예제 #1
0
def post(request):
    if request.method == 'POST':
        form = PostForm(request.POST)
        new_post = form.save(commit=False)
        new_post.user = request.user
        new_post.pub_date = timezone.now()
        new_post.save()
        text = new_post.text
        sentenceList = rake.split_sentences(text)

        for sentence in sentenceList:
            print("Sentence:", sentence)

        # generate candidate keywords
        stopwords = rake.load_stop_words(stoppath)
        stopwordpattern = rake.build_stop_word_regex(stoppath)
        phraseList = rake.generate_candidate_keywords(sentenceList,
                                                      stopwordpattern,
                                                      stopwords)
        print("Phrases:", phraseList)

        # calculate individual word scores
        wordscores = rake.calculate_word_scores(phraseList)

        # generate candidate keyword scores
        keywordcandidates = rake.generate_candidate_keyword_scores(
            phraseList, wordscores)
        for candidate in keywordcandidates.keys():
            print("Candidate: ", candidate, ", score: ",
                  keywordcandidates.get(candidate))

        # sort candidates by score to determine top-scoring keywords
        sortedKeywords = sorted(six.iteritems(keywordcandidates),
                                key=operator.itemgetter(1),
                                reverse=True)

        for keywords in sortedKeywords:
            print("Keyword: " + str(keywords) + "\n")

        print(rake_object.run(text))
        x = rake_object.run(text)

        if len(x) == 0:
            x = sortedKeywords[0][0]
        else:
            x = x[0][0]
        # if topic is already in topics table, just add to posts object
        if len(Topic.objects.filter(topic=x)) > 0:
            topics = Topic.objects.get(topic=x)
            topics.posts.add(new_post)
        # if topic is not in topics table, create it and add the new post
        else:
            newTopic = Topic(topic=x)
            newTopic.save()
            newTopic.posts.add(new_post)

        return home(request)
    else:
        form = PostForm
    return render(request, 'micro/post.html', {'form': form})
def RakeExtract(text, stoppath="SmartStoplist.txt"):
    """
    :param text: string type
    :param stoppath:  stopword list
    :return:    a list of tuples where the 1st index is the keyword and the second index the score
    """

    # Each word has at least 5 characters, each phrase has at most 3 words
    rake_object = rake.Rake(stoppath, 2, 2, 1)

    # Splits the text into sentences
    sentenceList = rake.split_sentences(text)
    stopwordpattern = rake.build_stop_word_regex(stoppath)

    # Generate Candidates
    phraseList = rake.generate_candidate_keywords(sentenceList,
                                                  stopwordpattern)
    # print "Candidate Keywords: ", phraseList

    wordscores = rake.calculate_word_scores(phraseList)
    keywordcandidates = rake.generate_candidate_keyword_scores(
        phraseList, wordscores)

    sortedKeywords = sorted(keywordcandidates.iteritems(),
                            key=operator.itemgetter(1),
                            reverse=True)
    totalKeywords = len(sortedKeywords)
    keywords = sortedKeywords[0:(totalKeywords / 3)]

    return keywords
    def Analyse_Text(self):
        text = self.bodyOfText.toPlainText()
        self.sentenceList = rake.split_sentences(text)
        # print(sentenceList)
        # #stoppath = "FoxStoplist.txt" #Fox stoplist contains "numbers", so it will not find "natural numbers" like in Table 1.1
        self.stoppath = 'C:\\Users\\EmmaAdeiza\\PycharmProjects\\needed_project_main\\Ok_keyword-rake-master\\SmartStoplist.txt'  # SMART stoplist misses some of the lower-scoring keywords in Figure 1.5, which means that the top 1/3 cuts off one of the 4.0 score words in Table 1.1
        self.stopwordpattern = rake.build_stop_word_regex(self.stoppath)
        #
        # generate candidate keywords
        self.phraseList = rake.generate_candidate_keywords(
            self.sentenceList, self.stopwordpattern)
        #
        # # calculate individual word scores
        self.wordscores = rake.calculate_word_scores(self.phraseList)

        # generate candidate keyword scores
        self.keywordcandidates = rake.generate_candidate_keyword_scores(
            self.phraseList, self.wordscores)
        if debug: print(self.keywordcandidates)

        self.sortedKeywords = sorted(self.keywordcandidates.items(),
                                     key=operator.itemgetter(1),
                                     reverse=True)
        if debug: print(self.sortedKeywords)

        totalKeywords = len(self.sortedKeywords)
        if debug: print(self.totalKeywords)

        self.keyterms = ''
        for i in range(len(self.sortedKeywords[0:20])):
            self.keyterms = self.keyterms + '\n' + self.sortedKeywords[0:20][
                i][0]

        self.keywordField.setText(self.keyterms)
def extract_key(text):
    #text = "The index had gained 68.22 points in the previous session on Thursday.Nifty acted very range bound facing a strong resistance at 8750 level and found immediate support at 8680 level which we mentioned earlier.Nifty still holds the immediate support placed at 8680 level.Prominent losers among the 30 sensex stocks were TCS, Axis Bank, Bharti Airtel, BHEL, GAIL, HDFC, HDFC Bank, Hero MotoCorp, Infosys, M&M, Maruti Suzuki, NTPC, ONGC, SBI, Sesa Sterlite, Sun Pharma and Tata Motors.These payments amount to around Rs 26 crore, the company said in a release here.Dabur India Ltd: The Burman Family Office, the investment arm of the Burman family, promoters of fast-moving consumer group conglomerate Dabur India, is in the final stages of negotiations to invest an undisclosed amount in online insurance policy aggregator EasyPolicy.com."

    # Split text into sentences
    sentenceList = rake.split_sentences(text)

    # generate candidate keywords
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList,
                                                  stopwordpattern)

    # calculate individual word scores
    wordscores = rake.calculate_word_scores(phraseList)

    # generate candidate keyword scores
    keywordcandidates = rake.generate_candidate_keyword_scores(
        phraseList, wordscores)

    # sort candidates by score to determine top-scoring keywords
    sortedKeywords = sorted(keywordcandidates.iteritems(),
                            key=operator.itemgetter(1),
                            reverse=True)
    totalKeywords = len(sortedKeywords)

    return_words = []
    # take the top four as the final keywords
    for keyword in sortedKeywords[0:4]:
        return_words.append(keyword[0])

    return return_words


#Example call
#print extract_key("The index had gained 68.22 points in the previous session on Thursday.Nifty acted very range bound facing a strong resistance at 8750 level and found immediate support at 8680 level which we mentioned earlier.Nifty still holds the immediate support placed at 8680 level.Prominent losers among the 30 sensex stocks were TCS, Axis Bank, Bharti Airtel, BHEL, GAIL, HDFC, HDFC Bank, Hero MotoCorp, Infosys, M&M, Maruti Suzuki, NTPC, ONGC, SBI, Sesa Sterlite, Sun Pharma and Tata Motors.These payments amount to around Rs 26 crore, the company said in a release here.Dabur India Ltd: The Burman Family Office, the investment arm of the Burman family, promoters of fast-moving consumer group conglomerate Dabur India, is in the final stages of negotiations to invest an undisclosed amount in online insurance policy aggregator EasyPolicy.com.")
예제 #5
0
def keywordExtract(filename):
    
    # EXAMPLE ONE - SIMPLE
    stoppath = "SmartStoplist.txt"
    
    
    print("---------------------------------------------")
    # EXAMPLE TWO - BEHIND THE SCENES (from https://github.com/aneesha/RAKE/rake.py)
    
    # 1. initialize RAKE by providing a path to a stopwords file
    # rake_object = rake.Rake(stoppath)
    
    sample_file = io.open(filename, 'r',encoding="iso-8859-1")
    text = sample_file.read()
    
    # 1. Split text into sentences
    sentenceList = rake.split_sentences(text)
    
    # generate candidate keywords
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern)
    # print("Phrases:", phraseList)
    # print("---------------------------------------------")
    
    # calculate individual word scores
    wordscores = rake.calculate_word_scores(phraseList)
    
    # generate candidate keyword scores
    keywordcandidates = rake.generate_candidate_keyword_scores(phraseList, wordscores)
    
    # sort candidates by score to determine top-scoring keywords
    sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True)
    # totalKeywords = len(sortedKeywords)
    return sortedKeywords
예제 #6
0
def keyword_tokenize(text):
    '''
    INPUT: String
    OUTPUT: Tokenized String
    '''
    stoppath = 'RAKE-tutorial/SmartStoplist.txt'
    rake_object = rake.Rake(stoppath, 5, 3, 4)
    sentenceList = rake.split_sentences(text)
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList,
                                                  stopwordpattern)
    wordscores = rake.calculate_word_scores(phraseList)
    return phraseList
예제 #7
0
def extract_phrase(sentence):
	# 1. initialize RAKE by providing a path to a stopwords file
	stoppath = "SmartStoplist_mod.txt"
	rake_object = rake.Rake(stoppath)
	text = "I would like to order 2 mnchurien and rice. Send me a mechnic"
	# 2. Split text into sentences
	txt = spellcheck.sentence_correct(sentence)
	sentenceList = rake.split_sentences(txt)
	# 3. generate candidate keywords
	stopwordpattern = rake.build_stop_word_regex(stoppath)
	phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern)
	return phraseList

#print(extract_phrase("Haa got you"))
예제 #8
0
def get_keywords_of_single_abstract_RAKE(abstract):
    sentence_list = rake.split_sentences(abstract)
    stopword_pattern = rake.build_stop_word_regex(STOPPATH)
    phrase_list = rake.generate_candidate_keywords(sentence_list,
                                                   stopword_pattern)
    word_scores = rake.calculate_word_scores(phrase_list)
    keyword_candidates = rake.generate_candidate_keyword_scores(
        phrase_list, word_scores)

    keywords = keyword_candidates.iteritems()
    keywords = list(keywords)
    stemmer = PorterStemmer()
    keywords = [(' '.join(stemmer.stem(w) for w in k.split(' ')), score)
                for k, score in keywords]

    sorted_keywords = sorted(keywords,
                             key=operator.itemgetter(1),
                             reverse=True)
    total_keywords = len(sorted_keywords)
    return [k[0] for k in sorted_keywords[0:total_keywords / 3]]
예제 #9
0
def main(args):
    atexit.register(quit_repl)
    rake_instance = rake.Rake()
    while True:
        print '-' * 80
        try:
            text = raw_input('Enter text to analyze\n%s' % PROMPT_STR)
        except EOFError:
            # Exit gracefully on Ctrl-D
            break

        # Split text into sentences
        sentenceList = rake.split_sentences(text)
        stopwordpattern = rake.build_stop_word_regex()

        # generate candidate keywords
        phraseList = rake.generate_candidate_keywords(sentenceList,
                                                      stopwordpattern)

        # calculate individual word scores
        wordscores = rake.calculate_word_scores(phraseList)

        # generate candidate keyword scores
        keywordcandidates = rake.generate_candidate_keyword_scores(
            phraseList, wordscores)
        if args.verbose:
            print keywordcandidates

        sortedKeywords = sorted(keywordcandidates.iteritems(),
                                key=itemgetter(1),
                                reverse=True)
        if args.verbose:
            print sortedKeywords

        totalKeywords = len(sortedKeywords)
        if args.verbose:
            print totalKeywords
        print sortedKeywords[0:(totalKeywords / 3)]

        keywords = rake_instance.run(text)
        print keywords
예제 #10
0
def extract_phrase(sentence):
	"""for the purpose of phrase extraction this function is employed

    :param name: sentence
    :type name: str. 
    :param state: free from slangs and spell errors
    :type state: str 
    :returns: list -- extracted phrases. 
    :raises: AttributeError, KeyError

    """ 
	# 1. initialize RAKE by providing a path to a stopwords file
	stoppath = "SmartStoplist_mod.txt"
	rake_object = rake.Rake(stoppath)
	text = "I would like to order 2 mnchurien and rice. Send me a mechnic"
	# 2. Split text into sentences
	txt = spellcheck.sentence_correct(sentence)
	sentenceList = rake.split_sentences(txt)
	# 3. generate candidate keywords
	stopwordpattern = rake.build_stop_word_regex(stoppath)
	phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern)
	return phraseList
예제 #11
0
def findKeywords(company_news):
    stoppath = "../Data/data/stoplists/SmartStoplist.txt"

    rake_obj = rake.Rake(stoppath, 5, 3, 4)

    sample_file = io.open("../Data/data/docs/fao_test/w2167e.txt",
                          'r',
                          encoding="iso-8859-1")
    text = sample_file.read()

    keywords = rake_obj.run(text)

    rake_obj = rake.Rake(stoppath)

    text = company_news

    sentences = rake.split_sentences(text)

    stop_words = rake.load_stop_words(stoppath)
    stop_pattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentences, stop_pattern,
                                                  stop_words)

    wscores = rake.calculate_word_scores(phraseList)

    keywordcandidates = rake.generate_candidate_keyword_scores(
        phraseList, wscores)
    keywords = sorted(six.iteritems(keywordcandidates),
                      key=operator.itemgetter(1),
                      reverse=True)
    totalKeywords = len(keywords)

    keyword_list = rake_obj.run(text)[0:10]
    keyword_list1 = []
    for i in keyword_list:
        keyword_list1.append(i[0])

    return keyword_list1
예제 #12
0
def extract_phrase(sentence):
    """for the purpose of phrase extraction this function is employed

    :param name: sentence
    :type name: str. 
    :param state: free from slangs and spell errors
    :type state: str 
    :returns: list -- extracted phrases. 
    :raises: AttributeError, KeyError

    """
    # 1. initialize RAKE by providing a path to a stopwords file
    stoppath = "SmartStoplist_mod.txt"
    rake_object = rake.Rake(stoppath)
    text = "I would like to order 2 mnchurien and rice. Send me a mechnic"
    # 2. Split text into sentences
    txt = spellcheck.sentence_correct(sentence)
    sentenceList = rake.split_sentences(txt)
    # 3. generate candidate keywords
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList,
                                                  stopwordpattern)
    return phraseList
예제 #13
0
def keywordExtract(filename):

    # EXAMPLE ONE - SIMPLE
    stoppath = "SmartStoplist.txt"

    print("---------------------------------------------")
    # EXAMPLE TWO - BEHIND THE SCENES (from https://github.com/aneesha/RAKE/rake.py)

    # 1. initialize RAKE by providing a path to a stopwords file
    # rake_object = rake.Rake(stoppath)

    sample_file = io.open(filename, 'r', encoding="iso-8859-1")
    text = sample_file.read()

    # 1. Split text into sentences
    sentenceList = rake.split_sentences(text)

    # generate candidate keywords
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList,
                                                  stopwordpattern)
    # print("Phrases:", phraseList)
    # print("---------------------------------------------")

    # calculate individual word scores
    wordscores = rake.calculate_word_scores(phraseList)

    # generate candidate keyword scores
    keywordcandidates = rake.generate_candidate_keyword_scores(
        phraseList, wordscores)

    # sort candidates by score to determine top-scoring keywords
    sortedKeywords = sorted(six.iteritems(keywordcandidates),
                            key=operator.itemgetter(1),
                            reverse=True)
    # totalKeywords = len(sortedKeywords)
    return sortedKeywords
import rake
import operator

# EXAMPLE ONE - SIMPLE
stoppath = "SmartStoplist.txt"

rake_object = rake.Rake(stoppath)

text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility " \
       "of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. " \
       "Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating"\
       " sets of solutions for all types of systems are given. These criteria and the corresponding algorithms " \
       "for constructing a minimal supporting set of solutions can be used in solving all the considered types of " \
       "systems and systems of mixed types."
sentenceList = rake.split_sentences(text)

stopwordpattern = rake.build_stop_word_regex(stoppath)
phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern)
print "Phrases:", phraseList


'''

flag = False
reftext = text.split(' ')
# for x in
# print text

sentence = " "
for x in reftext:
    #	print x
    if (flag == True):
        sentence = sentence + " " + x
    if "EFERENCES" in x:
        flag = True

referencesentencelist = rake.split_sentences(sentence)
stopwordpattern = rake.build_stop_word_regex(stoppath)
refphraseList = rake.generate_candidate_keywords(referencesentencelist,
                                                 stopwordpattern)

# print sentence
'''
'''

# 1. Split text into sentences
sentenceList = rake.split_sentences(text)

# for sentence in sentenceList:
#    print "Sentence:", sentence

# generate candidate keywords
# EXAMPLE TWO - BEHIND THE SCENES (from https://github.com/aneesha/RAKE/rake.py)

# 1. initialize RAKE by providing a path to a stopwords file
rake_object = rake.Rake(stoppath)

text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility " \
       "of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. " \
       "Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating"\
       " sets of solutions for all types of systems are given. These criteria and the corresponding algorithms " \
       "for constructing a minimal supporting set of solutions can be used in solving all the considered types of " \
       "systems and systems of mixed types."



# 1. Split text into sentences
sentenceList = rake.split_sentences(text)

for sentence in sentenceList:
    print "Sentence:", sentence

# generate candidate keywords
stopwordpattern = rake.build_stop_word_regex(stoppath)
phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern)
print "Phrases:", phraseList

# calculate individual word scores
wordscores = rake.calculate_word_scores(phraseList)

# generate candidate keyword scores
keywordcandidates = rake.generate_candidate_keyword_scores(phraseList, wordscores)
for candidate in keywordcandidates.keys():
예제 #17
0
from __future__ import absolute_import
from __future__ import print_function
import six
import rake as q
import operator
import io

stoppath = "SmartStoplist.txt"

text = ""
text = raw_input("Enter your Question: ")

sentenceList = q.split_sentences(text)

for sentence in sentenceList:
    print("Sentence:", sentence)

stopwordpattern = q.build_stop_word_regex(stoppath)
phraseList = q.generate_candidate_keywords(sentenceList, stopwordpattern)
print("Phrases:", phraseList)

wordscores = q.calculate_word_scores(phraseList)

keywordcandidates = q.generate_candidate_keyword_scores(phraseList, wordscores)
for candidate in keywordcandidates.keys():
    print("Keyword candidate: ", candidate, ", score: ",
          keywordcandidates.get(candidate))

sortedKeywords = sorted(six.iteritems(keywordcandidates),
                        key=operator.itemgetter(1),
                        reverse=True)
def enFunc():
    ans = entry.get('1.0', 'end')
    n = 0
    for line in ans:
        words = line.split()
    n = len(words)
    if (n >= 200):
        marks1 = 10
    else:
        marks1 = 5
    print("Marks obtained for word length", marks1, "/10")
    a = marks1

    stoppath = "data/stoplists/SmartStoplist.txt"

    rake_object = rake.Rake(stoppath)
    sample_file = io.open("data/docs/mp.txt", 'r', encoding="iso-8859-1")
    text = ans

    sentenceList = rake.split_sentences(text)

    for sentence in sentenceList:
        print("Sentence:", sentence)

    stopwords = rake.load_stop_words(stoppath)
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList,
                                                  stopwordpattern, stopwords)
    print("Phrases:", phraseList)

    wordscores = rake.calculate_word_scores(phraseList)

    keywordcandidates = rake.generate_candidate_keyword_scores(
        phraseList, wordscores)
    for candidate in keywordcandidates.keys():
        print("Candidate: ", candidate, ", score: ",
              keywordcandidates.get(candidate))

    sortedKeywords = sorted(six.iteritems(keywordcandidates),
                            key=operator.itemgetter(1),
                            reverse=True)
    totalKeywords = len(sortedKeywords)

    for keyword in sortedKeywords[0:int(totalKeywords / 3)]:
        print("Keyword: ", keyword[0], ", score: ", keyword[1])

    keyw = dict(rake_object.run(text))
    print(keyw)

    f1 = io.open("data/docs/mpques1.txt", 'r', encoding="iso-8859-1")
    text1 = f1.read()
    l = text1.split("\n\n")
    kw = l[2].split("\n")
    print("keyword in original file:", kw)

    c = 0
    for i in keyw:
        for j in kw:
            if (i == j):
                c = c + 1
    print("count:", c)

    total = len(kw)
    percentage = (c / total) * 100

    if (percentage >= 90):
        marks2 = 20
        message = "Marks obtained for keyword:" + str(marks2) + "/20"

    elif (percentage >= 80 or percentage < 90):
        marks2 = 18
        message = "Marks obtained for keyword:" + str(marks2) + "/20"

    elif (percentage >= 70 or percentage < 80):
        marks2 = 16
        message = "Marks obtained for keyword:" + str(marks2) + "/20"

    elif (percentage >= 60 or percentage < 80):
        marks2 = 14
        message = "Marks obtained for keyword:" + str(marks2) + "/20"

    elif (percentage >= 50 or percentage < 60):
        marks2 = 12
        message = "Marks obtained for keyword:" + str(marks2) + "/20"

    else:
        marks2 = 10
        message = "Score:" + str(marks2) + "/20"

    mes = Message(root, text=message, width=50)
    mes.grid(row=150)
    b = marks2

    tool = language_check.LanguageTool('en-US')
    #input=open("mp.txt","r")
    count = 0
    text = str(ans)
    txtlen = len(text.split())
    setxt = set(text.split())
    setlen = len(setxt)
    matches = tool.check(text)
    #print("Error:",matches)
    print("No. of Errors:", len(matches))
    noOfError = len(matches)
    if noOfError <= 5:
        marks3 = 10
    elif noOfError <= 10:
        marks3 = 8
    elif noOfError <= 15:
        marks3 = 5
    else:
        marks3 = 3

    if setlen > (txtlen / 2):
        marks3 += 10
    else:
        marks3 += 5
    print("Marks obtained after parsing:", marks3, "/20")

    mes2 = Message(root,
                   text="Marks obtained after parsing:" + str(marks3) + "/20",
                   width=500)
    mes2.grid(row=200, column=50)
    c = marks3

    print("Marks obtained out of 50 is:", a + b + c, "/50")
    mes3 = Message(root,
                   text="Marks obtained out of 50 is:" + a + b + c + "/50",
                   width=500)
    mes3.grid(row=250, column=50)
예제 #19
0
    
    
    link = article_url[i]
        
    new_link = url[0:26] + link[3:]
    
    html = urllib.urlopen(new_link).read()

    # get article name
    soup = BeautifulSoup(html , 'html.parser')
    
    tags = soup.findAll('textplugin')
    article = re.findall(r'_blank\">(.*?)<\/p>',str(tags))

    # 1. Split text into sentences
    sentenceList = rake.split_sentences(str(article))

    # generate candidate keywords
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern)
    # print("Phrases:", phraseList)
    # print("---------------------------------------------")

    # calculate individual word scores
    wordscores = rake.calculate_word_scores(phraseList)
    
    # generate candidate keyword scores
    keywordcandidates = rake.generate_candidate_keyword_scores(phraseList, wordscores)
    # print keywordcandidates
    """
    for candidate in keywordcandidates.keys():
def make_prediction():
    if request.method == 'POST':

        #get uploaded document
        file = request.files['uploaded_file']
        if not file:
            return render_template('index.html', label="No file uploaded")

        test = pd.read_csv(file)
        line = list()
        for i in test[test.columns.tolist()]:
            line.append(i)

        X_test = ''
        X_test = X_test.join(line)

        text = word_tokenize(X_test)

        #text = ''.join(text)

        cleaned_text = clean_text(text)

        joined_text = " ".join(cleaned_text)

        rake_object = rake.Rake('stopwords.txt')
        sentenceList = rake.split_sentences(joined_text)
        stopwords = rake.load_stop_words('stopwords.txt')
        stopwordpattern = rake.build_stop_word_regex('stopwords.txt')
        phraseList = rake.generate_candidate_keywords(sentenceList,
                                                      stopwordpattern,
                                                      stopwords)
        wordscores = rake.calculate_word_scores(phraseList)
        keywordcandidates = rake.generate_candidate_keyword_scores(
            phraseList, wordscores)
        sortedKeywords = sorted(six.iteritems(keywordcandidates),
                                key=operator.itemgetter(1),
                                reverse=True)
        totalKeywords = len(sortedKeywords)
        for keyword in sortedKeywords[0:int(5)]:
            print("Keyword: ", keyword[0], ", score: ", keyword[1])

        x_test_mat = weight_model.transform(joined_text.split('.'))

        predict = model.predict(x_test_mat)
        print(predict)
        label = str(np.squeeze(predict[0]))
        print(label)
        #read_dict = np.load('final.npy').item()
        #id = read_dict[label]["doi"]
        #title = read_dict[label]["title"]

        #recommend = zip(id, title)

        browser = mechanicalsoup.StatefulBrowser()
        q = label
        browser.open("https://www.scimagojr.com/journalsearch.php?q=" + q)
        soup = browser.get_current_page()
        #print(soup.prettify())

        #soup = BeautifulSoup(open("C:/Users/divya/Desktop/crawl.txt").read())
        divTag = soup.find_all("div", {"class": "search_results"})
        l = len(divTag)
        divTag = str(divTag)
        #print((divTag.split('</a>\n')[0]))

        recommend = []
        for i in range(1, 6):
            s = divTag.split('</a>\n')[i]
            a = s.split('>')
            #print(a)
            b = a[0].split('"')
            link = b[1]
            c = a[2].split('<')
            #print(c)
            title = c[0]
            recommend.append((title, "https://www.scimagojr.com/" + link))
            recommend = [(y, x.replace('amp;', '')) for y, x in recommend]
        print(recommend)

        return render_template('index.html',
                               label=label,
                               keyword=sortedKeywords[0:int(5)],
                               recommendations=recommend)
예제 #21
0
def splitUp(text):
    sentences = rake.split_sentences(text)
    sen = []
    for i in sentences:
    	sen.append(Sentence.Sentence(i))
    return (sen)
예제 #22
0
def enFunc():
    
    global counter
    
    ans = entry.get('1.0','end')
    n=0
    for line in ans:
        words=[line.split(' ') for line in ans]
    n=len(words)
    if(counter==1 or counter==2):
        if(n>=850):
            marks1=10
        elif(n>=400):
            marks1=5
        else:
            marks1=3
            
    else:
        if(n>=250):
            marks1=10
        elif(n>=100):
            marks1=5
        else:
            marks1=3
    a=marks1
    
    fname="data/docs/mp"+str(counter)+".txt"


    stoppath = "data/stoplists/SmartStoplist.txt"

    rake_object = rake.Rake(stoppath)
    sample_file = io.open(fname, 'r',encoding="iso-8859-1")
    text = ans

    sentenceList = rake.split_sentences(text)

    #for sentence in sentenceList:
     #   print("Sentence:", sentence)

    stopwords = rake.load_stop_words(stoppath)
    stopwordpattern = rake.build_stop_word_regex(stoppath)
    phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords)
    #print("Phrases:", phraseList)

    wordscores = rake.calculate_word_scores(phraseList)

    keywordcandidates = rake.generate_candidate_keyword_scores(phraseList, wordscores)
    """for candidate in keywordcandidates.keys():
        print("Candidate: ", candidate, ", score: ", keywordcandidates.get(candidate))

    sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True)
    totalKeywords = len(sortedKeywords)

    for keyword in sortedKeywords[0:int(totalKeywords/3)]:
        print("Keyword: ", keyword[0], ", score: ", keyword[1])"""

    keyw=dict(rake_object.run(text))
    print(keyw)
    #l1=len(keyw)
    
    
    print(fname)
    f1=io.open(fname, 'r',encoding="iso-8859-1")
    text1=f1.read()
    que=text1.split("\n")
    print(que[0])
    l=text1.split("\n\n")
    kw=l[2].split("\n")
    print("keyword in original file=",kw)
    total=len(kw)
    print("No of keywords in original file=",total)

    c=0
    for i in keyw:
        for j in range(0,total):
            if(kw[j].lower() in i.lower()):
                print("Detected= " +str(i))
                c=c+1
    print("count=",c)

    
    percentage=(c/total)*100

    if(percentage>=90):
        marks2=30
        message = "Marks obtained for keyword:" + str(marks2) + "/30"

    elif(percentage>=80 and percentage<90):
        marks2=28
        message = "Marks obtained for keyword:"+ str(marks2) + "/30"

    elif(percentage>=70 and percentage<80):
        marks2=26
        message = "Marks obtained for keyword:" + str(marks2) + "/30"

    elif(percentage>=60 and percentage<80):
        marks2=24
        message = "Marks obtained for keyword:" + str(marks2) + "/30"

    elif(percentage>=50 and percentage<60):
        marks2=28
        message = "Marks obtained for keyword:" + str(marks2) + "/30"

    elif(percentage>=40 and percentage<50): 
        marks2=25
        message = "Marks obtained for keyword:" + str(marks2) + "/30"
        
    else:
        marks2 = 0
        message = "Marks obtained for keyword:" + str(marks2) + "/30"
   
    mes2text = "\nMarks for length = " + str(a) + "/10" + "\nLength = " + str(n)
    print(mes2text)
    print(message)
    b=marks2



    tool=language_check.LanguageTool('en-US')

    count=0
    text=str(ans)
    txtlen=len(text.split())
    setxt = set(text.split())
    setlen = len(setxt)
    matches=tool.check(text)
    #print("Error:",matches)
    print("No. of Errors=",len(matches))
    noOfError=len(matches)
    for i in range (0,noOfError):
        print(matches[i].msg)
    
    if (noOfError<=3 and n>0):
        marks3=10
    elif (noOfError<=5):
        marks3=8
    elif (noOfError<=8):
        marks3=5
    else:
        marks3=3
    print("Marks obtained after parsing=",marks3,"/10")
    c=marks3
    d=a+b+c

    print("Marks obtained out of 50 is=",d,"/50")
    if(counter==1 or counter==2):
        tot=(d/50)*12
    else:
        tot=(d/50)*4
    m="\nMarks obtained for this question is"+str(tot)
    messagebox.showinfo("Result",m)
    global totmark
    totmark[counter-1]=tot