def post(request): if request.method == 'POST': form = PostForm(request.POST) new_post = form.save(commit=False) new_post.user = request.user new_post.pub_date = timezone.now() new_post.save() text = new_post.text sentenceList = rake.split_sentences(text) for sentence in sentenceList: print("Sentence:", sentence) # generate candidate keywords stopwords = rake.load_stop_words(stoppath) stopwordpattern = rake.build_stop_word_regex(stoppath) phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords) print("Phrases:", phraseList) # calculate individual word scores wordscores = rake.calculate_word_scores(phraseList) # generate candidate keyword scores keywordcandidates = rake.generate_candidate_keyword_scores( phraseList, wordscores) for candidate in keywordcandidates.keys(): print("Candidate: ", candidate, ", score: ", keywordcandidates.get(candidate)) # sort candidates by score to determine top-scoring keywords sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True) for keywords in sortedKeywords: print("Keyword: " + str(keywords) + "\n") print(rake_object.run(text)) x = rake_object.run(text) if len(x) == 0: x = sortedKeywords[0][0] else: x = x[0][0] # if topic is already in topics table, just add to posts object if len(Topic.objects.filter(topic=x)) > 0: topics = Topic.objects.get(topic=x) topics.posts.add(new_post) # if topic is not in topics table, create it and add the new post else: newTopic = Topic(topic=x) newTopic.save() newTopic.posts.add(new_post) return home(request) else: form = PostForm return render(request, 'micro/post.html', {'form': form})
def findKeywords(company_news): stoppath = "../Data/data/stoplists/SmartStoplist.txt" rake_obj = rake.Rake(stoppath, 5, 3, 4) sample_file = io.open("../Data/data/docs/fao_test/w2167e.txt", 'r', encoding="iso-8859-1") text = sample_file.read() keywords = rake_obj.run(text) rake_obj = rake.Rake(stoppath) text = company_news sentences = rake.split_sentences(text) stop_words = rake.load_stop_words(stoppath) stop_pattern = rake.build_stop_word_regex(stoppath) phraseList = rake.generate_candidate_keywords(sentences, stop_pattern, stop_words) wscores = rake.calculate_word_scores(phraseList) keywordcandidates = rake.generate_candidate_keyword_scores( phraseList, wscores) keywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True) totalKeywords = len(keywords) keyword_list = rake_obj.run(text)[0:10] keyword_list1 = [] for i in keyword_list: keyword_list1.append(i[0]) return keyword_list1
def enFunc(): ans = entry.get('1.0', 'end') n = 0 for line in ans: words = line.split() n = len(words) if (n >= 200): marks1 = 10 else: marks1 = 5 print("Marks obtained for word length", marks1, "/10") a = marks1 stoppath = "data/stoplists/SmartStoplist.txt" rake_object = rake.Rake(stoppath) sample_file = io.open("data/docs/mp.txt", 'r', encoding="iso-8859-1") text = ans sentenceList = rake.split_sentences(text) for sentence in sentenceList: print("Sentence:", sentence) stopwords = rake.load_stop_words(stoppath) stopwordpattern = rake.build_stop_word_regex(stoppath) phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords) print("Phrases:", phraseList) wordscores = rake.calculate_word_scores(phraseList) keywordcandidates = rake.generate_candidate_keyword_scores( phraseList, wordscores) for candidate in keywordcandidates.keys(): print("Candidate: ", candidate, ", score: ", keywordcandidates.get(candidate)) sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True) totalKeywords = len(sortedKeywords) for keyword in sortedKeywords[0:int(totalKeywords / 3)]: print("Keyword: ", keyword[0], ", score: ", keyword[1]) keyw = dict(rake_object.run(text)) print(keyw) f1 = io.open("data/docs/mpques1.txt", 'r', encoding="iso-8859-1") text1 = f1.read() l = text1.split("\n\n") kw = l[2].split("\n") print("keyword in original file:", kw) c = 0 for i in keyw: for j in kw: if (i == j): c = c + 1 print("count:", c) total = len(kw) percentage = (c / total) * 100 if (percentage >= 90): marks2 = 20 message = "Marks obtained for keyword:" + str(marks2) + "/20" elif (percentage >= 80 or percentage < 90): marks2 = 18 message = "Marks obtained for keyword:" + str(marks2) + "/20" elif (percentage >= 70 or percentage < 80): marks2 = 16 message = "Marks obtained for keyword:" + str(marks2) + "/20" elif (percentage >= 60 or percentage < 80): marks2 = 14 message = "Marks obtained for keyword:" + str(marks2) + "/20" elif (percentage >= 50 or percentage < 60): marks2 = 12 message = "Marks obtained for keyword:" + str(marks2) + "/20" else: marks2 = 10 message = "Score:" + str(marks2) + "/20" mes = Message(root, text=message, width=50) mes.grid(row=150) b = marks2 tool = language_check.LanguageTool('en-US') #input=open("mp.txt","r") count = 0 text = str(ans) txtlen = len(text.split()) setxt = set(text.split()) setlen = len(setxt) matches = tool.check(text) #print("Error:",matches) print("No. of Errors:", len(matches)) noOfError = len(matches) if noOfError <= 5: marks3 = 10 elif noOfError <= 10: marks3 = 8 elif noOfError <= 15: marks3 = 5 else: marks3 = 3 if setlen > (txtlen / 2): marks3 += 10 else: marks3 += 5 print("Marks obtained after parsing:", marks3, "/20") mes2 = Message(root, text="Marks obtained after parsing:" + str(marks3) + "/20", width=500) mes2.grid(row=200, column=50) c = marks3 print("Marks obtained out of 50 is:", a + b + c, "/50") mes3 = Message(root, text="Marks obtained out of 50 is:" + a + b + c + "/50", width=500) mes3.grid(row=250, column=50)
import rake import six #r.extract_keywords_from_text("The Beta Blocker Heart Attack Trial (BHAT) is a multicenter, randomized, double-blind, placebo control clinical trial sponsored by the National Heart, Lung, and Blood Institute designed to test the effectiveness of regular propranolol administration in reducing total mortality in patients who have survived a recent acute myocardial infarction., A number of other fatal and nonfatal response variables are also being monitored., Three thousand eight hundred thirty-seven patients, ages 30-69, are being followed at 31 clinical centers for a minimum of about 2 and a maximum of 4 years after the infarction., A number of design features of BHAT are discussed., These include maintenance of patient logs, guidelines for obtaining informed consent of patients, assessment of patient knowledge about BHAT, adjustment of study drug dose based on serum levels, and comparison of 1-hr and 24-hr ambulatory electrocardiogram readings., © 1981.") #print(r.get_ranked_phrases()) # To get keyword phrases ranked highest to lowest. r = Rake() filename = '/Users/iqra/Downloads/testing-data.csv' stop_words_file = '/Users/iqra/Documents/Rake Examples/RAKE-tutorial/data/stoplists/SmartStoplist.txt' rake_object = rake.Rake(stop_words_file) data_frame = pd.read_csv(filename, encoding='latin1', engine='c') # generate candidate keywords stopwords = rake.load_stop_words(stop_words_file) stopwordpattern = rake.build_stop_word_regex(stop_words_file) def rake_implement(x, r): r.extract_keywords_from_text(x) return r.get_ranked_phrases() data_frame['phrases'] = data_frame['abstract'].apply( lambda x: rake_implement(x, r)) print(data_frame['phrases'])
def make_prediction(): if request.method == 'POST': #get uploaded document file = request.files['uploaded_file'] if not file: return render_template('index.html', label="No file uploaded") test = pd.read_csv(file) line = list() for i in test[test.columns.tolist()]: line.append(i) X_test = '' X_test = X_test.join(line) text = word_tokenize(X_test) #text = ''.join(text) cleaned_text = clean_text(text) joined_text = " ".join(cleaned_text) rake_object = rake.Rake('stopwords.txt') sentenceList = rake.split_sentences(joined_text) stopwords = rake.load_stop_words('stopwords.txt') stopwordpattern = rake.build_stop_word_regex('stopwords.txt') phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords) wordscores = rake.calculate_word_scores(phraseList) keywordcandidates = rake.generate_candidate_keyword_scores( phraseList, wordscores) sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True) totalKeywords = len(sortedKeywords) for keyword in sortedKeywords[0:int(5)]: print("Keyword: ", keyword[0], ", score: ", keyword[1]) x_test_mat = weight_model.transform(joined_text.split('.')) predict = model.predict(x_test_mat) print(predict) label = str(np.squeeze(predict[0])) print(label) #read_dict = np.load('final.npy').item() #id = read_dict[label]["doi"] #title = read_dict[label]["title"] #recommend = zip(id, title) browser = mechanicalsoup.StatefulBrowser() q = label browser.open("https://www.scimagojr.com/journalsearch.php?q=" + q) soup = browser.get_current_page() #print(soup.prettify()) #soup = BeautifulSoup(open("C:/Users/divya/Desktop/crawl.txt").read()) divTag = soup.find_all("div", {"class": "search_results"}) l = len(divTag) divTag = str(divTag) #print((divTag.split('</a>\n')[0])) recommend = [] for i in range(1, 6): s = divTag.split('</a>\n')[i] a = s.split('>') #print(a) b = a[0].split('"') link = b[1] c = a[2].split('<') #print(c) title = c[0] recommend.append((title, "https://www.scimagojr.com/" + link)) recommend = [(y, x.replace('amp;', '')) for y, x in recommend] print(recommend) return render_template('index.html', label=label, keyword=sortedKeywords[0:int(5)], recommendations=recommend)
import rake import operator import io stoppath = "data/stoplists/SmartStoplist.txt" rake_object = rake.Rake(stoppath) sample_file = io.open("data/docs/mp.txt", 'r', encoding="iso-8859-1") text = sample_file.read() sentenceList = rake.split_sentences(text) for sentence in sentenceList: print("Sentence:", sentence) stopwords = rake.load_stop_words(stoppath) stopwordpattern = rake.build_stop_word_regex(stoppath) phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords) print("Phrases:", phraseList) wordscores = rake.calculate_word_scores(phraseList) keywordcandidates = rake.generate_candidate_keyword_scores( phraseList, wordscores) for candidate in keywordcandidates.keys(): print("Candidate: ", candidate, ", score: ", keywordcandidates.get(candidate)) sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1),
def enFunc(): global counter ans = entry.get('1.0','end') n=0 for line in ans: words=[line.split(' ') for line in ans] n=len(words) if(counter==1 or counter==2): if(n>=850): marks1=10 elif(n>=400): marks1=5 else: marks1=3 else: if(n>=250): marks1=10 elif(n>=100): marks1=5 else: marks1=3 a=marks1 fname="data/docs/mp"+str(counter)+".txt" stoppath = "data/stoplists/SmartStoplist.txt" rake_object = rake.Rake(stoppath) sample_file = io.open(fname, 'r',encoding="iso-8859-1") text = ans sentenceList = rake.split_sentences(text) #for sentence in sentenceList: # print("Sentence:", sentence) stopwords = rake.load_stop_words(stoppath) stopwordpattern = rake.build_stop_word_regex(stoppath) phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords) #print("Phrases:", phraseList) wordscores = rake.calculate_word_scores(phraseList) keywordcandidates = rake.generate_candidate_keyword_scores(phraseList, wordscores) """for candidate in keywordcandidates.keys(): print("Candidate: ", candidate, ", score: ", keywordcandidates.get(candidate)) sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True) totalKeywords = len(sortedKeywords) for keyword in sortedKeywords[0:int(totalKeywords/3)]: print("Keyword: ", keyword[0], ", score: ", keyword[1])""" keyw=dict(rake_object.run(text)) print(keyw) #l1=len(keyw) print(fname) f1=io.open(fname, 'r',encoding="iso-8859-1") text1=f1.read() que=text1.split("\n") print(que[0]) l=text1.split("\n\n") kw=l[2].split("\n") print("keyword in original file=",kw) total=len(kw) print("No of keywords in original file=",total) c=0 for i in keyw: for j in range(0,total): if(kw[j].lower() in i.lower()): print("Detected= " +str(i)) c=c+1 print("count=",c) percentage=(c/total)*100 if(percentage>=90): marks2=30 message = "Marks obtained for keyword:" + str(marks2) + "/30" elif(percentage>=80 and percentage<90): marks2=28 message = "Marks obtained for keyword:"+ str(marks2) + "/30" elif(percentage>=70 and percentage<80): marks2=26 message = "Marks obtained for keyword:" + str(marks2) + "/30" elif(percentage>=60 and percentage<80): marks2=24 message = "Marks obtained for keyword:" + str(marks2) + "/30" elif(percentage>=50 and percentage<60): marks2=28 message = "Marks obtained for keyword:" + str(marks2) + "/30" elif(percentage>=40 and percentage<50): marks2=25 message = "Marks obtained for keyword:" + str(marks2) + "/30" else: marks2 = 0 message = "Marks obtained for keyword:" + str(marks2) + "/30" mes2text = "\nMarks for length = " + str(a) + "/10" + "\nLength = " + str(n) print(mes2text) print(message) b=marks2 tool=language_check.LanguageTool('en-US') count=0 text=str(ans) txtlen=len(text.split()) setxt = set(text.split()) setlen = len(setxt) matches=tool.check(text) #print("Error:",matches) print("No. of Errors=",len(matches)) noOfError=len(matches) for i in range (0,noOfError): print(matches[i].msg) if (noOfError<=3 and n>0): marks3=10 elif (noOfError<=5): marks3=8 elif (noOfError<=8): marks3=5 else: marks3=3 print("Marks obtained after parsing=",marks3,"/10") c=marks3 d=a+b+c print("Marks obtained out of 50 is=",d,"/50") if(counter==1 or counter==2): tot=(d/50)*12 else: tot=(d/50)*4 m="\nMarks obtained for this question is"+str(tot) messagebox.showinfo("Result",m) global totmark totmark[counter-1]=tot
"of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. " \ "Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating"\ " sets of solutions for all types of systems are given. These criteria and the corresponding algorithms " \ "for constructing a minimal supporting set of solutions can be used in solving all the considered types of " \ "systems and systems of mixed types." # 1. Split text into sentences sentenceList = rake.split_sentences(text) for sentence in sentenceList: print("Sentence:", sentence) # generate candidate keywords stopwords = rake.load_stop_words(stoppath) stopwordpattern = rake.build_stop_word_regex(stoppath) phraseList = rake.generate_candidate_keywords(sentenceList, stopwordpattern, stopwords) print("Phrases:", phraseList) # calculate individual word scores wordscores = rake.calculate_word_scores(phraseList) # generate candidate keyword scores keywordcandidates = rake.generate_candidate_keyword_scores(phraseList, wordscores) for candidate in keywordcandidates.keys(): print("Candidate: ", candidate, ", score: ", keywordcandidates.get(candidate)) # sort candidates by score to determine top-scoring keywords sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True) totalKeywords = len(sortedKeywords)
# git clone https://github.com/zelandiya/RAKE-tutorial import rake import operator import nltk from nltk.stem import * from nltk.tokenize import RegexpTokenizer STOPPATH = 'SmartStoplist.txt' STOPWORDS = rake.load_stop_words(STOPPATH) def get_keywords_of_single_abstract_grams(abstract): abstract = abstract.lower() tokenizer = RegexpTokenizer(r'\w+') tokens = tokenizer.tokenize(abstract) stemmer = PorterStemmer() tokens = [stemmer.stem(t) for t in tokens if t not in STOPWORDS] bigrams = nltk.bigrams(tokens) trigrams = nltk.trigrams(tokens) keyword_candidates = nltk.FreqDist(bigrams) + nltk.FreqDist(trigrams) keywords = keyword_candidates.iteritems() keywords = [(' '.join(w for w in k), score) for k, score in keywords] sorted_keywords = sorted(keywords, key=operator.itemgetter(1), reverse=True) total_keywords = len(sorted_keywords) return [k[0] for k in sorted_keywords[0:total_keywords / 3]]