def send_relevant_entry_updates(self,max_entries=4, decay=.8): print 'send_relevant_entry_updates called' with Timer() as t: #Do the decay for the displayed entries: #TODO: handle duplicate keywords and updated scores for entry in self.displayed_entries: entry["score"] *= decay keywords = self.ke.getKeywordsDruid(self.complete_transcript[-1]) print keywords new_relevant_entries = wiki_search.getSummariesSingleKeyword(keywords,max_entries,lang=self.lang,pics_folder='pics/') new_relevant_entries_set = set(new_relevant_entries) relevant_entries_set = set(self.relevant_entries) #generate del relevant entries #for key in relevant_entries_set - new_relevant_entries_set: # entry = self.relevant_entries[key] # self.delDisplayEntry("wiki", entry["title"]) #generate add relevant entries for key in new_relevant_entries_set - relevant_entries_set: entry = new_relevant_entries[key] if self.addDisplayEntry("wiki", entry): for category in entry["categories"]: self.categories[category] += 1 #now look for changed scores (happens if a keyword got more important and gets mentioned again) for key in (new_relevant_entries_set & relevant_entries_set): entry = new_relevant_entries[key] if entry["score"] > self.relevant_entries[key]["score"]: print "score change for:",entry["title"], self.relevant_entries[key]["score"], "->", entry["score"] found_displayed_entry = False for display_entry in self.displayed_entries: #already displayed, we could delete and read it, to reflect the new placement if display_entry["title"] == key: found_displayed_entry = True #self.delDisplayEntry("wiki", entry["title"]) #self.addDisplayEntry("wiki", entry) break if not found_displayed_entry: #not displayed, try to see if the higher score gets results in a document that is more important self.addDisplayEntry("wiki", entry) for key in new_relevant_entries_set - relevant_entries_set: self.relevant_entries[key] = new_relevant_entries[key] topCategories_Event = self.topCategories() print topCategories_Event # TODO: only send something if topCategories actually changes self.keyword_client.sendCategories(topCategories_Event) print 'send_relevant_entry_updates finished. Time needed:', t.secs, 'seconds.' print 'Displayed entries should now be:',[entry['title'] for entry in self.displayed_entries]
def send_relevant_entry_updates(self,max_entries=4): print 'send_relevant_entry_updates called' keywords = self.ke.getKeywordsDruid('\n'.join([sentence[:-1] for sentence in self.complete_transcript])) new_relevant_entries = wiki_search.getSummariesSingleKeyword(keywords,max_entries,lang='en',pics_folder='pics/') print new_relevant_entries #generate del relevant entries for key in set(self.relevant_entries) - set(new_relevant_entries): entry = self.relevant_entries[key] self.keyword_client.delRelevantEntry("wiki", entry["title"]) print 'del',key #generate add relevant entries for key in set(new_relevant_entries) - set(self.relevant_entries): entry = new_relevant_entries[key] self.keyword_client.addRelevantEntry("wiki", entry["title"], entry["text"], entry["url"], entry["score"]) print 'add',key #TODO: Update scores of existing entries in self.displayed_entries (?) self.relevant_entries = new_relevant_entries
def send_relevant_entry_updates(self, max_entries=4): print 'send_relevant_entry_updates called' keywords = self.ke.getKeywordsDruid('\n'.join( [sentence[:-1] for sentence in self.complete_transcript])) new_relevant_entries = wiki_search.getSummariesSingleKeyword( keywords, max_entries, lang='en', pics_folder='pics/') print new_relevant_entries #generate del relevant entries for key in set(self.relevant_entries) - set(new_relevant_entries): entry = self.relevant_entries[key] self.keyword_client.delRelevantEntry("wiki", entry["title"]) print 'del', key #generate add relevant entries for key in set(new_relevant_entries) - set(self.relevant_entries): entry = new_relevant_entries[key] self.keyword_client.addRelevantEntry("wiki", entry["title"], entry["text"], entry["url"], entry["score"]) print 'add', key #TODO: Update scores of existing entries in self.displayed_entries (?) self.relevant_entries = new_relevant_entries
has_number = self.RE_D.search(words) #exlude any lines that have one or more numbers in them if not has_number: words_split = [filterHyphens(word) for word in words.split(u' ')] float_druid_score = float(druid_score) if float_druid_score > cutoff_druid_score: if not any((word in self.stopwords) for word in words_split): self.keyword_dict[words] = float_druid_score num_added_words += 1 if num_added_words % 1000 == 0: print words, self.keyword_dict[words] else: break if self.extra_keywords != '': with codecs.open(self.extra_keywords) as infile: for line in infile: words = line[:-1].lower() print 'Loading user set keyword:',words self.keyword_dict[words] = 3.0 if __name__ == "__main__": print 'Scripting directly called, I will perform some testing.' ke = KeywordExtract(lang="en") ke.buildDruidCache() test = ke.getKeywordsDruid(u"A columbia university law professor stood in a hotel lobby one morning and noticed a sign apologizing for an elevator that was out of order. it had dropped unexpectedly three stories a few days earlier. the professor, eben moglen, tried to imagine what the world would be like if elevators were not built so that people could inspect them. mr. moglen was on his way to give a talk about the dangers of secret code, known as proprietary software, that controls more and more devices every day. proprietary software is an unsafe building material, mr. moglen had said. you can't inspect it. he then went to the golden gate bridge and jumped.") print test print wiki_search.getSummariesSingleKeyword(test) test = ke.getKeywordsDruid(u"So i was walking down the golden gate bridge, i had the epiphany that in order to be a good computer scientist, i need to learn and practise machine learning. Also proprietary software is the root of all evil and I should better use open source software.") print test print wiki_search.getSummariesSingleKeyword(test)
druid_score = split[2] has_number = RE_D.search(words) #exlude any lines that have one or more numbers in them if not has_number: words_split = words.split(u' ') float_druid_score = float(druid_score) if float_druid_score > cutoff_druid_score: if not any((word in stopwords) for word in words_split): self.keyword_dict[words] = float_druid_score num_added_words += 1 if num_added_words % 1000 == 0: print words, self.keyword_dict[words] else: break if __name__ == "__main__": print 'Scripting directly called, I will perform some testing.' ke = KeywordExtract() ke.buildDruidCache() test = ke.getKeywordsDruid( u"A columbia university law professor stood in a hotel lobby one morning and noticed a sign apologizing for an elevator that was out of order. it had dropped unexpectedly three stories a few days earlier. the professor, eben moglen, tried to imagine what the world would be like if elevators were not built so that people could inspect them. mr. moglen was on his way to give a talk about the dangers of secret code, known as proprietary software, that controls more and more devices every day. proprietary software is an unsafe building material, mr. moglen had said. you can't inspect it. he then went to the golden gate bridge and jumped." ) print test print wiki_search.getSummariesSingleKeyword(test) test = ke.getKeywordsDruid( u"So i was walking down the golden gate bridge, i had the epiphany that in order to be a good computer scientist, i need to learn and practise machine learning. Also proprietary software is the root of all evil and I should better use open source software." ) print test print wiki_search.getSummariesSingleKeyword(test)