def send_relevant_entry_updates(self,max_entries=4, decay=.8):

        print 'send_relevant_entry_updates called'
        with Timer() as t:

            #Do the decay for the displayed entries:
            #TODO: handle duplicate keywords and updated scores
            for entry in self.displayed_entries:
                entry["score"] *= decay

            keywords = self.ke.getKeywordsDruid(self.complete_transcript[-1])
            print keywords
            new_relevant_entries = wiki_search.getSummariesSingleKeyword(keywords,max_entries,lang=self.lang,pics_folder='pics/')

            new_relevant_entries_set = set(new_relevant_entries)
            relevant_entries_set = set(self.relevant_entries)

            #generate del relevant entries
            #for key in relevant_entries_set - new_relevant_entries_set:
            #    entry = self.relevant_entries[key]
            #    self.delDisplayEntry("wiki", entry["title"])
                
            #generate add relevant entries
            for key in new_relevant_entries_set - relevant_entries_set:
                entry = new_relevant_entries[key]
                if self.addDisplayEntry("wiki", entry):
                    for category in entry["categories"]:
                        self.categories[category] += 1  

            #now look for changed scores (happens if a keyword got more important and gets mentioned again)   
            for key in (new_relevant_entries_set & relevant_entries_set):
                entry = new_relevant_entries[key]
                if entry["score"] > self.relevant_entries[key]["score"]:
                    print "score change for:",entry["title"], self.relevant_entries[key]["score"], "->", entry["score"]
                    found_displayed_entry = False
                    for display_entry in self.displayed_entries:
                        #already displayed, we could delete and read it, to reflect the new placement
                        if display_entry["title"] == key:
                            found_displayed_entry = True
                            #self.delDisplayEntry("wiki", entry["title"])
                            #self.addDisplayEntry("wiki", entry)
                            break

                    if not found_displayed_entry:
                        #not displayed, try to see if the higher score gets results in a document that is more important
                        self.addDisplayEntry("wiki", entry)

            for key in new_relevant_entries_set - relevant_entries_set:
                self.relevant_entries[key] = new_relevant_entries[key]

        topCategories_Event = self.topCategories()
        print topCategories_Event
        # TODO: only send something if topCategories actually changes
        self.keyword_client.sendCategories(topCategories_Event)

        print 'send_relevant_entry_updates finished. Time needed:', t.secs, 'seconds.'
        print 'Displayed entries should now be:',[entry['title'] for entry in self.displayed_entries]
    def send_relevant_entry_updates(self,max_entries=4):
        print 'send_relevant_entry_updates called'
        keywords = self.ke.getKeywordsDruid('\n'.join([sentence[:-1] for sentence in self.complete_transcript]))
        new_relevant_entries = wiki_search.getSummariesSingleKeyword(keywords,max_entries,lang='en',pics_folder='pics/')
        print new_relevant_entries

        #generate del relevant entries
        for key in set(self.relevant_entries) - set(new_relevant_entries):
            entry = self.relevant_entries[key]
            self.keyword_client.delRelevantEntry("wiki", entry["title"])
            print 'del',key
        #generate add relevant entries
        for key in set(new_relevant_entries) - set(self.relevant_entries):
            entry = new_relevant_entries[key]
            self.keyword_client.addRelevantEntry("wiki", entry["title"], entry["text"], entry["url"], entry["score"])
            print 'add',key

        #TODO: Update scores of existing entries in self.displayed_entries (?)

        self.relevant_entries = new_relevant_entries
    def send_relevant_entry_updates(self, max_entries=4):
        print 'send_relevant_entry_updates called'
        keywords = self.ke.getKeywordsDruid('\n'.join(
            [sentence[:-1] for sentence in self.complete_transcript]))
        new_relevant_entries = wiki_search.getSummariesSingleKeyword(
            keywords, max_entries, lang='en', pics_folder='pics/')
        print new_relevant_entries

        #generate del relevant entries
        for key in set(self.relevant_entries) - set(new_relevant_entries):
            entry = self.relevant_entries[key]
            self.keyword_client.delRelevantEntry("wiki", entry["title"])
            print 'del', key
        #generate add relevant entries
        for key in set(new_relevant_entries) - set(self.relevant_entries):
            entry = new_relevant_entries[key]
            self.keyword_client.addRelevantEntry("wiki", entry["title"],
                                                 entry["text"], entry["url"],
                                                 entry["score"])
            print 'add', key

        #TODO: Update scores of existing entries in self.displayed_entries (?)

        self.relevant_entries = new_relevant_entries
Esempio n. 4
0
            has_number = self.RE_D.search(words)
            #exlude any lines that have one or more numbers in them
            if not has_number:
                words_split = [filterHyphens(word) for word in words.split(u' ')]
                float_druid_score = float(druid_score)
                if float_druid_score > cutoff_druid_score:
                    if not any((word in self.stopwords) for word in words_split):
                        self.keyword_dict[words] = float_druid_score
                        num_added_words += 1
                        if num_added_words % 1000 == 0:
                            print words, self.keyword_dict[words]
                else:
                    break
        if self.extra_keywords != '':
            with codecs.open(self.extra_keywords) as infile:
                for line in infile:
                    words = line[:-1].lower()
                    print 'Loading user set keyword:',words
                    self.keyword_dict[words] = 3.0

if __name__ == "__main__":
    print 'Scripting directly called, I will perform some testing.'
    ke = KeywordExtract(lang="en")
    ke.buildDruidCache()
    test = ke.getKeywordsDruid(u"A columbia university law professor stood in a hotel lobby one morning and noticed a sign apologizing for an elevator that was out of order. it had dropped unexpectedly three stories a few days earlier. the professor, eben moglen, tried to imagine what the world would be like if elevators were not built so that people could inspect them. mr. moglen was on his way to give a talk about the dangers of secret code, known as proprietary software, that controls more and more devices every day. proprietary software is an unsafe building material, mr. moglen had said. you can't inspect it. he then went to the golden gate bridge and jumped.")
    print test
    print wiki_search.getSummariesSingleKeyword(test)
    test = ke.getKeywordsDruid(u"So i was walking down the golden gate bridge, i had the epiphany that in order to be a good computer scientist, i need to learn and practise machine learning. Also proprietary software is the root of all evil and I should better use open source software.")
    print test
    print wiki_search.getSummariesSingleKeyword(test)
            druid_score = split[2]
            has_number = RE_D.search(words)
            #exlude any lines that have one or more numbers in them
            if not has_number:
                words_split = words.split(u' ')
                float_druid_score = float(druid_score)
                if float_druid_score > cutoff_druid_score:
                    if not any((word in stopwords) for word in words_split):
                        self.keyword_dict[words] = float_druid_score
                        num_added_words += 1
                        if num_added_words % 1000 == 0:
                            print words, self.keyword_dict[words]
                else:
                    break


if __name__ == "__main__":
    print 'Scripting directly called, I will perform some testing.'
    ke = KeywordExtract()
    ke.buildDruidCache()
    test = ke.getKeywordsDruid(
        u"A columbia university law professor stood in a hotel lobby one morning and noticed a sign apologizing for an elevator that was out of order. it had dropped unexpectedly three stories a few days earlier. the professor, eben moglen, tried to imagine what the world would be like if elevators were not built so that people could inspect them. mr. moglen was on his way to give a talk about the dangers of secret code, known as proprietary software, that controls more and more devices every day. proprietary software is an unsafe building material, mr. moglen had said. you can't inspect it. he then went to the golden gate bridge and jumped."
    )
    print test
    print wiki_search.getSummariesSingleKeyword(test)
    test = ke.getKeywordsDruid(
        u"So i was walking down the golden gate bridge, i had the epiphany that in order to be a good computer scientist, i need to learn and practise machine learning. Also proprietary software is the root of all evil and I should better use open source software."
    )
    print test
    print wiki_search.getSummariesSingleKeyword(test)