Esempio n. 1
0
    def init_text_to_text_links(self):
        logging.info('Finding text to text links for {NAME}'.format(NAME=self.name()))
        lemmatizer = Lemmatizer()
        index = 0
        for _news in self.news.get_documents():
            _news.index = index
            index += 1

        for tweet in self.tweets.get_documents():
            tweet.words = lemmatizer.split_text_to_lemmas(tweet.text)
            tweet.index = index
            index += 1

        #print len(tweets), len(news)
        similarity_matrix = get_similarity_matrix(self.get_documents(), self.get_documents(), self.corpus, self.tf_idf_matrix)
        #print 'preparation finished'
        self.text_to_text_links = get_text_to_text_relation(self.news.get_documents(), self.tweets.get_documents(), similarity_matrix)