def GetTfidf(self, dictionary, corpus): tfidf = models.TfidfModel(corpus) vec_lsi = tfidf[corpus[0]] index = Similarity('t_index', corpus, len(dictionary)) #tsims = index[vec_lsi] cnt = 0 for similarities in index: if cnt == 1: return list(enumerate(similarities)) cnt += 1 #return list(enumerate(tsims)) def GetSimilarities(self, dictionary, corpus): self.GetWord2Vec(dictionary, corpus) #print("lsims") lsims = self.GetLsm(dictionary, corpus) #print("tsims") tsims = self.GetTfidf(dictionary, corpus) return ( tsims, lsims, ) if __name__ == '__main__': reader = Reader(r"C:\Users\CCrowe\Documents\Kaggle\Quora\train.csv", r"C:\Users\CCrowe\Documents\Kaggle\Quora\test.csv") documents = reader.GetDocuments() print("documents retrieved") similar = Similar(documents, texts)