def __getitem__(self, doc): # get similarities of doc to all documents in the corpus if self.normalize: doc = matutils.unitVec(doc) allSims = self.getSimilarities(doc) # return either all similarities as a list, or only self.numBest most similar, depending on settings from the constructor if self.numBest is None: return allSims else: return matutils.full2sparse_clipped(allSims, self.numBest)
def __getitem__(self, doc): # get similarities of doc to all documents in the corpus if self.normalize: doc = matutils.unitVec(doc) allSims = self.getSimilarities(doc) # return either all similarities as a list, or only self.numBest most similar, depending on settings from the constructor if self.numBest is None: return allSims else: tops = [(docNo, sim) for docNo, sim in enumerate(allSims) if sim > 0] tops = sorted(tops, key = lambda item: -item[1]) # sort by -sim => highest cossim first return tops[ : self.numBest] # return at most numBest top 2-tuples (docId, docSim)