def cosSim(self, doc1,doc2): sim = 0 for k in doc1: #if k in doc2: a = (1 + math.log(doc1[k])) b = (1+math.log(doc2[k])) sim += a * b if sim > 0: doc1s = [1+math.log(doc1[k]) for k in doc1] doc2s = [1+math.log(doc2[k]) for k in doc2] sim = float(sim)/(getScalar(doc1s) * getScalar(doc2s)) else: sim = 0 return sim
def __init__(self, topVocabDic,relevTh,docs): self.docs = docs self.relevanceth = relevTh self.topVocabDic = topVocabDic #doc1s = [1+math.log(self.topVocabDic[k]) for k in self.topVocabDic] doc1s = [self.topVocabDic[k] for k in self.topVocabDic] self.vocabScalar = getScalar(doc1s)
def cosSim(self,doc2): sim = 0 #for k in doc1: for k in self.topVocabDic: if k in doc2: #a = (1 + math.log(self.topVocabDic[k])) #b = (1+math.log(doc2[k])) a = self.topVocabDic[k] b = doc2[k] sim += a * b if sim > 0: #doc1s = [1+math.log(doc1[k]) for k in doc1] #doc2s = [1+math.log(doc2[k]) for k in doc2] doc2s = [doc2[k] for k in doc2] #sim = float(sim)/(getScalar(doc1s) * getScalar(doc2s)) sim = float(sim)/(self.vocabScalar * getScalar(doc2s)) else: sim = 0 return sim