Ejemplo n.º 1
0
 def cosSim(self, doc1,doc2):
     sim = 0
     for k in doc1:
         #if k in doc2:
         a = (1 + math.log(doc1[k]))
         b = (1+math.log(doc2[k]))
         sim +=  a * b 
     
     if sim > 0:
         doc1s = [1+math.log(doc1[k]) for k in doc1]
         doc2s = [1+math.log(doc2[k]) for k in doc2]
         sim = float(sim)/(getScalar(doc1s) * getScalar(doc2s))
         
     else:
         sim = 0
     return sim
Ejemplo n.º 2
0
 def __init__(self, topVocabDic,relevTh,docs):
     self.docs = docs
     self.relevanceth = relevTh
     self.topVocabDic = topVocabDic
     #doc1s = [1+math.log(self.topVocabDic[k]) for k in self.topVocabDic]
     doc1s = [self.topVocabDic[k] for k in self.topVocabDic]
     self.vocabScalar = getScalar(doc1s)
Ejemplo n.º 3
0
 def cosSim(self,doc2):
     sim = 0
     #for k in doc1:
     for k in self.topVocabDic:
         if k in doc2:
             #a = (1 + math.log(self.topVocabDic[k]))
             #b = (1+math.log(doc2[k]))
             a = self.topVocabDic[k]
             b = doc2[k]
             sim +=  a * b 
     
     if sim > 0:
         #doc1s = [1+math.log(doc1[k]) for k in doc1]
         
         #doc2s = [1+math.log(doc2[k]) for k in doc2]
         doc2s = [doc2[k] for k in doc2]
         #sim = float(sim)/(getScalar(doc1s) * getScalar(doc2s))
         sim = float(sim)/(self.vocabScalar * getScalar(doc2s))
         
     else:
         sim = 0
     return sim