def ExtractForOne(self,ExpTerm):
     #calc the ave vector of q terms
     #calc the vector for exp terms
     score = 0
     hFeature = {}
     hFeature['word2vecsim'] = score
     QVector = VectorC()
     TermVector = VectorC()
     QTermCnt = 0
     
     if self.hTargetTerm[ExpTerm.term] == -1:
         print "term [%s] not appear in word2vec" %(ExpTerm.term)
         return hFeature
     
     TermVector = self.lVector[self.hTargetTerm[ExpTerm.term]]
     
     for qterm in ExpTerm.query.split():
         if self.hTargetTerm[qterm] == -1:
             print "qterm [%s] not appear in word2vec" %(qterm)
             continue  
         QTermCnt += 1
         QVector += self.lVector[self.hTargetTerm[qterm]]
     if QTermCnt == 0:
         return hFeature
     QVector /= float(QTermCnt)
     
     score = VectorC.cosine(QVector,TermVector)
     hFeature['word2vecsim'] = score
     return hFeature
예제 #2
0
 def TfIdfCosine(LmA,LmB,CtfCenter):
     
     
     if (LmA.len == 0) | ( LmB.len == 0):
         return 0
     
     vA = LmA.TransferToVectorWithIdf(CtfCenter)
     vB = LmB.TransferToVectorWithIdf(CtfCenter)
     
     score =  VectorC.cosine(vA, vB)
     
     print "cosine [%f] of:\n%s\n%s" %(score, json.dumps(vA.hDim),json.dumps(vB.hDim))
     return score
예제 #3
0
 def TfIdfCosine(LmA,LmB,CtfCenter):
     vA = VectorC(LmA.hTermTF)
     vB = VectorC(LmB.hTermTF)
     
     if (LmA.len == 0) | ( LmB.len == 0):
         return 0
     
     vA /= LmA.len
     vB /= LmB.len
     
     
     for item in vA.hDim:
         CTF = CtfCenter.GetCtfProb(item)
         vA.hDim[item] *= math.log(1.0/CTF)
     for item in vB.hDim:
         CTF = CtfCenter.GetCtfProb(item)
         vB.hDim[item] *= math.log(1.0/CTF)
     
     
     score =  VectorC.cosine(vA, vB)
     
     print "cosine [%f] of:\n%s\n%s" %(score, json.dumps(vA.hDim),json.dumps(vB.hDim))
     return score