def ExtractForOne(self,ExpTerm): #calc the ave vector of q terms #calc the vector for exp terms score = 0 hFeature = {} hFeature['word2vecsim'] = score QVector = VectorC() TermVector = VectorC() QTermCnt = 0 if self.hTargetTerm[ExpTerm.term] == -1: print "term [%s] not appear in word2vec" %(ExpTerm.term) return hFeature TermVector = self.lVector[self.hTargetTerm[ExpTerm.term]] for qterm in ExpTerm.query.split(): if self.hTargetTerm[qterm] == -1: print "qterm [%s] not appear in word2vec" %(qterm) continue QTermCnt += 1 QVector += self.lVector[self.hTargetTerm[qterm]] if QTermCnt == 0: return hFeature QVector /= float(QTermCnt) score = VectorC.cosine(QVector,TermVector) hFeature['word2vecsim'] = score return hFeature
def TfIdfCosine(LmA,LmB,CtfCenter): if (LmA.len == 0) | ( LmB.len == 0): return 0 vA = LmA.TransferToVectorWithIdf(CtfCenter) vB = LmB.TransferToVectorWithIdf(CtfCenter) score = VectorC.cosine(vA, vB) print "cosine [%f] of:\n%s\n%s" %(score, json.dumps(vA.hDim),json.dumps(vB.hDim)) return score
def TfIdfCosine(LmA,LmB,CtfCenter): vA = VectorC(LmA.hTermTF) vB = VectorC(LmB.hTermTF) if (LmA.len == 0) | ( LmB.len == 0): return 0 vA /= LmA.len vB /= LmB.len for item in vA.hDim: CTF = CtfCenter.GetCtfProb(item) vA.hDim[item] *= math.log(1.0/CTF) for item in vB.hDim: CTF = CtfCenter.GetCtfProb(item) vB.hDim[item] *= math.log(1.0/CTF) score = VectorC.cosine(vA, vB) print "cosine [%f] of:\n%s\n%s" %(score, json.dumps(vA.hDim),json.dumps(vB.hDim)) return score