def searchtfjabPlusRelevent(self,searchList , releventdocstr , documentList): queryVector = self.getVectorKeywordIndex(searchList) relevenceVector = self.getVectorKeywordIndex(releventdocstr) self.documentVectors = self.getVectorKeywordIndexSeprated(documentList) ratings = [util.jaccard(queryVector , documentVector) for documentVector in self.documentVectors] ratingrel = [util.jaccard(relevenceVector , documentVector) for documentVector in self.documentVectors] for i in range(len(ratings)): ratings[i] += (ratingrel[i] * 0.5) return ratings
def pair_features(hashes1, hashes2): feats = [jaccard(binary_matrix_to_int(hashes1), binary_matrix_to_int(hashes2))] D = pairwise_distances(hashes1, hashes2, metric='hamming') if D.shape[0] > D.shape[1]: D = D.T if D.shape[0] == 0 or D.shape[1] == 0: feats.extend([np.nan] * 6) else: s0 = D.min(axis=1) s1 = D.max(axis=0) feats.extend([s0.min(), s0.max(), s0.mean(), s1.min(), s1.max(), s1.mean()]) return feats
def searchtfjab(self,searchList , documentList): queryVector = self.getVectorKeywordIndex(searchList) self.documentVectors = self.getVectorKeywordIndexSeprated(documentList) ratings = [util.jaccard(queryVector , documentVector) for documentVector in self.documentVectors] return ratings