def __computeSimilarity(self, keyHashesInput,keyHashesBase, outputFile, lsh_band): #print "Compute Similarity between: ", keys with inputprefix and keys with baseprefix, " numLines = 0 for keyArr1 in keyHashesInput: key1 = keyArr1[0] minarr1 = keyArr1[1:] #print "Start: ", key1 for keyArr2 in keyHashesBase: key2 = keyArr2[0] minarr2 = keyArr2[1:] if minarr1 != minarr2: score = util.compute_list_similarity(minarr1, minarr2) else: score = 1.0 if score >= self.scorethreshold: if score < 1.0 or lsh_band == "000": outputFile.write(key1 + self.separator + key2 + self.separator + str(score) + "\n") numLines += 1 return numLines
def __compute_similarity(self, key_hashes_array, output_file, lsh_band): #print "Compute Similarity between: ", len(keyHashesArray), " items" num_lines = 0 for keyArr1 in key_hashes_array: key1 = keyArr1[0] min_arr1 = keyArr1[1:] #print "Start: ", key1 for keyArr2 in key_hashes_array: key2 = keyArr2[0] if key1 < key2: min_arr2 = keyArr2[1:] if min_arr1 != min_arr2: score = util.compute_list_similarity(min_arr1, min_arr2) else: score = 1.0 if score >= self.score_threshold: if score < 1.0 or lsh_band == "000": output_file.write(key1 + self.separator + key2 + self.separator + str(score) + "\n") num_lines += 1 return num_lines