def search_hash(h1, arr): lowest = -math.inf for index, h in enumerate(arr): hamming = hamming_distance(h1, h) if h != h1 and hamming > lowest: result = h lowest = hamming lowest_idx = index return [lowest, lowest_idx, result]
def hamming_dist(self, x, y): ''' :param x: hex_str, e.g. 'f012d208' (a 32bits binary code) :param y: list, whose elements are hex_str :return: list ''' results = [None] * len(y) for idx, yi in enumerate(y): results[idx] = hamming_distance(x, yi) return results
def hammingsimilarity_countingsort(self, x, y, max_dist, threshold): ''' jointly compute hamming distance and counting sort return the sorting results, and the top result whose distances are less than the threshold :param x: hex_str, e.g. 'f012d208' (a 32bits binary code) :param y: list, whose elements are hex_str :param max_dist: int, equal to code length :param threshold: int, the samples with distances less than it should be further refined ''' # compute hamming distance and counting sort results = [[] for _ in range(max_dist + 1)] for idx, yi in enumerate(y): hamming_distance(x, yi) results[hamming_distance(x, yi)].append(idx) # return final sorting result and topk sorting result final_result = [] for idx, value in enumerate(results): final_result += value top_result = None if idx == threshold: top_result = copy.deepcopy(final_result) return top_result, final_result
def func(x, sub_y, start_idx, suby_len): result = [None for _ in range(suby_len)] for idx, yi in enumerate(sub_y): result[idx] = (hamming_distance(x, yi), start_idx + idx) return result
def all_hamming(h1, arr): result = [] for h in arr: result.append(hamming_distance(h1, h)) return result
def test_hamming_distance_errors(hex1, hex2, exception, msg): with pytest.raises(exception) as excinfo: _ = hamming_distance(hex1, hex2) assert msg in str(excinfo.value)
def test_hamming_distance(hex1, hex2, expected): assert hamming_distance(hex1, hex2) == expected