def test_set_abundance_num_hypothesis(hashes, abundances, sketch_size): a = MinHash(sketch_size, 10, track_abundance=True) oracle = dict(zip(hashes, abundances)) a.set_abundances(oracle) mins = a.get_mins(with_abundance=True) size = min(sum(1 for v in oracle.values() if v > 0), sketch_size) assert len(mins) == size for k, v in mins.items(): assert oracle[k] == v
def test_set_abundance_scaled_hypothesis(hashes, abundances, scaled): a = MinHash(0, 10, track_abundance=True, scaled=scaled) oracle = dict(zip(hashes, abundances)) a.set_abundances(oracle) max_hash = get_max_hash_for_scaled(scaled) below_max_hash = sum(1 for (k, v) in oracle.items() if k <= max_hash and v > 0) mins = a.get_mins(with_abundance=True) assert len(mins) == below_max_hash for k, v in mins.items(): assert oracle[k] == v assert k <= max_hash assert v > 0