예제 #1
0
    def setUp(self):
        # Set a random seed so hash functions are always the same
        random.seed(0)

        self.family = lsh.HammingDistanceFamily(20)
        self.dist_thres = 5

        def f(a, b):
            assert len(a) == len(b)
            return sum(1 for i in range(len(a)) if a[i] != b[i])

        self.dist_fn = f
예제 #2
0
    def __init__(self, dist_thres, probe_length):
        """
        Args:
            dist_thres: only call two probes near-duplicates if their
                Hamming distance is within this value; this should be
                equal to or commensurate with (but not greater than)
                the number of mismatches at/below which a probe is
                considered to hybridize to a target sequence so that
                candidate probes further apart than this value are not
                collapsed as near-duplicates
            probe_length: length of probes
        """
        super().__init__(k=20)
        self.lsh_family = lsh.HammingDistanceFamily(probe_length)
        self.dist_thres = dist_thres

        def hamming_dist(a, b):
            # a and b are probe.Probe objects
            return a.mismatches(b)
        self.dist_fn = hamming_dist
예제 #3
0
    def setUp(self):
        # Set a random seed so hash functions are always the same
        random.seed(0)

        self.family = lsh.HammingDistanceFamily(20)