예제 #1
0
def test_hash_murmur3():
    assert khmer.hash_murmur3('AAAA') == 526240128537019279
    assert khmer.hash_murmur3('TTTT') == 526240128537019279
    assert khmer.hash_murmur3('CCCC') == 14391997331386449225
    assert khmer.hash_murmur3('GGGG') == 14391997331386449225
    assert khmer.hash_murmur3('TATATATATATATATATATA') != 0
    assert khmer.hash_murmur3('TTTTGCAAAA') != 0
    assert khmer.hash_murmur3('GAAAATTTTC') != 0
예제 #2
0
def test_hash_murmur3():
    assert khmer.hash_murmur3('AAAA') == 526240128537019279
    assert khmer.hash_murmur3('TTTT') == 526240128537019279
    assert khmer.hash_murmur3('CCCC') == 14391997331386449225
    assert khmer.hash_murmur3('GGGG') == 14391997331386449225
    assert khmer.hash_murmur3('TATATATATATATATATATA') != 0
    assert khmer.hash_murmur3('TTTTGCAAAA') != 0
    assert khmer.hash_murmur3('GAAAATTTTC') != 0
예제 #3
0
    def add(self, kmer, rev_comp=False):
        """
        Add kmer into sketch, keeping sketch sorted, update counts accordingly
        """
        _mins = self._mins
        _counts = self._counts
        _kmers = self._kmers

        if rev_comp:
            h1 = khmer.hash_murmur3(kmer)
            h2 = khmer.hash_murmur3(khmer.reverse_complement(kmer))
            #h1 = hash(kmer)
            #h2 = hash(khmer.reverse_complement(kmer))
            h = min(h1, h2)
            if h == h2:
                kmer = khmer.reverse_complement(kmer)
        else:
            h = khmer.hash_murmur3(kmer)
            #h = hash(kmer)

        h = h % self.p
        if self.hash_list:  # If I only want to include hashes that occur in hash_list
            if h not in self.hash_list:  # If the kmer isn't in the hash_list, then break
                return

        if h >= _mins[-1]:
            return

        i = bisect.bisect_left(_mins, h)  # find index to insert h
        if _mins[i] == h:  # if h in mins, increment counts
            _counts[i] += 1
            return
        else:  # otherwise insert h, initialize counts to 1, and insert kmer if necessary
            _mins.insert(i, h)
            _mins.pop()
            _counts.insert(i, 1)
            _counts.pop()
            if _kmers:
                _kmers.insert(i, np.string_(kmer))
                _kmers.pop()
            return

        assert 0, "should never reach this"
예제 #4
0
    def add(self, kmer, rev_comp=False):
        """
        Add kmer into sketch, keeping sketch sorted, update counts accordingly
        """
        _mins = self._mins
        _counts = self._counts
        _kmers = self._kmers

        if rev_comp:
            h1 = khmer.hash_murmur3(kmer)
            h2 = khmer.hash_murmur3(khmer.reverse_complement(kmer))
            #h1 = hash(kmer)
            #h2 = hash(khmer.reverse_complement(kmer))
            h = min(h1, h2)
            if h == h2:
                kmer = khmer.reverse_complement(kmer)
        else:
            h = khmer.hash_murmur3(kmer)
            #h = hash(kmer)

        h = h % self.p
        if self.hash_list:  # If I only want to include hashes that occur in hash_list
            if h not in self.hash_list:  # If the kmer isn't in the hash_list, then break
                return

        if h >= _mins[-1]:
            return

        i = bisect.bisect_left(_mins, h)  # find index to insert h
        if _mins[i] == h:  # if h in mins, increment counts
            _counts[i] += 1
            return
        else:  # otherwise insert h, initialize counts to 1, and insert kmer if necessary
            _mins.insert(i, h)
            _mins.pop()
            _counts.insert(i, 1)
            _counts.pop()
            if _kmers:
                _kmers.insert(i, np.string_(kmer))
                _kmers.pop()
            return

        assert 0, "should never reach this"
예제 #5
0
 def add(self, kmer):
     idx = khmer.forward_hash(kmer, self.prefixsize)
     E = self.sketches[idx]
     
     hash = khmer.hash_murmur3(kmer)
     E.add(hash)
예제 #6
0
    def add(self, kmer):
        idx = khmer.forward_hash(kmer, self.prefixsize)
        E = self.sketches[idx]

        hash = khmer.hash_murmur3(kmer)
        E.add(hash)
예제 #7
0
def test_hash_murmur3():
    assert khmer.hash_murmur3("AAAA") == 526240128537019279
    assert khmer.hash_murmur3("TTTT") == 526240128537019279
    assert khmer.hash_murmur3("CCCC") == 14391997331386449225
    assert khmer.hash_murmur3("GGGG") == 14391997331386449225