Ejemplo n.º 1
0
 def test_make_canon_kmers(self):
     """Test that make kmers works."""
     seq = 'ATCGGTA'
     expected_kmers = set(['ATCG', 'CCGA', 'ACCG', 'GGTA'])
     actual_kmers = make_kmers(seq, 4, canon=True)
     for kmer in actual_kmers:
         self.assertIn(kmer, expected_kmers)
 def test_kmer_stats(self):
     kmers = make_kmers(ECOLI.longest_contig()[:1000], 31, canon=True)
     radial_cover = GreedyRadialCover(hamming_distance, 2)
     for kmer in kmers:
         radial_cover.add(kmer)
     radial_cover.stats()
     for kmer in kmers[:10]:
         radial_cover.search(kmer, 1)
Ejemplo n.º 3
0
def kmer_entropy(seqs, k=31):
    tbl = {}
    for seq in seqs:
        for kmer in make_kmers(seq, k, canon=True):
            tbl[kmer] = 1 + tbl.get(kmer, 0)
    H, total = 0, sum(tbl.values())
    for count in tbl.values():
        p = count / total
        H += p * log2(p)
    return -H
Ejemplo n.º 4
0
 def kmer_search(cls, seq, eps=0.5):
     uuids = {}
     kmers = make_kmers(seq, MAX_K)
     for kmer in kmers:
         for uuid in cls.query.filter_by(kmer=kmer).all():
             uuids[uuid] = uuids.get(uuid, 0) + 1
     seqs = []
     for uuid, count in uuids.items():
         if count < (eps * len(kmers)):
             continue
         seqs.append(Contig.from_uuid(uuid))
     return seqs
Ejemplo n.º 5
0
 def sub_k_dist(row):
     q_ks = make_kmers(row['query'], sub_k)
     t_ks = make_kmers(row['target'], sub_k)
     return len(q_ks & t_ks) / len(q_ks)
Ejemplo n.º 6
0
def num_unique_subwords(seq, n):
    out = set()
    for kmer in make_kmers(seq, n, canon=False):
        out.add(kmer)
    return len(out)
Ejemplo n.º 7
0
 def add_contig(cls, contig):
     kmers = set(make_kmers(contig.seq, MAX_K))
     for kmer in kmers:
         db.session.add(cls(contig.uuid, kmer))
     db.session.commit()
 def test_add_to_cover(self):
     kmers = make_kmers(ECOLI.longest_contig()[:1000], 31, canon=True)
     radial_cover = GreedyRadialCover(hamming_distance, 2)
     for kmer in kmers:
         radial_cover.add(kmer)
 def test_make_kmers(self):
     """Idiot check myself."""
     make_kmers(ECOLI.longest_contig()[:1000], 31, canon=True)