Exemple #1
0
def bcalm(input_filename, output_filename, k, m):
    input_file = open(input_filename)
    simple_buckets = False
    if simple_buckets:
        minimizers = sorted(list(set(map(lambda s: minimizer("".join(s), m), product("acgt", repeat=m)))))
        buckets = Buckets(minimizers, output_filename)
    else:
        m *= 2
        minimizers = sorted(list(set(map(lambda s: minimizer("".join(s), m), product("acgt", repeat=m)))))
        buckets = Superbuckets(minimizers, output_filename)
    precompute_hashes(m)

    # partition k-mers
    for line in input_file:
        kmer = line.strip()[:-1]
        bucket_minimizer = minimizer(kmer, m)
        buckets.put(kmer, bucket_minimizer)
    buckets.flush()
    buckets.stats()

    # process each bucket in minimizer order
    for bucket_file, bucket_minimizer in buckets.iterate():
        G = Graph(k)
        buckets.flush()
        G.importg(bucket_file)
        G.debruijn()
        G.compress(bucket_minimizer, m)
        for node in G.nodes.values():
            if use_tags:
                node = untag(node)
            min = minbutbiggerthan(node[: k - 1], node[-(k - 1) :], bucket_minimizer, m)
            buckets.put(node, min)
Exemple #2
0
 def can_compact(self, node_idx, node_label, bucket, minimizer_size):
     if bucket == "":
         return True
     if node_label == 'O':
         overlap = self.nodes[node_idx][-(self.k-1):] 
     else:
         overlap = self.nodes[node_idx][:self.k-1] 
     return minimizer(overlap, minimizer_size) == bucket