Ejemplo n.º 1
0
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [ReadPair(Kdmer(r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c)]
        else:
            raise
        graph = to_debruijn_graph(frags)
        print(f'Given the fragments {lines}, the de Bruijn graph is...', end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
        print(f'... and a Eulerian cycle is ...', end="\n\n")
        path = walk_eulerian_cycle(graph, frags[0].prefix())
        print(f'{" -> ".join([str(p) for p in path])}')
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Ejemplo n.º 2
0
def main():
    print("<div style=\"border:1px solid black;\">", end="\n\n")
    print("`{bm-disable-all}`", end="\n\n")
    try:
        lines = []
        while True:
            try:
                line = input().strip()
                if len(line) > 0:
                    lines.append(line)
            except EOFError:
                break

        command = lines[0]
        lines = lines[1:]
        counter = Counter(lines)
        if command == 'reads':
            frags = [Read(r, i) for r, c in counter.items() for i in range(c)]
        elif command == 'read-pairs':
            frags = [
                ReadPair(
                    Kdmer(
                        r.split('|')[0],
                        r.split('|')[2], int(r.split('|')[1])), i)
                for r, c in counter.items() for i in range(c)
            ]
        else:
            raise
        graph = to_debruijn_graph(frags)
        graph, head_nodes, tail_nodes = balance_graph(graph)
        print(
            f'Given the fragments {lines}, the artificially balanced de Bruijn graph is...',
            end="\n\n")
        print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n')
        print(
            f'... with original head nodes at {head_nodes} and tail nodes at {tail_nodes}.'
        )
    finally:
        print("</div>", end="\n\n")
        print("`{bm-enable-all}`", end="\n\n")
Ejemplo n.º 3
0
from Read import Read
from ToDeBruijnGraph import to_debruijn_graph
from Utils import slide_window

with open('/home/user/Downloads/dataset_240257_6(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
k = int(lines[0].strip())
dna = lines[1].strip()

reads = [Read(kmer) for kmer, _ in slide_window(dna, k)]

graph = to_debruijn_graph(reads)
for node, other_nodes in graph.get_all_outputs():
    other_nodes = list(other_nodes)
    if len(other_nodes) == 0:
        continue
    print(f'{node} -> {",".join([str(x) for x in other_nodes])}')
Ejemplo n.º 4
0
from ToDeBruijnGraph import to_debruijn_graph

reads_filepath = 'FinalChallengeReads.txt.xz'
with lzma.open(reads_filepath, mode='rt', encoding='utf-8') as f:
    lines = f.read().splitlines()
    lines = [l.strip() for l in lines]  # get rid of whitespace
    lines = [l for l in lines if len(l) > 0]  # get rid of empty lines

lines_split = [tuple(l.split('|', maxsplit=2)) for l in lines]
kdmers = [Kdmer(k1, k2, 1000) for k1, k2 in lines_split]
rps = [ReadPair(kdmer) for kdmer in kdmers]
broken_rps = [broken_rp for rp in rps for broken_rp in rp.shatter(40)]

broken_rps = list(set(broken_rps))

graph = to_debruijn_graph(broken_rps)
contig_paths = find_maximal_non_branching_paths(graph)

contig_paths.sort(key=lambda x: len(x))

for path in contig_paths:
    if len(path) >= path[0].d:
        out = path[0].stitch(path)
        print(f'{len(path)} kd-mers = {out}')
    else:
        heads = [Read(p.data.head) for p in path]
        heads_out = heads[0].stitch(heads)
        tails = [Read(p.data.tail) for p in path]
        tails_out = tails[0].stitch(tails)
        print(f'{len(heads)} k-mers = {heads_out}')
        print(f'{len(tails)} k-mers = {tails_out}')