from BalanceNearlyBalancedGraph import balance_graph
from Graph import Graph
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240261_6(1).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
adjacency_list = lines[:]
adjacency_list = [l.strip() for l in adjacency_list]  # get rid of whitespace
adjacency_list = [l for l in adjacency_list
                  if len(l) > 0]  # get rid of empty lines
adjacency_list = [l.split(' -> ') for l in adjacency_list]
adjacency_list = [(l[0], l[1].split(',')) for l in adjacency_list]

graph = Graph()
[
    graph.insert_edge(from_node, to_node)
    for from_node, to_nodes in adjacency_list for to_node in to_nodes
]
graph, roots, tails = balance_graph(graph)

path = walk_eulerian_cycle(graph, roots.pop())
path.pop(
)  # last conn in cycle is artificial -- it was created from balancing so generating this path would be fast
print(f'{"->".join(path)}')
    for i in range(len(cycle_path) - 1):
        kmer = cycle_path[i].data + cycle_path[i + 1].data[-1]
        out.append(Read(kmer))
    return out


def do_kmers_cycle(reads: List[Read]) -> bool:
    for i in range(len(reads) - 1):
        if reads[i].suffix() != reads[i + 1].prefix():
            return False
    if reads[-1].suffix() != reads[0].prefix():
        return False
    return True


cycle_path = walk_eulerian_cycle(graph, Read('00'))
cycle_path_as_kmers = eularian_path_to_kmers(cycle_path)
print(f'{cycle_path_as_kmers}')
print(f'{cycle_path_as_kmers[0].stitch(cycle_path_as_kmers)}')
print(f'{do_kmers_cycle(cycle_path_as_kmers)}')
cycle_path = walk_eulerian_cycle(graph, Read('01'))
cycle_path_as_kmers = eularian_path_to_kmers(cycle_path)
print(f'{cycle_path_as_kmers}')
print(f'{cycle_path_as_kmers[0].stitch(cycle_path_as_kmers)}')
print(f'{do_kmers_cycle(cycle_path_as_kmers)}')
cycle_path = walk_eulerian_cycle(graph, Read('10'))
cycle_path_as_kmers = eularian_path_to_kmers(cycle_path)
print(f'{cycle_path_as_kmers}')
print(f'{cycle_path_as_kmers[0].stitch(cycle_path_as_kmers)}')
print(f'{do_kmers_cycle(cycle_path_as_kmers)}')
cycle_path = walk_eulerian_cycle(graph, Read('11'))
Ejemplo n.º 3
0
from Graph import Graph
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240261_2(2).txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
adjacency_list = lines[:]
adjacency_list = [l.strip() for l in adjacency_list]  # get rid of whitespace
adjacency_list = [l for l in adjacency_list
                  if len(l) > 0]  # get rid of empty lines
adjacency_list = [l.split(' -> ') for l in adjacency_list]
adjacency_list = [(l[0], l[1].split(',')) for l in adjacency_list]

graph = Graph()
[
    graph.insert_edge(from_node, to_node)
    for from_node, to_nodes in adjacency_list for to_node in to_nodes
]

cycle_path = walk_eulerian_cycle(graph, next(graph.get_nodes()))
print(f'{"->".join(cycle_path)}')
from BalanceNearlyBalancedGraph import balance_graph
from ReadPair import ReadPair
from Kdmer import Kdmer
from ToOverlapGraphHash import to_overlap_graph
from WalkRandomEulerianCycle import walk_eulerian_cycle

with open('/home/user/Downloads/dataset_240262_16(1).txt', mode='r', encoding='utf-8') as f:
    data = f.read()


lines = data.split('\n')
lines = [l.strip() for l in lines]  # get rid of whitespace
lines = [l for l in lines if len(l) > 0]  # get rid of empty li

k, d = [int(s) for s in lines[0].split(' ')]

kdmers = [tuple(s.split('|', maxsplit=2)) for s in lines[1:]]
kdmers = [Kdmer(k1, k2, d) for k1, k2 in kdmers]
read_pairs = [ReadPair(kdmer) for kdmer in kdmers]

graph = to_overlap_graph(read_pairs)
graph, roots, tails = balance_graph(graph)
path = walk_eulerian_cycle(
    graph,
    list(roots)[0]
)
path.pop()  # remove last kdmer because the cycle we created when balancing the graph is artificial -- we just did it so
            # we can get the path using eularian cycles, which is an efficient way of reconstructing the string.

genome = path[0].stitch(path)
print(f'{genome}')