from BalanceNearlyBalancedGraph import balance_graph from Graph import Graph from WalkRandomEulerianCycle import walk_eulerian_cycle with open('/home/user/Downloads/dataset_240261_6(1).txt', mode='r', encoding='utf-8') as f: data = f.read() lines = data.split('\n') adjacency_list = lines[:] adjacency_list = [l.strip() for l in adjacency_list] # get rid of whitespace adjacency_list = [l for l in adjacency_list if len(l) > 0] # get rid of empty lines adjacency_list = [l.split(' -> ') for l in adjacency_list] adjacency_list = [(l[0], l[1].split(',')) for l in adjacency_list] graph = Graph() [ graph.insert_edge(from_node, to_node) for from_node, to_nodes in adjacency_list for to_node in to_nodes ] graph, roots, tails = balance_graph(graph) path = walk_eulerian_cycle(graph, roots.pop()) path.pop( ) # last conn in cycle is artificial -- it was created from balancing so generating this path would be fast print(f'{"->".join(path)}')
for i in range(len(cycle_path) - 1): kmer = cycle_path[i].data + cycle_path[i + 1].data[-1] out.append(Read(kmer)) return out def do_kmers_cycle(reads: List[Read]) -> bool: for i in range(len(reads) - 1): if reads[i].suffix() != reads[i + 1].prefix(): return False if reads[-1].suffix() != reads[0].prefix(): return False return True cycle_path = walk_eulerian_cycle(graph, Read('00')) cycle_path_as_kmers = eularian_path_to_kmers(cycle_path) print(f'{cycle_path_as_kmers}') print(f'{cycle_path_as_kmers[0].stitch(cycle_path_as_kmers)}') print(f'{do_kmers_cycle(cycle_path_as_kmers)}') cycle_path = walk_eulerian_cycle(graph, Read('01')) cycle_path_as_kmers = eularian_path_to_kmers(cycle_path) print(f'{cycle_path_as_kmers}') print(f'{cycle_path_as_kmers[0].stitch(cycle_path_as_kmers)}') print(f'{do_kmers_cycle(cycle_path_as_kmers)}') cycle_path = walk_eulerian_cycle(graph, Read('10')) cycle_path_as_kmers = eularian_path_to_kmers(cycle_path) print(f'{cycle_path_as_kmers}') print(f'{cycle_path_as_kmers[0].stitch(cycle_path_as_kmers)}') print(f'{do_kmers_cycle(cycle_path_as_kmers)}') cycle_path = walk_eulerian_cycle(graph, Read('11'))
from Graph import Graph from WalkRandomEulerianCycle import walk_eulerian_cycle with open('/home/user/Downloads/dataset_240261_2(2).txt', mode='r', encoding='utf-8') as f: data = f.read() lines = data.split('\n') adjacency_list = lines[:] adjacency_list = [l.strip() for l in adjacency_list] # get rid of whitespace adjacency_list = [l for l in adjacency_list if len(l) > 0] # get rid of empty lines adjacency_list = [l.split(' -> ') for l in adjacency_list] adjacency_list = [(l[0], l[1].split(',')) for l in adjacency_list] graph = Graph() [ graph.insert_edge(from_node, to_node) for from_node, to_nodes in adjacency_list for to_node in to_nodes ] cycle_path = walk_eulerian_cycle(graph, next(graph.get_nodes())) print(f'{"->".join(cycle_path)}')
from BalanceNearlyBalancedGraph import balance_graph from ReadPair import ReadPair from Kdmer import Kdmer from ToOverlapGraphHash import to_overlap_graph from WalkRandomEulerianCycle import walk_eulerian_cycle with open('/home/user/Downloads/dataset_240262_16(1).txt', mode='r', encoding='utf-8') as f: data = f.read() lines = data.split('\n') lines = [l.strip() for l in lines] # get rid of whitespace lines = [l for l in lines if len(l) > 0] # get rid of empty li k, d = [int(s) for s in lines[0].split(' ')] kdmers = [tuple(s.split('|', maxsplit=2)) for s in lines[1:]] kdmers = [Kdmer(k1, k2, d) for k1, k2 in kdmers] read_pairs = [ReadPair(kdmer) for kdmer in kdmers] graph = to_overlap_graph(read_pairs) graph, roots, tails = balance_graph(graph) path = walk_eulerian_cycle( graph, list(roots)[0] ) path.pop() # remove last kdmer because the cycle we created when balancing the graph is artificial -- we just did it so # we can get the path using eularian cycles, which is an efficient way of reconstructing the string. genome = path[0].stitch(path) print(f'{genome}')