def test_circular(self, circular_linear_structure): graph, contig = circular_linear_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(contig[:K], direction='R') print(path, ',', contig) assert utils._equals_rc(path, contig[:len(path)])
def test_branch_to_end(self, left_tip_structure): # assemble from branch point until end graph, contig, L, HDN, R, tip = left_tip_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(HDN) assert len(path) == len(contig) - HDN.pos assert utils._equals_rc(path, contig[HDN.pos:])
def test_end_to_beginning(self, right_tip_structure): # should have exact same behavior as right_of_branch_outwards graph, contig, L, HDN, R, tip = right_tip_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(contig[-K:]) assert len(path) == len(contig) assert utils._equals_rc(path, contig)
def test_left_of_branch_to_beginning_revcomp(self, right_tip_structure): # start from revcomp of HDN (left of branch) graph, contig, L, HDN, R, tip = right_tip_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(revcomp(L)) assert len(path) == HDN.pos + K assert utils._equals_rc(path, contig[:len(path)])
def test_beginning_to_branch(self, right_tip_structure): # assemble from beginning of contig, up until branch point graph, contig, L, HDN, R, tip = right_tip_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(contig[0:K]) assert len(path) == HDN.pos + K assert utils._equals_rc(path, contig[:len(path)])
def test_all_start_positions(self, linear_structure): # assemble entire contig, starting from wherever graph, contig = linear_structure asm = khmer.LinearAssembler(graph) for start in range(0, len(contig), 150): path = asm.assemble(contig[start:start + K]) assert utils._equals_rc(path, contig), start
def test_end_to_branch(self, left_tip_structure): # assemble from end until branch point # should include HDN graph, contig, L, HDN, R, tip = left_tip_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(contig[-K:]) assert len(path) == len(contig) - HDN.pos assert utils._equals_rc(path, contig[HDN.pos:])
def test_all_right_to_end(self, linear_structure): # assemble directed right graph, contig = linear_structure asm = khmer.LinearAssembler(graph) for start in range(0, len(contig), 150): path = asm.assemble(contig[start:start + K], direction='R') print(path, ', ', contig[:start]) assert utils._equals_rc(path, contig[start:]), start
def test_from_branch_to_ends_with_stopbf_revcomp(self, left_tip_structure): # block the tip with the stop_bf. should return a full length contig. graph, contig, L, HDN, R, tip = left_tip_structure asm = khmer.LinearAssembler(graph) stop_bf = khmer.Nodegraph(K, 1e5, 4) stop_bf.count(tip) path = asm.assemble(revcomp(HDN), stop_bf) assert len(path) == len(contig) assert utils._equals_rc(path, contig)
def test_right_of_branch_outwards_to_ends(self, right_tip_structure): # assemble from right of branch point (at R) # Should get the *entire* original contig, as the assembler # will move left relative to the branch, and not consider it # as a high degree node graph, contig, L, HDN, R, tip = right_tip_structure asm = khmer.LinearAssembler(graph) path = asm.assemble(R) assert len(path) == len(contig) assert utils._equals_rc(path, contig)
def test_single_node_flanked_by_hdns(self, left_tip_structure): # assemble single node flanked by high-degree nodes # we'll copy the main nodegraph before mutating it graph, contig, L, HDN, R, tip = left_tip_structure asm = khmer.LinearAssembler(graph) graph.consume(mutate_position(contig, HDN.pos + K)) path = asm.assemble(HDN) assert len(path) == K assert utils._equals_rc(path, HDN)
def test_end_thru_tip_with_stopbf(self, left_tip_structure): # assemble up to branch point, and include introduced branch b/c # of stop bf graph, contig, L, HDN, R, tip = left_tip_structure asm = khmer.LinearAssembler(graph) stop_bf = khmer.Nodegraph(K, 1e5, 4) stop_bf.count(L) # ...and block original path path = asm.assemble(contig[-K:], stop_bf) assert len(path) == len(contig) - HDN.pos + 1 # should be the tip k-kmer, plus the last base of the HDN thru # the end of the contig assert utils._equals_rc(path, tip + contig[HDN.pos + K - 1:])
def traverse_and_mark_linear_paths(graph, nk, stop_bf, pathy, degree_nodes): size, adj_kmers, visited = graph.traverse_linear_path( nk, degree_nodes, stop_bf) if not size: # 0 length paths return # get an ID for the new path path_id = pathy.new_linear_node() # output a contig if requested if pathy.assemblyfp: asm = khmer.LinearAssembler(graph, stop_bf) contig = asm.assemble(nk) pathy.add_assembly(path_id, contig) if size and not contig: print('nonzero size, but contig is not produced. WTF.') if contig and graph.get_min_count(contig) == 0: print('generated k-mers not in BF. sigh.') if contig: stop_bf.add(contig[:graph.ksize()]) stop_bf.add(contig[-graph.ksize():]) if len(contig) and size + graph.ksize() - 1 != len(contig): print('visited k-mers != contig size. WTF?') else: assert 0 # add all adjacencies, if any if adj_kmers: for kmer in adj_kmers: adj_node_id = pathy.kmers_to_nodes[kmer] pathy.add_adjacency(path_id, adj_node_id) # a purely linear path; add all the k-mers to stop bf to prevent # traversing in both directions. else: for k in visited: stop_bf.add(k)