def test_assemble_left_double_fork(self, left_double_fork_structure): # assemble entire contig + branch points b/c of labels; start from end graph, contig, L, HDN, R, branch = left_double_fork_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) # first try without the labels paths = asm.assemble(contig[-K:]) assert len(paths) == 1 # without labels, should get the beginning of the HDN thru the end assert paths[0] == contig[HDN.pos:] # now add labels and check that we get two full length paths hdn = graph.find_high_degree_nodes(contig) hdn += graph.find_high_degree_nodes(branch) print(list(hdn)) lh.label_across_high_degree_nodes(contig, hdn, 1) lh.label_across_high_degree_nodes(branch, hdn, 2) print(lh.get_tag_labels(list(hdn)[0])) paths = asm.assemble(contig[-K:]) assert len(paths) == 2 assert any(utils._equals_rc(path, contig) for path in paths) assert any(utils._equals_rc(path, branch) for path in paths)
def test_hash_as_seed(self, linear_structure): graph, contig = linear_structure lh = khmer.GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) left = graph.hash(contig[:K]) assert utils._equals_rc(asm.assemble(left).pop(), contig)
def test_assemble_tandem_repeats(self, tandem_repeat_structure): # assemble one copy of a tandem repeat graph, repeat, tandem_repeats = tandem_repeat_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) paths = asm.assemble(repeat[:K]) assert len(paths) == 1 # There are K-1 k-mers spanning the junction between # the beginning and end of the repeat assert len(paths[0]) == len(repeat) + K - 1
def test_assemble_snp_bubble_single(self, snp_bubble_structure): # assemble entire contig + one of two paths through a bubble graph, wildtype, mutant, HDN_L, HDN_R = snp_bubble_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) hdn = graph.find_high_degree_nodes(wildtype) assert len(hdn) == 2 lh.label_across_high_degree_nodes(wildtype, hdn, 1) paths = asm.assemble(wildtype[:K]) assert len(paths) == 1 assert utils._equals_rc(paths[0], wildtype)
def test_beginning_to_end_across_tip(self, right_tip_structure): # assemble entire contig, ignoring branch point b/c of labels graph, contig, L, HDN, R, tip = right_tip_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) hdn = graph.find_high_degree_nodes(contig) # L, HDN, and R will be labeled with 1 lh.label_across_high_degree_nodes(contig, hdn, 1) path = asm.assemble(contig[:K]) assert len(path) == 1, "there should only be one path" path = path[0] # @CTB assert len(path) == len(contig) assert utils._equals_rc(path, contig)
def test_assemble_snp_bubble_both(self, snp_bubble_structure): # assemble entire contig + both paths graph, wildtype, mutant, HDN_L, HDN_R = snp_bubble_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) hdn = graph.find_high_degree_nodes(wildtype) hdn += graph.find_high_degree_nodes(mutant) assert len(hdn) == 2 lh.label_across_high_degree_nodes(wildtype, hdn, 1) lh.label_across_high_degree_nodes(mutant, hdn, 2) paths = asm.assemble(wildtype[:K]) assert len(paths) == 2 assert any(utils._contains_rc(wildtype, path) for path in paths) assert any(utils._contains_rc(mutant, path) for path in paths)
def test_assemble_right_double_fork(self, right_double_fork_structure): # assemble two contigs from a double forked structure graph, contig, L, HDN, R, branch = right_double_fork_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) hdn = graph.find_high_degree_nodes(contig) hdn += graph.find_high_degree_nodes(branch) print(list(hdn)) lh.label_across_high_degree_nodes(contig, hdn, 1) lh.label_across_high_degree_nodes(branch, hdn, 2) print(lh.get_tag_labels(list(hdn)[0])) paths = asm.assemble(contig[:K]) print('Path lengths', [len(x) for x in paths]) assert len(paths) == 2 assert any(utils._equals_rc(path, contig) for path in paths) assert any(utils._equals_rc(path, branch) for path in paths)
def test_assemble_snp_bubble_stopbf(self, snp_bubble_structure): # assemble one side of bubble, blocked with stop_bf, # when labels on both branches # stop_bf should trip a filter failure, negating the label spanning graph, wildtype, mutant, HDN_L, HDN_R = snp_bubble_structure stop_bf = khmer.Nodegraph(K, 1e5, 4) lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) hdn = graph.find_high_degree_nodes(wildtype) hdn += graph.find_high_degree_nodes(mutant) assert len(hdn) == 2 lh.label_across_high_degree_nodes(wildtype, hdn, 1) lh.label_across_high_degree_nodes(mutant, hdn, 2) # do the labeling, but block the mutant with stop_bf stop_bf.count(mutant[HDN_L.pos + 1:HDN_L.pos + K + 1]) paths = asm.assemble(wildtype[:K], stop_bf) assert len(paths) == 1 assert any(utils._equals_rc(path, wildtype) for path in paths)
def test_assemble_right_triple_fork(self, right_triple_fork_structure): # assemble three contigs from a trip fork (graph, contig, L, HDN, R, top_sequence, bottom_sequence) = right_triple_fork_structure lh = khmer._GraphLabels(graph) asm = khmer.SimpleLabeledAssembler(lh) hdn = graph.find_high_degree_nodes(contig) hdn += graph.find_high_degree_nodes(top_sequence) hdn += graph.find_high_degree_nodes(bottom_sequence) print(list(hdn)) lh.label_across_high_degree_nodes(contig, hdn, 1) lh.label_across_high_degree_nodes(top_sequence, hdn, 2) lh.label_across_high_degree_nodes(bottom_sequence, hdn, 3) print(lh.get_tag_labels(list(hdn)[0])) paths = asm.assemble(contig[:K]) print([len(x) for x in paths]) assert len(paths) == 3 assert any(utils._equals_rc(path, contig) for path in paths) assert any(utils._equals_rc(path, top_sequence) for path in paths) assert any(utils._equals_rc(path, bottom_sequence) for path in paths)