def test_double_deletion_with_snp_inside_first_deletiod_and_false_deletion_path( ): repeated_sequence = "AGGTCCCAGGTCCATCT" graph = Graph.from_dicts( { 1: "TTTT", 2: "AGGTCC", 3: "C", 4: "A", 5: repeated_sequence, 6: repeated_sequence }, { 1: [2, 5, 6], 2: [3, 4], 3: [5, 6], 4: [5, 6], 5: [6] }, [1, 2, 3, 5, 6]) variants = VcfVariants([ VcfVariant(1, 4, "TAGGTCCC", "T", type="DELETION"), VcfVariant(1, 11, "CAGGTCCCAGGTCCATCT", "C", type="DELETION") ]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() print(new_graph) assert list(new_graph.get_edges(1)) == [2, 8] assert list(new_graph.get_edges(2)) == [3, 4] assert list(new_graph.get_edges(3)) == [5, 9] assert list(new_graph.get_edges(4)) == [5, 9] assert list(new_graph.get_edges(9)) == [6] assert list(new_graph.get_edges(8)) == [5, 9]
def test_double_deletion_with_snp_inside_first_deletion(): graph = Graph.from_dicts( { 1: "ACTG", 2: "A", 3: "C", 4: "T", 5: "AAA", 6: "G" }, { 1: [2, 5, 6], 2: [3, 4], 3: [5, 6], 4: [5, 6], 5: [6] }, [1, 2, 4, 6]) variants = VcfVariants([ VcfVariant(1, 4, "GAT", "G", type="DELETION"), VcfVariant(1, 6, "TAAA", "T", type="DELETION") ]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() print(new_graph)
def test_overlapping_deletions(): graph = Graph.from_dicts( { 1: "AA", 2: "TCTG", 3: "TCT", 4: "G", 5: "A", 6: "GG" }, { 1: [2, 3], 2: [3, 6], 3: [4, 5], 4: [6], 5: [6] }, [1, 2, 3, 5, 6]) variants = VcfVariants([ VcfVariant(1, 2, "ATCTG", "A", type="DELETION"), VcfVariant(1, 6, "GTCTA", "T", type="DELETION"), VcfVariant(1, 10, "A", "G", type="SNP") ]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() assert list(new_graph.get_edges(1)) == [2, 8] assert list(new_graph.get_edges(8)) == [3, 9] assert list(new_graph.get_edges(2)) == [3, 9] assert list(new_graph.get_edges(9)) == [6] ref_node, var_node = new_graph.get_variant_nodes(variants[1]) assert ref_node == 3 assert var_node == 9 print(new_graph)
def test_insertion_with_identical_false_path(): graph = Graph.from_dicts({ 1: "AA", 2: "TCTG", 3: "TCTG", 4: "GG" }, { 1: [2, 3], 2: [3], 3: [4], }, [1, 3, 4]) variants = VcfVariants([ VcfVariant(1, 2, "A", "ATCTG", type="INSERTION"), ]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() print(new_graph) assert list(new_graph.get_edges(1)) == [2, 6] assert list(new_graph.get_edges(6)) == [3] assert list(new_graph.get_edges(2)) == [3] ref_node, var_node = new_graph.get_variant_nodes(variants[0]) assert ref_node == 6 assert var_node == 2
def test_simple(): graph1 = Graph.from_dicts({ 1: "ACTG", 2: "A", 3: "C", 4: "ACT" }, { 1: [2, 3], 2: [4], 3: [4] }, [1, 2, 4]) graph2 = Graph.from_dicts({ 1: "AAAA", 2: "A", 3: "C", 4: "ACT" }, { 1: [2, 3], 2: [4], 3: [4] }, [1, 2, 4]) merged_graph = merge_graphs([graph1, graph2]) print(merged_graph) assert list(merged_graph.get_edges(1)) == [2, 3] assert merged_graph.get_node_at_ref_offset(0) == 1 assert merged_graph.get_node_sequence( merged_graph.get_node_at_ref_offset(8)) == "AAAA" assert merged_graph.get_node_sequence( merged_graph.get_node_at_ref_offset(4)) == "A" assert len(merged_graph.get_edges( merged_graph.get_node_at_ref_offset(8))) == 2 assert len(merged_graph.get_edges( merged_graph.get_node_at_ref_offset(11))) == 2 assert merged_graph.get_ref_offset_at_node(6) == 8 assert 7 in merged_graph.linear_ref_nodes() assert merged_graph.get_ref_offset_at_node(7) == 12 assert list(merged_graph.chromosome_start_nodes) == [1, 6] merged_graph.to_file("merged_graph.npz") merged_graph2 = Graph.from_file("merged_graph.npz")
def test_find_insertion_nodes(): g = Graph.from_dicts({ 1: "CTACCA", 2: "AA", 3: "TAAATAA", 4: "" }, { 1: [2, 4], 2: [3], 4: [3] }, [1, 3]) variant = VcfVariant(1, 6, "A", "AAA", "", "INSERTION") ref_node, variant_node = g.get_variant_nodes(variant) assert ref_node == 4 assert variant_node == 2
def test_indel_graph2(): graph = Graph.from_dicts( { 1: "gggggaggcttgtggttagcagagagtgggtggaagacagaggtttgag", 2: "ga", 3: "gagagagacccaggggagaaaaccagctgcagaggcaggaggggtccagggcagcccgaggccagagatgggcgtcttccttacagccacctgtggtccc", 100: "" }, { 1: [2, 100], 2: [3], 100: [3] }, [1, 2, 3]) kmer_finder = SnpKmerFinder(graph, k=31) flat_kmers = kmer_finder.find_kmers() print(kmer_finder.kmers_found)
def test_simple_insertion(): graph = Graph.from_dicts({ 1: "ACTG", 2: "C", 3: "AAAA" }, { 1: [2, 3], 2: [3] }, [1, 3]) variants = VcfVariants([VcfVariant(1, 4, "G", "GC", type="INSERTION")]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() assert new_graph.node_has_edges(5, [3]) assert new_graph.node_has_edges(1, [2, 5]) assert new_graph.node_has_edges(2, [3])
def simple_test(): g = Graph.from_dicts({ 1: "CTACCA", 2: "AA", 3: "TAAATAA", 4: "" }, { 1: [2, 4], 2: [3], 4: [3] }, [1, 2, 3]) print(g.ref_offset_to_node) print(g.get_node_size(3)) k = 4 variants = VcfVariants([VcfVariant(6, "AAA", "A", "", "DELETION")]) reference_kmers = ReferenceKmerIndex.from_sequence("CTACCAAATAAATAA", k) finder = UniqueVariantKmersFinder(g, reference_kmers, variants, k) finder.find_unique_kmers()
def test_from_dicts(): g = Graph.from_dicts({ 1: "ACTG", 2: "A", 3: "G", 4: "AAA" }, { 1: [2, 3], 2: [4], 3: [4] }, [1, 2, 4]) assert g.get_node_size(1) == 4 assert g.get_node_size(2) == 1 assert g.get_node_size(3) == 1 assert g.get_node_size(4) == 3 assert list(g.get_edges(1)) == [2, 3] assert g.get_node_sequence(2) == "A"
def test_indel_graph(): graph = Graph.from_dicts({ 1: "ACTG", 2: "A", 3: "", 4: "TAAT" }, { 1: [2, 3], 2: [4], 3: [4] }, [1, 2, 4]) kmer_finder = SnpKmerFinder(graph, k=3) flat_kmers = kmer_finder.find_kmers() print(kmer_finder.kmers_found) index = KmerIndex.from_flat_kmers(flat_kmers) hits = index.get(sequence_to_kmer_hash("GTA")) assert list(hits[1] == [1, 3, 4]) print(hits) hits = index.get(sequence_to_kmer_hash("GAT")) assert list(hits[1] == [1, 2, 4]) print(hits)
def test_tricky_case_nested_deletions(): graph = Graph.from_dicts( { 1: "TATAT", 2: "AT", 3: "A", 4: "T", 5: "A", 6: "A", 7: "T", 8: "A", 9: "GG" }, { 1: [2, 6], 2: [3, 6], 3: [4, 5], 4: [6], 5: [6], 6: [7, 8], 7: [9], 8: [9] }, [1, 2, 3, 5, 6, 8, 9]) variants = VcfVariants([ VcfVariant(1, 5, "TATAA", "T", type="DELETION"), VcfVariant(1, 7, "TAA", "T", type="DELETION"), VcfVariant(1, 5, "A", "T", type="SNP"), ]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() print(new_graph) assert list(new_graph.get_edges(1)) == [2, 11] assert list(new_graph.get_edges(2)) == [3, 12] assert list(new_graph.get_edges(11)) == [6] assert list(new_graph.get_edges(12)) == [6]
def test_insertion_with_multiple_paths(): graph = Graph.from_dicts( { 1: "AAAG", 2: "GAGT", 3: "GA", 4: "C", 5: "G", 6: "T" }, { 1: [2, 3], 2: [3], 3: [4, 5], 4: [6], 5: [6] }, [1, 3, 5, 6]) variants = VcfVariants([VcfVariant(1, 4, "G", "GGAGT", type="INSERTION")]) dummy_node_adder = DummyNodeAdder(graph, variants) new_graph = dummy_node_adder.create_new_graph_with_dummy_nodes() assert list(new_graph.get_edges(1)) == [2, 8] assert list(new_graph.get_edges(8)) == [3] print(new_graph)
def test_simple_snp_graph(): graph = Graph.from_dicts({ 1: "ACTG", 2: "A", 3: "G", 4: "AAAT" }, { 1: [2, 3], 2: [4], 3: [4] }, [1, 2, 4]) kmer_finder = SnpKmerFinder(graph, k=3) flat_kmers = kmer_finder.find_kmers() print(kmer_finder.kmers_found) print(flat_kmers._ref_offsets) print(flat_kmers._nodes) print(flat_kmers._hashes) assert kmer_finder.has_kmer("ACT", {1}) assert kmer_finder.has_kmer("GAA", {1, 2, 4}) assert kmer_finder.has_kmer("GGA", {1, 3, 4}) assert kmer_finder.has_kmer("AAT", {4})