def test_simple_variant_with_nearby_snp(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2]) assert_no_overlapping_kmers(panel) assert set(panel.refs) == set([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", ]) assert set(panel.alts) == set([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", ])
def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"]) panel = self.pg.create(v, context=[v2, v3]) assert_no_overlapping_kmers(panel) assert panel.refs == ['CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT'] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT"]
def test_indel_snp_indel_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="TCGCGTGGC", start=4021459, alternate_bases=["GCGAGCAGA"]) v1 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=4021455, alternate_bases=["ATCTAGCCGCAAG"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=4021489, alternate_bases=["G"]) panel = self.pg2.create(v) # , context = [v1, v2]) assert_no_overlapping_kmers(panel) assert "ATCATGCGATTCTGCGTCTGCTCGCGAGGCTCGCGTGGCCGCCGGCGCTGGCGGGCGATCT" in panel.refs panel = self.pg2.create(v, context=[v1, v2]) assert_no_overlapping_kmers(panel) assert sorted( panel.alts) == sorted( [ "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG", "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT", "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG", "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT"])
def test_del_with_ins_context5(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=5, alternate_bases=["TT"]) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=6, alternate_bases=["AG"]) panel = self.pg.create(v, context=[v2, v3]) assert_no_overlapping_kmers(panel) assert self.pg._remove_overlapping_contexts(v, [v2, v3]) == [v2, v3] assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs assert sorted( panel.alts) == sorted( [ "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGA", "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", "TTTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])
def test_complex_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATTT", start=1503643, alternate_bases=["A"]) v1 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="CCT", start=1503615, alternate_bases=["C"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=1503655, alternate_bases=["ATGCCGCCGCC"]) panel = self.pg2.create(v, context=[v1, v2]) assert_no_overlapping_kmers(panel) assert "ATCCTGGAGCCCACCAGCGGAAACACCGGCATTTCGCTGGCGATGGCGGCCCGGTTGAAGG" in panel.refs assert set(panel.alts) == set([ "CCATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGGT", "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGG", "ATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG", "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG", ])
def test_del_with_SNP_context2(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="AA", start=31, alternate_bases=["A"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2]) assert_no_overlapping_kmers(panel) assert self.pg._remove_overlapping_contexts(v, [v2]) == [] assert ( "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs ) assert sorted(panel.alts) == sorted( ["GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"] )
def test_snp_with_replace_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="G", start=2338961, alternate_bases=["A"], ) v1 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="GGATG", start=2338990, alternate_bases=["CGATA"], ) panel = self.pg2.create(v, context=[v1]) assert_no_overlapping_kmers(panel) assert ( "CGACTAGCCACCATCGCGCATCAGTGCGAGGTCAAAAGCGACCAAAGCGAGCAAGTCGCGG" in panel.refs ) assert set(panel.alts) == set( [ "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCCG", "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCGG", ] )
def test_simple_variant_end(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2902618, alternate_bases=["T"], ) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert panel.refs == [ "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] assert panel.alts == [ "TTTATACTACTGCTCAATTTTTTTACTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=2902616, alternate_bases=["C"], ) panel = self.pg.create(v) assert panel.refs == [ "ATTTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] assert panel.alts == [ "ATTTTATACTACTGCTCAATTTTTTTACTTCTATNNNNNNNNNNNNNNNNNNNNNNNNNNN" ]
def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) v5 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["G"], ) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"], ) v4 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert_no_overlapping_kmers(panel) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTG", ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTG", ])
def test_simple_deletion4(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs assert panel.alts == [ "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT" ]
def test_simple_deletion3(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AT", start=2902618, alternate_bases=["T"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs assert panel.alts == [ "TTTATACTACTGCTCAATTTTTTTACTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ]
def test_del_with_ins_context_where_base_is_deleted2(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="TAAA", start=5, alternate_bases=["T"], ) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=7, alternate_bases=["AG"], ) panel = self.pg.create(v, context=[v2, v3]) assert_no_overlapping_kmers(panel) assert ( "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs ) assert sorted(panel.alts) == sorted( [ "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATC", "TTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", ] ) panel = self.pg.create(v, context=[v3, v2]) assert_no_overlapping_kmers(panel) assert ( "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs ) assert sorted(panel.alts) == sorted( [ "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATC", "TTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", ] )
def test_simple_insertion3(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["ATTT"], ) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert ("CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs) assert panel.alts == [ "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG" ]
def test_simple_deletion1(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AA", start=31, alternate_bases=["A"]) assert v.is_indel assert v.is_deletion panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs assert self.pg._calculate_length_delta_from_indels(v, []) == 1 assert panel.alts == [ "GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG" ]
def test_large_insertion(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="CCGCCGGCCCCGCCGTTT", start=1636155, alternate_bases=[ "CTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCG" ]) panel = self.pg2.create(v, context=[]) assert_no_overlapping_kmers(panel) assert "AGACCTAGCAGGGTGCCGGCGCCGCCCTTGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCAT" in panel.refs assert panel.alts == [ "GACCTAGCAGGGTGCCGGCGCCGCCCTTGCTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCGCCATCGCCGATGATGTTTTCC" ]
def test_large_insertion(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=2352065, alternate_bases=[ "CCTCGCCTGGGCTGGCGAGCAGACGCAAAATCCCCCGCACGCCCGGCGTGTCGGGGGATTTTGCGTCTG" ]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "AGCTCGGCCAGCTCAGTCACGTCGCCGCCGCCTCGCCAGTTGACCGCGCCCGCTCGCGGCT" in panel.refs assert panel.alts == [ "CCAGCTCAGTCACGTCGCCGCCGCCTCGCCTGGGCTGGCGAGCAGACGCAAAATCCCCCGCACGCCCGGCGTGTCGGGGGATTTTGCGTCTGCTCGCCAGTTGACCGCGCCCGCTCGCGGCT" ]
def test_large_variant2(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases= "AACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCAC", start=1355983, alternate_bases=[ "ACCGCCCGGTATCTGAGGATTGGTTTTCCACCCAAATACAAGTCGCATTCGC" ]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "TCGTCAACGCCCGGTATCTGAGGATCTGTGTTCTCACCCAATACAAGTCGCATTCACTGGA" in panel.refs assert panel.alts == [ "TCGTCACCGCCCGGTATCTGAGGATTGGTTTTCCACCCAAATACAAGTCGCATTCGCTGGA" ]
def test_large_var1(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases= "CGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCA", start=2266659, alternate_bases=[ "CACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCG" ]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "TGGTGACGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCAAACA" in panel.refs assert panel.alts == [ "GGCCTCGTCGGGAATGCCGGCGATGGTGACACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCGAACACGCCAGGCCCGGGCCGTCGTCAACCA" ]
def test_double_insertion(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=4021408, alternate_bases=["ACGCTGGCGGGCG"]) v1 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AGA", start=4021406, alternate_bases=["CGG"]) context = [v1] assert self.pg2._remove_overlapping_contexts(v, [v1]) == [] panel = self.pg2.create(v, context=context) assert_no_overlapping_kmers(panel) assert "ATCTAGCCGCAAGGGCGCGAGCAGACGCAGAATCGCATGATTTGAGCTCAAATCATGCGAT" in panel.refs assert panel.alts == [ "TCTAGCCGCAAGGGCGCGAGCAGACGCAGACGCTGGCGGGCGATCGCATGATTTGAGCTCAAATCATGCGAT" ]
def test_simple_snp_variant(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) panel = self.pg.create(v) assert panel.refs[0][:31] != panel.alts[0][:31] assert panel.refs[0][-32:] != panel.alts[0][-32:] assert panel.refs[0][-31:] != panel.alts[0][-31:] assert_no_overlapping_kmers(panel) assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG"] assert self.pg._calculate_length_delta_from_indels(v, []) == 0 assert v.is_indel is False