class TestSNPAlleleGenerator(): def setup(self): DB.drop_database('mykrobe-test') self.pg = AlleleGenerator( reference_filepath="src/mykrobe/data/BX571856.1.fasta") self.reference_set = ReferenceSet().create_and_save(name="ref_set") self.variant_set = VariantSet.create_and_save( name="this_vcf_file", reference_set=self.reference_set) self.variant_sets = [self.variant_set] self.reference = Reference().create_and_save( name="ref", md5checksum="sre", reference_sets=[self.reference_set]) def test_panel_generator(self): pg = AlleleGenerator( reference_filepath="src/mykrobe/data/BX571856.1.fasta") assert pg.ref is not None def test_simple_variant(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) panel = self.pg.create(v) assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT" ] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT" ] assert self.pg._calculate_length_delta_from_indels(v, []) == 0 assert v.is_indel is False def test_simple_variant2(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) panel = self.pg.create(v) assert panel.refs == [ "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" ] assert panel.alts == [ "GATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGATC" ] def test_simple_variant_invalid(self): with pytest.raises(ValueError) as cm: v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=31, alternate_bases=["T"]) panel = self.pg.create(v) def test_simple_variant_start(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=1, alternate_bases=["T"]) panel = self.pg.create(v) assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT" ] assert panel.alts == [ "TGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT" ] def test_simple_variant_end(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2902618, alternate_bases=["T"]) panel = self.pg.create(v) assert panel.refs == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT" ] assert panel.alts == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTTT" ] v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=2902616, alternate_bases=["C"]) panel = self.pg.create(v) assert panel.refs == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT" ] assert panel.alts == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTCTAT" ] def test_simple_variant_with_nearby_snp(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) panel = self.pg.create(v, context=[v2]) assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT" ] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT" ] def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) v3 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"]) panel = self.pg.create(v, context=[v2, v3]) assert panel.refs == [ 'CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT' ] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT" ] def test_simple_variant_with_multiple_nearby_snps2(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) v3 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"]) v4 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"]) v5 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["A"]) assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4], [v, v3]]) assert (self.pg._split_context([v3, v4])) == [[v4], [v3]] assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5], [v, v3, v5]] panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTGAT" ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTGAT" ]) def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) v5 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["G"]) v3 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"]) v4 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"]) panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTGAT" ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTGAT" ])
class TestINDELAlleleGenerator(): def setup(self): DB.drop_database('mykrobe-test') self.pg = AlleleGenerator( reference_filepath=f"{DATA_DIR}/BX571856.1.fasta") self.pg2 = AlleleGenerator( reference_filepath=f"{DATA_DIR}/NC_000962.3.fasta") self.reference_set = ReferenceSet().create_and_save(name="ref_set") self.variant_set = VariantSet.create_and_save( name="this_vcf_file", reference_set=self.reference_set) self.variant_sets = [self.variant_set] self.reference = Reference().create_and_save( name="ref", md5checksum="sre", reference_sets=[self.reference_set]) def test_simple_deletion1(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AA", start=31, alternate_bases=["A"]) assert v.is_indel assert v.is_deletion panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs assert self.pg._calculate_length_delta_from_indels(v, []) == 1 assert panel.alts == [ "GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG" ] def test_simple_deletion2(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AT", start=32, alternate_bases=["A"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs assert panel.alts == [ "ATTAAAGATAGAAATACACGATGCGAGCAACAAATTTCATAACATCACCATGAGTTTGAT" ] def test_simple_deletion3(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AT", start=2902618, alternate_bases=["T"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs assert panel.alts == [ "TTTATACTACTGCTCAATTTTTTTACTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] def test_simple_deletion4(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs assert panel.alts == [ "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT" ] def test_simple_insertion1(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=1, alternate_bases=["TTTC"]) panel = self.pg.create(v) # assert_no_overlapping_kmers(panel)### Skip this test for vars in first k bases of ref assert v.is_indel assert v.is_insertion assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs assert panel.alts == ["TTTCGATTAAAGATAGAAATACACGATGCGAGC"] def test_simple_insertion2(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=1, alternate_bases=["CTTT"]) panel = self.pg.create(v) # assert_no_overlapping_kmers(panel)### Skip this test for vars in first k bases of ref assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs assert panel.alts == ["CTTTGATTAAAGATAGAAATACACGATGCGAGCA"] def test_simple_insertion3(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["ATTT"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs assert panel.alts == [ "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG" ] def test_simple_insertion4(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["AGGGG"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs assert panel.alts == [ "ATTAAAGATAGAAATACACGATGCGAGCAAGGGGTCAAATTTCATAACATCACCATGAGTTTGA" ] def test_simple_insertion5(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2902618, alternate_bases=["ATGC"]) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs assert panel.alts == [ "TATACTACTGCTCAATTTTTTTACTTTTATGCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] def test_double_insertion(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=4021408, alternate_bases=["ACGCTGGCGGGCG"]) v1 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AGA", start=4021406, alternate_bases=["CGG"]) context = [v1] assert self.pg2._remove_overlapping_contexts(v, [v1]) == [] panel = self.pg2.create(v, context=context) assert_no_overlapping_kmers(panel) assert "ATCTAGCCGCAAGGGCGCGAGCAGACGCAGAATCGCATGATTTGAGCTCAAATCATGCGAT" in panel.refs assert panel.alts == [ "TCTAGCCGCAAGGGCGCGAGCAGACGCAGACGCTGGCGGGCGATCGCATGATTTGAGCTCAAATCATGCGAT" ] def test_double_indel_fail(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="CCA", start=2288851, alternate_bases=["A"]) v1 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2288850, alternate_bases=["ACC"]) context = [v1] panel = self.pg2.create(v, context=context) assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" in panel.refs assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" not in panel.alts def test_large_insertion(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="CCGCCGGCCCCGCCGTTT", start=1636155, alternate_bases=[ "CTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCG" ]) panel = self.pg2.create(v, context=[]) assert_no_overlapping_kmers(panel) assert "AGACCTAGCAGGGTGCCGGCGCCGCCCTTGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCAT" in panel.refs assert panel.alts == [ "GACCTAGCAGGGTGCCGGCGCCGCCCTTGCTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCGCCATCGCCGATGATGTTTTCC" ]
class TestSNPAlleleGenerator: def setup(self): DB.drop_database("mykrobe-test") self.pg = AlleleGenerator( reference_filepath=f"{DATA_DIR}/BX571856.1.fasta", kmer=31) self.reference_set = ReferenceSet().create_and_save(name="ref_set") self.variant_set = VariantSet.create_and_save( name="this_vcf_file", reference_set=self.reference_set) self.variant_sets = [self.variant_set] self.reference = Reference().create_and_save( name="ref", md5checksum="sre", reference_sets=[self.reference_set]) def test_panel_generator(self): pg = AlleleGenerator(reference_filepath=f"{DATA_DIR}/BX571856.1.fasta", kmer=31) assert pg.ref is not None def test_simple_snp_variant(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) panel = self.pg.create(v) assert panel.refs[0][:31] != panel.alts[0][:31] assert panel.refs[0][-32:] != panel.alts[0][-32:] assert panel.refs[0][-31:] != panel.alts[0][-31:] assert_no_overlapping_kmers(panel) assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" ] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG" ] assert self.pg._calculate_length_delta_from_indels(v, []) == 0 assert v.is_indel is False def test_simple_variant2(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert panel.refs == [ "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" ] assert panel.alts == [ "GATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGA" ] def test_simple_variant_invalid(self): with pytest.raises(ValueError) as cm: v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=31, alternate_bases=["T"], ) panel = self.pg.create(v) def test_simple_variant_start(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=1, alternate_bases=["T"], ) panel = self.pg.create(v) # assert_no_overlapping_kmers(panel) ## Will have overlapping kmers only if the SNP is in the i assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" ] assert panel.alts == [ "TGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" ] def test_simple_variant_end(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2902618, alternate_bases=["T"], ) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert panel.refs == [ "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] assert panel.alts == [ "TTTATACTACTGCTCAATTTTTTTACTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=2902616, alternate_bases=["C"], ) panel = self.pg.create(v) assert panel.refs == [ "ATTTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] assert panel.alts == [ "ATTTTATACTACTGCTCAATTTTTTTACTTCTATNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] def test_simple_variant_with_nearby_snp(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2]) assert_no_overlapping_kmers(panel) assert set(panel.refs) == set([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", ]) assert set(panel.alts) == set([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", ]) def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"], ) panel = self.pg.create(v, context=[v2, v3]) assert_no_overlapping_kmers(panel) assert panel.refs == [ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT", ] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT", ] def test_simple_variant_with_multiple_nearby_snps2(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"], ) v4 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"], ) v5 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["A"], ) assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4], [v, v3]]) assert (self.pg._split_context([v3, v4])) == [[v4], [v3]] assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5], [v, v3, v5]] panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert_no_overlapping_kmers(panel) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTG", ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTG", ]) def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) v5 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["G"], ) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"], ) v4 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert_no_overlapping_kmers(panel) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTG", ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTG", ])