コード例 #1
0
class TestSNPAlleleGenerator():
    def setup(self):
        DB.drop_database('mykrobe-test')
        self.pg = AlleleGenerator(
            reference_filepath="src/mykrobe/data/BX571856.1.fasta")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_panel_generator(self):
        pg = AlleleGenerator(
            reference_filepath="src/mykrobe/data/BX571856.1.fasta")
        assert pg.ref is not None

    def test_simple_variant(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert self.pg._calculate_length_delta_from_indels(v, []) == 0
        assert v.is_indel is False

    def test_simple_variant2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=32,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC"
        ]
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGATC"
        ]

    def test_simple_variant_invalid(self):
        with pytest.raises(ValueError) as cm:
            v = Variant.create(variant_sets=self.variant_sets,
                               reference=self.reference,
                               reference_bases="T",
                               start=31,
                               alternate_bases=["T"])
            panel = self.pg.create(v)

    def test_simple_variant_start(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="C",
                           start=1,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert panel.alts == [
            "TGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_variant_end(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=2902618,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT"
        ]
        assert panel.alts == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTTT"
        ]

        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="T",
                           start=2902616,
                           alternate_bases=["C"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT"
        ]
        assert panel.alts == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTCTAT"
        ]

    def test_simple_variant_with_nearby_snp(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        panel = self.pg.create(v, context=[v2])
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])

        panel = self.pg.create(v, context=[v2, v3])
        assert panel.refs == [
            'CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT'
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_variant_with_multiple_nearby_snps2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])
        v4 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["T"])
        v5 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["A"])
        assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4],
                                                                      [v, v3]])
        assert (self.pg._split_context([v3, v4])) == [[v4], [v3]]
        assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5],
                                                             [v, v3, v5]]
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v5 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["G"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])
        v4 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["T"])
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTGAT"
        ])
コード例 #2
0
class TestINDELAlleleGenerator():
    def setup(self):
        DB.drop_database('mykrobe-test')

        self.pg = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/BX571856.1.fasta")
        self.pg2 = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/NC_000962.3.fasta")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_simple_deletion1(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="AA",
                           start=31,
                           alternate_bases=["A"])
        assert v.is_indel
        assert v.is_deletion
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert self.pg._calculate_length_delta_from_indels(v, []) == 1
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"
        ]

    def test_simple_deletion2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="AT",
                           start=32,
                           alternate_bases=["A"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert panel.alts == [
            "ATTAAAGATAGAAATACACGATGCGAGCAACAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_deletion3(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="AT",
                           start=2902618,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs
        assert panel.alts == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

    def test_simple_deletion4(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="ATC",
                           start=32,
                           alternate_bases=["A"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert panel.alts == [
            "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"
        ]

    def test_simple_insertion1(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="C",
                           start=1,
                           alternate_bases=["TTTC"])
        panel = self.pg.create(v)
        #        assert_no_overlapping_kmers(panel)### Skip this test for vars in first k bases of ref
        assert v.is_indel
        assert v.is_insertion
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert panel.alts == ["TTTCGATTAAAGATAGAAATACACGATGCGAGC"]

    def test_simple_insertion2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="C",
                           start=1,
                           alternate_bases=["CTTT"])
        panel = self.pg.create(v)
        #        assert_no_overlapping_kmers(panel)### Skip this test for vars in first k bases of ref
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert panel.alts == ["CTTTGATTAAAGATAGAAATACACGATGCGAGCA"]

    def test_simple_insertion3(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["ATTT"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG"
        ]

    def test_simple_insertion4(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=32,
                           alternate_bases=["AGGGG"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
        assert panel.alts == [
            "ATTAAAGATAGAAATACACGATGCGAGCAAGGGGTCAAATTTCATAACATCACCATGAGTTTGA"
        ]

    def test_simple_insertion5(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=2902618,
                           alternate_bases=["ATGC"])
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)
        assert "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" in panel.refs
        assert panel.alts == [
            "TATACTACTGCTCAATTTTTTTACTTTTATGCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

    def test_double_insertion(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=4021408,
                           alternate_bases=["ACGCTGGCGGGCG"])
        v1 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="AGA",
                            start=4021406,
                            alternate_bases=["CGG"])
        context = [v1]
        assert self.pg2._remove_overlapping_contexts(v, [v1]) == []
        panel = self.pg2.create(v, context=context)
        assert_no_overlapping_kmers(panel)
        assert "ATCTAGCCGCAAGGGCGCGAGCAGACGCAGAATCGCATGATTTGAGCTCAAATCATGCGAT" in panel.refs
        assert panel.alts == [
            "TCTAGCCGCAAGGGCGCGAGCAGACGCAGACGCTGGCGGGCGATCGCATGATTTGAGCTCAAATCATGCGAT"
        ]

    def test_double_indel_fail(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="CCA",
                           start=2288851,
                           alternate_bases=["A"])
        v1 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=2288850,
                            alternate_bases=["ACC"])
        context = [v1]
        panel = self.pg2.create(v, context=context)
        assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" in panel.refs
        assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" not in panel.alts

    def test_large_insertion(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="CCGCCGGCCCCGCCGTTT",
            start=1636155,
            alternate_bases=[
                "CTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCG"
            ])
        panel = self.pg2.create(v, context=[])
        assert_no_overlapping_kmers(panel)
        assert "AGACCTAGCAGGGTGCCGGCGCCGCCCTTGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCAT" in panel.refs
        assert panel.alts == [
            "GACCTAGCAGGGTGCCGGCGCCGCCCTTGCTGCCGGCCCCGCCGGCGCCGCCCAATCCACCGAAGCCCCTCCCTTCGGTGGGGTCGCTGCCGCCGTCGCCGCCGTCACCGCCCTTGCCGCCGGCCCCGCCGTCGCCGCCGGCTCCGGCGGTGCCGTCGCCGCCCTGGCCGCCGGCCCCGCCGTTTCCGCCGCCGCCGCCATCGCCGATGATGTTTTCC"
        ]
コード例 #3
0
ファイル: test_snp_only.py プロジェクト: martinghunt/mykrobe
class TestSNPAlleleGenerator:
    def setup(self):
        DB.drop_database("mykrobe-test")
        self.pg = AlleleGenerator(
            reference_filepath=f"{DATA_DIR}/BX571856.1.fasta", kmer=31)
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])

    def test_panel_generator(self):
        pg = AlleleGenerator(reference_filepath=f"{DATA_DIR}/BX571856.1.fasta",
                             kmer=31)
        assert pg.ref is not None

    def test_simple_snp_variant(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert panel.refs[0][:31] != panel.alts[0][:31]
        assert panel.refs[0][-32:] != panel.alts[0][-32:]
        assert panel.refs[0][-31:] != panel.alts[0][-31:]

        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG"
        ]
        assert self.pg._calculate_length_delta_from_indels(v, []) == 0
        assert v.is_indel is False

    def test_simple_variant2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA"
        ]
        assert panel.alts == [
            "GATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGA"
        ]

    def test_simple_variant_invalid(self):
        with pytest.raises(ValueError) as cm:
            v = Variant.create(
                variant_sets=self.variant_sets,
                reference=self.reference,
                reference_bases="T",
                start=31,
                alternate_bases=["T"],
            )
            panel = self.pg.create(v)

    def test_simple_variant_start(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=1,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        #        assert_no_overlapping_kmers(panel) ## Will have overlapping kmers only if the SNP is in the i
        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
        ]
        assert panel.alts == [
            "TGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
        ]

    def test_simple_variant_end(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=2902618,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
        assert panel.alts == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=2902616,
            alternate_bases=["C"],
        )
        panel = self.pg.create(v)
        assert panel.refs == [
            "ATTTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
        assert panel.alts == [
            "ATTTTATACTACTGCTCAATTTTTTTACTTCTATNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

    def test_simple_variant_with_nearby_snp(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)

        assert set(panel.refs) == set([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert set(panel.alts) == set([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
        ])

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["G"],
        )

        panel = self.pg.create(v, context=[v2, v3])
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
        ]

    def test_simple_variant_with_multiple_nearby_snps2(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["G"],
        )
        v4 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["T"],
        )
        v5 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["A"],
        )

        assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4],
                                                                      [v, v3]])
        assert (self.pg._split_context([v3, v4])) == [[v4], [v3]]
        assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5],
                                                             [v, v3, v5]]
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert_no_overlapping_kmers(panel)
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTG",
        ])

    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        v5 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["G"],
        )
        v3 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["G"],
        )
        v4 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="C",
            start=30,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v, context=[v2, v3, v4, v5])
        assert_no_overlapping_kmers(panel)
        assert sorted(panel.refs) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTG",
        ])