Example #1
0
def run(parser, args):
    db_name = '%s-%s' % (DB_PREFIX, args.db_name)
    DB = connect(db_name, host=args.db_uri)
    if args.verbose:
        logger.setLevel(level=logging.DEBUG)
    else:
        logger.setLevel(level=logging.INFO)
    al = AlleleGenerator(reference_filepath=args.reference_filepath,
                         kmer=args.kmer)
    _variant_ids = get_non_singelton_variants(db_name)
    total = Variant.snps(id__in=_variant_ids).count()
    N = 100
    pages = math.ceil(total / N)
    for page in range(pages):
        logger.info("%i of %i - %f%%" %
                    (page * N, total, round(100 * (page * N) / total, 2)))
        for variant in Variant.snps(
                id__in=_variant_ids).order_by("start").skip(N * page).limit(N):
            # for variant in Variant.snps().order_by("start"):
            variant_panel = make_variant_probe(al, variant, args.kmer, DB=DB)
            for i, ref in enumerate(variant_panel.refs):
                sys.stdout.write(
                    ">ref-%s?var_name=%snum_alts=%i&ref=%s&enum=%i\n" %
                    (variant_panel.variant.var_hash, variant.var_name[:100],
                     len(variant_panel.alts),
                     variant_panel.variant.reference.id, i))
                sys.stdout.write("%s\n" % ref)
            for i, a in enumerate(variant_panel.alts):
                sys.stdout.write(">alt-%s?var_name=%s&enum=%i\n" %
                                 (variant_panel.variant.var_hash,
                                  variant.var_name[:100], i))
                sys.stdout.write("%s\n" % a)
    def test_simple_variant_end(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=2902618,
                           alternate_bases=["T"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT"
        ]
        assert panel.alts == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTTT"
        ]

        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="T",
                           start=2902616,
                           alternate_bases=["C"])
        panel = self.pg.create(v)
        assert panel.refs == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT"
        ]
        assert panel.alts == [
            "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTCTAT"
        ]
    def test_indel_snp_indel_context(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="TCGCGTGGC",
            start=4021459,
            alternate_bases=["GCGAGCAGA"])
        v1 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=4021455,
            alternate_bases=["ATCTAGCCGCAAG"])
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=4021489,
            alternate_bases=["G"])
        panel = self.pg2.create(v)  # , context = [v1, v2])
        assert_no_overlapping_kmers(panel)  
        assert "ATCATGCGATTCTGCGTCTGCTCGCGAGGCTCGCGTGGCCGCCGGCGCTGGCGGGCGATCT" in panel.refs

        panel = self.pg2.create(v, context=[v1, v2])
        assert_no_overlapping_kmers(panel)  
        assert sorted(
            panel.alts) == sorted(
            [
                "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG",
                "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT",
                "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG",
                "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT"])
Example #4
0
    def test_simple_variant_end(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=2902618,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v)
        assert_no_overlapping_kmers(panel)

        assert panel.refs == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
        assert panel.alts == [
            "TTTATACTACTGCTCAATTTTTTTACTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]

        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="T",
            start=2902616,
            alternate_bases=["C"],
        )
        panel = self.pg.create(v)
        assert panel.refs == [
            "ATTTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
        assert panel.alts == [
            "ATTTTATACTACTGCTCAATTTTTTTACTTCTATNNNNNNNNNNNNNNNNNNNNNNNNNNN"
        ]
Example #5
0
    def test_del_with_ins_context_where_base_is_deleted2(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="ATC",
                           start=32,
                           alternate_bases=["A"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="TAAA",
                            start=5,
                            alternate_bases=["T"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=7,
                            alternate_bases=["AG"])
        panel = self.pg.create(v, context=[v2, v3])
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" in panel.refs
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC",
            "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCCAAA",
            "GATTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC"
        ])

        panel = self.pg.create(v, context=[v3, v2])
        assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" in panel.refs
        assert sorted(panel.alts) == sorted([
            "CGATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC",
            "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCCAAA",
            "GATTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC"
        ])
Example #6
0
    def test_simple_variant_with_nearby_snp(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["T"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v, context=[v2])
        assert_no_overlapping_kmers(panel)

        assert set(panel.refs) == set([
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
        ])
        assert set(panel.alts) == set([
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
        ])
Example #7
0
    def setup(self):
        DB.drop_database('mykrobe-test')

        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file2", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])
        self.call_set = VariantCallSet.create(sample_id="C00123",
                                              name="C00123",
                                              variant_sets=self.variant_sets)
        self.variant_snp = Variant.create(variant_sets=self.variant_sets,
                                          start=0,
                                          end=1,
                                          reference_bases="A",
                                          alternate_bases=["T"],
                                          reference=self.reference)

        self.variant_snp_mult_alts = Variant.create(
            variant_sets=self.variant_sets,
            start=0,
            end=1,
            reference_bases="T",
            alternate_bases=["A", "C"],
            reference=self.reference)
    def test_simple_variant_with_multiple_nearby_snps(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="A",
                           start=31,
                           alternate_bases=["T"])
        v2 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="A",
                            start=32,
                            alternate_bases=["T"])
        v3 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="C",
                            start=30,
                            alternate_bases=["G"])

        panel = self.pg.create(v, context=[v2, v3])
        assert panel.refs == [
            'CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT',
            'CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT'
        ]
        assert panel.alts == [
            "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
            "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT"
        ]
Example #9
0
 def test_simple_variant_with_multiple_nearby_snps(self):
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="A",
         start=31,
         alternate_bases=["T"],
     )
     v2 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="A",
         start=32,
         alternate_bases=["T"],
     )
     v5 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="A",
         start=32,
         alternate_bases=["G"],
     )
     v3 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="C",
         start=30,
         alternate_bases=["G"],
     )
     v4 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="C",
         start=30,
         alternate_bases=["T"],
     )
     panel = self.pg.create(v, context=[v2, v3, v4, v5])
     assert_no_overlapping_kmers(panel)
     assert sorted(panel.refs) == sorted([
         "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTG",
     ])
     assert sorted(panel.alts) == sorted([
         "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTG",
         "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTG",
     ])
 def test_del_with_SNP_context2(self):
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="AA",
         start=31,
         alternate_bases=["A"],
     )
     v2 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="A",
         start=32,
         alternate_bases=["T"],
     )
     panel = self.pg.create(v, context=[v2])
     assert_no_overlapping_kmers(panel)
     assert self.pg._remove_overlapping_contexts(v, [v2]) == []
     assert (
         "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
         in panel.refs
     )
     assert sorted(panel.alts) == sorted(
         ["GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"]
     )
    def test_ins_with_SNP_context(self):

        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=31,
            alternate_bases=["ATTT"],
        )
        v2 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="A",
            start=32,
            alternate_bases=["T"],
        )
        panel = self.pg.create(v, context=[v2])
        # assert_no_overlapping_kmers(panel)  ### This test seems to fail sometimes, and pass othertimes...
        assert (
            "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG"
            in panel.refs
        )
        assert sorted(panel.alts) == sorted(
            [
                "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG",
                "TTAAAGATAGAAATACACGATGCGAGCATTTTTCAAATTTCATAACATCACCATGAGTTTG",
            ]
        )
    def test_snp_with_replace_context(self):
        v = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="G",
            start=2338961,
            alternate_bases=["A"],
        )
        v1 = Variant.create(
            variant_sets=self.variant_sets,
            reference=self.reference,
            reference_bases="GGATG",
            start=2338990,
            alternate_bases=["CGATA"],
        )
        panel = self.pg2.create(v, context=[v1])
        assert_no_overlapping_kmers(panel)
        assert (
            "CGACTAGCCACCATCGCGCATCAGTGCGAGGTCAAAAGCGACCAAAGCGAGCAAGTCGCGG"
            in panel.refs
        )

        assert set(panel.alts) == set(
            [
                "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCCG",
                "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCGG",
            ]
        )
 def test_del_with_ins_context5(self):
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="ATC",
         start=32,
         alternate_bases=["A"])
     v2 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="T",
         start=5,
         alternate_bases=["TT"])
     v3 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="A",
         start=6,
         alternate_bases=["AG"])
     panel = self.pg.create(v, context=[v2, v3])
     assert_no_overlapping_kmers(panel)  
     assert self.pg._remove_overlapping_contexts(v, [v2, v3]) == [v2, v3]
     assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs
     assert sorted(
         panel.alts) == sorted(
         [
             "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGA",
             "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
             "TTTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT",
             "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])
 def test_complex_context(self):
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="ATTT",
         start=1503643,
         alternate_bases=["A"])
     v1 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="CCT",
         start=1503615,
         alternate_bases=["C"])
     v2 = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference,
         reference_bases="A",
         start=1503655,
         alternate_bases=["ATGCCGCCGCC"])
     panel = self.pg2.create(v, context=[v1, v2])
     assert_no_overlapping_kmers(panel)                             
     assert "ATCCTGGAGCCCACCAGCGGAAACACCGGCATTTCGCTGGCGATGGCGGCCCGGTTGAAGG" in panel.refs
     assert set(panel.alts) == set([
         "CCATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGGT",
         "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGG",            
         "ATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG",
         "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG",
         ])
 def test_simple_variant_with_multiple_nearby_snps2(self):
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference,
                        reference_bases="A",
                        start=31,
                        alternate_bases=["T"])
     v2 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="A",
                         start=32,
                         alternate_bases=["T"])
     v3 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="C",
                         start=30,
                         alternate_bases=["G"])
     v4 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="C",
                         start=30,
                         alternate_bases=["T"])
     v5 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="C",
                         start=30,
                         alternate_bases=["A"])
     assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4],
                                                                   [v, v3]])
     assert (self.pg._split_context([v3, v4])) == [[v4], [v3]]
     assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5],
                                                          [v, v3, v5]]
     panel = self.pg.create(v, context=[v2, v3, v4, v5])
     assert sorted(panel.refs) == sorted([
         "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTGAT"
     ])
     assert sorted(panel.alts) == sorted([
         "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTGAT",
         "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTGAT"
     ])
 def test_add_second_vcf_variant_set(self):
     # This VCF only has one Variant which is not in the first VCF
     vcf = VCF(f="tests/vcf_tests/test2.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     assert VariantSet.objects().count() == 3
     assert VariantCallSet.objects().count() == 2
     assert VariantCall.objects().count() == 42
     assert Variant.objects().count() == 22
     assert len(Variant.objects()[0].variant_sets) == 3
     assert len(
         Variant.objects.get(
             names="UNION_BC_k31_var_147").variant_sets) == 3
Example #17
0
 def _remove_variant_set(self, variant_set_name):
     vs = VariantSet.objects.get(name=variant_set_name,
                                 reference_set=self.reference_set)
     for call_set in VariantCallSet.objects(variant_sets=vs):
         call_set.variant_sets.remove(vs)
         call_set.save()
         # Remove calls from callsets that only have this variantset
         if len(call_set.variant_sets) < 2:
             VariantCall.objects(call_set=call_set).delete()
             call_set.delete()
     # Remove variants that are ONLY from this variant set
     Variant.objects(variant_sets=vs, variant_sets__size=2).delete()
     VariantSetMetadata.objects(variant_set=vs).delete()
     vs.delete()
 def test_add_second_vcf_variant_set(self):
     # This VCF only has one Variant which is not in the first VCF
     vcf = VCF(f="tests/vcf_tests/test3.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     assert VariantSet.objects().count() == 2
     assert VariantCallSet.objects().count() == 1
     assert VariantCall.objects().count() == 106
     assert Variant.objects().count() == 106
     assert Variant.snps().count() == 89
     assert Variant.indels().count() == 17
     assert Variant.insertions().count() == 8
     assert Variant.deletions().count() == 8
     assert Variant.ph_snps.count() == 1
Example #19
0
 def test_double_indel_fail(self):
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference,
                        reference_bases="CCA",
                        start=2288851,
                        alternate_bases=["A"])
     v1 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="A",
                         start=2288850,
                         alternate_bases=["ACC"])
     context = [v1]
     panel = self.pg2.create(v, context=context)
     assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" in panel.refs
     assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" not in panel.alts
Example #20
0
 def test_make_variant_panel8(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("eis")
     variants = list(
         self.gm.get_variant_names("eis", "TG-1T",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == 'CA'
     assert start == 2715332
     assert alt == 'A'
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference_id,
                        reference_bases=ref,
                        start=start,
                        alternate_bases=[alt])
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     print(alt, seq[:31] + seq[31:])
     assert alt == seq[:30] + seq[31:]
     DB.drop_database('mykrobe-test')
 def test_add_add_variants_and_calls(self):
     vcf = VCF(f="tests/vcf_tests/test.vcf",
               reference_set_id=self.reference_set.id,
               method="CORTEX")
     vcf.add_to_database()
     assert VariantCall.objects().count() == 21
     assert Variant.objects().count() == 21
Example #22
0
    def setUp(self):
        self.variant_snp = Variant.create(start=0,
                                          end=1,
                                          reference_bases="A",
                                          alternate_bases=["T"])

        self.predictor = TBPredictor(variant_calls={}, called_genes={})
Example #23
0
 def test_make_variant_panel6(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("pncA")
     variants = list(
         self.gm.get_variant_names("pncA",
                                   "CAG28TAA",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == "CTG"
     assert start == 2289212
     assert alt == "TTA"
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference_id,
         reference_bases=ref,
         start=start,
         alternate_bases=[alt],
     )
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     assert alt == seq[:30] + "TTA" + seq[33:]
     DB.drop_database("mykrobe-test")
Example #24
0
 def test_large_var1(self):
     v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="CGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCA", start=2266659, alternate_bases=[
         "CACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCG"])
     panel = self.pg.create(v)
     assert "TGGTGACGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCAAACACG" in panel.refs
     assert panel.alts == [
         "GACCGCCGAGTGCGGCTGGATTGGATTTCACAAGGATGCCAATATCCGGCGCAACGCCGTCGAGCGACGGACGGTGCTCGACACGGGAGCCCGGCTATTCTGTGTGCCGCGGGCCGACATCCTGGCAGAGCAAGTCGCGGCACGGTATATTGCGTCCCTTGCGGCGATTGCCCGTGCCGCACGATTTCCGGGACCATTCATCTACACGGTTCACCCGAGCAAGATCGTTCGCGTGCTCTAGTCGTTCATCGCTCCGTTAACCGCCGGCGAGGCCGTCGACGATCTTCATGGTCTCGACGCTGACGGTGGTCACCTTCTTGATGAGGTCGACGATGTAGGTGGGATCGTCGTGTTCGTCGCACCAGTCGTTGGGGTCGTTGACGATGCCCGACGCTTTGTCGGTGGTGACGCGGTAGCGCTCGATGATCCAGCCGAGCGCCGAGCGGGAGCGAGCAGGTAGCGCTCGGCCTCGTCGGGAATGCCGGCGATGGTGACACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCGAACACGCCAGGCCCGGGCCGTCGTCAACCAATTCGCGGTATGCCTCAACCACTTTCGGGAACAGCTCGGCAACCTGCTTGGACGTCTTGATGTCCTTGGCGAACGCCACCGCCCGACGCATCGGCGGCTCACCGGCGACAATGCCGGTACCGGACCGCTTGGCCAGGCCATTCCAGCAGCCGACGATCTTGGAGGCGTCGTCGAGCATCAGCTCGCCGGAAACCCCGGAGAGTTCCTGCTGCAACCGGGGCGCGATCACGCCCTGATCGACGGTGAGCACCATCACCTTGTAGTCGGTGAGCAGCCCGCGCTCCACCGCCTCGCCGAACGACAGCCGGTGAAACTCCGGCCCGAACGTCAGCTCGTCGTCCATCGACACCAACTCGGCGGAGTGCTGGTCGGCCCTGTCCTTGATGCTCTCGGTGAAAATCCTTGGCGTGGCGGTCATATACAGCCGCCGGGCCGCCTTCAGATACTGACCGTCGTGCACCCGC"]
Example #25
0
def get_context(pos, kmer):
    context = []
    for variant in Variant.objects(start__ne=pos,
                                   start__gt=pos - kmer,
                                   start__lt=pos + kmer):
        for split_variant in variant.split():
            context.append(split_variant)
    return context
Example #26
0
 def test_del_with_ins_context_where_base_is_deleted(self):
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference,
                        reference_bases="ATC",
                        start=32,
                        alternate_bases=["A"])
     v2 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="T",
                         start=33,
                         alternate_bases=["C"])
     panel = self.pg.create(v, context=[v2])
     assert self.pg._remove_overlapping_contexts(v, [v2]) == []
     assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" in panel.refs
     assert sorted(panel.alts) == sorted([
         "CGATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC"
     ])
Example #27
0
    def test_snp_with_replace_context(self):
        v = Variant.create(variant_sets=self.variant_sets,
                           reference=self.reference,
                           reference_bases="G",
                           start=2338961,
                           alternate_bases=["A"])
        v1 = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="GGATG",
                            start=2338990,
                            alternate_bases=["CGATA"])
        panel = self.pg2.create(v, context=[v1])
        assert "CGACTAGCCACCATCGCGCATCAGTGCGAGGTCAAAAGCGACCAAAGCGAGCAAGTCGCGGAT" in panel.refs

        assert panel.alts == \
            ["CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCGGAT",
             "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCCGAT"]
Example #28
0
 def test_del_with_SNP_context1(self):
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference,
                        reference_bases="AA",
                        start=31,
                        alternate_bases=["A"])
     v2 = Variant.create(variant_sets=self.variant_sets,
                         reference=self.reference,
                         reference_bases="T",
                         start=33,
                         alternate_bases=["A"])
     panel = self.pg.create(v, context=[v2])
     assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT" in panel.refs
     assert sorted(panel.alts) == sorted([
         "CGATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTGATC",
         "CGATTAAAGATAGAAATACACGATGCGAGCAACAAATTTCATAACATCACCATGAGTTTGATC"
     ])
Example #29
0
 def variant(self):
     ref, start, alt = split_var_name(self.var_name)
     return Variant.create(variant_sets=None,
                           start=int(start),
                           end=0,
                           reference_bases=ref,
                           alternate_bases=[alt],
                           reference=self.reference)
 def test_simple_variant_invalid(self):
     with pytest.raises(ValueError) as cm:
         v = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference,
                            reference_bases="T",
                            start=31,
                            alternate_bases=["T"])
         panel = self.pg.create(v)