def run(parser, args): db_name = '%s-%s' % (DB_PREFIX, args.db_name) DB = connect(db_name, host=args.db_uri) if args.verbose: logger.setLevel(level=logging.DEBUG) else: logger.setLevel(level=logging.INFO) al = AlleleGenerator(reference_filepath=args.reference_filepath, kmer=args.kmer) _variant_ids = get_non_singelton_variants(db_name) total = Variant.snps(id__in=_variant_ids).count() N = 100 pages = math.ceil(total / N) for page in range(pages): logger.info("%i of %i - %f%%" % (page * N, total, round(100 * (page * N) / total, 2))) for variant in Variant.snps( id__in=_variant_ids).order_by("start").skip(N * page).limit(N): # for variant in Variant.snps().order_by("start"): variant_panel = make_variant_probe(al, variant, args.kmer, DB=DB) for i, ref in enumerate(variant_panel.refs): sys.stdout.write( ">ref-%s?var_name=%snum_alts=%i&ref=%s&enum=%i\n" % (variant_panel.variant.var_hash, variant.var_name[:100], len(variant_panel.alts), variant_panel.variant.reference.id, i)) sys.stdout.write("%s\n" % ref) for i, a in enumerate(variant_panel.alts): sys.stdout.write(">alt-%s?var_name=%s&enum=%i\n" % (variant_panel.variant.var_hash, variant.var_name[:100], i)) sys.stdout.write("%s\n" % a)
def test_simple_variant_end(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2902618, alternate_bases=["T"]) panel = self.pg.create(v) assert panel.refs == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT" ] assert panel.alts == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTTT" ] v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=2902616, alternate_bases=["C"]) panel = self.pg.create(v) assert panel.refs == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTTTAT" ] assert panel.alts == [ "TAACAAAATCCTTTTTATAACGCAAGTTCATTTTATACTACTGCTCAATTTTTTTACTTCTAT" ]
def test_indel_snp_indel_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="TCGCGTGGC", start=4021459, alternate_bases=["GCGAGCAGA"]) v1 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=4021455, alternate_bases=["ATCTAGCCGCAAG"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=4021489, alternate_bases=["G"]) panel = self.pg2.create(v) # , context = [v1, v2]) assert_no_overlapping_kmers(panel) assert "ATCATGCGATTCTGCGTCTGCTCGCGAGGCTCGCGTGGCCGCCGGCGCTGGCGGGCGATCT" in panel.refs panel = self.pg2.create(v, context=[v1, v2]) assert_no_overlapping_kmers(panel) assert sorted( panel.alts) == sorted( [ "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG", "ATCATGCGATTCTGCGTCTGCTCGCGAGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT", "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCG", "TGCGTCTGCTCGCGATCTAGCCGCAAGGGCGCGAGCAGACGCCGGCGCTGGCGGGCGATCT"])
def test_simple_variant_end(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2902618, alternate_bases=["T"], ) panel = self.pg.create(v) assert_no_overlapping_kmers(panel) assert panel.refs == [ "TTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] assert panel.alts == [ "TTTATACTACTGCTCAATTTTTTTACTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=2902616, alternate_bases=["C"], ) panel = self.pg.create(v) assert panel.refs == [ "ATTTTATACTACTGCTCAATTTTTTTACTTTTATNNNNNNNNNNNNNNNNNNNNNNNNNNN" ] assert panel.alts == [ "ATTTTATACTACTGCTCAATTTTTTTACTTCTATNNNNNNNNNNNNNNNNNNNNNNNNNNN" ]
def test_del_with_ins_context_where_base_is_deleted2(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="TAAA", start=5, alternate_bases=["T"]) v3 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=7, alternate_bases=["AG"]) panel = self.pg.create(v, context=[v2, v3]) assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" in panel.refs assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC", "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCCAAA", "GATTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC" ]) panel = self.pg.create(v, context=[v3, v2]) assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" in panel.refs assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC", "CGATTGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCCAAA", "GATTAAGAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC" ])
def test_simple_variant_with_nearby_snp(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2]) assert_no_overlapping_kmers(panel) assert set(panel.refs) == set([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", ]) assert set(panel.alts) == set([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", ])
def setup(self): DB.drop_database('mykrobe-test') self.reference_set = ReferenceSet().create_and_save(name="ref_set") self.variant_set = VariantSet.create_and_save( name="this_vcf_file2", reference_set=self.reference_set) self.variant_sets = [self.variant_set] self.reference = Reference().create_and_save( name="ref", md5checksum="sre", reference_sets=[self.reference_set]) self.call_set = VariantCallSet.create(sample_id="C00123", name="C00123", variant_sets=self.variant_sets) self.variant_snp = Variant.create(variant_sets=self.variant_sets, start=0, end=1, reference_bases="A", alternate_bases=["T"], reference=self.reference) self.variant_snp_mult_alts = Variant.create( variant_sets=self.variant_sets, start=0, end=1, reference_bases="T", alternate_bases=["A", "C"], reference=self.reference)
def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) v3 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"]) panel = self.pg.create(v, context=[v2, v3]) assert panel.refs == [ 'CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT', 'CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT' ] assert panel.alts == [ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT" ]
def test_simple_variant_with_multiple_nearby_snps(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) v5 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["G"], ) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"], ) v4 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert_no_overlapping_kmers(panel) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCAGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGAGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTAGTCAAATTTCATAACATCACCATGAGTTTG", ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGCTGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGGTGTCAAATTTCATAACATCACCATGAGTTTG", "CGATTAAAGATAGAAATACACGATGCGAGTTGTCAAATTTCATAACATCACCATGAGTTTG", ])
def test_del_with_SNP_context2(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="AA", start=31, alternate_bases=["A"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2]) assert_no_overlapping_kmers(panel) assert self.pg._remove_overlapping_contexts(v, [v2]) == [] assert ( "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs ) assert sorted(panel.alts) == sorted( ["GATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTG"] )
def test_ins_with_SNP_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["ATTT"], ) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"], ) panel = self.pg.create(v, context=[v2]) # assert_no_overlapping_kmers(panel) ### This test seems to fail sometimes, and pass othertimes... assert ( "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTG" in panel.refs ) assert sorted(panel.alts) == sorted( [ "GATTAAAGATAGAAATACACGATGCGAGCATTTATCAAATTTCATAACATCACCATGAGTTTG", "TTAAAGATAGAAATACACGATGCGAGCATTTTTCAAATTTCATAACATCACCATGAGTTTG", ] )
def test_snp_with_replace_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="G", start=2338961, alternate_bases=["A"], ) v1 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="GGATG", start=2338990, alternate_bases=["CGATA"], ) panel = self.pg2.create(v, context=[v1]) assert_no_overlapping_kmers(panel) assert ( "CGACTAGCCACCATCGCGCATCAGTGCGAGGTCAAAAGCGACCAAAGCGAGCAAGTCGCGG" in panel.refs ) assert set(panel.alts) == set( [ "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCCG", "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCGG", ] )
def test_del_with_ins_context5(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=5, alternate_bases=["TT"]) v3 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=6, alternate_bases=["AG"]) panel = self.pg.create(v, context=[v2, v3]) assert_no_overlapping_kmers(panel) assert self.pg._remove_overlapping_contexts(v, [v2, v3]) == [v2, v3] assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGA" in panel.refs assert sorted( panel.alts) == sorted( [ "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGA", "TTAGAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", "TTTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT", "ATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGAT"])
def test_complex_context(self): v = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATTT", start=1503643, alternate_bases=["A"]) v1 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="CCT", start=1503615, alternate_bases=["C"]) v2 = Variant.create( variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=1503655, alternate_bases=["ATGCCGCCGCC"]) panel = self.pg2.create(v, context=[v1, v2]) assert_no_overlapping_kmers(panel) assert "ATCCTGGAGCCCACCAGCGGAAACACCGGCATTTCGCTGGCGATGGCGGCCCGGTTGAAGG" in panel.refs assert set(panel.alts) == set([ "CCATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGGT", "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGGCGGCCCGGTTGAAGGGG", "ATCGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG", "TCCTGGAGCCCACCAGCGGAAACACCGGCACGCTGGCGATGCCGCCGCCTGGCGGCCCGG", ])
def test_simple_variant_with_multiple_nearby_snps2(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=31, alternate_bases=["T"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=32, alternate_bases=["T"]) v3 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["G"]) v4 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["T"]) v5 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="C", start=30, alternate_bases=["A"]) assert sorted(self.pg._split_context([v, v3, v4])) == sorted([[v, v4], [v, v3]]) assert (self.pg._split_context([v3, v4])) == [[v4], [v3]] assert (self.pg._split_context([v, v3, v4, v5])) == [[v, v4, v5], [v, v3, v5]] panel = self.pg.create(v, context=[v2, v3, v4, v5]) assert sorted(panel.refs) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTATTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGAAATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGAATTCAAATTTCATAACATCACCATGAGTTTGAT" ]) assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGCTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGGTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGTTTTCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGATATCAAATTTCATAACATCACCATGAGTTTGAT", "CGATTAAAGATAGAAATACACGATGCGAGATTTCAAATTTCATAACATCACCATGAGTTTGAT" ])
def test_add_second_vcf_variant_set(self): # This VCF only has one Variant which is not in the first VCF vcf = VCF(f="tests/vcf_tests/test2.vcf", reference_set_id=self.reference_set.id, method="CORTEX") vcf.add_to_database() assert VariantSet.objects().count() == 3 assert VariantCallSet.objects().count() == 2 assert VariantCall.objects().count() == 42 assert Variant.objects().count() == 22 assert len(Variant.objects()[0].variant_sets) == 3 assert len( Variant.objects.get( names="UNION_BC_k31_var_147").variant_sets) == 3
def _remove_variant_set(self, variant_set_name): vs = VariantSet.objects.get(name=variant_set_name, reference_set=self.reference_set) for call_set in VariantCallSet.objects(variant_sets=vs): call_set.variant_sets.remove(vs) call_set.save() # Remove calls from callsets that only have this variantset if len(call_set.variant_sets) < 2: VariantCall.objects(call_set=call_set).delete() call_set.delete() # Remove variants that are ONLY from this variant set Variant.objects(variant_sets=vs, variant_sets__size=2).delete() VariantSetMetadata.objects(variant_set=vs).delete() vs.delete()
def test_add_second_vcf_variant_set(self): # This VCF only has one Variant which is not in the first VCF vcf = VCF(f="tests/vcf_tests/test3.vcf", reference_set_id=self.reference_set.id, method="CORTEX") vcf.add_to_database() assert VariantSet.objects().count() == 2 assert VariantCallSet.objects().count() == 1 assert VariantCall.objects().count() == 106 assert Variant.objects().count() == 106 assert Variant.snps().count() == 89 assert Variant.indels().count() == 17 assert Variant.insertions().count() == 8 assert Variant.deletions().count() == 8 assert Variant.ph_snps.count() == 1
def test_double_indel_fail(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="CCA", start=2288851, alternate_bases=["A"]) v1 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="A", start=2288850, alternate_bases=["ACC"]) context = [v1] panel = self.pg2.create(v, context=context) assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" in panel.refs assert "GGCGCACACAATGATCGGTGGCAATACCGACCACATCGACCTCATCGACGCCGCGTTGCCG" not in panel.alts
def test_make_variant_panel8(self): ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta") gene = self.gm.get_gene("eis") variants = list( self.gm.get_variant_names("eis", "TG-1T", protein_coding_var=False)) assert len(variants) == 1 var = variants[0] ref, start, alt = split_var_name(var) assert ref == 'CA' assert start == 2715332 assert alt == 'A' v = Variant.create(variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt]) panel = ag.create(v) assert len(panel.alts) == 1 alt = panel.alts[0] # the panel ref/alt seqs go past the end of the gene, # so can't comparie against gene sequence. Need to get # subseq from the reference seq panel_ref_start = self.reference_seq.find(panel.refs[0]) assert panel_ref_start < start < panel_ref_start + len(panel.refs[0]) seq = str(self.reference_seq[panel_ref_start:panel_ref_start + len(panel.refs[0])]) assert seq == panel.refs[0] print(alt, seq[:31] + seq[31:]) assert alt == seq[:30] + seq[31:] DB.drop_database('mykrobe-test')
def test_add_add_variants_and_calls(self): vcf = VCF(f="tests/vcf_tests/test.vcf", reference_set_id=self.reference_set.id, method="CORTEX") vcf.add_to_database() assert VariantCall.objects().count() == 21 assert Variant.objects().count() == 21
def setUp(self): self.variant_snp = Variant.create(start=0, end=1, reference_bases="A", alternate_bases=["T"]) self.predictor = TBPredictor(variant_calls={}, called_genes={})
def test_make_variant_panel6(self): ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31) gene = self.gm.get_gene("pncA") variants = list( self.gm.get_variant_names("pncA", "CAG28TAA", protein_coding_var=False)) assert len(variants) == 1 var = variants[0] ref, start, alt = split_var_name(var) assert ref == "CTG" assert start == 2289212 assert alt == "TTA" v = Variant.create( variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt], ) panel = ag.create(v) assert len(panel.alts) == 1 alt = panel.alts[0] # the panel ref/alt seqs go past the end of the gene, # so can't comparie against gene sequence. Need to get # subseq from the reference seq panel_ref_start = self.reference_seq.find(panel.refs[0]) assert panel_ref_start < start < panel_ref_start + len(panel.refs[0]) seq = str(self.reference_seq[panel_ref_start:panel_ref_start + len(panel.refs[0])]) assert seq == panel.refs[0] assert alt == seq[:30] + "TTA" + seq[33:] DB.drop_database("mykrobe-test")
def test_large_var1(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="CGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCA", start=2266659, alternate_bases=[ "CACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCG"]) panel = self.pg.create(v) assert "TGGTGACGCGGGAGTAGAACGATCGCCAAGTGGTCGGTCTTGGCTGCCCACTTCATCCCCGGCGCCACCGGCAGGTCTCGCGGTCATCTCGACCAACGGAGGGCCGTCGGTGGTTCGTATCCGGCCAAGAACGGCGAGAACGGTTTGTGCCTCTATGCCAGGGTGAATGTCTCATCTCCCAGGCGGACGGTGATATCCAGTTCTCCGCCAAGAGCGGACACGTATTTGCGCAGTGTGTTGACCTGTGCGGAGCCGATGTCGCCGTTCTCGATGCTGGATACCCGGCTCTGCCGGATGTGCGCCAGCGCAGCCACCTGGACCTGGGTGAGTGACTGAGCCGCGCGCAGCTCCCGGAGCCGGAATGCCCGCACTTCATCGCGCATTCGTGCCTTGTGCCGGTCCACCGCCTCCCGGTTAACGGGACGTACGGCGTCCATGTCCCGTAGTGTCATCGCCATCGTGCCACTTACCCTTTCTTGCGCTTGCGCCTCTTTGGCTTCGTGTCCTCGAACTGTGCGAGATGTTCGGCAAACATCTCATCGGCCGCTTTGATCTTCTCGTCGTACCACTGGGTCCACCGCCCGGCCTTGTTACCGGCGGCCAGCATGATCGCCTGCCGCGCCGGGTCGAAGGCGAACAGAATGCGGACCTCGGACCGCCCTTGTGATCCTGGACGCAGCTCCTTCATGTTCTTGTGGCGCGACCCACGCACCGTGTCCACCAGAGGACAGCCAAGTGCGGGGCCCTCTTCCTCGAGAACCTCGATAGCTGCGAACACCAATTCGTAGGTCTCTCGGTCCAAGCCGTTGAGCCAGGCGGAGATGCGCTCCACATCCGCCGTCCACCCCACAGAGTCGCAGAGTAGCGCGATACGCGATATCACACAAGGGTGATATTCCTCCGGGTAAGAGCAGCGGGCGACGGGGCTACCGTCGAGGAAATGCCGGCAGGCGAGGACGGACTCTGCGCACCCGGGCCGTTGAAACAGTAGCCTGTGCCAGGCCGAGAATTCATCCCCACGTATGAGGCAGTACAGTGCGCCGCCGTGCGCGTTCTCCCATGGAACGTTCACGGGCTCCCGTGGATGACAGGCGTTTCATGAACGCCAGCGCCGCCGCAACCCGACCGAAAGCGGTTGACCCCAAGGAGAGCTGGAAGTCGAGGCCACCACCTTCGCCGCGGAGTTGCTCATGCCCGAGAGCGAGACTCGTCCCGAAATACGCCGGCTCGATTTCGGCAAGTTGCTCGAACTGAAGCGGGAATGGGCGTCGACCCGCTCGACCAGCCCCAGCCGGGTGACCAGCCCCAGCCGGGTGACCAGCCGATGCACCGCGGCGATCCCACCGAAGCCGGTGGCATCGATGTTGGCGCCGACCTCGTAGCGCACCGCGCCCGAACCCAGCATCGGCCTGGGCTGCGCCGCCCAGCGTCCAGCCCGCGCGTGCCGCGCCGCCACCCTGCGCCCTCGGCGTGTGATGTTTCGCCGACTCTGTTCATGGGTTATCTTCTTCACCACAAAGGCCTTTCCTGCTGGGCTGTGTTGAGGTCGCAAACCCAGCCAGGGTAAGGCCTTTGGCCTCTCCTACCCGGCCGACACGCTTACTGAAGGCCTAGTCTAGGCAGGCCATTCAATCTGCGGAATCGAAAAATTCGGTTCCAGCCTGCTCGTTTCCTTTCCGACAGCGATCTGACGTTGCGTAACGTCATTTGTACGGACTCTTTTAGCGGCATTGATTTCAGATGCCAACGCCGTCTGTGCTGTAGCGCCGATTGGCCGAAACTGTAAATTTGTATGATTATTTAAATCTTTGACGAACACGCGCCACAAACGTACTATCTCTTTGGCAAAGTCCACCGGCATCTCATTCAACGGTTTTGTTTGCGCGTGGTCGTCATATGTTGGTAACTGTGTAACCGGCCGCCTATCTTGCGCGTGCATCATATGACTATGAATCGGCCTTCTCCAGTGAAATTGATACAAGATCGATCCGATAAGCGGTACCTTGTACACAGTGCAATTGTAGTAATTCGCGTTTTGTCCTACGCTTGTATTCTGCGTGAAGAATTCAAACACG" in panel.refs assert panel.alts == [ "GACCGCCGAGTGCGGCTGGATTGGATTTCACAAGGATGCCAATATCCGGCGCAACGCCGTCGAGCGACGGACGGTGCTCGACACGGGAGCCCGGCTATTCTGTGTGCCGCGGGCCGACATCCTGGCAGAGCAAGTCGCGGCACGGTATATTGCGTCCCTTGCGGCGATTGCCCGTGCCGCACGATTTCCGGGACCATTCATCTACACGGTTCACCCGAGCAAGATCGTTCGCGTGCTCTAGTCGTTCATCGCTCCGTTAACCGCCGGCGAGGCCGTCGACGATCTTCATGGTCTCGACGCTGACGGTGGTCACCTTCTTGATGAGGTCGACGATGTAGGTGGGATCGTCGTGTTCGTCGCACCAGTCGTTGGGGTCGTTGACGATGCCCGACGCTTTGTCGGTGGTGACGCGGTAGCGCTCGATGATCCAGCCGAGCGCCGAGCGGGAGCGAGCAGGTAGCGCTCGGCCTCGTCGGGAATGCCGGCGATGGTGACACGCGAGTTGTAGATGATCGTTGAGTGGTCTTGCTTGGACTTCCATTTCATCTTTTCGACGCGCCAGGTCTCGCGGTCCTCCGGATCTGCGCCCGGTTTGAGTTGCACATCAAGGGGATACGGCTTGACCGACTCGTAGCCGACATGTAAGTCGGCTAGTTTCCGGCCGGCGCTGGCGAGCTGGTCGAAGCGTTCGCGGGTCTCCGGTGTTGGGATGTGCGGGAGCATCTTCTTGAGGTCAGCGGCGTATTTTGTGCGGTAGGCGGGGTCATGCAGCAGGCCGTAGACGTAGTAGAAGATGTCGTCTTTGGTGACTTGGTCGCCGATCGTGTCGCGGTAGAGCTTGAGGATGACGCCGGTGATGTTGTCGACGCGGCGGTAGCCGTGGTCGTCTACTTCGGCGTTGGTGGTGGACTCGAAATCGAGTTCGCCGTCACGTGGTTCGGTCTTCTCGTAGGTCCAGCGCGGGAAGAATTGACCGTTGCTTGAGCCCCAGAATGCGAGATCGGGGATAGCGTTTAGCATCAGACACGAGAAGGGCTTGTCTGAGCCCATGCCAACCACGTAGTAACCGACATTCCCGTGCTCCGGCGTCGGAAACATCGACGGAAGCTGGTAGGTACAGTTGTTGAGCTGCTGGTTGGGGTCGAGGTAGGCGTGCTCTTTCGTAAATGGTCGGTACGTGCCGAGCCGCATTCCCGCGGGAGCGAATTCGATGCGAATGCCTTGTGCCACTTGCCGCTTGTTGATGCGGTCCCAGCTGAACTTGGCCGAGTCCACGGTAATGAGGGCGTCAACCGGCGGGGTCTTGGCGTCCCTTCCGCGGATCTCGTTGATCCGGTCGACCTCCGAGTTGTAGAAGTCGATCGTGCGTCCGATGTTGGCCTCGAGCGCACCACGTGAAAAGTTGTAACACCACGCATCCCGGCTGGTCTTCAAGCCCGCGGAATAGTTCGCGAAGACACGTGTCACGTCAAGAGCAGCCTTCTTGTCGCCGATAACCGGCCACGCGCTGAACGCGTCGTCGCGTTGGTTGACCCAGTCACCGTGCAAGTTGGGTGTGACTGTCTGCCATTCCACCGTGTCGAGGTAGCCGTCGCCGACGATCCGCAACTTCTCCTCGCGACTCAGGTAATCGCCGATGTCGCGGTAAAGGACATCGCATGGCCCGCTGTGCTTCGGATCCTTGATGCCAAGGAAGATCGCCACCGTGTTGCGACTCCCCCCGCCAAAGACCTTGCCGCCTTCCTGGCGTGAGAGTTCCCCAGCTGTGCGCTGGTTCCCCCGCAGGTTGTACACATATACCGCCGCGTAGTCGTCGGCGAGCGACAACCGCATGCCGTCTGCCGTGTTGCCGTCTATGTACCCACCATTGGAGACGAATCCGACAACACCGTTGTCACCAATGCGGTCGGTCGCCCACCGGAACGCGCGAATATACGAGTCGTACAGGCTGTTCTTCAGCTGCGCCGTCGACCGCTTCGCGTACGTCTGCTCAATCCGCCCGTCCAACGTCGGATACTTCACGTTGGCGTTCAGGTCGTTCGCGCTGCTCTGCCCCACCGAGTACGGCGGATTCCCGATGATCACGCTGATCGGCGTCGCCAGCTGTCGCAAGATCCGAGCGTTGTTGTACGGGAACATGATCGCGTCCATCGAGTCCCCGGCTTCGGAAATCTGGAACGTGTCGGCCAGCGCCATCCCGGGGAACGGCTCATAGGCGTCGGCGTCGGCGGTCTTGCCCGCCAAAGCATGGTAGGTCGACTCGATGTTCACCGCGGCGATGTAGTACGCCAGCAGCATGATCTCGTTGGCGTGCAGCTCTTGCGAGTACTTTCGGGTGAGGTCGGCGGCCGTGATCAGGTCGGACTGCAGCAGCCGGGTAATGAATGTGCCCGTCCCGGCGAAGCCGTCCAGAATATGCACGCCCTCGTCGGTCAGCCCGCGCCCGAAATGCTTGCGCGACACGAAATCAGCCGCCCGCACAATGAAGTCCACGACCTCGACCGGCGTGTACACGATCCCCAGCGCCTCGGCCTGCTTCTTGAAGCCGATGCGGAAGAACTTCTCGTACAGCTCGGCGATCACCTGCTGCTTGCCCTCGGCGCTGGTGACCTCGCCGGCGCGCCGTCGCACCGATTCGTAAAAGCCTTCCAACCGAGCGGTTTCGGCCTCCAGGCCGGCACCCCCGACGGTGTCGACCATCTTCTGCATGGCCCGCGACACCGGGTTGTGCGACGCGAAGTCATGCCCGGCGAACAGCGCGTCGAACACCGGCTTGGTGATCAGGTGCTGCGAGAGCATGCTGATCGCGTCATCGGGGGTGATCGAGTCATTGAGGTTATCGCGCAGCCCGGCCAGGAACTGCTCGAACGCCGCCGCCGCCGTAGCGTCGGCGCCGCCGAGCAGGGCGTGGATACGGGTGGTCAGCGTCGCGGCGATGTCGGCGACATCGGCGGCCCACTGCTCCCAATAGGTCCGGGTGCCAACCTTGTCGACGATGCGCGCGTAGATCGCTTCCTGCCACTGCGACAACGAGAACATCGCCAACTGCTCCGCGACGGCGGGTCCCGCCTCGTCGGAGGTCGGCCCGATGTGACCGCCCAACAGCTTGTCGCTGCCTTCACCGGTCTTCGTCGGCTTCACGTTCAGCGCAATGCTGTTCACCATCGCGTCGAAGCGCTCGTCGTGCGACCGCAACGCGTTGAGGACCTGCCACACCACCTTGAACCGTTTGTTGTCGGCCAACGCGGCAGACGGCTCGACACCCTCGGGCACCGCCACCGGCAAGATGACGTACCCGTAGTCCTTGCCGGGCGACTTGCGCATCACCCGACCGACCGACTGCACCACGTCGACGATGGAATTGCGCGGATTCAGGAACAGCACCGCGTCCAGCGCGGGCACGTCGACCCCTTCGGAGAGGCAGCGGGCGTTGGACAGGATGCGGCATTCATCCTCGGCGACCACGCCTTTGAGCCAGGCCAGCTGTTCGTTGCGGACCAGCGCGTTGAACGTCCCGTCCACGTGGCGCACCGAACACGCCAGGCCCGGGCCGTCGTCAACCAATTCGCGGTATGCCTCAACCACTTTCGGGAACAGCTCGGCAACCTGCTTGGACGTCTTGATGTCCTTGGCGAACGCCACCGCCCGACGCATCGGCGGCTCACCGGCGACAATGCCGGTACCGGACCGCTTGGCCAGGCCATTCCAGCAGCCGACGATCTTGGAGGCGTCGTCGAGCATCAGCTCGCCGGAAACCCCGGAGAGTTCCTGCTGCAACCGGGGCGCGATCACGCCCTGATCGACGGTGAGCACCATCACCTTGTAGTCGGTGAGCAGCCCGCGCTCCACCGCCTCGCCGAACGACAGCCGGTGAAACTCCGGCCCGAACGTCAGCTCGTCGTCCATCGACACCAACTCGGCGGAGTGCTGGTCGGCCCTGTCCTTGATGCTCTCGGTGAAAATCCTTGGCGTGGCGGTCATATACAGCCGCCGGGCCGCCTTCAGATACTGACCGTCGTGCACCCGC"]
def get_context(pos, kmer): context = [] for variant in Variant.objects(start__ne=pos, start__gt=pos - kmer, start__lt=pos + kmer): for split_variant in variant.split(): context.append(split_variant) return context
def test_del_with_ins_context_where_base_is_deleted(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="ATC", start=32, alternate_bases=["A"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=33, alternate_bases=["C"]) panel = self.pg.create(v, context=[v2]) assert self.pg._remove_overlapping_contexts(v, [v2]) == [] assert "GATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGATC" in panel.refs assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCAAAAATTTCATAACATCACCATGAGTTTGATCC" ])
def test_snp_with_replace_context(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="G", start=2338961, alternate_bases=["A"]) v1 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="GGATG", start=2338990, alternate_bases=["CGATA"]) panel = self.pg2.create(v, context=[v1]) assert "CGACTAGCCACCATCGCGCATCAGTGCGAGGTCAAAAGCGACCAAAGCGAGCAAGTCGCGGAT" in panel.refs assert panel.alts == \ ["CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCGGAT", "CGACTAGCCACCATCGCGCATCAGTGCGAGATCAAAAGCGACCAAAGCGAGCAAGTCGCCGAT"]
def test_del_with_SNP_context1(self): v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="AA", start=31, alternate_bases=["A"]) v2 = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=33, alternate_bases=["A"]) panel = self.pg.create(v, context=[v2]) assert "CGATTAAAGATAGAAATACACGATGCGAGCAATCAAATTTCATAACATCACCATGAGTTTGAT" in panel.refs assert sorted(panel.alts) == sorted([ "CGATTAAAGATAGAAATACACGATGCGAGCATCAAATTTCATAACATCACCATGAGTTTGATC", "CGATTAAAGATAGAAATACACGATGCGAGCAACAAATTTCATAACATCACCATGAGTTTGATC" ])
def variant(self): ref, start, alt = split_var_name(self.var_name) return Variant.create(variant_sets=None, start=int(start), end=0, reference_bases=ref, alternate_bases=[alt], reference=self.reference)
def test_simple_variant_invalid(self): with pytest.raises(ValueError) as cm: v = Variant.create(variant_sets=self.variant_sets, reference=self.reference, reference_bases="T", start=31, alternate_bases=["T"]) panel = self.pg.create(v)