Exemple #1
0
 def test_make_variant_panel_stop_codon(self):
     variants = list(
         self.gm.get_variant_names("katG", "W90*", protein_coding_var=True))
     assert len(variants) == 3
     refs = sorted([split_var_name(v)[0] for v in variants])
     alts = sorted([split_var_name(v)[-1] for v in variants])
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert start == 2155842
     assert refs == ["CCA"] * 3
     assert alts == sorted(["TTA", "CTA", "TCA"])
Exemple #2
0
 def __init__(
         self,
         var_name,
         reference,
         gene=None,
         mut=None):
     self.var_name = var_name
     self.gene = gene
     if mut:
         tmp, self.start, tmp = split_var_name(mut)
     self.ref, tmp, self.alt = split_var_name(var_name)
     self.standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
     self.reference = reference
Exemple #3
0
 def __init__(self,
              var_name,
              reference,
              gene=None,
              mut=None,
              protein_coding_var=False):
     self.var_name = var_name
     self.gene = gene
     if mut:
         tmp, self.start, tmp = split_var_name(mut)
     self.ref, tmp, self.alt = split_var_name(var_name)
     self.standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
     self.reference = reference
     self.input_mutation_name = mut
     self.protein_coding_var = protein_coding_var
Exemple #4
0
 def test_make_variant_panel8(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("eis")
     variants = list(
         self.gm.get_variant_names("eis", "TG-1T",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == 'CA'
     assert start == 2715332
     assert alt == 'A'
     v = Variant.create(variant_sets=self.variant_sets,
                        reference=self.reference_id,
                        reference_bases=ref,
                        start=start,
                        alternate_bases=[alt])
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     print(alt, seq[:31] + seq[31:])
     assert alt == seq[:30] + seq[31:]
     DB.drop_database('mykrobe-test')
Exemple #5
0
 def test_make_variant_panel6(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("pncA")
     variants = list(
         self.gm.get_variant_names("pncA",
                                   "CAG28TAA",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == "CTG"
     assert start == 2289212
     assert alt == "TTA"
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference_id,
         reference_bases=ref,
         start=start,
         alternate_bases=[alt],
     )
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     assert alt == seq[:30] + "TTA" + seq[33:]
     DB.drop_database("mykrobe-test")
Exemple #6
0
 def variant(self):
     ref, start, alt = split_var_name(self.var_name)
     return Variant.create(variant_sets=None,
                           start=int(start),
                           end=0,
                           reference_bases=ref,
                           alternate_bases=[alt],
                           reference=self.reference)
Exemple #7
0
 def get_variant_names(self, gene, mutation, protein_coding_var=True):
     ref, start, alt = split_var_name(mutation)
     gene = self.get_gene(gene)
     if start < 0 or not protein_coding_var:
         return self._process_DNA_mutation(gene, ref, start, alt)
     elif start > 0:
         return self._process_coding_mutation(gene, ref, start, alt)
     else:
         raise ValueError(
             "Variants are defined in 1-based coordinates. You can't have pos 0. ")
Exemple #8
0
 def test_make_variant_panel5(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("gyrA")
     for var in self.gm.get_variant_names("gyrA", "D94X"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference_id,
                            reference_bases=ref,
                            start=start,
                            alternate_bases=[alt])
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq))
             seq = seq.replace(panel.refs[0], alt)
             assert Seq(seq).translate()[93] != "D"
     DB.drop_database('mykrobe-test')
Exemple #9
0
 def test_make_variant_panel4(self):
     ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta")
     gene = self.gm.get_gene("katG")
     for var in self.gm.get_variant_names("katG", "W90R"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(variant_sets=self.variant_sets,
                            reference=self.reference_id,
                            reference_bases=ref,
                            start=start,
                            alternate_bases=[alt])
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq.reverse_complement()))
             seq = seq.replace(panel.refs[0], alt)
             assert seq != str(gene.seq)
             assert Seq(seq).reverse_complement().translate()[89] == "R"
     DB.drop_database('mykrobe-test')
Exemple #10
0
 def _create_variant(self, probe_name):
     names = []
     params = get_params(probe_name)
     if params.get("mut"):
         names.append("_".join([params.get("gene"), params.get("mut")]))
     var_name = probe_name.split('?')[0].split('-')[1]
     names.append(var_name)
     try:
         # If it's a variant panel we can create a variant
         ref, start, alt = split_var_name(var_name)
         return Variant.create(start=start,
                               reference_bases=ref,
                               alternate_bases=[alt],
                               names=names,
                               info=params)
     except AttributeError:
         return None
Exemple #11
0
 def test_make_variant_panel7(self):
     # Test DNA change upstream of a gene on the reverse
     # strand. The variant G-10A is in "gene space", ie
     # 10 bases upstream of eis is the nucleotide G on the
     # reverse strand. That position is 2715342 in the genome,
     # and is C on the forwards strand.
     # Here's a diagram:
     #             | <- This C is at -10 in "gene space", so variant G-10A has ref=G
     #             |    ref coord is 2715342, and variant in "ref space" is C2715342T
     # CACAGAATCCGACTGTGGCATATGCCGC
     #   |
     #   | <- C = last nucleotide of gene, at 2715332
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("eis")
     variants = list(
         self.gm.get_variant_names("eis", "G-10A",
                                   protein_coding_var=False))
     assert len(variants) == 1
     var = variants[0]
     ref, start, alt = split_var_name(var)
     assert ref == "C"
     assert start == 2715342
     assert alt == "T"
     v = Variant.create(
         variant_sets=self.variant_sets,
         reference=self.reference_id,
         reference_bases=ref,
         start=start,
         alternate_bases=[alt],
     )
     panel = ag.create(v)
     assert len(panel.alts) == 1
     alt = panel.alts[0]
     # the panel ref/alt seqs go past the end of the gene,
     # so can't comparie against gene sequence. Need to get
     # subseq from the reference seq
     panel_ref_start = self.reference_seq.find(panel.refs[0])
     assert panel_ref_start < start < panel_ref_start + len(panel.refs[0])
     seq = str(self.reference_seq[panel_ref_start:panel_ref_start +
                                  len(panel.refs[0])])
     assert seq == panel.refs[0]
     assert alt == seq[:30] + "T" + seq[31:]
     DB.drop_database("mykrobe-test")
 def test_make_variant_panel1(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta")
     gene = self.gm.get_gene("rpoB")
     for var in self.gm.get_variant_names("rpoB", "D3A"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(
             variant_sets=self.variant_sets,
             reference=self.reference_id,
             reference_bases=ref,
             start=start,
             alternate_bases=[alt])
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq))
             assert Seq(seq).translate()[2] == "D"
             seq = seq.replace(panel.refs[0][25:], alt[24:])
             assert seq != str(gene.seq)
             assert Seq(seq).translate()[2] == "A"
     DB.drop_database('mykrobe-test')
Exemple #13
0
 def test_make_variant_panel2(self):
     ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31)
     gene = self.gm.get_gene("katG")
     for var in self.gm.get_variant_names("katG", "E3A"):
         ref, start, alt = split_var_name(var)
         v = Variant.create(
             variant_sets=self.variant_sets,
             reference=self.reference_id,
             reference_bases=ref,
             start=start,
             alternate_bases=[alt],
         )
         panel = ag.create(v)
         for alt in panel.alts:
             seq = copy.copy(str(gene.seq.reverse_complement()))
             seq = seq.replace(panel.refs[0][:39],
                               alt[:39 + len(alt) - len(panel.refs[0])])
             assert seq != str(gene.seq)
             assert Seq(seq).reverse_complement().translate()[2] == "A"
     DB.drop_database("mykrobe-test")
Exemple #14
0
 def test_split_name_del(self):
     name = "AA12T"
     r, pos, a = split_var_name(name)
     assert r == "AA"
     assert pos == 12
     assert a == "T"
Exemple #15
0
 def test_split_name3(self):
     name = "C-54T"
     r, pos, a = split_var_name(name)
     assert r == "C"
     assert pos == -54
     assert a == "T"
Exemple #16
0
 def test_split_name2(self):
     name = "A12T/A"
     r, pos, a = split_var_name(name)
     assert r == "A"
     assert pos == 12
     assert a == "T/A"