class TestAltSeqBuilder(unittest.TestCase): # root sequence = "" fn = os.path.join(os.path.dirname(__file__), "data", "sanity_cp.tsv") _datasource = mock_input_data_source.MockInputSource(fn) _parser = hgvs.parser.Parser() def test_substitution_start(self): hgvsc = "NM_999999.1:c.1A>T" expected_sequence = "AAAATCAAATTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_substitution_middle(self): hgvsc = "NM_999999.1:c.6A>T" expected_sequence = "AAAATCAAAATGAATGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_substitution_end(self): hgvsc = "NM_999999.1:c.30G>C" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACGGG" self._run_comparison(hgvsc, expected_sequence) # TODO - build in support when system can handle variants in 5'utr region # def test_insertion_before_start(self): # hgvsc = "NM_999999.1:c.-1_1insGGG" # expected_sequence = "AAAATCAAAGGGATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" # self._run_comparison(hgvsc, expected_sequence) def test_insertion_start(self): hgvsc = "NM_999999.1:c.1_2insAAA" expected_sequence = "AAAATCAAAAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_insertion_middle(self): hgvsc = "NM_999999.1:c.22_23insT" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGTCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_insertion_end(self): hgvsc = "NM_999999.1:c.29_30insGG" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGGGG" self._run_comparison(hgvsc, expected_sequence) # TODO - build in support when system can handle variants in 3'utr region # def test_insertion_after_end(self): # hgvsc = "NM_999999.1:c.30_*1insAA" # expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGAAGGGN" # self._run_comparison(hgvsc, expected_sequence) def test_deletion_start(self): hgvsc = "NM_999999.1:c.1del" expected_sequence = "AAAATCAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_deletion_middle(self): hgvsc = "NM_999999.1:c.2_7del" expected_sequence = "AAAATCAAAACGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_deletion_end(self): hgvsc = "NM_999999.1:c.30del" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGG" self._run_comparison(hgvsc, expected_sequence) def test_delins_start(self): hgvsc = "NM_999999.1:c.1delinsTTTT" expected_sequence = "AAAATCAAATTTTTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_delins_middle(self): hgvsc = "NM_999999.1:c.2_3delinsAA" expected_sequence = "AAAATCAAAAAAAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_delins_end(self): hgvsc = "NM_999999.1:c.30delinsCCCC" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACCCCGGG" self._run_comparison(hgvsc, expected_sequence) def test_dup(self): hgvsc = "NM_999999.1:c.16_24dup" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_delete_gene(self): hgvsc = "NM_999999.1:c.-3_*1del" expected_sequence = "" self._run_comparison(hgvsc, expected_sequence) def test_sequence_with_length_that_is_not_divisible_by_3(self): hgvsc = "NM_999992.2:c.1del" expected_sequence = "AAAATCAAATGGGGTAGGCCCGGCAGCCAGCTTTATAGAGGAGGCAGTTTCGCC" with self.assertRaises(NotImplementedError): ac_p = "DUMMY" var = self._parser.parse_hgvs_variant(hgvsc) transcript_data = RefTranscriptData(hdp=self._datasource, tx_ac=var.ac, pro_ac=ac_p) # def test_2_substitutions(self): # pass # # def test_2_indel_no_net_frameshift(self): # pass # # def test_2_indel_net_frameshift(self): # pass def _run_comparison(self, hgvsc, expected_sequence): ac_p = "DUMMY" var = self._parser.parse_hgvs_variant(hgvsc) transcript_data = RefTranscriptData(hdp=self._datasource, tx_ac=var.ac, pro_ac=ac_p) builder = altseqbuilder.AltSeqBuilder(var, transcript_data) insert_result = builder.build_altseq() actual_sequence = insert_result[0].transcript_sequence msg = "expected: {}\nactual : {}".format(expected_sequence, actual_sequence) self.assertEqual(expected_sequence, actual_sequence, msg)
class TestHgvsCToP(unittest.TestCase): fn = os.path.join(os.path.dirname(__file__), "data", "sanity_cp.tsv") _datasource = mock_input_data_source.MockInputSource(fn) _mapper = variantmapper.VariantMapper(_datasource, prevalidation_level="INTRINSIC") _parser = hgvs.parser.Parser() def test_silent(self): hgvsc = "NM_999999.1:c.6A>G" hgvsp_expected = "MOCK:p.(Lys2=)" self._run_conversion(hgvsc, hgvsp_expected) def test_substitution(self): hgvsc = "NM_999999.1:c.6A>T" hgvsp_expected = "MOCK:p.(Lys2Asn)" self._run_conversion(hgvsc, hgvsp_expected) def test_substitution_introduces_stop_codon(self): hgvsc = "NM_999996.1:c.8C>A" hgvsp_expected = "MOCK:p.(Ser3Ter)" self._run_conversion(hgvsc, hgvsp_expected) def test_substitution_removes_stop_codon(self): hgvsc = "NM_999998.1:c.30G>T" hgvsp_expected = "MOCK:p.(Ter10TyrextTer3)" self._run_conversion(hgvsc, hgvsp_expected) def test_insertion_no_frameshift(self): hgvsc = "NM_999999.1:c.6_7insGGG" hgvsp_expected = "MOCK:p.(Lys2_Ala3insGly)" self._run_conversion(hgvsc, hgvsp_expected) def test_insertion_frameshift(self): hgvsc = "NM_999999.1:c.22_23insT" hgvsp_expected = "MOCK:p.(Ala8ValfsTer?)" self._run_conversion(hgvsc, hgvsp_expected) def test_insertion_adds_stop(self): hgvsc = "NM_999999.1:c.8_9insTT" hgvsp_expected = "MOCK:p.(Lys4Ter)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_no_frameshift(self): hgvsc = "NM_999999.1:c.10_12del" hgvsp_expected = "MOCK:p.(Lys4del)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion2_no_frameshift(self): hgvsc = "NM_999999.1:c.4_15del" hgvsp_expected = "MOCK:p.(Lys2_Ala5del)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion3_no_frameshift_c_term(self): hgvsc = "NM_999995.1:c.4_6del" hgvsp_expected = "MOCK:p.(Lys3del)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion4_no_frameshift_c_term(self): hgvsc = "NM_999994.1:c.4_9del" hgvsp_expected = "MOCK:p.(Lys3_Lys4del)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion5_no_frameshift(self): hgvsc = "NM_999994.1:c.20_25del" hgvsp_expected = "MOCK:p.(Ala7_Arg9delinsGly)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion6_no_frameshift(self): hgvsc = "NM_999999.1:c.5_7del" hgvsp_expected = "MOCK:p.(Lys2_Ala3delinsThr)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion7_no_frameshift(self): hgvsc = "NM_999993.1:c.13_24del" hgvsp_expected = "MOCK:p.(Arg5_Ala8del)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_frameshift_nostop(self): hgvsc = "NM_999999.1:c.11_12del" hgvsp_expected = "MOCK:p.(Lys4SerfsTer?)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_frameshift_adds_stop(self): hgvsc = "NM_999997.1:c.7del" hgvsp_expected = "MOCK:p.(Ala3ArgfsTer6)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_no_frameshift_removes_stop_plus_previous(self): hgvsc = "NM_999999.1:c.25_30del" hgvsp_expected = "MOCK:p.(Lys9_Ter10delinsGly)" self._run_conversion(hgvsc, hgvsp_expected) def test_indel_no_frameshift(self): hgvsc = "NM_999999.1:c.11_12delinsTCCCA" hgvsp_expected = "MOCK:p.(Lys4delinsIlePro)" self._run_conversion(hgvsc, hgvsp_expected) def test_indel2_no_frameshift(self): hgvsc = "NM_999999.1:c.11_18delinsTCCCA" hgvsp_expected = "MOCK:p.(Lys4_Phe6delinsIlePro)" self._run_conversion(hgvsc, hgvsp_expected) def test_indel_frameshift_nostop(self): hgvsc = "NM_999999.1:c.8delinsGG" hgvsp_expected = "MOCK:p.(Ala3GlyfsTer?)" self._run_conversion(hgvsc, hgvsp_expected) def test_dup_1AA_no_frameshift_2(self): hgvsc = "NM_999999.1:c.10_12dup" hgvsp_expected = "MOCK:p.(Lys4dup)" self._run_conversion(hgvsc, hgvsp_expected) def test_dup_1AA_no_frameshift(self): hgvsc = "NM_999999.1:c.16_18dup" hgvsp_expected = "MOCK:p.(Phe6dup)" self._run_conversion(hgvsc, hgvsp_expected) def test_dup_2AA_no_frameshift(self): hgvsc = "NM_999999.1:c.16_21dup" hgvsp_expected = "MOCK:p.(Phe6_Arg7dup)" self._run_conversion(hgvsc, hgvsp_expected) def test_dup_2AA2_no_frameshift(self): hgvsc = "NM_999995.1:c.4_6dup" hgvsp_expected = "MOCK:p.(Lys3dup)" self._run_conversion(hgvsc, hgvsp_expected) def test_dup_3AA_no_frameshift(self): hgvsc = "NM_999999.1:c.16_24dup" hgvsp_expected = "MOCK:p.(Phe6_Ala8dup)" self._run_conversion(hgvsc, hgvsp_expected) def test_dup_frameshift(self): hgvsc = "NM_999999.1:c.12_13dup" hgvsp_expected = "MOCK:p.(Ala5GlufsTer?)" self._run_conversion(hgvsc, hgvsp_expected) def test_intron(self): hgvsc = "NM_999999.1:c.12+1G>A" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_five_prime_utr(self): hgvsc = "NM_999999.1:c.-2A>G" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_three_prime_utr(self): hgvsc = "NM_999999.1:c.*3G>A" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_into_three_prime_utr_frameshift(self): hgvsc = "NM_999999.1:c.27_*3del" hgvsp_expected = "MOCK:p.(Lys9XaafsTer?)" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_into_three_prime_utr_no_frameshift(self): hgvsc = "NM_999995.1:c.28_*3del" hgvsp_expected = "MOCK:p.(Lys10_Ter11delinsArgGlnPheArg)" self._run_conversion(hgvsc, hgvsp_expected) def test_delins_into_three_prime_utr_no_frameshift(self): hgvsc = "NM_999995.1:c.28_*3delinsGGG" hgvsp_expected = "MOCK:p.(Lys10_Ter11delinsGlyArgGlnPheArg)" self._run_conversion(hgvsc, hgvsp_expected) # See recommendations re p.? (p.Met1?) at: # http://varnomen.hgvs.org/recommendations/protein/variant/substitution/ def test_substitution_removes_start_codon(self): hgvsc = "NM_999999.1:c.1A>G" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_from_five_prime_utr_frameshift(self): hgvsc = "NM_999999.1:c.-3_1del" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_deletion_from_five_prime_utr_no_frameshift(self): hgvsc = "NM_999999.1:c.-3_3del" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_delins_from_five_prime_utr_no_frameshift(self): hgvsc = "NM_999999.1:c.-3_3delinsAAA" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) def test_delete_entire_gene(self): hgvsc = "NM_999999.1:c.-3_*1del" hgvsp_expected = "MOCK:p.0?" self._run_conversion(hgvsc, hgvsp_expected) def test_multiple_stop_codons(self): hgvsc = "NM_999992.1:c.4G>A" hgvsp_expected = "MOCK:p.?" self._run_conversion(hgvsc, hgvsp_expected) # The following are unsupported # # def test_repeats(self): # hgvsc = "NM_999999.1:c.12_13[3]" # hgvsp_expected = "" # self._run_conversion(hgvsc, hgvsp_expected) # # def test_variable_repeats(self): # pass # # def test_indeterminate_entire_exon_del(self): # pass # # def test_indeterminate_entire_exon_dup(self): # pass # # def test_mosaic(self): # pass # # def test_chimera(self): # pass # # def test_two_changes_same_allele(self): # pass # # def test_two_changes_diff_allele(self): # pass # # def test_two_changes_unknown_allele(self): # pass def _run_conversion(self, hgvsc, hgvsp_expected): """Helper method to actually run the test :param hgvsc tag """ var_c = TestHgvsCToP._parser.parse_hgvs_variant(hgvsc) ac_p = "MOCK" hgvsp_actual = str(TestHgvsCToP._mapper.c_to_p(var_c, ac_p)) msg = "hgvsc: {} hgvsp expected: {} actual: {}".format( hgvsc, hgvsp_expected, hgvsp_actual) self.assertEqual(hgvsp_expected, hgvsp_actual, msg)
class TestAltSeqBuilder(unittest.TestCase): # root sequence = "" fn = os.path.join(os.path.dirname(__file__), "data", "sanity_cp.tsv") _datasource = mock_input_data_source.MockInputSource(fn) _parser = hgvs.parser.Parser() def test_substitution_start(self): hgvsc = "NM_999999.1:c.1A>T" expected_sequence = "AAAATCAAATTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_substitution_middle(self): hgvsc = "NM_999999.1:c.6A>T" expected_sequence = "AAAATCAAAATGAATGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_substitution_end(self): hgvsc = "NM_999999.1:c.30G>C" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACGGG" self._run_comparison(hgvsc, expected_sequence) # TODO - build in support when system can handle variants in 5'utr region # def test_insertion_before_start(self): # hgvsc = "NM_999999.1:c.-1_1insGGG" # expected_sequence = "AAAATCAAAGGGATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" # self._run_comparison(hgvsc, expected_sequence) def test_insertion_start(self): hgvsc = "NM_999999.1:c.1_2insAAA" expected_sequence = "AAAATCAAAAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_insertion_middle(self): hgvsc = "NM_999999.1:c.22_23insT" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGTCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_insertion_end(self): hgvsc = "NM_999999.1:c.29_30insGG" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGGGG" self._run_comparison(hgvsc, expected_sequence) # TODO - build in support when system can handle variants in 3'utr region # def test_insertion_after_end(self): # hgvsc = "NM_999999.1:c.30_*1insAA" # expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGAAGGGN" # self._run_comparison(hgvsc, expected_sequence) def test_deletion_start(self): hgvsc = "NM_999999.1:c.1del" expected_sequence = "AAAATCAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_deletion_middle(self): hgvsc = "NM_999999.1:c.2_7del" expected_sequence = "AAAATCAAAACGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_deletion_end(self): hgvsc = "NM_999999.1:c.30del" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGG" self._run_comparison(hgvsc, expected_sequence) def test_delins_start(self): hgvsc = "NM_999999.1:c.1delinsTTTT" expected_sequence = "AAAATCAAATTTTTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_delins_middle(self): hgvsc = "NM_999999.1:c.2_3delinsAA" expected_sequence = "AAAATCAAAAAAAAAGCGAAAGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_delins_end(self): hgvsc = "NM_999999.1:c.30delinsCCCC" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACCCCGGG" self._run_comparison(hgvsc, expected_sequence) def test_dup(self): hgvsc = "NM_999999.1:c.16_24dup" expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGTTTCGCGCGAAATAGGGG" self._run_comparison(hgvsc, expected_sequence) def test_delete_gene(self): hgvsc = "NM_999999.1:c.-3_*1del" expected_sequence = "" self._run_comparison(hgvsc, expected_sequence) # def test_2_substitutions(self): # pass # # def test_2_indel_no_net_frameshift(self): # pass # # def test_2_indel_net_frameshift(self): # pass def _run_comparison(self, hgvsc, expected_sequence): # test replicates the internal class of p_to_c @attr.s(slots=True) class RefTranscriptData(object): transcript_sequence = attr.ib() aa_sequence = attr.ib() cds_start = attr.ib() cds_stop = attr.ib() protein_accession = attr.ib() @classmethod def setup_transcript_data(cls, ac, ac_p, db, ref="GRCh37.p10"): """helper for generating RefTranscriptData from for c_to_p""" tx_info = db.get_tx_info(ac) tx_seq = db.get_tx_seq(ac) if tx_info is None or tx_seq is None: raise hgvs.exceptions.HGVSError("Missing transcript data for accession: {}".format(ac)) # use 1-based hgvs coords cds_start = tx_info["cds_start_i"] + 1 cds_stop = tx_info["cds_end_i"] # padding list so biopython won't complain during the conversion tx_seq_to_translate = tx_seq[cds_start - 1:cds_stop] if len(tx_seq_to_translate) % 3 != 0: "".join(list(tx_seq_to_translate).extend(["N"] * ((3 - len(tx_seq_to_translate) % 3) % 3))) tx_seq_cds = Seq(tx_seq_to_translate) protein_seq = str(tx_seq_cds.translate()) transcript_data = RefTranscriptData(tx_seq, protein_seq, cds_start, cds_stop, ac_p) return transcript_data ac_p = "DUMMY" var = self._parser.parse_hgvs_variant(hgvsc) transcript_data = RefTranscriptData.setup_transcript_data(var.ac, ac_p, self._datasource) builder = altseqbuilder.AltSeqBuilder(var, transcript_data) insert_result = builder.build_altseq() actual_sequence = insert_result[0].transcript_sequence msg = "expected: {}\nactual : {}".format(expected_sequence, actual_sequence) self.assertEqual(expected_sequence, actual_sequence, msg)