class TestAltSeqBuilder(unittest.TestCase):

    # root sequence = ""
    fn = os.path.join(os.path.dirname(__file__), "data", "sanity_cp.tsv")
    _datasource = mock_input_data_source.MockInputSource(fn)
    _parser = hgvs.parser.Parser()

    def test_substitution_start(self):
        hgvsc = "NM_999999.1:c.1A>T"
        expected_sequence = "AAAATCAAATTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_substitution_middle(self):
        hgvsc = "NM_999999.1:c.6A>T"
        expected_sequence = "AAAATCAAAATGAATGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_substitution_end(self):
        hgvsc = "NM_999999.1:c.30G>C"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACGGG"
        self._run_comparison(hgvsc, expected_sequence)

    # TODO - build in support when system can handle variants in 5'utr region
    # def test_insertion_before_start(self):
    #     hgvsc = "NM_999999.1:c.-1_1insGGG"
    #     expected_sequence = "AAAATCAAAGGGATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
    #     self._run_comparison(hgvsc, expected_sequence)

    def test_insertion_start(self):
        hgvsc = "NM_999999.1:c.1_2insAAA"
        expected_sequence = "AAAATCAAAAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_insertion_middle(self):
        hgvsc = "NM_999999.1:c.22_23insT"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGTCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_insertion_end(self):
        hgvsc = "NM_999999.1:c.29_30insGG"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    # TODO - build in support when system can handle variants in 3'utr region
    # def test_insertion_after_end(self):
    #     hgvsc = "NM_999999.1:c.30_*1insAA"
    #     expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGAAGGGN"
    #     self._run_comparison(hgvsc, expected_sequence)

    def test_deletion_start(self):
        hgvsc = "NM_999999.1:c.1del"
        expected_sequence = "AAAATCAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_deletion_middle(self):
        hgvsc = "NM_999999.1:c.2_7del"
        expected_sequence = "AAAATCAAAACGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_deletion_end(self):
        hgvsc = "NM_999999.1:c.30del"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delins_start(self):
        hgvsc = "NM_999999.1:c.1delinsTTTT"
        expected_sequence = "AAAATCAAATTTTTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delins_middle(self):
        hgvsc = "NM_999999.1:c.2_3delinsAA"
        expected_sequence = "AAAATCAAAAAAAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delins_end(self):
        hgvsc = "NM_999999.1:c.30delinsCCCC"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACCCCGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_dup(self):
        hgvsc = "NM_999999.1:c.16_24dup"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delete_gene(self):
        hgvsc = "NM_999999.1:c.-3_*1del"
        expected_sequence = ""
        self._run_comparison(hgvsc, expected_sequence)

    def test_sequence_with_length_that_is_not_divisible_by_3(self):
        hgvsc = "NM_999992.2:c.1del"
        expected_sequence = "AAAATCAAATGGGGTAGGCCCGGCAGCCAGCTTTATAGAGGAGGCAGTTTCGCC"
        with self.assertRaises(NotImplementedError):
            ac_p = "DUMMY"
            var = self._parser.parse_hgvs_variant(hgvsc)
            transcript_data = RefTranscriptData(hdp=self._datasource,
                                                tx_ac=var.ac,
                                                pro_ac=ac_p)

    # def test_2_substitutions(self):
    #     pass
    #
    # def test_2_indel_no_net_frameshift(self):
    #     pass
    #
    # def test_2_indel_net_frameshift(self):
    #     pass

    def _run_comparison(self, hgvsc, expected_sequence):

        ac_p = "DUMMY"
        var = self._parser.parse_hgvs_variant(hgvsc)
        transcript_data = RefTranscriptData(hdp=self._datasource,
                                            tx_ac=var.ac,
                                            pro_ac=ac_p)

        builder = altseqbuilder.AltSeqBuilder(var, transcript_data)
        insert_result = builder.build_altseq()
        actual_sequence = insert_result[0].transcript_sequence
        msg = "expected: {}\nactual  : {}".format(expected_sequence,
                                                  actual_sequence)
        self.assertEqual(expected_sequence, actual_sequence, msg)
class TestHgvsCToP(unittest.TestCase):

    fn = os.path.join(os.path.dirname(__file__), "data", "sanity_cp.tsv")
    _datasource = mock_input_data_source.MockInputSource(fn)
    _mapper = variantmapper.VariantMapper(_datasource,
                                          prevalidation_level="INTRINSIC")
    _parser = hgvs.parser.Parser()

    def test_silent(self):
        hgvsc = "NM_999999.1:c.6A>G"
        hgvsp_expected = "MOCK:p.(Lys2=)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_substitution(self):
        hgvsc = "NM_999999.1:c.6A>T"
        hgvsp_expected = "MOCK:p.(Lys2Asn)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_substitution_introduces_stop_codon(self):
        hgvsc = "NM_999996.1:c.8C>A"
        hgvsp_expected = "MOCK:p.(Ser3Ter)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_substitution_removes_stop_codon(self):
        hgvsc = "NM_999998.1:c.30G>T"
        hgvsp_expected = "MOCK:p.(Ter10TyrextTer3)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_insertion_no_frameshift(self):
        hgvsc = "NM_999999.1:c.6_7insGGG"
        hgvsp_expected = "MOCK:p.(Lys2_Ala3insGly)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_insertion_frameshift(self):
        hgvsc = "NM_999999.1:c.22_23insT"
        hgvsp_expected = "MOCK:p.(Ala8ValfsTer?)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_insertion_adds_stop(self):
        hgvsc = "NM_999999.1:c.8_9insTT"
        hgvsp_expected = "MOCK:p.(Lys4Ter)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_no_frameshift(self):
        hgvsc = "NM_999999.1:c.10_12del"
        hgvsp_expected = "MOCK:p.(Lys4del)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion2_no_frameshift(self):
        hgvsc = "NM_999999.1:c.4_15del"
        hgvsp_expected = "MOCK:p.(Lys2_Ala5del)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion3_no_frameshift_c_term(self):
        hgvsc = "NM_999995.1:c.4_6del"
        hgvsp_expected = "MOCK:p.(Lys3del)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion4_no_frameshift_c_term(self):
        hgvsc = "NM_999994.1:c.4_9del"
        hgvsp_expected = "MOCK:p.(Lys3_Lys4del)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion5_no_frameshift(self):
        hgvsc = "NM_999994.1:c.20_25del"
        hgvsp_expected = "MOCK:p.(Ala7_Arg9delinsGly)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion6_no_frameshift(self):
        hgvsc = "NM_999999.1:c.5_7del"
        hgvsp_expected = "MOCK:p.(Lys2_Ala3delinsThr)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion7_no_frameshift(self):
        hgvsc = "NM_999993.1:c.13_24del"
        hgvsp_expected = "MOCK:p.(Arg5_Ala8del)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_frameshift_nostop(self):
        hgvsc = "NM_999999.1:c.11_12del"
        hgvsp_expected = "MOCK:p.(Lys4SerfsTer?)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_frameshift_adds_stop(self):
        hgvsc = "NM_999997.1:c.7del"
        hgvsp_expected = "MOCK:p.(Ala3ArgfsTer6)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_no_frameshift_removes_stop_plus_previous(self):
        hgvsc = "NM_999999.1:c.25_30del"
        hgvsp_expected = "MOCK:p.(Lys9_Ter10delinsGly)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_indel_no_frameshift(self):
        hgvsc = "NM_999999.1:c.11_12delinsTCCCA"
        hgvsp_expected = "MOCK:p.(Lys4delinsIlePro)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_indel2_no_frameshift(self):
        hgvsc = "NM_999999.1:c.11_18delinsTCCCA"
        hgvsp_expected = "MOCK:p.(Lys4_Phe6delinsIlePro)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_indel_frameshift_nostop(self):
        hgvsc = "NM_999999.1:c.8delinsGG"
        hgvsp_expected = "MOCK:p.(Ala3GlyfsTer?)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_dup_1AA_no_frameshift_2(self):
        hgvsc = "NM_999999.1:c.10_12dup"
        hgvsp_expected = "MOCK:p.(Lys4dup)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_dup_1AA_no_frameshift(self):
        hgvsc = "NM_999999.1:c.16_18dup"
        hgvsp_expected = "MOCK:p.(Phe6dup)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_dup_2AA_no_frameshift(self):
        hgvsc = "NM_999999.1:c.16_21dup"
        hgvsp_expected = "MOCK:p.(Phe6_Arg7dup)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_dup_2AA2_no_frameshift(self):
        hgvsc = "NM_999995.1:c.4_6dup"
        hgvsp_expected = "MOCK:p.(Lys3dup)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_dup_3AA_no_frameshift(self):
        hgvsc = "NM_999999.1:c.16_24dup"
        hgvsp_expected = "MOCK:p.(Phe6_Ala8dup)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_dup_frameshift(self):
        hgvsc = "NM_999999.1:c.12_13dup"
        hgvsp_expected = "MOCK:p.(Ala5GlufsTer?)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_intron(self):
        hgvsc = "NM_999999.1:c.12+1G>A"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_five_prime_utr(self):
        hgvsc = "NM_999999.1:c.-2A>G"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_three_prime_utr(self):
        hgvsc = "NM_999999.1:c.*3G>A"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_into_three_prime_utr_frameshift(self):
        hgvsc = "NM_999999.1:c.27_*3del"
        hgvsp_expected = "MOCK:p.(Lys9XaafsTer?)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_into_three_prime_utr_no_frameshift(self):
        hgvsc = "NM_999995.1:c.28_*3del"
        hgvsp_expected = "MOCK:p.(Lys10_Ter11delinsArgGlnPheArg)"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_delins_into_three_prime_utr_no_frameshift(self):
        hgvsc = "NM_999995.1:c.28_*3delinsGGG"
        hgvsp_expected = "MOCK:p.(Lys10_Ter11delinsGlyArgGlnPheArg)"
        self._run_conversion(hgvsc, hgvsp_expected)

    # See recommendations re p.? (p.Met1?) at:
    # http://varnomen.hgvs.org/recommendations/protein/variant/substitution/
    def test_substitution_removes_start_codon(self):
        hgvsc = "NM_999999.1:c.1A>G"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_from_five_prime_utr_frameshift(self):
        hgvsc = "NM_999999.1:c.-3_1del"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_deletion_from_five_prime_utr_no_frameshift(self):
        hgvsc = "NM_999999.1:c.-3_3del"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_delins_from_five_prime_utr_no_frameshift(self):
        hgvsc = "NM_999999.1:c.-3_3delinsAAA"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_delete_entire_gene(self):
        hgvsc = "NM_999999.1:c.-3_*1del"
        hgvsp_expected = "MOCK:p.0?"
        self._run_conversion(hgvsc, hgvsp_expected)

    def test_multiple_stop_codons(self):
        hgvsc = "NM_999992.1:c.4G>A"
        hgvsp_expected = "MOCK:p.?"
        self._run_conversion(hgvsc, hgvsp_expected)

    # The following are unsupported
    #
    # def test_repeats(self):
    #     hgvsc = "NM_999999.1:c.12_13[3]"
    #     hgvsp_expected = ""
    #     self._run_conversion(hgvsc, hgvsp_expected)
    #
    # def test_variable_repeats(self):
    #     pass
    #
    # def test_indeterminate_entire_exon_del(self):
    #     pass
    #
    # def test_indeterminate_entire_exon_dup(self):
    #     pass
    #
    # def test_mosaic(self):
    #     pass
    #
    # def test_chimera(self):
    #     pass
    #
    # def test_two_changes_same_allele(self):
    #     pass
    #
    # def test_two_changes_diff_allele(self):
    #     pass
    #
    # def test_two_changes_unknown_allele(self):
    #     pass

    def _run_conversion(self, hgvsc, hgvsp_expected):
        """Helper method to actually run the test
        :param hgvsc tag
        """
        var_c = TestHgvsCToP._parser.parse_hgvs_variant(hgvsc)
        ac_p = "MOCK"
        hgvsp_actual = str(TestHgvsCToP._mapper.c_to_p(var_c, ac_p))
        msg = "hgvsc: {} hgvsp expected: {} actual: {}".format(
            hgvsc, hgvsp_expected, hgvsp_actual)
        self.assertEqual(hgvsp_expected, hgvsp_actual, msg)
Esempio n. 3
0
class TestAltSeqBuilder(unittest.TestCase):

    # root sequence = ""
    fn = os.path.join(os.path.dirname(__file__), "data", "sanity_cp.tsv")
    _datasource = mock_input_data_source.MockInputSource(fn)
    _parser = hgvs.parser.Parser()

    def test_substitution_start(self):
        hgvsc = "NM_999999.1:c.1A>T"
        expected_sequence = "AAAATCAAATTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_substitution_middle(self):
        hgvsc = "NM_999999.1:c.6A>T"
        expected_sequence = "AAAATCAAAATGAATGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_substitution_end(self):
        hgvsc = "NM_999999.1:c.30G>C"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACGGG"
        self._run_comparison(hgvsc, expected_sequence)

    # TODO - build in support when system can handle variants in 5'utr region
    # def test_insertion_before_start(self):
    #     hgvsc = "NM_999999.1:c.-1_1insGGG"
    #     expected_sequence = "AAAATCAAAGGGATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
    #     self._run_comparison(hgvsc, expected_sequence)

    def test_insertion_start(self):
        hgvsc = "NM_999999.1:c.1_2insAAA"
        expected_sequence = "AAAATCAAAAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_insertion_middle(self):
        hgvsc = "NM_999999.1:c.22_23insT"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGTCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_insertion_end(self):
        hgvsc = "NM_999999.1:c.29_30insGG"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    # TODO - build in support when system can handle variants in 3'utr region
    # def test_insertion_after_end(self):
    #     hgvsc = "NM_999999.1:c.30_*1insAA"
    #     expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGAAGGGN"
    #     self._run_comparison(hgvsc, expected_sequence)

    def test_deletion_start(self):
        hgvsc = "NM_999999.1:c.1del"
        expected_sequence = "AAAATCAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_deletion_middle(self):
        hgvsc = "NM_999999.1:c.2_7del"
        expected_sequence = "AAAATCAAAACGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_deletion_end(self):
        hgvsc = "NM_999999.1:c.30del"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATAGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delins_start(self):
        hgvsc = "NM_999999.1:c.1delinsTTTT"
        expected_sequence = "AAAATCAAATTTTTGAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delins_middle(self):
        hgvsc = "NM_999999.1:c.2_3delinsAA"
        expected_sequence = "AAAATCAAAAAAAAAGCGAAAGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delins_end(self):
        hgvsc = "NM_999999.1:c.30delinsCCCC"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGAAATACCCCGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_dup(self):
        hgvsc = "NM_999999.1:c.16_24dup"
        expected_sequence = "AAAATCAAAATGAAAGCGAAAGCGTTTCGCGCGTTTCGCGCGAAATAGGGG"
        self._run_comparison(hgvsc, expected_sequence)

    def test_delete_gene(self):
        hgvsc = "NM_999999.1:c.-3_*1del"
        expected_sequence = ""
        self._run_comparison(hgvsc, expected_sequence)

    # def test_2_substitutions(self):
    #     pass
    #
    # def test_2_indel_no_net_frameshift(self):
    #     pass
    #
    # def test_2_indel_net_frameshift(self):
    #     pass

    def _run_comparison(self, hgvsc, expected_sequence):

        # test replicates the internal class of p_to_c
        @attr.s(slots=True)
        class RefTranscriptData(object):
            transcript_sequence = attr.ib()
            aa_sequence = attr.ib()
            cds_start = attr.ib()
            cds_stop = attr.ib()
            protein_accession = attr.ib()

            @classmethod
            def setup_transcript_data(cls, ac, ac_p, db, ref="GRCh37.p10"):
                """helper for generating RefTranscriptData from for c_to_p"""
                tx_info = db.get_tx_info(ac)
                tx_seq = db.get_tx_seq(ac)

                if tx_info is None or tx_seq is None:
                    raise hgvs.exceptions.HGVSError("Missing transcript data for accession: {}".format(ac))

                # use 1-based hgvs coords
                cds_start = tx_info["cds_start_i"] + 1
                cds_stop = tx_info["cds_end_i"]

                # padding list so biopython won't complain during the conversion
                tx_seq_to_translate = tx_seq[cds_start - 1:cds_stop]
                if len(tx_seq_to_translate) % 3 != 0:
                    "".join(list(tx_seq_to_translate).extend(["N"] * ((3 - len(tx_seq_to_translate) % 3) % 3)))

                tx_seq_cds = Seq(tx_seq_to_translate)
                protein_seq = str(tx_seq_cds.translate())

                transcript_data = RefTranscriptData(tx_seq, protein_seq, cds_start, cds_stop, ac_p)

                return transcript_data

        ac_p = "DUMMY"
        var = self._parser.parse_hgvs_variant(hgvsc)
        transcript_data = RefTranscriptData.setup_transcript_data(var.ac, ac_p, self._datasource)

        builder = altseqbuilder.AltSeqBuilder(var, transcript_data)
        insert_result = builder.build_altseq()
        actual_sequence = insert_result[0].transcript_sequence
        msg = "expected: {}\nactual  : {}".format(expected_sequence, actual_sequence)
        self.assertEqual(expected_sequence, actual_sequence, msg)