Esempio n. 1
0
    def c_to_p(self, var_c, pro_ac=None):
        """
        Converts a c. SequenceVariant to a p. SequenceVariant on the specified protein accession
        Author: Rudy Rico

        :param SequenceVariant var_c: hgvsc tag
        :param str pro_ac: protein accession
        :rtype: hgvs.sequencevariant.SequenceVariant

        """

        if not (var_c.type == "c"):
            raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c))
        if self._validator:
            self._validator.validate(var_c)
        reference_data = RefTranscriptData(self.hdp, var_c.ac, pro_ac)
        builder = altseqbuilder.AltSeqBuilder(var_c, reference_data)

        # TODO: handle case where you get 2+ alt sequences back;
        # currently get list of 1 element loop structure implemented
        # to handle this, but doesn't really do anything currently.
        all_alt_data = builder.build_altseq()

        var_ps = []
        for alt_data in all_alt_data:
            builder = altseq_to_hgvsp.AltSeqToHgvsp(reference_data, alt_data)
            var_p = builder.build_hgvsp()
            var_ps.append(var_p)

        var_p = var_ps[0]

        if self.add_gene_symbol:
            self._update_gene_symbol(var_p, var_c.gene)

        return var_p
Esempio n. 2
0
    def _run_comparison(self, hgvsc, expected_sequence):

        ac_p = "DUMMY"
        var = self._parser.parse_hgvs_variant(hgvsc)
        transcript_data = RefTranscriptData(hdp=self._datasource, tx_ac=var.ac, pro_ac=ac_p)

        builder = altseqbuilder.AltSeqBuilder(var, transcript_data)
        insert_result = builder.build_altseq()
        actual_sequence = insert_result[0].transcript_sequence
        msg = "expected: {}\nactual  : {}".format(expected_sequence, actual_sequence)
        self.assertEqual(expected_sequence, actual_sequence, msg)
Esempio n. 3
0
    def _run_comparison(self, hgvsc, expected_sequence):

        # test replicates the internal class of p_to_c
        @attr.s(slots=True)
        class RefTranscriptData(object):
            transcript_sequence = attr.ib()
            aa_sequence = attr.ib()
            cds_start = attr.ib()
            cds_stop = attr.ib()
            protein_accession = attr.ib()

            @classmethod
            def setup_transcript_data(cls, ac, ac_p, db, ref="GRCh37.p10"):
                """helper for generating RefTranscriptData from for c_to_p"""
                tx_info = db.get_tx_info(ac)
                tx_seq = db.get_tx_seq(ac)

                if tx_info is None or tx_seq is None:
                    raise hgvs.exceptions.HGVSError("Missing transcript data for accession: {}".format(ac))

                # use 1-based hgvs coords
                cds_start = tx_info["cds_start_i"] + 1
                cds_stop = tx_info["cds_end_i"]

                # padding list so biopython won't complain during the conversion
                tx_seq_to_translate = tx_seq[cds_start - 1:cds_stop]
                if len(tx_seq_to_translate) % 3 != 0:
                    "".join(list(tx_seq_to_translate).extend(["N"] * ((3 - len(tx_seq_to_translate) % 3) % 3)))

                tx_seq_cds = Seq(tx_seq_to_translate)
                protein_seq = str(tx_seq_cds.translate())

                transcript_data = RefTranscriptData(tx_seq, protein_seq, cds_start, cds_stop, ac_p)

                return transcript_data

        ac_p = "DUMMY"
        var = self._parser.parse_hgvs_variant(hgvsc)
        transcript_data = RefTranscriptData.setup_transcript_data(var.ac, ac_p, self._datasource)

        builder = altseqbuilder.AltSeqBuilder(var, transcript_data)
        insert_result = builder.build_altseq()
        actual_sequence = insert_result[0].transcript_sequence
        msg = "expected: {}\nactual  : {}".format(expected_sequence, actual_sequence)
        self.assertEqual(expected_sequence, actual_sequence, msg)
Esempio n. 4
0
    def c_to_p(self, var_c, pro_ac=None):
        """
        Converts a c. SequenceVariant to a p. SequenceVariant on the specified protein accession
        Author: Rudy Rico

        :param SequenceVariant var_c: hgvsc tag
        :param str pro_ac: protein accession
        :rtype: hgvs.sequencevariant.SequenceVariant

        """

        @attr.s(slots=True)
        class RefTranscriptData(object):
            transcript_sequence = attr.ib()
            aa_sequence = attr.ib()
            cds_start = attr.ib()
            cds_stop = attr.ib()
            protein_accession = attr.ib()

            @classmethod
            def setup_transcript_data(cls, hdp, tx_ac, pro_ac):
                """helper for generating RefTranscriptData from for c_to_p"""
                tx_info = hdp.get_tx_identity_info(var_c.ac)
                tx_seq = hdp.get_seq(tx_ac)

                if tx_info is None or tx_seq is None:
                    raise HGVSDataNotAvailableError("Missing transcript data for accession: {}".format(tx_ac))

                # use 1-based hgvs coords
                cds_start = tx_info["cds_start_i"] + 1
                cds_stop = tx_info["cds_end_i"]

                # padding list so biopython won't complain during the conversion
                tx_seq_to_translate = tx_seq[cds_start - 1:cds_stop]
                if len(tx_seq_to_translate) % 3 != 0:
                    "".join(list(tx_seq_to_translate).extend(["N"] * ((3 - len(tx_seq_to_translate) % 3) % 3)))

                tx_seq_cds = Seq(tx_seq_to_translate)
                protein_seq = str(tx_seq_cds.translate())

                if pro_ac is None:
                    # get_acs... will always return at least the MD5_ accession
                    pro_ac = (hdp.get_pro_ac_for_tx_ac(tx_ac) or hdp.get_acs_for_protein_seq(protein_seq)[0])

                transcript_data = RefTranscriptData(tx_seq, protein_seq, cds_start, cds_stop, pro_ac)

                return transcript_data

        if not (var_c.type == "c"):
            raise HGVSInvalidVariantError("Expected a cDNA (c.); got " + str(var_c))
        if self._validator:
            self._validator.validate(var_c)
        reference_data = RefTranscriptData.setup_transcript_data(self.hdp, var_c.ac, pro_ac)
        builder = altseqbuilder.AltSeqBuilder(var_c, reference_data)

        # TODO: handle case where you get 2+ alt sequences back;
        # currently get list of 1 element loop structure implemented
        # to handle this, but doesn't really do anything currently.
        all_alt_data = builder.build_altseq()

        var_ps = []
        for alt_data in all_alt_data:
            builder = altseq_to_hgvsp.AltSeqToHgvsp(reference_data, alt_data)
            var_p = builder.build_hgvsp()
            var_ps.append(var_p)

        var_p = var_ps[0]

        return var_p