예제 #1
0
    def _calculate_protein_sequence(self, exons, seq, cds_start_genomic_space, cds_stop_genomic_space, strand):
        cds_start_exon_space, cds_stop_exon_space = TranscriptProviderUtils._convert_genomic_space_to_feature_space(int(cds_start_genomic_space), int(cds_stop_genomic_space), exons, strand)

        prot_seq = MutUtils.translate_sequence(seq[int(cds_start_exon_space):int(cds_stop_exon_space)])
        if len(prot_seq) > 0 and prot_seq[-1] == '*':
            prot_seq = prot_seq[:-1]

        return prot_seq
예제 #2
0
    def _calculate_protein_sequence(self, exons, seq, cds_start_genomic_space,
                                    cds_stop_genomic_space, strand):
        cds_start_exon_space, cds_stop_exon_space = TranscriptProviderUtils._convert_genomic_space_to_feature_space(
            int(cds_start_genomic_space), int(cds_stop_genomic_space), exons,
            strand)

        prot_seq = MutUtils.translate_sequence(
            seq[int(cds_start_exon_space):int(cds_stop_exon_space)])
        if len(prot_seq) > 0 and prot_seq[-1] == '*':
            prot_seq = prot_seq[:-1]

        return prot_seq
예제 #3
0
파일: Gaf.py 프로젝트: Tmacme/oncotator
    def get_protein_seq(self, transcript_id):
        gaf_record = self.get_transcript(transcript_id)
        tx_seq = self.get_transcript_seq(transcript_id)
        if not gaf_record or not tx_seq:
            return None

        if 'cds_start' not in gaf_record or not gaf_record['cds_start']:
            return None

        prot_seq = MutUtils.translate_sequence(tx_seq[gaf_record['cds_start']-1:gaf_record['cds_stop']])
        if prot_seq[-1] == '*':
            prot_seq = prot_seq[:-1]

        return prot_seq
예제 #4
0
    def get_protein_seq(self, transcript_id):
        gaf_record = self.get_transcript(transcript_id)
        tx_seq = self.get_transcript_seq(transcript_id)
        if not gaf_record or not tx_seq:
            return None

        if 'cds_start' not in gaf_record or not gaf_record['cds_start']:
            return None

        prot_seq = MutUtils.translate_sequence(
            tx_seq[gaf_record['cds_start'] - 1:gaf_record['cds_stop']])
        if prot_seq[-1] == '*':
            prot_seq = prot_seq[:-1]

        return prot_seq
예제 #5
0
    def _determine_vc_for_cds_overlap(self, start, end, ref_allele, alt_allele,
                                      is_frameshift_indel, is_splice_site, tx,
                                      variant_type, is_start_codon):
        """
        Note: This method can also handle start and stop codons.

        :param start:
        :param end:
        :param ref_allele:
        :param alt_allele:
        :param is_frameshift_indel:
        :param is_splice_site:
        :param tx:
        :param variant_type:
        :return:
        """
        observed_allele_stranded, reference_allele_stranded = self._get_stranded_alleles(
            ref_allele, alt_allele, tx)
        transcript_position_start, transcript_position_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(
            start, end, tx)

        if tx.get_strand(
        ) == "+" and not variant_type == VariantClassification.VT_INS:
            transcript_position_start -= 1
            transcript_position_end -= 1

        transcript_seq = tx.get_seq()
        protein_seq = tx.get_protein_seq()
        cds_start, cds_stop = TranscriptProviderUtils.determine_cds_in_exon_space(
            tx)
        protein_position_start, protein_position_end = TranscriptProviderUtils.get_protein_positions(
            transcript_position_start, transcript_position_end, cds_start)
        new_ref_transcript_seq = transcript_seq
        if (transcript_seq[transcript_position_start:transcript_position_end +
                           1] != reference_allele_stranded
            ) and variant_type != VariantClassification.VT_INS:
            new_ref_transcript_seq = list(transcript_seq)
            new_ref_transcript_seq[
                transcript_position_start:transcript_position_end +
                1] = reference_allele_stranded
            new_ref_transcript_seq = ''.join(new_ref_transcript_seq)
            ref_tx_seq_has_been_changed = True
        else:
            ref_tx_seq_has_been_changed = False
        cds_codon_start, cds_codon_end = TranscriptProviderUtils.get_cds_codon_positions(
            protein_position_start, protein_position_end, cds_start)

        if variant_type == "DEL":
            reference_codon_seq = new_ref_transcript_seq[
                cds_codon_start:cds_codon_end + 1].lower()
        else:
            reference_codon_seq = TranscriptProviderUtils.mutate_reference_sequence(
                new_ref_transcript_seq[cds_codon_start:cds_codon_end +
                                       1].lower(), cds_codon_start,
                transcript_position_start, transcript_position_end,
                reference_allele_stranded, variant_type)

        if variant_type == "INS" and tx.get_strand() == "-":
            mutated_codon_seq = TranscriptProviderUtils.mutate_reference_sequence(
                reference_codon_seq.lower(), cds_codon_start - 1,
                transcript_position_start, transcript_position_end,
                observed_allele_stranded, variant_type)
        else:
            mutated_codon_seq = TranscriptProviderUtils.mutate_reference_sequence(
                reference_codon_seq.lower(), cds_codon_start,
                transcript_position_start, transcript_position_end,
                observed_allele_stranded, variant_type)

        observed_aa = MutUtils.translate_sequence(mutated_codon_seq)
        if ref_tx_seq_has_been_changed:
            reference_aa = MutUtils.translate_sequence(reference_codon_seq)
        else:
            reference_aa = protein_seq[protein_position_start -
                                       1:protein_position_end]

        if variant_type != VariantClassification.VT_SNP:

            try:
                reference_aa, observed_aa, protein_position_start, protein_position_end = \
                    self._adjust_protein_position_and_alleles(protein_seq, protein_position_start,
                        protein_position_end, reference_aa, observed_aa)
            except InvalidVariantException as ive:
                logging.getLogger(__name__).error(
                    "Could not properly adjust protein position for variant: %s, %s, %s, %s, %s VT: %s"
                    % (tx.get_contig(), start, end, ref_allele, alt_allele,
                       variant_type))
                logging.getLogger(__name__).error(str(ive))
                logging.getLogger(__name__).warn(
                    "Above error may not have exact start and end positions if this is a VCF input."
                )
                logging.getLogger(__name__).warn(
                    "Variant type is likely incorrect.  This can happen with some GATK VCFs"
                )
                logging.getLogger(__name__).warn(
                    TranscriptProviderUtils.is_valid_xNP(
                        variant_type, ref_allele, alt_allele))
                logging.getLogger(__name__).warn(
                    "The protein_change annotation may not be properly rendered."
                )

        vc_tmp, vc_tmp_secondary = self.infer_variant_classification(
            variant_type,
            reference_aa,
            observed_aa,
            ref_allele,
            alt_allele,
            is_frameshift_indel=is_frameshift_indel,
            is_splice_site=is_splice_site,
            is_start_codon=is_start_codon)

        cds_start_exon_space, cds_end_exon_space = TranscriptProviderUtils.determine_cds_in_exon_space(
            tx)
        exon_i = TranscriptProviderUtils.determine_exon_index(
            int(start), int(end), tx, variant_type)
        final_vc = VariantClassification(
            vc_tmp,
            variant_type,
            transcript_id=tx.get_transcript_id(),
            alt_codon=mutated_codon_seq,
            ref_codon=reference_codon_seq,
            ref_aa=reference_aa,
            ref_protein_start=protein_position_start,
            ref_protein_end=protein_position_end,
            alt_aa=observed_aa,
            alt_codon_start_in_exon=cds_codon_start,
            alt_codon_end_in_exon=cds_codon_end,
            ref_codon_start_in_exon=cds_codon_start,
            ref_codon_end_in_exon=cds_codon_end,
            cds_start_in_exon_space=cds_start_exon_space,
            ref_allele_stranded=reference_allele_stranded,
            alt_allele_stranded=observed_allele_stranded,
            exon_i=exon_i,
            vc_secondary=vc_tmp_secondary)
        return final_vc