Example #1
0
    def variant_classify(self, tx, ref_allele, alt_allele, start, end, variant_type, dist=2):
        """Perform classifications.

        Everything handled in genomic space

        *RNA*
        x'UTR
        Splice_Site (Intron)
        Intron
        Splice_Site (Exon)
        {Missense, Silent}
        {Nonsense, Silent}
        {Nonstop, Silent}
        IGR
        x'Flank
        De_novo_Start

        """
        gene_type = tx.get_gene_type()
        if gene_type != "protein_coding":
            if gene_type == VariantClassification.LINCRNA:
                return VariantClassification(VariantClassification.LINCRNA, variant_type, tx.get_transcript_id())
            else:
                return VariantClassification(VariantClassification.RNA, variant_type, tx.get_transcript_id())

        if ref_allele == "-":
            ref_allele = ""
        if alt_allele == "-":
            alt_allele = ""

        s = int(start)
        e = int(end)
        is_exon_overlap = TranscriptProviderUtils.determine_if_exon_overlap(s, e, tx, variant_type)

        is_splice_site_tuple = self._determine_if_splice_site_overlap(s, e, tx, variant_type, dist)
        is_splice_site = is_splice_site_tuple[0]

        is_beyond_exons, side, is_flank = self._determine_beyond_exon_info_vt(start, end, tx, variant_type)

        if not is_exon_overlap and not is_beyond_exons:
            exon_i = TranscriptProviderUtils.determine_closest_exon(tx, int(start), int(end))
            if is_splice_site:
                # Intron Splice Site
                return VariantClassification(VariantClassification.SPLICE_SITE, variant_type, tx.get_transcript_id(), vc_secondary=VariantClassification.INTRON, exon_i=exon_i)
            else:
                return VariantClassification(VariantClassification.INTRON, variant_type, tx.get_transcript_id(), exon_i=exon_i)

        if not is_exon_overlap and is_beyond_exons:
            if is_flank:
                # Flanks
                if side.startswith("3"):
                    return VariantClassification(VariantClassification.THREE_PRIME_PRIME_FLANK, variant_type, transcript_id=tx.get_transcript_id())
                else:
                    return VariantClassification(VariantClassification.FIVE_PRIME_PRIME_FLANK, variant_type, transcript_id=tx.get_transcript_id())

            else:
                # IGR
                return VariantClassification(VariantClassification.IGR, variant_type)

        is_start_codon_overlap = self._determine_codon_overlap(s, e, tx.get_start_codon(), variant_type)
        is_stop_codon_overlap = self._determine_codon_overlap(s, e, tx.get_stop_codon(), variant_type)

        if is_start_codon_overlap and not variant_type.endswith("NP"):
            return VariantClassification('Start_Codon_' + variant_type.capitalize(), variant_type, transcript_id=tx.get_transcript_id())
        if is_stop_codon_overlap and not variant_type.endswith("NP"):
            return VariantClassification('Stop_Codon_' + variant_type.capitalize(), variant_type, transcript_id=tx.get_transcript_id())

        is_cds_overlap = self._determine_if_cds_overlap(s, e, tx, variant_type)
        if is_exon_overlap and not is_cds_overlap and not is_start_codon_overlap and not is_stop_codon_overlap:
            # UTR
            if side.startswith("3"):
                vc_tmp = VariantClassification.THREE_PRIME_UTR
            else:
                vc_tmp = VariantClassification.FIVE_PRIME_UTR
            transcript_position_exon_space_start, transcript_position_exon_space_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(start, end, tx)
            vc = self._determine_de_novo(vc_tmp, transcript_position_exon_space_start, ref_allele, alt_allele, tx, variant_type)
            return VariantClassification(vc, variant_type, transcript_id=tx.get_transcript_id(), )

        # We have a clean overlap in the CDS.  Includes start codon or stop codon.
        if is_cds_overlap or is_stop_codon_overlap or is_start_codon_overlap:
            is_frameshift_indel = self.is_frameshift_indel(variant_type, int(start), int(end), alt_allele)
            return self._determine_vc_for_cds_overlap(start, end, ref_allele, alt_allele, is_frameshift_indel, is_splice_site, tx, variant_type, is_start_codon_overlap)

        raise ValueError("Could not determine variant classification:  " + tx.get_trancript_id() + " " + str([ref_allele, alt_allele, start, end]))
Example #2
0
    def variant_classify(self,
                         tx,
                         ref_allele,
                         alt_allele,
                         start,
                         end,
                         variant_type,
                         dist=2):
        """Perform classifications.

        Everything handled in genomic space

        *RNA*
        x'UTR
        Splice_Site (Intron)
        Intron
        Splice_Site (Exon)
        {Missense, Silent}
        {Nonsense, Silent}
        {Nonstop, Silent}
        IGR
        x'Flank
        De_novo_Start

        """
        gene_type = tx.get_gene_type()
        if gene_type != "protein_coding":
            if gene_type == VariantClassification.LINCRNA:
                return VariantClassification(VariantClassification.LINCRNA,
                                             variant_type,
                                             tx.get_transcript_id())
            else:
                return VariantClassification(VariantClassification.RNA,
                                             variant_type,
                                             tx.get_transcript_id())

        if ref_allele == "-":
            ref_allele = ""
        if alt_allele == "-":
            alt_allele = ""

        s = int(start)
        e = int(end)
        is_exon_overlap = TranscriptProviderUtils.determine_if_exon_overlap(
            s, e, tx, variant_type)

        is_splice_site_tuple = self._determine_if_splice_site_overlap(
            s, e, tx, variant_type, dist)
        is_splice_site = is_splice_site_tuple[0]

        is_beyond_exons, side, is_flank = self._determine_beyond_exon_info_vt(
            start, end, tx, variant_type)

        if not is_exon_overlap and not is_beyond_exons:
            exon_i = TranscriptProviderUtils.determine_closest_exon(
                tx, int(start), int(end))
            if is_splice_site:
                # Intron Splice Site
                return VariantClassification(
                    VariantClassification.SPLICE_SITE,
                    variant_type,
                    tx.get_transcript_id(),
                    vc_secondary=VariantClassification.INTRON,
                    exon_i=exon_i)
            else:
                return VariantClassification(VariantClassification.INTRON,
                                             variant_type,
                                             tx.get_transcript_id(),
                                             exon_i=exon_i)

        if not is_exon_overlap and is_beyond_exons:
            if is_flank:
                # Flanks
                if side.startswith("3"):
                    return VariantClassification(
                        VariantClassification.THREE_PRIME_PRIME_FLANK,
                        variant_type,
                        transcript_id=tx.get_transcript_id())
                else:
                    return VariantClassification(
                        VariantClassification.FIVE_PRIME_PRIME_FLANK,
                        variant_type,
                        transcript_id=tx.get_transcript_id())

            else:
                # IGR
                return VariantClassification(VariantClassification.IGR,
                                             variant_type)

        is_start_codon_overlap = self._determine_codon_overlap(
            s, e, tx.get_start_codon(), variant_type)
        is_stop_codon_overlap = self._determine_codon_overlap(
            s, e, tx.get_stop_codon(), variant_type)

        if is_start_codon_overlap and not variant_type.endswith("NP"):
            return VariantClassification('Start_Codon_' +
                                         variant_type.capitalize(),
                                         variant_type,
                                         transcript_id=tx.get_transcript_id())
        if is_stop_codon_overlap and not variant_type.endswith("NP"):
            return VariantClassification('Stop_Codon_' +
                                         variant_type.capitalize(),
                                         variant_type,
                                         transcript_id=tx.get_transcript_id())

        is_cds_overlap = self._determine_if_cds_overlap(s, e, tx, variant_type)
        if is_exon_overlap and not is_cds_overlap and not is_start_codon_overlap and not is_stop_codon_overlap:
            # UTR
            if side.startswith("3"):
                vc_tmp = VariantClassification.THREE_PRIME_UTR
            else:
                vc_tmp = VariantClassification.FIVE_PRIME_UTR
            transcript_position_exon_space_start, transcript_position_exon_space_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(
                start, end, tx)
            vc = self._determine_de_novo(vc_tmp,
                                         transcript_position_exon_space_start,
                                         ref_allele, alt_allele, tx,
                                         variant_type)
            return VariantClassification(
                vc,
                variant_type,
                transcript_id=tx.get_transcript_id(),
            )

        # We have a clean overlap in the CDS.  Includes start codon or stop codon.
        if is_cds_overlap or is_stop_codon_overlap or is_start_codon_overlap:
            is_frameshift_indel = self.is_frameshift_indel(
                variant_type, int(start), int(end), alt_allele)
            return self._determine_vc_for_cds_overlap(
                start, end, ref_allele, alt_allele, is_frameshift_indel,
                is_splice_site, tx, variant_type, is_start_codon_overlap)

        raise ValueError("Could not determine variant classification:  " +
                         tx.get_trancript_id() + " " +
                         str([ref_allele, alt_allele, start, end]))