def _extract_exon_info(self, position, tx): """ Create basic information about the given position relative to the transcript. :param int position: in genomic space :param Transcript tx: :return tuple: [0]: closest exon index of the position (0-based), [1]: whether the distance was left in genomic space (false for overlap) [2]: whether the position overlaps an exon """ exon_index = TranscriptProviderUtils.determine_closest_exon(tx, position, position) if exon_index is None: return exon_index, None, None, None left_distance, right_distance = TranscriptProviderUtils.determine_closest_distance_from_exon(position, position, exon_index, tx) is_in_exon = (left_distance <= 0) and (right_distance >= 0) is_diff_is_positive = (left_distance > 0) and (right_distance > 0) is_negative_strand = (tx.get_strand() == "-") return exon_index, is_diff_is_positive, is_in_exon, is_negative_strand
def variant_classify(self, tx, ref_allele, alt_allele, start, end, variant_type, dist=2): """Perform classifications. Everything handled in genomic space *RNA* x'UTR Splice_Site (Intron) Intron Splice_Site (Exon) {Missense, Silent} {Nonsense, Silent} {Nonstop, Silent} IGR x'Flank De_novo_Start """ gene_type = tx.get_gene_type() if gene_type != "protein_coding": if gene_type == VariantClassification.LINCRNA: return VariantClassification(VariantClassification.LINCRNA, variant_type, tx.get_transcript_id()) else: return VariantClassification(VariantClassification.RNA, variant_type, tx.get_transcript_id()) if ref_allele == "-": ref_allele = "" if alt_allele == "-": alt_allele = "" s = int(start) e = int(end) is_exon_overlap = TranscriptProviderUtils.determine_if_exon_overlap(s, e, tx, variant_type) is_splice_site_tuple = self._determine_if_splice_site_overlap(s, e, tx, variant_type, dist) is_splice_site = is_splice_site_tuple[0] is_beyond_exons, side, is_flank = self._determine_beyond_exon_info_vt(start, end, tx, variant_type) if not is_exon_overlap and not is_beyond_exons: exon_i = TranscriptProviderUtils.determine_closest_exon(tx, int(start), int(end)) if is_splice_site: # Intron Splice Site return VariantClassification(VariantClassification.SPLICE_SITE, variant_type, tx.get_transcript_id(), vc_secondary=VariantClassification.INTRON, exon_i=exon_i) else: return VariantClassification(VariantClassification.INTRON, variant_type, tx.get_transcript_id(), exon_i=exon_i) if not is_exon_overlap and is_beyond_exons: if is_flank: # Flanks if side.startswith("3"): return VariantClassification(VariantClassification.THREE_PRIME_PRIME_FLANK, variant_type, transcript_id=tx.get_transcript_id()) else: return VariantClassification(VariantClassification.FIVE_PRIME_PRIME_FLANK, variant_type, transcript_id=tx.get_transcript_id()) else: # IGR return VariantClassification(VariantClassification.IGR, variant_type) is_start_codon_overlap = self._determine_codon_overlap(s, e, tx.get_start_codon(), variant_type) is_stop_codon_overlap = self._determine_codon_overlap(s, e, tx.get_stop_codon(), variant_type) if is_start_codon_overlap and not variant_type.endswith("NP"): return VariantClassification('Start_Codon_' + variant_type.capitalize(), variant_type, transcript_id=tx.get_transcript_id()) if is_stop_codon_overlap and not variant_type.endswith("NP"): return VariantClassification('Stop_Codon_' + variant_type.capitalize(), variant_type, transcript_id=tx.get_transcript_id()) is_cds_overlap = self._determine_if_cds_overlap(s, e, tx, variant_type) if is_exon_overlap and not is_cds_overlap and not is_start_codon_overlap and not is_stop_codon_overlap: # UTR if side.startswith("3"): vc_tmp = VariantClassification.THREE_PRIME_UTR else: vc_tmp = VariantClassification.FIVE_PRIME_UTR transcript_position_exon_space_start, transcript_position_exon_space_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(start, end, tx) vc = self._determine_de_novo(vc_tmp, transcript_position_exon_space_start, ref_allele, alt_allele, tx, variant_type) return VariantClassification(vc, variant_type, transcript_id=tx.get_transcript_id(), ) # We have a clean overlap in the CDS. Includes start codon or stop codon. if is_cds_overlap or is_stop_codon_overlap or is_start_codon_overlap: is_frameshift_indel = self.is_frameshift_indel(variant_type, int(start), int(end), alt_allele) return self._determine_vc_for_cds_overlap(start, end, ref_allele, alt_allele, is_frameshift_indel, is_splice_site, tx, variant_type, is_start_codon_overlap) raise ValueError("Could not determine variant classification: " + tx.get_trancript_id() + " " + str([ref_allele, alt_allele, start, end]))
def variant_classify(self, tx, ref_allele, alt_allele, start, end, variant_type, dist=2): """Perform classifications. Everything handled in genomic space *RNA* x'UTR Splice_Site (Intron) Intron Splice_Site (Exon) {Missense, Silent} {Nonsense, Silent} {Nonstop, Silent} IGR x'Flank De_novo_Start """ gene_type = tx.get_gene_type() if gene_type != "protein_coding": if gene_type == VariantClassification.LINCRNA: return VariantClassification(VariantClassification.LINCRNA, variant_type, tx.get_transcript_id()) else: return VariantClassification(VariantClassification.RNA, variant_type, tx.get_transcript_id()) if ref_allele == "-": ref_allele = "" if alt_allele == "-": alt_allele = "" s = int(start) e = int(end) is_exon_overlap = TranscriptProviderUtils.determine_if_exon_overlap( s, e, tx, variant_type) is_splice_site_tuple = self._determine_if_splice_site_overlap( s, e, tx, variant_type, dist) is_splice_site = is_splice_site_tuple[0] is_beyond_exons, side, is_flank = self._determine_beyond_exon_info_vt( start, end, tx, variant_type) if not is_exon_overlap and not is_beyond_exons: exon_i = TranscriptProviderUtils.determine_closest_exon( tx, int(start), int(end)) if is_splice_site: # Intron Splice Site return VariantClassification( VariantClassification.SPLICE_SITE, variant_type, tx.get_transcript_id(), vc_secondary=VariantClassification.INTRON, exon_i=exon_i) else: return VariantClassification(VariantClassification.INTRON, variant_type, tx.get_transcript_id(), exon_i=exon_i) if not is_exon_overlap and is_beyond_exons: if is_flank: # Flanks if side.startswith("3"): return VariantClassification( VariantClassification.THREE_PRIME_PRIME_FLANK, variant_type, transcript_id=tx.get_transcript_id()) else: return VariantClassification( VariantClassification.FIVE_PRIME_PRIME_FLANK, variant_type, transcript_id=tx.get_transcript_id()) else: # IGR return VariantClassification(VariantClassification.IGR, variant_type) is_start_codon_overlap = self._determine_codon_overlap( s, e, tx.get_start_codon(), variant_type) is_stop_codon_overlap = self._determine_codon_overlap( s, e, tx.get_stop_codon(), variant_type) if is_start_codon_overlap and not variant_type.endswith("NP"): return VariantClassification('Start_Codon_' + variant_type.capitalize(), variant_type, transcript_id=tx.get_transcript_id()) if is_stop_codon_overlap and not variant_type.endswith("NP"): return VariantClassification('Stop_Codon_' + variant_type.capitalize(), variant_type, transcript_id=tx.get_transcript_id()) is_cds_overlap = self._determine_if_cds_overlap(s, e, tx, variant_type) if is_exon_overlap and not is_cds_overlap and not is_start_codon_overlap and not is_stop_codon_overlap: # UTR if side.startswith("3"): vc_tmp = VariantClassification.THREE_PRIME_UTR else: vc_tmp = VariantClassification.FIVE_PRIME_UTR transcript_position_exon_space_start, transcript_position_exon_space_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space( start, end, tx) vc = self._determine_de_novo(vc_tmp, transcript_position_exon_space_start, ref_allele, alt_allele, tx, variant_type) return VariantClassification( vc, variant_type, transcript_id=tx.get_transcript_id(), ) # We have a clean overlap in the CDS. Includes start codon or stop codon. if is_cds_overlap or is_stop_codon_overlap or is_start_codon_overlap: is_frameshift_indel = self.is_frameshift_indel( variant_type, int(start), int(end), alt_allele) return self._determine_vc_for_cds_overlap( start, end, ref_allele, alt_allele, is_frameshift_indel, is_splice_site, tx, variant_type, is_start_codon_overlap) raise ValueError("Could not determine variant classification: " + tx.get_trancript_id() + " " + str([ref_allele, alt_allele, start, end]))