Python VariantClassifier 예제들, oncotator.utils.VariantClassifier.VariantClassifier Python 예제들

예제 #1

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: ihuerga/oncotator

    def annotate_mutation(self, mutation):
        chr = mutation.chr
        start = int(mutation.start)
        end = int(mutation.end)
        txs = self.get_transcripts_by_pos(chr, start, end)
        final_annotation_dict = self._create_blank_set_of_annotations()
        final_annotation_dict['variant_type'] = Annotation(value=TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), datasourceName=self.title)
        chosen_tx = None

        # We have hit IGR if no transcripts come back.  Most annotations can just use the blank set.
        if len(txs) == 0:
            final_annotation_dict['variant_classification'] = self._create_basic_annotation(VariantClassification.IGR)
            nearest_genes = self._get_nearest_genes(chr, int(start), int(end))
            final_annotation_dict['other_transcripts'] = self._create_basic_annotation(value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1]))
            final_annotation_dict['gene'] = self._create_basic_annotation('Unknown')
            final_annotation_dict['gene_id'] = self._create_basic_annotation('0')
            final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
        else:
            # Choose the best effect transcript
            chosen_tx = self._choose_transcript(txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end)
            vcer = VariantClassifier()

            final_annotation_dict['annotation_transcript'] = self._create_basic_annotation(chosen_tx.get_transcript_id())
            final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
            final_annotation_dict['strand'] = self._create_basic_annotation(chosen_tx.get_strand())

            final_annotation_dict['transcript_position'] = self._create_basic_annotation(TranscriptProviderUtils.render_transcript_position(int(start), int(end), chosen_tx))

            final_annotation_dict['transcript_id'] = self._create_basic_annotation(chosen_tx.get_transcript_id())

            variant_classfication = vcer.variant_classify(tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value,
                                             ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end)
            final_annotation_dict['transcript_exon'] = self._create_basic_annotation(str(variant_classfication.get_exon_i()+1))
            final_annotation_dict['variant_classification'] = self._create_basic_annotation(variant_classfication.get_vc())
            final_annotation_dict['secondary_variant_classification'] = self._create_basic_annotation(variant_classfication.get_secondary_vc())
            final_annotation_dict['protein_change'] = self._create_basic_annotation(vcer.generate_protein_change_from_vc(variant_classfication))
            final_annotation_dict['codon_change'] = self._create_basic_annotation(vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication))
            final_annotation_dict['transcript_change'] = self._create_basic_annotation(vcer.generate_transcript_change_from_tx(chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele))

            final_annotation_dict['transcript_strand'] = self._create_basic_annotation(chosen_tx.get_strand())
            final_annotation_dict['gene'] = self._create_basic_annotation(chosen_tx.get_gene())
            final_annotation_dict['gene_type'] = self._create_basic_annotation(chosen_tx.get_gene_type())
            final_annotation_dict['gencode_transcript_tags'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'tag'))
            final_annotation_dict['gencode_transcript_status'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status'))
            final_annotation_dict['havana_transcript'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript'))
            final_annotation_dict['ccds_id'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid'))
            final_annotation_dict['gencode_transcript_type'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type'))
            final_annotation_dict['gencode_transcript_name'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name'))

            other_transcript_value = self._render_other_transcripts(txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end)
            final_annotation_dict['other_transcripts'] = self._create_basic_annotation(other_transcript_value)
            # final_annotation_dict['gene_id'].value

        mutation.addAnnotations(final_annotation_dict)

        # Add the HGVS annotations ... setting to "" if not available.
        hgvs_dict_annotations = self._create_hgvs_annotation_dict(mutation, chosen_tx)
        mutation.addAnnotations(hgvs_dict_annotations)

        return mutation

예제 #2

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: ihuerga/oncotator

    def _render_other_transcripts(self, txs, transcriptIndicesToSkip, variant_type, ref_allele, alt_allele, start, end):
        """
        Create a list of transcripts that are not being chosen.

        Other transcripts are formatted <gene>_<transcript_id>_<variant_classification>_<protein_change>
            Note:  There are other areas of Oncotator (e.g. Generic_GeneProteinPositionDatasource) that depend
                on this format.  Changing it here may introduce bugs in other pieces of code.

                Also, do not include any transcript that would render as IGR.

        txs -- a list of transcripts to render.
        transcriptIndicesToSkip -- a list of transcripts that are being used (i.e. not an "other transcript").  This will usually be the canonical or any transcript chosen by tx_mode.
        """
        vcer = VariantClassifier()
        other_transcripts = list()
        for i, ot in enumerate(txs):
            if i not in transcriptIndicesToSkip:
                vc = vcer.variant_classify(tx=ot, variant_type=variant_type, ref_allele=ref_allele, alt_allele=alt_allele, start=start, end=end)
                if vc.get_vc() == VariantClassification.IGR:
                    continue
                o = '_'.join([ot.get_gene(), ot.get_transcript_id(),
                              vc.get_vc(), vcer.generate_protein_change_from_vc(vc)])
                o = o.strip('_')
                other_transcripts.append(o)

        return '|'.join(other_transcripts)

예제 #3

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: alexramos/oncotator

    def _render_other_transcripts(self, txs, transcriptIndicesToSkip, variant_type, ref_allele, alt_allele, start, end):
        """
        Create a list of transcripts that are not being chosen.

        Other transcripts are formatted <gene>_<transcript_id>_<variant_classification>_<protein_change>
            Note:  There are other areas of Oncotator (e.g. Generic_GeneProteinPositionDatasource) that depend
                on this format.  Changing it here may introduce bugs in other pieces of code.

                Also, do not include any transcript that would render as IGR.

        txs -- a list of transcripts to render.
        transcriptIndicesToSkip -- a list of transcripts that are being used (i.e. not an "other transcript").  This will usually be the canonical or any transcript chosen by tx_mode.
        """
        vcer = VariantClassifier()
        other_transcripts = list()
        for i, ot in enumerate(txs):
            if i not in transcriptIndicesToSkip:
                vc = vcer.variant_classify(tx=ot, variant_type=variant_type, ref_allele=ref_allele, alt_allele=alt_allele, start=start, end=end)
                if vc.get_vc() == VariantClassification.IGR:
                    continue
                o = '_'.join([ot.get_gene(), ot.get_transcript_id(),
                              vc.get_vc(), vcer.generate_protein_change_from_vc(vc)])
                o = o.strip('_')
                other_transcripts.append(o)

        return '|'.join(other_transcripts)

예제 #4

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: alexramos/oncotator

    def _choose_best_effect_transcript(self, txs, variant_type, ref_allele, alt_allele, start, end):
        """Choose the transcript with the most detrimental effect.
         The rankings are in TranscriptProviderUtils.
         Ties are broken by which transcript has the longer coding length.

        :param list txs: list of Transcript
        :param str variant_type:
        :param str ref_allele:
        :param str alt_allele:
        :param str start:
        :param str end:
        :return Transcript:
         """
        vcer = VariantClassifier()
        effect_dict = TranscriptProviderUtils.retrieve_effect_dict()
        best_effect_score = 100000000 # lower score is more likely to get picked
        best_effect_tx = None
        for tx in txs:
            if (ref_allele == "" or ref_allele == "-") and (alt_allele == "" or alt_allele == "-"):
                vc = VariantClassification.SILENT
            else:
                vc = vcer.variant_classify(tx, ref_allele, alt_allele, start, end, variant_type).get_vc()
            effect_score = effect_dict.get(vc, 25)
            if effect_score < best_effect_score:
                best_effect_score = effect_score
                best_effect_tx = tx
            elif (effect_score == best_effect_score) and (len(best_effect_tx.get_seq()) < len(tx.get_seq())):
                best_effect_score = effect_score
                best_effect_tx = tx

        return best_effect_tx

예제 #5

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: alexramos/oncotator

    def annotate_mutation(self, mutation):
        chr = mutation.chr
        start = int(mutation.start)
        end = int(mutation.end)
        txs = self.get_transcripts_by_pos(chr, start, end)
        final_annotation_dict = self._create_blank_set_of_annotations()
        final_annotation_dict['variant_type'] = Annotation(value=TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), datasourceName=self.title)
        chosen_tx = None

        # We have hit IGR if no transcripts come back.  Most annotations can just use the blank set.
        if len(txs) == 0:
            final_annotation_dict['variant_classification'] = self._create_basic_annotation(VariantClassification.IGR)
            nearest_genes = self._get_nearest_genes(chr, int(start), int(end))
            final_annotation_dict['other_transcripts'] = self._create_basic_annotation(value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1]))
            final_annotation_dict['gene'] = self._create_basic_annotation('Unknown')
            final_annotation_dict['gene_id'] = self._create_basic_annotation('0')
            final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
        else:
            # Choose the best effect transcript
            chosen_tx = self._choose_transcript(txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end)
            vcer = VariantClassifier()

            final_annotation_dict['annotation_transcript'] = self._create_basic_annotation(chosen_tx.get_transcript_id())
            final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
            final_annotation_dict['strand'] = self._create_basic_annotation(chosen_tx.get_strand())

            final_annotation_dict['transcript_position'] = self._create_basic_annotation(TranscriptProviderUtils.render_transcript_position(int(start), int(end), chosen_tx))

            final_annotation_dict['transcript_id'] = self._create_basic_annotation(chosen_tx.get_transcript_id())

            variant_classfication = vcer.variant_classify(tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value,
                                             ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end)
            final_annotation_dict['transcript_exon'] = self._create_basic_annotation(str(variant_classfication.get_exon_i()+1))
            final_annotation_dict['variant_classification'] = self._create_basic_annotation(variant_classfication.get_vc())
            final_annotation_dict['secondary_variant_classification'] = self._create_basic_annotation(variant_classfication.get_secondary_vc())
            final_annotation_dict['protein_change'] = self._create_basic_annotation(vcer.generate_protein_change_from_vc(variant_classfication))
            final_annotation_dict['codon_change'] = self._create_basic_annotation(vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication))
            final_annotation_dict['transcript_change'] = self._create_basic_annotation(vcer.generate_transcript_change_from_tx(chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele))

            final_annotation_dict['transcript_strand'] = self._create_basic_annotation(chosen_tx.get_strand())
            final_annotation_dict['gene'] = self._create_basic_annotation(chosen_tx.get_gene())
            final_annotation_dict['gene_type'] = self._create_basic_annotation(chosen_tx.get_gene_type())
            final_annotation_dict['gencode_transcript_tags'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'tag'))
            final_annotation_dict['gencode_transcript_status'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status'))
            final_annotation_dict['havana_transcript'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript'))
            final_annotation_dict['ccds_id'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid'))
            final_annotation_dict['gencode_transcript_type'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type'))
            final_annotation_dict['gencode_transcript_name'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name'))

            other_transcript_value = self._render_other_transcripts(txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end)
            final_annotation_dict['other_transcripts'] = self._create_basic_annotation(other_transcript_value)
            # final_annotation_dict['gene_id'].value

        mutation.addAnnotations(final_annotation_dict)

        # Add the HGVS annotations ... setting to "" if not available.
        hgvs_dict_annotations = self._create_hgvs_annotation_dict(mutation, chosen_tx)
        mutation.addAnnotations(hgvs_dict_annotations)

        return mutation

예제 #6

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: ihuerga/oncotator

 def _calculate_effect_score(tx, start, end, alt_allele, ref_allele, variant_type):
     """Compute the effect score"""
     effect_dict = TranscriptProviderUtils.retrieve_effect_dict()
     vcer = VariantClassifier()
     if (ref_allele == "" or ref_allele == "-") and (alt_allele == "" or alt_allele == "-"):
         vc = VariantClassification.SILENT
     else:
         vc = vcer.variant_classify(tx, ref_allele, alt_allele, start, end, variant_type).get_vc()
     effect_score = effect_dict.get(vc, 25)
     return effect_score

예제 #7

0

파일 보기

파일: EnsemblTranscriptDatasource.py 프로젝트: Tmacme/oncotator

 def _calculate_effect_score(tx, start, end, alt_allele, ref_allele, variant_type):
     """Compute the effect score"""
     effect_dict = TranscriptProviderUtils.retrieve_effect_dict()
     vcer = VariantClassifier()
     if (ref_allele == "" or ref_allele == "-") and (alt_allele == "" or alt_allele == "-"):
         vc = VariantClassification.SILENT
     else:
         vc = vcer.variant_classify(tx, ref_allele, alt_allele, start, end, variant_type).get_vc()
     effect_score = effect_dict.get(vc, 25)
     return effect_score