Beispiel #1
0
    def _render_other_transcripts(self,
                                  txs,
                                  transcriptIndicesToSkip,
                                  variant_type,
                                  ref_allele,
                                  alt_allele,
                                  start,
                                  end,
                                  is_longer_field=False):
        """
        Create a list of transcripts that are not being chosen.

        Other transcripts are formatted <gene>_<transcript_id>_<variant_classification>_<protein_change>
            Note:  There are other areas of Oncotator (e.g. Generic_GeneProteinPositionDatasource) that depend
                on this format.  Changing it here may introduce bugs in other pieces of code.

                Also, do not include any transcript that would render as IGR.

        txs -- a list of transcripts to render.
        transcriptIndicesToSkip -- a list of transcripts that are being used (i.e. not an "other transcript").  This will usually be the canonical or any transcript chosen by tx_mode.
        is_longer_field -- generates a slightly longer other_transcripts field.  For now that just adds the transcript change field.
        """
        vcer = VariantClassifier()
        other_transcripts = list()
        for i, ot in enumerate(txs):
            if i not in transcriptIndicesToSkip:
                vc = vcer.variant_classify(tx=ot,
                                           variant_type=variant_type,
                                           ref_allele=ref_allele,
                                           alt_allele=alt_allele,
                                           start=start,
                                           end=end)
                if vc.get_vc() == VariantClassification.IGR:
                    continue
                list_to_include = [
                    ot.get_gene(),
                    ot.get_transcript_id(),
                    vc.get_vc(),
                    vcer.generate_protein_change_from_vc(vc)
                ]
                if is_longer_field:
                    tx_change = vcer.generate_transcript_change_from_tx(
                        ot, variant_type, vc, start, end, ref_allele,
                        alt_allele)
                    if tx_change is not None and tx_change != "":
                        list_to_include.append(tx_change)
                o = '_'.join(list_to_include)
                o = o.strip('_')
                other_transcripts.append(o)

        return '|'.join(other_transcripts)
    def annotate_mutation(self, mutation):
        chr = mutation.chr
        start = int(mutation.start)
        end = int(mutation.end)
        txs = self.get_transcripts_by_pos(chr, start, end)
        final_annotation_dict = self._create_blank_set_of_annotations()
        final_annotation_dict['variant_type'] = Annotation(
            value=TranscriptProviderUtils.infer_variant_type(
                mutation.ref_allele, mutation.alt_allele),
            datasourceName=self.title)
        chosen_tx = None

        # We have hit IGR if no transcripts come back.  Most annotations can just use the blank set.
        if len(txs) == 0:
            final_annotation_dict[
                'variant_classification'] = self._create_basic_annotation(
                    VariantClassification.IGR)
            nearest_genes = self._get_nearest_genes(chr, int(start), int(end))
            final_annotation_dict[
                'other_transcripts'] = self._create_basic_annotation(
                    value='%s (%s upstream) : %s (%s downstream)' %
                    (nearest_genes[0][0], nearest_genes[0][1],
                     nearest_genes[1][0], nearest_genes[1][1]))
            final_annotation_dict['gene'] = self._create_basic_annotation(
                'Unknown')
            final_annotation_dict['gene_id'] = self._create_basic_annotation(
                '0')
            final_annotation_dict[
                'genome_change'] = self._create_basic_annotation(
                    TranscriptProviderUtils.determine_genome_change(
                        mutation.chr, mutation.start, mutation.end,
                        mutation.ref_allele, mutation.alt_allele,
                        final_annotation_dict['variant_type'].value))
        else:
            # Choose the best effect transcript
            chosen_tx = self._choose_transcript(
                txs, self.get_tx_mode(),
                final_annotation_dict['variant_type'].value,
                mutation.ref_allele, mutation.alt_allele, start, end)
            vcer = VariantClassifier()

            final_annotation_dict[
                'annotation_transcript'] = self._create_basic_annotation(
                    chosen_tx.get_transcript_id())
            final_annotation_dict[
                'genome_change'] = self._create_basic_annotation(
                    TranscriptProviderUtils.determine_genome_change(
                        mutation.chr, mutation.start, mutation.end,
                        mutation.ref_allele, mutation.alt_allele,
                        final_annotation_dict['variant_type'].value))

            final_annotation_dict[
                'transcript_position'] = self._create_basic_annotation(
                    TranscriptProviderUtils.render_transcript_position(
                        int(start), int(end), chosen_tx))

            final_annotation_dict[
                'transcript_id'] = self._create_basic_annotation(
                    chosen_tx.get_transcript_id())

            variant_classfication = vcer.variant_classify(
                tx=chosen_tx,
                variant_type=final_annotation_dict['variant_type'].value,
                ref_allele=mutation.ref_allele,
                alt_allele=mutation.alt_allele,
                start=mutation.start,
                end=mutation.end)
            final_annotation_dict[
                'transcript_exon'] = self._create_basic_annotation(
                    str(variant_classfication.get_exon_i() + 1))
            final_annotation_dict[
                'variant_classification'] = self._create_basic_annotation(
                    variant_classfication.get_vc())
            final_annotation_dict[
                'secondary_variant_classification'] = self._create_basic_annotation(
                    variant_classfication.get_secondary_vc())
            final_annotation_dict[
                'protein_change'] = self._create_basic_annotation(
                    vcer.generate_protein_change_from_vc(
                        variant_classfication))
            final_annotation_dict[
                'codon_change'] = self._create_basic_annotation(
                    vcer.generate_codon_change_from_vc(chosen_tx, start, end,
                                                       variant_classfication))
            final_annotation_dict[
                'transcript_change'] = self._create_basic_annotation(
                    vcer.generate_transcript_change_from_tx(
                        chosen_tx, final_annotation_dict['variant_type'].value,
                        variant_classfication, start, end, mutation.ref_allele,
                        mutation.alt_allele))

            final_annotation_dict[
                'transcript_strand'] = self._create_basic_annotation(
                    chosen_tx.get_strand())
            final_annotation_dict['gene'] = self._create_basic_annotation(
                chosen_tx.get_gene())
            final_annotation_dict['gene_type'] = self._create_basic_annotation(
                chosen_tx.get_gene_type())
            final_annotation_dict[
                'gencode_transcript_tags'] = self._create_basic_annotation(
                    self._retrieve_gencode_tag_value(chosen_tx, 'tag'))
            final_annotation_dict[
                'gencode_transcript_status'] = self._create_basic_annotation(
                    self._retrieve_gencode_tag_value(chosen_tx,
                                                     'transcript_status'))
            final_annotation_dict[
                'havana_transcript'] = self._create_basic_annotation(
                    self._retrieve_gencode_tag_value(chosen_tx,
                                                     'havana_transcript'))
            final_annotation_dict['ccds_id'] = self._create_basic_annotation(
                self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid'))
            final_annotation_dict[
                'gencode_transcript_type'] = self._create_basic_annotation(
                    self._retrieve_gencode_tag_value(chosen_tx,
                                                     'transcript_type'))
            final_annotation_dict[
                'gencode_transcript_name'] = self._create_basic_annotation(
                    self._retrieve_gencode_tag_value(chosen_tx,
                                                     'transcript_name'))

            other_transcript_value = self._render_other_transcripts(
                txs, [txs.index(chosen_tx)],
                final_annotation_dict['variant_type'].value,
                mutation.ref_allele, mutation.alt_allele, mutation.start,
                mutation.end)
            final_annotation_dict[
                'other_transcripts'] = self._create_basic_annotation(
                    other_transcript_value)
            # final_annotation_dict['gene_id'].value

        mutation.addAnnotations(final_annotation_dict)

        # Add the HGVS annotations ... setting to "" if not available.
        hgvs_dict_annotations = self._create_hgvs_annotation_dict(
            mutation, chosen_tx)
        mutation.addAnnotations(hgvs_dict_annotations)

        return mutation
    def annotate_mutation(self, mutation):
        chr = mutation.chr
        start = int(mutation.start)
        end = int(mutation.end)
        txs = self.get_transcripts_by_pos(chr, start, end)
        final_annotation_dict = self._create_blank_set_of_annotations()
        final_annotation_dict['variant_type'] = Annotation(value=TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), datasourceName=self.title)
        chosen_tx = None

        # We have hit IGR if no transcripts come back.  Most annotations can just use the blank set.
        if len(txs) == 0:
            final_annotation_dict['variant_classification'] = self._create_basic_annotation(VariantClassification.IGR)
            nearest_genes = self._get_nearest_genes(chr, int(start), int(end))
            final_annotation_dict['other_transcripts'] = self._create_basic_annotation(value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1]))
            final_annotation_dict['gene'] = self._create_basic_annotation('Unknown')
            final_annotation_dict['gene_id'] = self._create_basic_annotation('0')
            final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
        else:
            # Choose the best effect transcript
            chosen_tx = self._choose_transcript(txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end)
            vcer = VariantClassifier()

            final_annotation_dict['annotation_transcript'] = self._create_basic_annotation(chosen_tx.get_transcript_id())
            final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
            final_annotation_dict['strand'] = self._create_basic_annotation(chosen_tx.get_strand())

            final_annotation_dict['transcript_position'] = self._create_basic_annotation(TranscriptProviderUtils.render_transcript_position(int(start), int(end), chosen_tx))

            final_annotation_dict['transcript_id'] = self._create_basic_annotation(chosen_tx.get_transcript_id())

            variant_classfication = vcer.variant_classify(tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value,
                                             ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end)
            final_annotation_dict['transcript_exon'] = self._create_basic_annotation(str(variant_classfication.get_exon_i()+1))
            final_annotation_dict['variant_classification'] = self._create_basic_annotation(variant_classfication.get_vc())
            final_annotation_dict['secondary_variant_classification'] = self._create_basic_annotation(variant_classfication.get_secondary_vc())
            final_annotation_dict['protein_change'] = self._create_basic_annotation(vcer.generate_protein_change_from_vc(variant_classfication))
            final_annotation_dict['codon_change'] = self._create_basic_annotation(vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication))
            final_annotation_dict['transcript_change'] = self._create_basic_annotation(vcer.generate_transcript_change_from_tx(chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele))

            final_annotation_dict['transcript_strand'] = self._create_basic_annotation(chosen_tx.get_strand())
            final_annotation_dict['gene'] = self._create_basic_annotation(chosen_tx.get_gene())
            final_annotation_dict['gene_type'] = self._create_basic_annotation(chosen_tx.get_gene_type())
            final_annotation_dict['gencode_transcript_tags'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'tag'))
            final_annotation_dict['gencode_transcript_status'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status'))
            final_annotation_dict['havana_transcript'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript'))
            final_annotation_dict['ccds_id'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid'))
            final_annotation_dict['gencode_transcript_type'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type'))
            final_annotation_dict['gencode_transcript_name'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name'))

            other_transcript_value = self._render_other_transcripts(txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end)
            final_annotation_dict['other_transcripts'] = self._create_basic_annotation(other_transcript_value)
            # final_annotation_dict['gene_id'].value

        mutation.addAnnotations(final_annotation_dict)

        # Add the HGVS annotations ... setting to "" if not available.
        hgvs_dict_annotations = self._create_hgvs_annotation_dict(mutation, chosen_tx)
        mutation.addAnnotations(hgvs_dict_annotations)

        return mutation