def annotate_mutation(self, mutation): chr = mutation.chr start = int(mutation.start) end = int(mutation.end) txs = self.get_transcripts_by_pos(chr, start, end) final_annotation_dict = self._create_blank_set_of_annotations() final_annotation_dict['variant_type'] = Annotation(value=TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), datasourceName=self.title) chosen_tx = None # We have hit IGR if no transcripts come back. Most annotations can just use the blank set. if len(txs) == 0: final_annotation_dict['variant_classification'] = self._create_basic_annotation(VariantClassification.IGR) nearest_genes = self._get_nearest_genes(chr, int(start), int(end)) final_annotation_dict['other_transcripts'] = self._create_basic_annotation(value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1])) final_annotation_dict['gene'] = self._create_basic_annotation('Unknown') final_annotation_dict['gene_id'] = self._create_basic_annotation('0') final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value)) else: # Choose the best effect transcript chosen_tx = self._choose_transcript(txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end) vcer = VariantClassifier() final_annotation_dict['annotation_transcript'] = self._create_basic_annotation(chosen_tx.get_transcript_id()) final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value)) final_annotation_dict['strand'] = self._create_basic_annotation(chosen_tx.get_strand()) final_annotation_dict['transcript_position'] = self._create_basic_annotation(TranscriptProviderUtils.render_transcript_position(int(start), int(end), chosen_tx)) final_annotation_dict['transcript_id'] = self._create_basic_annotation(chosen_tx.get_transcript_id()) variant_classfication = vcer.variant_classify(tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value, ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end) final_annotation_dict['transcript_exon'] = self._create_basic_annotation(str(variant_classfication.get_exon_i()+1)) final_annotation_dict['variant_classification'] = self._create_basic_annotation(variant_classfication.get_vc()) final_annotation_dict['secondary_variant_classification'] = self._create_basic_annotation(variant_classfication.get_secondary_vc()) final_annotation_dict['protein_change'] = self._create_basic_annotation(vcer.generate_protein_change_from_vc(variant_classfication)) final_annotation_dict['codon_change'] = self._create_basic_annotation(vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication)) final_annotation_dict['transcript_change'] = self._create_basic_annotation(vcer.generate_transcript_change_from_tx(chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele)) final_annotation_dict['transcript_strand'] = self._create_basic_annotation(chosen_tx.get_strand()) final_annotation_dict['gene'] = self._create_basic_annotation(chosen_tx.get_gene()) final_annotation_dict['gene_type'] = self._create_basic_annotation(chosen_tx.get_gene_type()) final_annotation_dict['gencode_transcript_tags'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'tag')) final_annotation_dict['gencode_transcript_status'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status')) final_annotation_dict['havana_transcript'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript')) final_annotation_dict['ccds_id'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid')) final_annotation_dict['gencode_transcript_type'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type')) final_annotation_dict['gencode_transcript_name'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name')) other_transcript_value = self._render_other_transcripts(txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end) final_annotation_dict['other_transcripts'] = self._create_basic_annotation(other_transcript_value) # final_annotation_dict['gene_id'].value mutation.addAnnotations(final_annotation_dict) # Add the HGVS annotations ... setting to "" if not available. hgvs_dict_annotations = self._create_hgvs_annotation_dict(mutation, chosen_tx) mutation.addAnnotations(hgvs_dict_annotations) return mutation
def initializeMutFromAttributes(chr, start, end, ref_allele, alt_allele, build, mutation_data_factory=None): mutation_data_factory = MutationDataFactory() if mutation_data_factory is None else mutation_data_factory mut = mutation_data_factory.create(str(chr), str(start), str(end), ref_allele, alt_allele, str(build)) varType = TranscriptProviderUtils.infer_variant_type(mut.ref_allele, mut.alt_allele) if TranscriptProviderUtils.is_xnp(varType): # Snps and other xNPs mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="") if varType == VariantClassification.VT_DEL: # deletion preceding_bases, updated_ref_allele, updated_start, updated_end =\ MutUtils.retrievePrecedingBasesForDeletions(mut) mut.ref_allele = updated_ref_allele mut["ref_allele"] = updated_ref_allele mut.alt_allele = "-" mut["alt_allele"] = "-" mut.start = updated_start mut["start"] = updated_start mut.end = updated_end mut["end"] = updated_end mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases) elif varType == VariantClassification.VT_INS: # insertion preceding_bases, updated_alt_allele, updated_start, updated_end = \ MutUtils.retrievePrecedingBasesForInsertions(mut) mut.ref_allele = "-" mut["ref_allele"] = "-" mut.alt_allele = updated_alt_allele mut["alt_allele"] = updated_alt_allele mut.start = updated_start mut["start"] = updated_start mut.end = updated_end mut["end"] = updated_end mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases) return mut
def _add(self, mutation): variant_type = TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele) # only combine ONPs, not indels if not TranscriptProviderUtils.is_xnp(variant_type): self.indel_queue.append(mutation) else: self.queue[self.sns.getSampleName(mutation)].append(mutation)
def initializeMutFromAttributes(chr, start, end, ref_allele, alt_allele, build): mut = MutationData(str(chr), str(start), str(end), ref_allele, alt_allele, str(build)) varType = TranscriptProviderUtils.infer_variant_type(mut.ref_allele, mut.alt_allele) if TranscriptProviderUtils.is_xnp(varType): # Snps and other xNPs mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="") if varType == VariantClassification.VT_DEL: # deletion preceding_bases, updated_ref_allele, updated_start, updated_end =\ MutUtils.retrievePrecedingBasesForDeletions(mut) mut.ref_allele = updated_ref_allele mut["ref_allele"] = updated_ref_allele mut.alt_allele = "-" mut["alt_allele"] = "-" mut.start = updated_start mut["start"] = updated_start mut.end = updated_end mut["end"] = updated_end mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases) elif varType == VariantClassification.VT_INS: # insertion preceding_bases, updated_alt_allele, updated_start, updated_end = \ MutUtils.retrievePrecedingBasesForInsertions(mut) mut.ref_allele = "-" mut["ref_allele"] = "-" mut.alt_allele = updated_alt_allele mut["alt_allele"] = updated_alt_allele mut.start = updated_start mut["start"] = updated_start mut.end = updated_end mut["end"] = updated_end mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases) return mut
def _add(self, mutation): variant_type = TranscriptProviderUtils.infer_variant_type( mutation.ref_allele, mutation.alt_allele) # only combine ONPs, not indels if not TranscriptProviderUtils.is_xnp(variant_type): self.indel_queue.append(mutation) else: self.queue[self.sns.getSampleName(mutation)].append(mutation)
def annotate_mutation(self, mutation, upstream_padding=3000, downstream_padding=0): mutation.createAnnotation('variant_type', TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), self.title) data = [mutation] data = gaf_annotation.find_mut_in_gaf(data, self) data = gaf_annotation.identify_best_effect_transcript(data, self) data = gaf_annotation.identify_best_canonical_transcript(data, self) data = gaf_annotation.correct_transcript_coordinates(data, self) data = gaf_annotation.infer_output_fields(data, self) data = self._annotateMutationFromTranscripts(data) annotated_mutation = data.next() return annotated_mutation
def annotate_mutation(self, mutation, upstream_padding=3000, downstream_padding=0): mutation.createAnnotation( 'variant_type', TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), self.title) data = [mutation] data = gaf_annotation.find_mut_in_gaf(data, self) data = gaf_annotation.identify_best_effect_transcript(data, self) data = gaf_annotation.identify_best_canonical_transcript(data, self) data = gaf_annotation.correct_transcript_coordinates(data, self) data = gaf_annotation.infer_output_fields(data, self) data = self._annotateMutationFromTranscripts(data) annotated_mutation = data.next() return annotated_mutation
def test_infer_variant_type(self,ref,alt,vt_gt): """test that we can tell a snp from an indel""" self.assertEqual(TranscriptProviderUtils.infer_variant_type(ref, alt), vt_gt)