def _add_variant_protein_variant_assoc_to_graph(self, row): """ Generates relationships between variants and protein variants given a row of data :param iterable: row of data, see add_variant_info_to_graph() docstring for expected structure :return None """ gu = GraphUtils(curie_map.get()) geno = Genotype(self.graph) is_missense = False is_literal = True (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source) = row[0:11] variant_id = self.make_cgd_id('variant{0}'.format(variant_key)) transcript_curie = self._make_transcript_curie(transcript_id) uniprot_curie = self._make_uniprot_polypeptide_curie(transcript_id) ncbi_protein_curie = self._make_ncbi_polypeptide_curie(transcript_id) geno.addGenotype(variant_id, variant_label, geno.genoparts['sequence_alteration']) # Make fake amino acid sequence in case we # can't get a CCDS to Uniprot and/or NCBI Protein mapping aa_seq_id = self.make_cgd_id('transcript{0}'.format(amino_acid_variant)) # Add Transcript: geno.addTranscript(variant_id, transcript_curie, transcript_id, geno.genoparts['transcript']) # Add polypeptide if ncbi_protein_curie is not None: geno.addPolypeptide(ncbi_protein_curie, self.transcript_xrefs['RefSeq'][transcript_id], transcript_curie) aa_seq_id = ncbi_protein_curie if uniprot_curie is not None: geno.addPolypeptide(uniprot_curie, self.transcript_xrefs['UniProt'][transcript_id], transcript_curie) # Overrides ncbi_protein_curie, # but we set them as equal individuals below aa_seq_id = uniprot_curie if ncbi_protein_curie is not None and uniprot_curie is not None: gu.addSameIndividual(self.graph, ncbi_protein_curie, uniprot_curie) else: aa_seq_id = self.make_cgd_id('transcript{0}'.format(amino_acid_variant)) if protein_variant_type == 'nonsynonymous - missense' \ or re.search(r'missense', variant_label): is_missense = True geno.addGenotype(variant_id, variant_label, geno.genoparts['missense_variant']) # Get gene ID from gene map self._add_variant_gene_relationship(variant_id, transcript_gene) amino_acid_regex = re.compile(r'^p\.([A-Za-z]{1,3})(\d+)([A-Za-z]{1,3})$') if is_missense: match = re.match(amino_acid_regex, amino_acid_variant.rstrip()) else: match = None if match is not None: ref_amino_acid = match.group(1) position = match.group(2) altered_amino_acid = match.group(3) else: logger.debug("Could not parse amino acid information" " from {0} variant:" " {1} type: {2}".format(amino_acid_variant, variant_label, protein_variant_type)) # Add amino acid change to model if is_missense is True and match is not None: gu.addTriple(self.graph, variant_id, geno.properties['reference_amino_acid'], ref_amino_acid, is_literal) gu.addTriple(self.graph, variant_id, geno.properties['results_in_amino_acid_change'], altered_amino_acid, is_literal) aa_region_id = ":_{0}{1}{2}Region".format(position, position, aa_seq_id) self._add_feature_with_coords(variant_id, position, position, aa_seq_id, aa_region_id) return