def __init__(self, identifier, ptms=None): ''' identifier : str UniProt AC of the protein. ptms : list List 3 element tuples, containing residue name, number and the PTM type (e.g. phosphorylation). ''' self.id = identifier self.ptms = [] if ptms is not None: for name, number, typ in ptms: res = intera.Residue(number, name, identifier) ptm = intera.Ptm(identifier, residue=res, typ=typ)
def translate_ptm(self, ptm): tptms = self.translate_site(ptm.protein, ptm.residue.name, ptm.residue.number, ptm.residue.isoform, ptm.typ) result = [] if self.target in tptms: for x in tptms[self.target]: se = self.get_seq(x[0]) if (se is None or x[1] not in se.isof) and self.strict: continue res = intera.Residue(x[3], x[2], x[0], isoform=x[1]) start, end, region = (se.get_region(x[3], isoform=x[1]) if se is not None and x[1] in se.isof else (None, None, None)) mot = intera.Motif(x[0], start=start, end=end, instance=region, isoform=x[1]) ptm = intera.Ptm(x[0], motif=mot, residue=res, typ=x[5], isoform=x[1], source=ptm.sources) result.append(ptm) return result
def _process(self, p): # human leukocyte antigenes result a result an # extremely high number of combinations if (not p['kinase'] or (isinstance(p['substrate'], common.basestring) and p['substrate'].startswith('HLA'))): return if not isinstance(p['kinase'], list): p['kinase'] = [p['kinase']] kinase_ups = mapping.map_names( p['kinase'], self.enzyme_id_type, 'uniprot', ncbi_tax_id=self.ncbi_tax_id, ) substrate_ups_all = set([]) for sub_id_type in (self.substrate_id_types[self.input_method.lower()] if self.input_is(self.substrate_id_types, '__contains__') else [self.substrate_id_type]): if type(sub_id_type) is tuple: sub_id_type, sub_id_attr = sub_id_type else: sub_id_attr = 'substrate' substrate_ups_all.update( set( mapping.map_name( p[sub_id_attr], sub_id_type, 'uniprot', self.ncbi_tax_id, ))) # looking up sequences in all isoforms: substrate_ups = [] for s in substrate_ups_all: if 'substrate_isoform' in p and p['substrate_isoform']: substrate_ups.append((s, p['substrate_isoform'])) else: se = self.get_seq(s) if se is None: continue for isof in se.isoforms(): if p['instance'] is not None: if se.match(p['instance'], p['start'], p['end'], isoform=isof): substrate_ups.append((s, isof)) else: if se.match(p['resaa'], p['resnum'], isoform=isof): substrate_ups.append((s, isof)) if self.trace: if p['substrate'] not in self.sub_ambig: self.sub_ambig[p['substrate']] = substrate_ups for k in p['kinase']: if k not in self.kin_ambig: self.kin_ambig[k] = kinase_ups # generating report on non matching substrates if len(substrate_ups) == 0: for s in substrate_ups_all: se = self.get_seq(s[0]) if se is None: continue nomatch.append( (s[0], s[1], ((p['substrate_refseq'] if 'substrate_refseq' in p else ''), s, p['instance'], se.get(p['start'], p['end'])))) # adding kinase-substrate interactions for k in kinase_ups: for s in substrate_ups: if (not self.allow_mixed_organisms and (self.get_taxon(k) != self.ncbi_tax_id or self.get_taxon(s[0]) != self.ncbi_tax_id)): continue se = self.get_seq(s[0]) if se is None: continue res = intera.Residue(p['resnum'], p['resaa'], s[0], isoform=s[1]) if p['instance'] is None: reg = se.get_region(p['resnum'], p['start'], p['end'], isoform=s[1]) if reg is not None: p['instance'] = reg[2] p['start'] = reg[0] p['end'] = reg[1] if 'typ' not in p: p['typ'] = 'phosphorylation' mot = intera.Motif(s[0], p['start'], p['end'], instance=p['instance'], isoform=s[1]) ptm = intera.Ptm(s[0], motif=mot, residue=res, typ=p['typ'], source=[self.name], isoform=s[1]) dom = intera.Domain(protein=k) if 'references' not in p: p['references'] = [] dommot = intera.DomainMotif(domain=dom, ptm=ptm, sources=[self.name], refs=p['references']) if self.input_is('mimp'): dommot.mimp_sources = ';'.split(p['databases']) dommot.npmid = p['npmid'] elif self.input_is('phosphonetworks'): dommot.pnetw_score = p['score'] elif self.input_is('dbptm'): dommot.dbptm_sources = [p['source']] yield dommot