Python Ptm Examples

Programming Language: Python

Namespace/Package Name: pypath.internals.intera

Method/Function: Ptm

Examples at hotexamples.com: 5

Python Ptm - 5 examples found. These are the top rated real world Python examples of pypath.internals.intera.Ptm extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def li2012_dmi():
    """
    Converts table read by ``pypath.inputs.li2012.get_li2012`` to
    list of ``pypath.internals.intera.DomainMotif`` objects.
    Translates GeneSymbols to UniProt IDs.
    """

    result = {}
    nondigit = re.compile(r'[^\d]+')
    se = uniprot_input.swissprot_seq(isoforms=True)
    data = get_li2012()

    for l in data:

        subs_protein = l[1].split('/')[0]
        tk_protein = l[2].split()[0]
        reader_protein = l[3].split()[0]
        subs_uniprots = mapping.map_name(
            subs_protein,
            'genesymbol',
            'uniprot',
        )
        tk_uniprots = mapping.map_name(tk_protein, 'genesymbol', 'uniprot')
        reader_uniprots = mapping.map_name(reader_protein, 'genesymbol',
                                           'uniprot')
        subs_resnum = int(non_digit.sub('', l[1].split('/')[1]))

        for su in subs_uniprots:
            if su in se:
                subs_iso = None
                for iso, s in iteritems(se[su].isof):
                    if se[su].get(subs_resnum, isoform=iso) == 'Y':
                        subs_iso = iso
                        break
                if subs_iso:
                    start = min(1, subs_resnum - 7)
                    end = max(subs_resnum + 7, len(se[su].isof[subs_iso]))
                    for ku in tk_uniprots:
                        res = intera.Residue(subs_resnum,
                                             'Y',
                                             su,
                                             isoform=subs_iso)
                        mot = intera.Motif(su,
                                           start,
                                           end,
                                           isoform=subs_iso,
                                           instance=se[su].get(
                                               start, end, isoform=subs_iso))
                        ptm = intera.Ptm(su,
                                         motif=mot,
                                         residue=res,
                                         isoform=subs_iso,
                                         source='Li2012')
                        dom = intera.Domain(ku)
                        dommot = intera.DomainMotif(domain=dom,
                                                    ptm=ptm,
                                                    sources=['Li2012'])
                        result = {}

    return result

Example #2

Show file

def phosphosite_ptms(organism='human'):
    """
    Downloads the phosphorylation site dataset from PhosphoSitePlus.
    """

    result = []
    url = urls.urls['psite_p']['url']
    nondigit = re.compile(r'[^\d]+')
    remot = re.compile(r'(_*)([A-Za-z]+)(_*)')

    c = curl.Curl(url, silent=False, large=True)
    data = c.result

    for _ in xrange(4):
        null = c.result.readline()

    for r in data:

        r = r.split('\t')

        if len(r) > 9 and (organism is None or r[6] == organism):

            uniprot = r[2]
            isoform = 1 if '-' not in uniprot else int(uniprot.split('-')[1])
            uniprot = uniprot.split('-')[0]
            typ = r[3].lower()
            if len(typ) == 0:
                typ = r[4].split('-')[1] if '-' in r[4] else None
            aa = r[4][0]
            num = int(nondigit.sub('', r[4]))
            motif = remot.match(r[9])
            if motif:
                start = num - 7 + len(motif.groups()[0])
                end = num + 7 - len(motif.groups()[2])
                instance = r[9].replace('_', '').upper()
            else:
                start = None
                end = None
                instance = None

            res = intera.Residue(num, aa, uniprot, isoform=isoform)
            mot = intera.Motif(uniprot,
                               start,
                               end,
                               instance=instance,
                               isoform=isoform)
            ptm = intera.Ptm(uniprot,
                             typ=typ,
                             motif=mot,
                             residue=res,
                             source='PhosphoSite',
                             isoform=isoform)
            result.append(ptm)

    return result

Example #3

Show file

File: homology.py Project: rfour92/pypath

    def translate_ptm(self, ptm):

        tptms = self.translate_site(
            ptm.protein,
            ptm.residue.name,
            ptm.residue.number,
            ptm.residue.isoform,
            ptm.typ,
        )

        result = []

        for x in tptms:

            se = self.get_seq(x[0])

            if (se is None or x[1] not in se.isof) and self.strict:
                continue

            res = intera.Residue(
                number=x[3],
                name=x[2],
                protein=x[0],
                isoform=x[1],
                ncbi_tax_id=self.target,
            )
            start, end, region = (se.get_region(x[3], isoform=x[1])
                                  if se is not None and x[1] in se.isof else
                                  (None, None, None))
            mot = intera.Motif(
                protein=x[0],
                start=start,
                end=end,
                instance=region,
                isoform=x[1],
                ncbi_tax_id=self.target,
            )

            ptm = intera.Ptm(
                protein=x[0],
                motif=mot,
                residue=res,
                typ=x[5],
                isoform=x[1],
                evidences=ptm.evidences,
                ncbi_tax_id=self.target,
            )

            result.append(ptm)

        return result

Example #4

Show file

    def _process(self, p):

        # human leukocyte antigenes result a result an
        # extremely high number of combinations
        if (not p['kinase'] or (isinstance(p['substrate'], common.basestring)
                                and p['substrate'].startswith('HLA'))):

            return

        if not isinstance(p['kinase'], list):
            p['kinase'] = [p['kinase']]

        kinase_ups = mapping.map_names(
            p['kinase'],
            self.id_type_enzyme,
            'uniprot',
            ncbi_tax_id=self.ncbi_tax_id,
        )

        substrate_ups_all = set()

        for sub_id_type in self.id_type_substrate:

            if isinstance(sub_id_type, (list, tuple)):
                sub_id_type, sub_id_attr = sub_id_type
            else:
                sub_id_attr = 'substrate'

            substrate_ups_all.update(
                set(
                    mapping.map_name(
                        p[sub_id_attr],
                        sub_id_type,
                        'uniprot',
                        self.ncbi_tax_id,
                    )))

        # looking up sequences in all isoforms:
        substrate_ups = []

        for s in substrate_ups_all:

            if 'substrate_isoform' in p and p['substrate_isoform']:

                substrate_ups.append((s, p['substrate_isoform']))

            else:

                se = self.get_seq(s)

                if se is None:
                    continue

                for isof in se.isoforms():

                    if 'instance' in p and p['instance'] is not None:

                        if se.match(
                                p['instance'],
                                p['start'],
                                p['end'],
                                isoform=isof,
                        ):

                            substrate_ups.append((s, isof))

                    else:

                        if se.match(
                                p['resaa'],
                                p['resnum'],
                                isoform=isof,
                        ):

                            substrate_ups.append((s, isof))

        if self.trace:

            if p['substrate'] not in self.sub_ambig:

                self.sub_ambig[p['substrate']] = substrate_ups

            for k in p['kinase']:

                if k not in self.kin_ambig:

                    self.kin_ambig[k] = kinase_ups
            # generating report on non matching substrates
            if len(substrate_ups) == 0:

                for s in substrate_ups_all:

                    se = self.get_seq(s[0])

                    if se is None:
                        continue

                    self.nomatch.append((
                        s[0],
                        s[1],
                        (
                            p['substrate_refseq']
                            if 'substrate_refseq' in p else '',
                            s,
                            p['instance'],
                            se.get(p['start'], p['end']),
                        ),
                    ))

        # building objects representing the enzyme-substrate interaction(s)

        if 'typ' not in p:
            p['typ'] = 'phosphorylation'

        _resources = tuple(
            (self.input_param.
             get_via(name) if hasattr(self.input_param, 'get_via') else name)
            for name in (p['databases'] if 'databases' in p else ()))
        _resources += ((self.name, ) if isinstance(
            self.input_param, common.basestring) else (self.input_param, ))

        # collecting the evidences
        evidences = evidence.Evidences(
            evidence.Evidence(resource=_res,
                              references=p['references'] if 'references' in
                              p else None) for _res in _resources)

        for s in substrate_ups:

            # building the objects representing the substrate
            se = self.get_seq(s[0])

            if se is None:
                continue

            res = intera.Residue(
                p['resnum'],
                p['resaa'],
                s[0],
                isoform=s[1],
                ncbi_tax_id=self.ncbi_tax_id,
            )

            if 'instance' not in p or p['instance'] is None:

                reg = se.get_region(
                    p['resnum'],
                    p['start'] if 'start' in p else None,
                    p['end'] if 'end' in p else None,
                    isoform=s[1],
                )

                if reg is not None:

                    p['start'], p['end'], p['instance'] = reg

            mot = intera.Motif(
                s[0],
                p['start'],
                p['end'],
                instance=p['instance'],
                isoform=s[1],
                ncbi_tax_id=self.ncbi_tax_id,
            )

            ptm = intera.Ptm(
                s[0],
                motif=mot,
                residue=res,
                typ=p['typ'],
                evidences=evidences,
                isoform=s[1],
                ncbi_tax_id=self.ncbi_tax_id,
            )

            for k in kinase_ups:

                if (not self.allow_mixed_organisms
                        and (self.get_taxon(k) != self.ncbi_tax_id
                             or self.get_taxon(s[0]) != self.ncbi_tax_id)):
                    continue

                # the enzyme (kinase)
                dom = intera.Domain(
                    protein=k,
                    ncbi_tax_id=self.ncbi_tax_id,
                )

                dommot = intera.DomainMotif(
                    domain=dom,
                    ptm=ptm,
                    evidences=evidences,
                )

                if hasattr(self.input_param, 'extra_attrs'):

                    for attr, key in iteritems(self.input_param.extra_attrs):

                        if key in p:

                            setattr(dommot, attr, p[key])

                yield dommot

Example #5

Show file

def phosphosite_enzyme_substrate(
    raw=True,
    organism='human',
    strict=True,
):
    """
    Downloads and preprocesses phosphorylation site data from PhosphoSitePlus.
    """

    url = urls.urls['psite_kin']['url']
    c = curl.Curl(
        url,
        silent=False,
        compr='gz',
        encoding='iso-8859-1',
        large=True,
    )
    orto = {}
    data = c.result
    cols = {
        'kinase': 2,
        'kinase_org': 3,
        'substrate': 6,
        'substrate_org': 8,
        'residue': 9,
        'motif': 11
    }
    data = inputs_common.read_table(
        cols=cols,
        fileObject=data,
        sep='\t',
        hdr=4,
    )
    result = []
    non_digit = re.compile(r'[^\d.-]+')
    motre = re.compile(r'(_*)([A-Za-z]+)(_*)')

    for r in data:

        if organism is None or \
            ((r['kinase_org'] == organism or not strict) and \
            r['substrate_org'] == organism):

            if r['kinase_org'] != organism:
                korg = r['kinase_org']
                # attempting to map by orthology:
                if korg in taxonomy.taxa and organism in taxonomy.taxa:

                    ktaxid = taxonomy.taxa[korg]
                    taxid = taxonomy.taxa[organism]

                    if korg not in orto:

                        orto[korg] = homology.homologene_dict(
                            ktaxid,
                            taxid,
                            'refseqp',
                        )

                    korg_refseq = mapping.map_name(r['kinase'], 'uniprot',
                                                   'refseqp', ktaxid)

                    kin_uniprot = \
                        list(
                            itertools.chain(
                                *map(
                                    lambda ors:
                                        mapping.map_name(ors,
                                                        'refseqp',
                                                        'uniprot',
                                                        taxid),
                                    itertools.chain(
                                        *map(
                                            lambda rs:
                                                orto[korg][rs],
                                            filter(
                                                lambda rs:
                                                    rs in orto[korg],
                                                korg_refseq
                                            )
                                        )
                                    )
                                )
                            )
                        )
            else:
                kin_uniprot = [r['kinase']]

            for kinase in kin_uniprot:

                r['resaa'] = r['residue'][0]
                r['resnum'] = int(non_digit.sub('', r['residue'][1:]))
                mot = motre.match(r['motif'])

                # excluding e.g. Q12809_VAR_014388
                r['substrate'] = r['substrate'].split('_')[0]
                sisoform = 1 if '-' not in r['substrate'] else \
                    int(r['substrate'].split('-')[1])
                r['substrate'] = r['substrate'].split('-')[0]

                kisoform = (1 if '-' not in kinase else int(
                    kinase.split('-')[1]))
                kinase = kinase.split('-')[0]

                r['substrate'] = r['substrate'].split('-')[0]

                if mot:
                    r['start'] = r['resnum'] - 7 + len(mot.groups()[0])
                    r['end'] = r['resnum'] + 7 - len(mot.groups()[2])
                    r['instance'] = r['motif'].replace('_', '').upper()
                else:
                    r['start'] = None
                    r['end'] = None
                    r['instance'] = None

                if raw:
                    r['kinase'] = kinase
                    result.append(r)
                else:
                    res = intera.Residue(r['resnum'],
                                         r['resaa'],
                                         r['substrate'],
                                         isoform=sisoform)

                    mot = intera.Motif(r['substrate'],
                                       r['start'],
                                       r['end'],
                                       instance=r['instance'],
                                       isoform=sisoform)

                    ptm = intera.Ptm(protein=r['substrate'],
                                     residue=res,
                                     motif=mot,
                                     typ='phosphorylation',
                                     source='PhosphoSite',
                                     isoform=sisoform)

                    dom = intera.Domain(protein=kinase, isoform=kisoform)

                    dommot = intera.DomainMotif(domain=dom,
                                                ptm=ptm,
                                                sources=['PhosphoSite'])

                    result.append(dommot)

    return result