Example #1
0
    def __init__(
        self,
        members,
        name=None,
        parent=None,
        aspect='functional',
        source='resource_specific',
        scope='specific',
        resource=None,
        transmitter=None,
        receiver=None,
        limit=None,
        avoid=None,
        enabled=True,
    ):

        collections_abc.Set.__init__(self)
        self.members = set(members)
        self.name = name or 'unnamed'
        self.parent = parent or self.name
        self.aspect = aspect
        self.source = source
        self.scope = scope
        self.resource = (resource
                         or settings.get('annot_composite_database_name')
                         or 'Unknown')
        self.transmitter = transmitter
        self.receiver = receiver
        self.limit = common.to_set(limit)
        self.avoid = common.to_set(avoid)
        self.enabled = enabled
Example #2
0
 def __init__(
         self,
         components,
         name = None,
         ids = None,
         sources = None,
         interactions = None,
         references = None,
         proteins = None,
         attrs = None,
     ):
     """
     Represents a molecular complex.
     
     components : list,dict
         Either a list of identifiers or a dict with identifiers as keys
         and stoichiometric coefficients as values. List of identifiers
         also assumed to represent stoichiometry by repetition
         of identifiers.
     name : str
         A custom name or identifier of the complex.
     ids : dict
         Identifiers. If ``sources`` is a set, list or tuple it should be
         a dict with database names as keys and set of identifiers as
         values. If ``sources`` is a string, it can be a set of
         identifiers or a single identifier.
     sources : set,str
         Database(s) the complex has been defined in.
     interactions : list,dict
         Interactions between the components of the complex. Either
         a list of tuples of component IDs or a dict with tuples as
         keys and custom interaction properties as values.
     proteins : list,dict
         Synonym for `components`, kept for compatibility.
     """
     
     components = components or proteins
     
     if not isinstance(components, dict):
         
         self.components = dict(collections.Counter(components))
         
     else:
         
         self.components = components
     
     self.proteins = self.components
     self.name = name
     self.ids = collections.defaultdict(set)
     self.add_ids(ids, source = sources)
     self.sources = common.to_set(sources)
     self.references = common.to_set(references)
     self.attrs = {}
     if isinstance(attrs, dict):
         self.attrs.update(attrs)
     
     self.interactions = interactions
Example #3
0
 def filter_entity_type(cls, entities, entity_type):
     """
     Filters an iterable of entities or identifiers keeping only the ones
     of type(s) in ``entity_type``.
     
     :param iterable entities:
         A list, set, tuple or other iterable yielding entities or
         identifiers.
     :param str,set entity_type:
         One or more entity types e.g. ``{'protein', 'mirna'}``.
     
     :returns:
         Same type of object as ``entities`` if the type of the object is
         list, set or tuple, otherwise a generator.
     """
     
     if not entity_type or not entities:
         
         return entities
     
     entity_type = common.to_set(entity_type)
     obj_type = (
         type(entities)
             if isinstance(entities, common.list_like) else
         lambda x: x
     )
     
     return obj_type(
         e
         for e in entities
         if cls._get_entity_type(e) in entity_type
     )
Example #4
0
    def _process_references(references):

        references = common.to_set(references)

        return (set(
            (refs.Reference(ref
                            ) if not isinstance(ref, refs.Reference) else ref)
            for ref in references))
Example #5
0
 def add_ids(self, ids, source = None):
     
     if not isinstance(ids, dict):
         
         ids = common.to_set(ids)
     
     if isinstance(ids, set) and source:
         
         source = common.to_set(source)
         
         ids = dict((s, ids) for s in source)
     
     if isinstance(ids, dict):
         
         for this_source, this_ids in iteritems(ids):
             
             this_ids = common.to_set(this_ids)
             self.ids[this_source].update(this_ids)
Example #6
0
    def _process_boolean_group_args(values, postfix):

        if postfix:

            values = {
                '%s%s' % (val, postfix)
                for val in common.to_list(values)
            }

        return ' or '.join(common.to_set(values))
Example #7
0
    def filter_df(
        cls,
        annot_df,
        category=None,
        name=None,
        parent=None,
        database=None,
        scope=None,
        aspect=None,
        source=None,
        entities=None,
        entity_type=None,
        causality=None,
        topology=None,
        postfix=None,
    ):

        category = category or name
        args = locals()

        _topologies = {
            'pmtm': 'plasma_membrane_transmembrane',
            'pmp': 'plasma_membrane_peripheral',
            'sec': 'secreted',
        }

        entities = args.pop('entities')
        causality = args.pop('causality') or ()
        topology = args.pop('topology') or ()

        topology = [
            _topologies[top] if top in _topologies else top
            for top in common.to_set(topology)
        ]

        query = cls._process_query_args(
            df=annot_df,
            entities=entities,
            args=args,
            postfix=postfix,
        )

        if causality:

            query.append(cls._process_boolean_group_args(causality, postfix))

        if topology:

            query.append(cls._process_boolean_group_args(topology, postfix))

        args = cls._args_add_postfix(args, postfix)

        query = ' and '.join(query)

        return annot_df.query(query) if query else annot_df
Example #8
0
    def _foreign_resources_set(resources):

        other = common.to_set(resources)

        return {
            (
                res.resource
                    if hasattr(res, 'resource') else
                res
            )
            for res in resources
        }
Example #9
0
    def get_desc(rec, attr):

        desc = '%s_desc' % attr

        value = ('' if (attr in rec and rec[attr] == 'False'
                        or attr not in rec and not rec[desc]) else
                 rec[desc] if rec[desc] else attr)

        for pattern, repl in iteritems(replacements):

            value = value.replace(pattern, repl)

        value = value.lower().split(',') if value else None

        return tuple(sorted(common.to_set(value)))
Example #10
0
    def _dip_urls(self, e):

        attrs = e.attrs if hasattr(e, 'attrs') else e.attributes

        result = []

        if 'dip_id' in attrs:

            dip_ids = sorted(common.to_set(attrs['dip_id']))

            for dip_id in dip_ids:

                try:
                    result.append(urls.urls['dip']['ik'] %
                                  (int(dip_id.split('-')[1][:-1])))
                except:

                    self._log('Could not find DIP ID: %s' % dip_id)

        return ';'.join(result)
Example #11
0
    def match(
        self,
        resource=None,
        data_model=None,
        interaction_type=None,
        via=False,
        references=None,
    ):
        def _match(attr, value):

            return (getattr(self.resource, attr) in value
                    if isinstance(value, common.list_like) else getattr(
                        self.resource, attr) == value)

        resource = (resource.resource
                    if isinstance(resource, Evidence) else resource)

        interaction_type = (resource.interaction_type if
                            (interaction_type is None
                             and hasattr(resource, 'interaction_type')) else
                            interaction_type)

        via = (resource.via if
               (via is None and hasattr(resource, 'via')) else via)

        data_model = (resource.data_model
                      if hasattr(resource, 'data_model') else data_model)

        references = common.to_set(references)

        return ((resource is None or
                 (self.resource.name in resource
                  if isinstance(resource, set) else self.resource == resource))
                and (interaction_type is None
                     or _match('interaction_type', interaction_type))
                and (via is None or (via == False and not self.resource.via) or
                     (via == True and self.resource.via) or _match('via', via))
                and (not references or self.references & references)
                and (not data_model or _match('data_model', data_model)))
Example #12
0
def protmapper_enzyme_substrate(
    only_evidences=None,
    only_literature=False,
    interactions=False,
):
    """
    :arg str,set,NoneType only_evidences:
        Keep only the interactions with these evidence type, e.g. `VALID`.
        See the 'descriptions' column in the 'evidences.csv' supplementary
        table.
    """

    databases = {
        'signor': 'SIGNOR',
        'psp': 'PhosphoSite',
        'sparser': 'Sparser',
        'reach': 'REACH',
        'pid': 'NCI-PID',
        'reactome': 'Reactome',
        'rlimsp': 'RLIMS-P',
        'bel': 'BEL-Large-Corpus',
    }

    result = []
    only_evidences = common.to_set(only_evidences)

    records, evidences = get_protmapper()

    for rec in records:

        if rec['CTRL_NS'] != 'UP':

            continue

        if only_evidences:

            ev_types = {ev['DESCRIPTION'] for ev in evidences[rec['ID']]}

            if not only_evidences & ev_types:

                continue

        references = {ev['PMID'] for ev in evidences[rec['ID']] if ev['PMID']}

        if only_literature and not references:

            continue

        typ = ('phosphorylation'
               if rec['CTRL_IS_KINASE'] == 'True' else 'unknown')
        sources = {
            databases[source] if source in databases else source
            for source in rec['SOURCES'].strip('"').split(',')
        }

        if interactions:

            result.append([
                rec['CTRL_ID'],
                rec['TARGET_UP_ID'],
                sources,
                references,
            ])

        else:

            result.append({
                'kinase': rec['CTRL_ID'],
                'resaa': rec['TARGET_RES'],
                'resnum': int(rec['TARGET_POS']),
                'references': references,
                'substrate': rec['TARGET_UP_ID'],
                'databases': sources,
            })

    return result
Example #13
0
    def sets(*args):

        return ((a if isinstance(a, set) else
                 a.members if hasattr(a, 'members') else common.to_set(a))
                for a in args)
Example #14
0
def pathwaycommons_interactions(
    resources=None,
    types=None,
    by_interaction=False,
    version=12,
):

    interactions = collections.defaultdict(set) if by_interaction else []

    types = common.to_set(types)

    resources = {
        res.lower()
        for res in (common.to_list(resources) or (
            pc_res.name for pc_res in pathwaycommons_resources))
    }

    prg = progress.Progress(
        len(resources),
        'Processing PathwayCommons',
        1,
        percent=False,
    )

    url = urls.urls['pwcommons']['url']

    for resource in pathwaycommons_resources:

        if not resources & {resource.pc_label, resource.name.lower()}:

            continue

        prg.step()
        _version = min(resource.version, version)
        resource_url = url % (_version, _version, resource.pc_label)
        c = curl.Curl(resource_url, silent=False, large=True)

        for l in c.result:

            if hasattr(l, 'decode'):

                l = l.decode('ascii')

            l = l.strip('\n\r').split('\t')

            if not types or l[1] in types:

                if by_interaction:

                    a_b = (l[0], l[1], l[2])
                    b_a = (l[2], l[1], l[0])

                    directed = l[1] in pathwaycommons_directed_types

                    key = (b_a if (a_b not in interactions and not directed
                                   and b_a in interactions) else a_b)

                    interactions[key].add(
                        PathwayCommonsInteraction(*key,
                                                  resource=resource.name))

                else:

                    l.append(resource.name)
                    interactions.append(PathwayCommonsInteraction(*l))

    return interactions
Example #15
0
def hippie_interactions(
    score_threshold=.75,
    only_human=False,
    only_sources=None,
    only_methods=None,
    methods=False,
    sources=False,
    references=True,
    organisms=False,
):

    only_sources = common.to_set(only_sources)
    only_methods = common.to_set(only_methods)

    HippieInteraction = collections.namedtuple(
        'HippieInteraction',
        [
            'id_a',
            'id_b',
            'score',
            'methods',
            'references',
            'sources',
            'organisms',
        ],
    )

    tps = lambda i: tuple(sorted(i))

    url = urls.urls['hippie']['url']
    c = curl.Curl(url, large=True, silent=False)

    result = set()

    for i, l in enumerate(c.result):

        l = l.strip('\r\n').split('\t')

        score = float(l[4])

        if score < score_threshold:

            continue

        ids_a_1 = mapping.map_name(l[0], 'uniprot-entry', 'uniprot')
        ids_a_2 = mapping.map_name(l[1], 'entrez', 'uniprot')
        ids_b_1 = mapping.map_name(l[2], 'uniprot-entry', 'uniprot')
        ids_b_2 = mapping.map_name(l[3], 'entrez', 'uniprot')

        for id_a, id_b in itertools.product(ids_a_1 | ids_a_2,
                                            ids_b_1 | ids_b_2):

            details = dict((
                dd[0],
                set(dd[1].split(',')),
            ) for dd in (d.split(':') for d in l[5].split(';')))

            _sources = details['sources'] if 'sources' in details else set()
            experiments = (details['experiments']
                           if 'experiments' in details else set())

            if not all((
                    not only_methods or experiments & only_methods,
                    not only_methods or _sources & only_sources,
            )):

                continue

            _organisms = {9606}

            if 'species' in details:

                names = {
                    spec.split('(')[0].strip()
                    for spec in details['species']
                }
                _organisms = {
                    taxonomy.ensure_ncbi_tax_id(name)
                    for name in names
                }
                _organisms.discard(None)

                if only_human and 9606 not in _organisms:

                    continue

            result.add(
                HippieInteraction(
                    id_a=id_a,
                    id_b=id_b,
                    score=score,
                    methods=tps(experiments) if methods else None,
                    references=(tps(details['pmids']) if references else None),
                    sources=tps(_sources) if sources else None,
                    organisms=tps(_organisms) if organisms else None,
                ))

    return list(result)
Example #16
0
    def __init__(self,
                 ncbi_tax_id,
                 input_param=None,
                 input_method=None,
                 map_by_homology_from=None,
                 trace=False,
                 id_type_enzyme=None,
                 id_type_substrate=None,
                 name=None,
                 homology_only_swissprot=True,
                 ptm_homology_strict=False,
                 **kwargs):
        """
        Unifies a `pypath.core.enz_sub.EnzymeSubstrateProcessor` and
        a `pypath.utils.homology.EnzymeSubstrateHomology` object to build
        a set of enzyme-substrate interactions from a database and
        subsequently translate them by homology to one different organism.
        Multiple organism can be chosen as the source of the
        enzyme-substrate interactions. For example if you want mouse
        interactions, you can translate them from human and from rat.
        To get the original mouse interactions themselves, use an
        other instance of the `EnzymeSubstrateProcessor`.
        To have both the original and the homology translated set,
        and also from multiple databases, whatmore all these merged
        into a single set, use the `EnzymeSubstrateAggregator`.

        :param str input_method: Data source for `EnzymeSubstrateProcessor`.
        :param int ncbi_tax_id: The NCBI Taxonomy ID the interactions
                                should be translated to.
        :param bool homology_only_swissprot: Use only SwissProt
                                             (i.e. not Trembl) at homology
                                             translation.
        :param bool ptm_homology_strict: Use only those homologous PTM pairs
                                         which are in PhosphoSite data, i.e.
                                         do not look for residues with same
                                         offset in protein sequence.

        See further options at `EnzymeSubstrateProcessor`.

        """

        if not hasattr(self, '_log'):

            session_mod.Logger.__init__(name='enz_sub_homology')

        self.target_taxon = ncbi_tax_id
        self.map_by_homology_from = (map_by_homology_from
                                     or {9606, 10090, 10116})
        self.map_by_homology_from = common.to_set(self.map_by_homology_from)
        self.map_by_homology_from.discard(self.target_taxon)

        self.input_param = input_param
        self.input_method = input_method
        self.trace = trace
        self.id_type_enzyme = id_type_enzyme
        self.id_type_substrate = id_type_substrate
        self.name = name
        self.ptmprocargs = kwargs

        homology.PtmHomology.__init__(
            self,
            target=ncbi_tax_id,
            only_swissprot=homology_only_swissprot,
            strict=ptm_homology_strict,
        )
Example #17
0
    def remove(lst, to_remove):
        to_remove = common.to_set(to_remove)

        return [it for it in lst if it not in to_remove]