def __init__( self, members, name=None, parent=None, aspect='functional', source='resource_specific', scope='specific', resource=None, transmitter=None, receiver=None, limit=None, avoid=None, enabled=True, ): collections_abc.Set.__init__(self) self.members = set(members) self.name = name or 'unnamed' self.parent = parent or self.name self.aspect = aspect self.source = source self.scope = scope self.resource = (resource or settings.get('annot_composite_database_name') or 'Unknown') self.transmitter = transmitter self.receiver = receiver self.limit = common.to_set(limit) self.avoid = common.to_set(avoid) self.enabled = enabled
def __init__( self, components, name = None, ids = None, sources = None, interactions = None, references = None, proteins = None, attrs = None, ): """ Represents a molecular complex. components : list,dict Either a list of identifiers or a dict with identifiers as keys and stoichiometric coefficients as values. List of identifiers also assumed to represent stoichiometry by repetition of identifiers. name : str A custom name or identifier of the complex. ids : dict Identifiers. If ``sources`` is a set, list or tuple it should be a dict with database names as keys and set of identifiers as values. If ``sources`` is a string, it can be a set of identifiers or a single identifier. sources : set,str Database(s) the complex has been defined in. interactions : list,dict Interactions between the components of the complex. Either a list of tuples of component IDs or a dict with tuples as keys and custom interaction properties as values. proteins : list,dict Synonym for `components`, kept for compatibility. """ components = components or proteins if not isinstance(components, dict): self.components = dict(collections.Counter(components)) else: self.components = components self.proteins = self.components self.name = name self.ids = collections.defaultdict(set) self.add_ids(ids, source = sources) self.sources = common.to_set(sources) self.references = common.to_set(references) self.attrs = {} if isinstance(attrs, dict): self.attrs.update(attrs) self.interactions = interactions
def filter_entity_type(cls, entities, entity_type): """ Filters an iterable of entities or identifiers keeping only the ones of type(s) in ``entity_type``. :param iterable entities: A list, set, tuple or other iterable yielding entities or identifiers. :param str,set entity_type: One or more entity types e.g. ``{'protein', 'mirna'}``. :returns: Same type of object as ``entities`` if the type of the object is list, set or tuple, otherwise a generator. """ if not entity_type or not entities: return entities entity_type = common.to_set(entity_type) obj_type = ( type(entities) if isinstance(entities, common.list_like) else lambda x: x ) return obj_type( e for e in entities if cls._get_entity_type(e) in entity_type )
def _process_references(references): references = common.to_set(references) return (set( (refs.Reference(ref ) if not isinstance(ref, refs.Reference) else ref) for ref in references))
def add_ids(self, ids, source = None): if not isinstance(ids, dict): ids = common.to_set(ids) if isinstance(ids, set) and source: source = common.to_set(source) ids = dict((s, ids) for s in source) if isinstance(ids, dict): for this_source, this_ids in iteritems(ids): this_ids = common.to_set(this_ids) self.ids[this_source].update(this_ids)
def _process_boolean_group_args(values, postfix): if postfix: values = { '%s%s' % (val, postfix) for val in common.to_list(values) } return ' or '.join(common.to_set(values))
def filter_df( cls, annot_df, category=None, name=None, parent=None, database=None, scope=None, aspect=None, source=None, entities=None, entity_type=None, causality=None, topology=None, postfix=None, ): category = category or name args = locals() _topologies = { 'pmtm': 'plasma_membrane_transmembrane', 'pmp': 'plasma_membrane_peripheral', 'sec': 'secreted', } entities = args.pop('entities') causality = args.pop('causality') or () topology = args.pop('topology') or () topology = [ _topologies[top] if top in _topologies else top for top in common.to_set(topology) ] query = cls._process_query_args( df=annot_df, entities=entities, args=args, postfix=postfix, ) if causality: query.append(cls._process_boolean_group_args(causality, postfix)) if topology: query.append(cls._process_boolean_group_args(topology, postfix)) args = cls._args_add_postfix(args, postfix) query = ' and '.join(query) return annot_df.query(query) if query else annot_df
def _foreign_resources_set(resources): other = common.to_set(resources) return { ( res.resource if hasattr(res, 'resource') else res ) for res in resources }
def get_desc(rec, attr): desc = '%s_desc' % attr value = ('' if (attr in rec and rec[attr] == 'False' or attr not in rec and not rec[desc]) else rec[desc] if rec[desc] else attr) for pattern, repl in iteritems(replacements): value = value.replace(pattern, repl) value = value.lower().split(',') if value else None return tuple(sorted(common.to_set(value)))
def _dip_urls(self, e): attrs = e.attrs if hasattr(e, 'attrs') else e.attributes result = [] if 'dip_id' in attrs: dip_ids = sorted(common.to_set(attrs['dip_id'])) for dip_id in dip_ids: try: result.append(urls.urls['dip']['ik'] % (int(dip_id.split('-')[1][:-1]))) except: self._log('Could not find DIP ID: %s' % dip_id) return ';'.join(result)
def match( self, resource=None, data_model=None, interaction_type=None, via=False, references=None, ): def _match(attr, value): return (getattr(self.resource, attr) in value if isinstance(value, common.list_like) else getattr( self.resource, attr) == value) resource = (resource.resource if isinstance(resource, Evidence) else resource) interaction_type = (resource.interaction_type if (interaction_type is None and hasattr(resource, 'interaction_type')) else interaction_type) via = (resource.via if (via is None and hasattr(resource, 'via')) else via) data_model = (resource.data_model if hasattr(resource, 'data_model') else data_model) references = common.to_set(references) return ((resource is None or (self.resource.name in resource if isinstance(resource, set) else self.resource == resource)) and (interaction_type is None or _match('interaction_type', interaction_type)) and (via is None or (via == False and not self.resource.via) or (via == True and self.resource.via) or _match('via', via)) and (not references or self.references & references) and (not data_model or _match('data_model', data_model)))
def protmapper_enzyme_substrate( only_evidences=None, only_literature=False, interactions=False, ): """ :arg str,set,NoneType only_evidences: Keep only the interactions with these evidence type, e.g. `VALID`. See the 'descriptions' column in the 'evidences.csv' supplementary table. """ databases = { 'signor': 'SIGNOR', 'psp': 'PhosphoSite', 'sparser': 'Sparser', 'reach': 'REACH', 'pid': 'NCI-PID', 'reactome': 'Reactome', 'rlimsp': 'RLIMS-P', 'bel': 'BEL-Large-Corpus', } result = [] only_evidences = common.to_set(only_evidences) records, evidences = get_protmapper() for rec in records: if rec['CTRL_NS'] != 'UP': continue if only_evidences: ev_types = {ev['DESCRIPTION'] for ev in evidences[rec['ID']]} if not only_evidences & ev_types: continue references = {ev['PMID'] for ev in evidences[rec['ID']] if ev['PMID']} if only_literature and not references: continue typ = ('phosphorylation' if rec['CTRL_IS_KINASE'] == 'True' else 'unknown') sources = { databases[source] if source in databases else source for source in rec['SOURCES'].strip('"').split(',') } if interactions: result.append([ rec['CTRL_ID'], rec['TARGET_UP_ID'], sources, references, ]) else: result.append({ 'kinase': rec['CTRL_ID'], 'resaa': rec['TARGET_RES'], 'resnum': int(rec['TARGET_POS']), 'references': references, 'substrate': rec['TARGET_UP_ID'], 'databases': sources, }) return result
def sets(*args): return ((a if isinstance(a, set) else a.members if hasattr(a, 'members') else common.to_set(a)) for a in args)
def pathwaycommons_interactions( resources=None, types=None, by_interaction=False, version=12, ): interactions = collections.defaultdict(set) if by_interaction else [] types = common.to_set(types) resources = { res.lower() for res in (common.to_list(resources) or ( pc_res.name for pc_res in pathwaycommons_resources)) } prg = progress.Progress( len(resources), 'Processing PathwayCommons', 1, percent=False, ) url = urls.urls['pwcommons']['url'] for resource in pathwaycommons_resources: if not resources & {resource.pc_label, resource.name.lower()}: continue prg.step() _version = min(resource.version, version) resource_url = url % (_version, _version, resource.pc_label) c = curl.Curl(resource_url, silent=False, large=True) for l in c.result: if hasattr(l, 'decode'): l = l.decode('ascii') l = l.strip('\n\r').split('\t') if not types or l[1] in types: if by_interaction: a_b = (l[0], l[1], l[2]) b_a = (l[2], l[1], l[0]) directed = l[1] in pathwaycommons_directed_types key = (b_a if (a_b not in interactions and not directed and b_a in interactions) else a_b) interactions[key].add( PathwayCommonsInteraction(*key, resource=resource.name)) else: l.append(resource.name) interactions.append(PathwayCommonsInteraction(*l)) return interactions
def hippie_interactions( score_threshold=.75, only_human=False, only_sources=None, only_methods=None, methods=False, sources=False, references=True, organisms=False, ): only_sources = common.to_set(only_sources) only_methods = common.to_set(only_methods) HippieInteraction = collections.namedtuple( 'HippieInteraction', [ 'id_a', 'id_b', 'score', 'methods', 'references', 'sources', 'organisms', ], ) tps = lambda i: tuple(sorted(i)) url = urls.urls['hippie']['url'] c = curl.Curl(url, large=True, silent=False) result = set() for i, l in enumerate(c.result): l = l.strip('\r\n').split('\t') score = float(l[4]) if score < score_threshold: continue ids_a_1 = mapping.map_name(l[0], 'uniprot-entry', 'uniprot') ids_a_2 = mapping.map_name(l[1], 'entrez', 'uniprot') ids_b_1 = mapping.map_name(l[2], 'uniprot-entry', 'uniprot') ids_b_2 = mapping.map_name(l[3], 'entrez', 'uniprot') for id_a, id_b in itertools.product(ids_a_1 | ids_a_2, ids_b_1 | ids_b_2): details = dict(( dd[0], set(dd[1].split(',')), ) for dd in (d.split(':') for d in l[5].split(';'))) _sources = details['sources'] if 'sources' in details else set() experiments = (details['experiments'] if 'experiments' in details else set()) if not all(( not only_methods or experiments & only_methods, not only_methods or _sources & only_sources, )): continue _organisms = {9606} if 'species' in details: names = { spec.split('(')[0].strip() for spec in details['species'] } _organisms = { taxonomy.ensure_ncbi_tax_id(name) for name in names } _organisms.discard(None) if only_human and 9606 not in _organisms: continue result.add( HippieInteraction( id_a=id_a, id_b=id_b, score=score, methods=tps(experiments) if methods else None, references=(tps(details['pmids']) if references else None), sources=tps(_sources) if sources else None, organisms=tps(_organisms) if organisms else None, )) return list(result)
def __init__(self, ncbi_tax_id, input_param=None, input_method=None, map_by_homology_from=None, trace=False, id_type_enzyme=None, id_type_substrate=None, name=None, homology_only_swissprot=True, ptm_homology_strict=False, **kwargs): """ Unifies a `pypath.core.enz_sub.EnzymeSubstrateProcessor` and a `pypath.utils.homology.EnzymeSubstrateHomology` object to build a set of enzyme-substrate interactions from a database and subsequently translate them by homology to one different organism. Multiple organism can be chosen as the source of the enzyme-substrate interactions. For example if you want mouse interactions, you can translate them from human and from rat. To get the original mouse interactions themselves, use an other instance of the `EnzymeSubstrateProcessor`. To have both the original and the homology translated set, and also from multiple databases, whatmore all these merged into a single set, use the `EnzymeSubstrateAggregator`. :param str input_method: Data source for `EnzymeSubstrateProcessor`. :param int ncbi_tax_id: The NCBI Taxonomy ID the interactions should be translated to. :param bool homology_only_swissprot: Use only SwissProt (i.e. not Trembl) at homology translation. :param bool ptm_homology_strict: Use only those homologous PTM pairs which are in PhosphoSite data, i.e. do not look for residues with same offset in protein sequence. See further options at `EnzymeSubstrateProcessor`. """ if not hasattr(self, '_log'): session_mod.Logger.__init__(name='enz_sub_homology') self.target_taxon = ncbi_tax_id self.map_by_homology_from = (map_by_homology_from or {9606, 10090, 10116}) self.map_by_homology_from = common.to_set(self.map_by_homology_from) self.map_by_homology_from.discard(self.target_taxon) self.input_param = input_param self.input_method = input_method self.trace = trace self.id_type_enzyme = id_type_enzyme self.id_type_substrate = id_type_substrate self.name = name self.ptmprocargs = kwargs homology.PtmHomology.__init__( self, target=ncbi_tax_id, only_swissprot=homology_only_swissprot, strict=ptm_homology_strict, )
def remove(lst, to_remove): to_remove = common.to_set(to_remove) return [it for it in lst if it not in to_remove]