def interactions(self, req): fields = [b'sources', b'references'] result = [] elist = self._get_eids(req) res = [] hdr = [ 'source', 'target', 'is_directed', 'is_stimulation', 'is_inhibition' ] if b'fields' in req.args: hdr += [ f.decode('utf-8') for f in fields if f in req.args[b'fields'] ] all_sources = set([]) for eid in elist: e = self.g.es[eid] all_sources = all_sources | e['sources'] for d in ['straight', 'reverse']: uniprots = getattr(e['dirs'], d) if e['dirs'].dirs[uniprots]: thisEdge = [ uniprots[0], uniprots[1], 1, int(e['dirs'].is_stimulation(uniprots)), int(e['dirs'].is_inhibition(uniprots)) ] dsources = e['dirs'].get_dir(uniprots, sources=True) dsources = dsources | e['dirs'].get_dir('undirected', sources=True) if 'sources' in hdr: thisEdge.append(list(dsources)) if 'references' in hdr: thisEdge.append([ r.pmid for r in flatList([ rs for s, rs in iteritems(e['refs_by_source']) if s in dsources ]) ]) thisEdge.append(self._dip_urls(e)) res.append(thisEdge) if not e['dirs'].is_directed(): thisEdge = [e['dirs'].nodes[0], e['dirs'].nodes[1], 0, 0, 0] if 'sources' in hdr: thisEdge.append(list(e['sources'])) if 'references' in hdr: thisEdge.append([r.pmid for r in e['references']]) thisEdge.append(self._dip_urls(e)) res.append(thisEdge) if 'DIP' in all_sources: hdr.append('dip_url') else: res = map(lambda r: r[:-1], res) if b'format' in req.args and req.args[b'format'] == b'json': return json.dumps([dict(zip(hdr, r)) for r in res]) else: return self._table_output(res, hdr, req)
def toplist(self, length=None, alpha=None, significant=True, min_set_size=0, groups=None, filtr=lambda x: True, **kwargs): args = get_args(locals(), ['filtr', 'groups']) if groups is None: groups = self.gsea.groups.keys() # all by default sets = set( common.flatList(s for g, s in iteritems(self.gsea.groups) if g in groups)) return super(GSEABinaryEnrichmentSet, self).toplist(filtr=lambda x: x[0] in sets and filtr(x), **args)
def write_set(self, id_list, setname, id_type, map_ids=True): self.sets[setname] = set(common.uniqList(common.flatList( self.mapper.map_name(n, self.ids[id_type], self.target_id) for n in id_list))) if map_ids \ else set(id_list)
def get_pubmed_data(pp, cachefile=None, htp_threshold=20): """ For one PyPath object, obtains metadata for all PubMed IDs through NCBI E-utils. :param pp: ``pypath.PyPath`` object :param htp_threshold: The number of interactions for one reference above the study considered to be high-throughput. """ if cachefile is None: cachefile = settings.get('pubmed_cache') if htp_threshold is not None: pp.htp_stats() pubmeds = common.uniqList( common.flatList([[r.pmid for r in e['references']] for e in pp.graph.es])) if htp_threshold is not None: pubmeds = set(pubmeds) - pp.htp[htp_threshold]['htrefs'] notpmid = [i for i in pubmeds if not i.isdigit()] sys.stdout.write('\t:: Number of non PubMed ID references: %u\n' % len(notpmid)) pmdata = {} if os.path.exists(cachefile): sys.stdout.write('\t:: Loading data previously downloaded ' 'from PubMed, from file `%s`\n' % cachefile) pmdata = pickle.load(open(cachefile, 'rb')) missing = list(set(pubmeds) - set(pmdata.keys())) sys.stdout.write('\t:: Downloading data from PubMed about %s papers\n' % len(missing)) cached_pubmeds_len = len(pmdata) pmdata_new = dataio.get_pubmeds(missing) pmdata.update(pmdata_new) sys.stdout.write('\t:: Saving PubMed data to file `%s`\n' % cachefile) if len(pmdata) > cached_pubmeds_len: pickle.dump(pmdata, open(cachefile, 'wb')) pmdata = dict(i for i in pmdata.items() if i[0] in pubmeds) points = [] earliest = [] for e in pp.graph.es: for s, rs in iteritems(e['refs_by_source']): pms = [ r.pmid for r in rs if (htp_threshold is None or r.pmid not in pp.htp[htp_threshold]['htrefs']) and r.pmid in pmdata and 'pubdate' in pmdata[r.pmid] ] if len(pms) > 0: yrs = [int(pmdata[pm]['pubdate'][:4]) for pm in pms] earliest.append((s, 0, min(yrs), '', e.index)) for pm in pms: points.append((s, pm, int(pmdata[pm]['pubdate'][:4]), pmdata[pm]['source'], e.index)) points = common.uniqList(points) earliest = common.uniqList(earliest) points = pd.DataFrame.from_records(points) earliest = pd.DataFrame.from_records(earliest) points.columns = ['database', 'pmid', 'year', 'journal', 'eid'] earliest.columns = ['database', 'none', 'year', 'none', 'eid'] return points, earliest