예제 #1
0
 def interactions(self, req):
     fields = [b'sources', b'references']
     result = []
     elist = self._get_eids(req)
     res = []
     hdr = [
         'source', 'target', 'is_directed', 'is_stimulation',
         'is_inhibition'
     ]
     if b'fields' in req.args:
         hdr += [
             f.decode('utf-8') for f in fields if f in req.args[b'fields']
         ]
     all_sources = set([])
     for eid in elist:
         e = self.g.es[eid]
         all_sources = all_sources | e['sources']
         for d in ['straight', 'reverse']:
             uniprots = getattr(e['dirs'], d)
             if e['dirs'].dirs[uniprots]:
                 thisEdge = [
                     uniprots[0], uniprots[1], 1,
                     int(e['dirs'].is_stimulation(uniprots)),
                     int(e['dirs'].is_inhibition(uniprots))
                 ]
                 dsources = e['dirs'].get_dir(uniprots, sources=True)
                 dsources = dsources | e['dirs'].get_dir('undirected',
                                                         sources=True)
                 if 'sources' in hdr:
                     thisEdge.append(list(dsources))
                 if 'references' in hdr:
                     thisEdge.append([
                         r.pmid for r in flatList([
                             rs for s, rs in iteritems(e['refs_by_source'])
                             if s in dsources
                         ])
                     ])
                 thisEdge.append(self._dip_urls(e))
                 res.append(thisEdge)
         if not e['dirs'].is_directed():
             thisEdge = [e['dirs'].nodes[0], e['dirs'].nodes[1], 0, 0, 0]
             if 'sources' in hdr:
                 thisEdge.append(list(e['sources']))
             if 'references' in hdr:
                 thisEdge.append([r.pmid for r in e['references']])
             thisEdge.append(self._dip_urls(e))
             res.append(thisEdge)
     if 'DIP' in all_sources:
         hdr.append('dip_url')
     else:
         res = map(lambda r: r[:-1], res)
     if b'format' in req.args and req.args[b'format'] == b'json':
         return json.dumps([dict(zip(hdr, r)) for r in res])
     else:
         return self._table_output(res, hdr, req)
예제 #2
0
 def toplist(self,
             length=None,
             alpha=None,
             significant=True,
             min_set_size=0,
             groups=None,
             filtr=lambda x: True,
             **kwargs):
     args = get_args(locals(), ['filtr', 'groups'])
     if groups is None:
         groups = self.gsea.groups.keys()  # all by default
     sets = set(
         common.flatList(s for g, s in iteritems(self.gsea.groups)
                         if g in groups))
     return super(GSEABinaryEnrichmentSet,
                  self).toplist(filtr=lambda x: x[0] in sets and filtr(x),
                                **args)
예제 #3
0
 def write_set(self, id_list, setname, id_type, map_ids=True):
     self.sets[setname] = set(common.uniqList(common.flatList(
         self.mapper.map_name(n, self.ids[id_type], self.target_id)
         for n in id_list))) if map_ids \
         else set(id_list)
예제 #4
0
def get_pubmed_data(pp, cachefile=None, htp_threshold=20):
    """
    For one PyPath object, obtains metadata for all PubMed IDs
    through NCBI E-utils.

    :param pp:
        ``pypath.PyPath`` object
    :param htp_threshold:
        The number of interactions for one reference
        above the study considered to be high-throughput.
    """

    if cachefile is None:

        cachefile = settings.get('pubmed_cache')

    if htp_threshold is not None:
        pp.htp_stats()

    pubmeds = common.uniqList(
        common.flatList([[r.pmid for r in e['references']]
                         for e in pp.graph.es]))

    if htp_threshold is not None:
        pubmeds = set(pubmeds) - pp.htp[htp_threshold]['htrefs']

    notpmid = [i for i in pubmeds if not i.isdigit()]

    sys.stdout.write('\t:: Number of non PubMed ID references: %u\n' %
                     len(notpmid))

    pmdata = {}
    if os.path.exists(cachefile):
        sys.stdout.write('\t:: Loading data previously downloaded '
                         'from PubMed, from file `%s`\n' % cachefile)
        pmdata = pickle.load(open(cachefile, 'rb'))

    missing = list(set(pubmeds) - set(pmdata.keys()))
    sys.stdout.write('\t:: Downloading data from PubMed about %s papers\n' %
                     len(missing))
    cached_pubmeds_len = len(pmdata)
    pmdata_new = dataio.get_pubmeds(missing)
    pmdata.update(pmdata_new)

    sys.stdout.write('\t:: Saving PubMed data to file `%s`\n' % cachefile)

    if len(pmdata) > cached_pubmeds_len:
        pickle.dump(pmdata, open(cachefile, 'wb'))

    pmdata = dict(i for i in pmdata.items() if i[0] in pubmeds)

    points = []
    earliest = []

    for e in pp.graph.es:

        for s, rs in iteritems(e['refs_by_source']):

            pms = [
                r.pmid for r in rs
                if (htp_threshold is None
                    or r.pmid not in pp.htp[htp_threshold]['htrefs'])
                and r.pmid in pmdata and 'pubdate' in pmdata[r.pmid]
            ]
            if len(pms) > 0:
                yrs = [int(pmdata[pm]['pubdate'][:4]) for pm in pms]
                earliest.append((s, 0, min(yrs), '', e.index))
                for pm in pms:
                    points.append((s, pm, int(pmdata[pm]['pubdate'][:4]),
                                   pmdata[pm]['source'], e.index))

    points = common.uniqList(points)
    earliest = common.uniqList(earliest)

    points = pd.DataFrame.from_records(points)
    earliest = pd.DataFrame.from_records(earliest)
    points.columns = ['database', 'pmid', 'year', 'journal', 'eid']
    earliest.columns = ['database', 'none', 'year', 'none', 'eid']

    return points, earliest