Esempio n. 1
0
    def __init__(
        self,
        total=None,
        name="Progress",
        interval=None,
        percent=True,
        status='initializing',
        done=0,
        init=True,
        unit='it',
        off=None,
    ):

        if off is None:

            self.off = not settings.get('progressbars')

        else:

            self.off = off

        self.name = name
        self.interval = (max(int(total /
                                 100), 1) if interval is None else interval)
        self.total = total
        self.done = done
        self.status = status
        self.unit = unit
        self.start_time = time.time()
        self.min_update_interval = 0.1
        self.last_printed_value = 0

        if init and not self.off:

            self.init_tqdm()
Esempio n. 2
0
File: log.py Progetto: kkaris/pypath
def new_logger(name=None, logdir=None, verbosity=None, **kwargs):
    """
    Returns a new logger with default settings (can be customized).

    Parameters
    ----------
    name : str
        Custom name for the log.
    logdir : str
        Path to the directoty to store log files.
    verbosity : int
        Verbosity level, lowest is 0. Messages from levels above this
        won't be written to the log..

    Returns
    -------
    ``log.Logger`` instance.
    """

    name = name or settings.get('module_name')
    logdir = logdir or '%s_log' % name

    return Logger(fname='%s__%s.log' % (
        name,
        Logger.timestamp().replace(' ', '_').replace(':', '.'),
    ),
                  verbosity=0,
                  logdir=logdir,
                  **kwargs)
Esempio n. 3
0
    def cache_dir_exists(self):

        if self.cache_dir is None:

            self.cache_dir = settings.get('cachedir')

        if not os.path.exists(self.cache_dir):

            os.makedirs(self.cache_dir)
Esempio n. 4
0
File: log.py Progetto: kkaris/pypath
    def __init__(
        self,
        fname,
        verbosity=None,
        console_level=None,
        logdir=None,
        max_width=200,
    ):
        """
        fname : str
            Log file name.
        logdir : name
            Path to the directory containing the log files.
        verbosity : int
            Messages at and below this level will be written into the
            logfile. All other messages will be dropped.
        console_level : int
            Messages below this log level will be printed not only into
            logfile but also to the console.
        """
        @_log_flush_timeloop.job(interval=datetime.timedelta(
            seconds=settings.get('log_flush_interval')))
        def _flush():

            self.flush()

        _log_flush_timeloop.start(block=False)

        self.wrapper = textwrap.TextWrapper(
            width=max_width,
            subsequent_indent=' ' * 22,
            break_long_words=False,
        )
        self.logdir = self.get_logdir(logdir)
        self.fname = os.path.join(self.logdir, fname)
        self.verbosity = (verbosity if verbosity is not None else
                          settings.get('log_verbosity'))
        self.console_level = (console_level if console_level is not None else
                              settings.get('console_verbosity'))
        self.open_logfile()

        # sending some greetings
        self.msg('Welcome!')
        self.msg('Logger started, logging into `%s`.' % self.fname)
Esempio n. 5
0
def get_cachedir(cachedir=None):
    """
    Ensures the cache directory exists and returns its path.
    """

    cachedir = cachedir or settings.get('cachedir')

    os.makedirs(cachedir, exist_ok=True)

    return cachedir
Esempio n. 6
0
    def __init__(
        self,
        type_,
        id_type_a,
        id_type_b,
        ncbi_tax_id=None,
    ):

        self.type = type_
        self.id_type_a = id_type_a
        self.id_type_b = id_type_b
        self.ncbi_tax_id = ncbi_tax_id or settings.get('default_organism')
Esempio n. 7
0
File: log.py Progetto: kkaris/pypath
    def get_logdir(self, dirname=None):
        """
        Returns the path to log directory.
        Also creates the directory if does not exist.
        """

        dirname = dirname or '%s_log' % settings.get('module_name')

        if not os.path.exists(dirname):
            os.makedirs(dirname)

        return os.path.abspath(dirname)
Esempio n. 8
0
    def webservice_interactions_df(self):

        sources_omnipath = set(f.name for f in data_formats.omnipath.values())
        sources_extra_directions = settings.get('network_extra_directions')
        sources_kinase_extra = set(f.name
                                   for f in data_formats.ptm_misc.values())
        sources_ligrec_extra = set(
            f.name for f in data_formats.ligand_receptor.values())
        sources_pathway_extra = set(
            f.name for f in data_formats.pathway_noref.values())
        sources_mirna = set(f.name for f in data_formats.mirna_target.values())

        self.make_df(
            unique_pairs=False,
            extra_node_attrs={'ncbi_tax_id': 'ncbi_tax_id'},
            extra_edge_attrs={
                'omnipath':
                lambda e, d:
                ((bool(e['dirs'].sources[d] & sources_omnipath) or
                  (bool(e['dirs'].sources['undirected'] & sources_omnipath) and
                   bool(e['dirs'].sources[d] & sources_extra_directions))) and
                 'PPI' in e['type']),
                'kinaseextra':
                lambda e, d: (bool(e['dirs'].sources[d] & sources_kinase_extra)
                              and 'PPI' in e['type']),
                'ligrecextra':
                lambda e, d: (bool(e['dirs'].sources[d] & sources_ligrec_extra)
                              and 'PPI' in e['type']),
                'pathwayextra':
                lambda e, d: (bool(e['dirs'].sources[d] & sources_pathway_extra
                                   ) and 'PPI' in e['type']),
                'mirnatarget':
                lambda e, d: (bool(e['dirs'].sources[d] & sources_mirna) and
                              'MTI' in e['type']),
                'tfregulons':
                lambda e, d: ('TF' in e['sources_by_type'] and bool(e[
                    'sources_by_type']['TF'] & e['dirs'].sources[d])),
                'tfregulons_curated':
                'tfregulons_curated',
                'tfregulons_chipseq':
                'tfregulons_chipseq',
                'tfregulons_tfbs':
                'tfregulons_tfbs',
                'tfregulons_coexp':
                'tfregulons_coexp',
                'tfregulons_level':
                lambda e, d:
                (';'.join(sorted(e['tfregulons_level'])) if 'tfregulons_level'
                 in e.attributes() and 'TF' in e['sources_by_type'] and bool(e[
                     'sources_by_type']['TF'] & e['dirs'].sources[d]) else ''),
                'type':
                lambda e: e['type'][0]
            })
Esempio n. 9
0
    def __init__(self, input_args = None, **kwargs):

        input_args = input_args or {}

        if 'organism' not in input_args:

            input_args['organism'] = settings.get('default_organism')

        AbstractComplexResource.__init__(
            self,
            name = 'Signor',
            input_method = 'signor_complexes',
            input_args = input_args or {},
        )
Esempio n. 10
0
    def which_list(self, id_type, ncbi_tax_id=None):

        ncbi_tax_id = ncbi_tax_id or settings.get('default_organism')

        key = (id_type, ncbi_tax_id)

        self.expiry[key] = time.time()

        if key not in self.lists:

            self.load(key)

        if key in self.lists:

            return self.lists[key]
Esempio n. 11
0
File: go.py Progetto: kkaris/pypath
    def _set_pickle_path(self):

        self._pickle_file = (self._pickle_file or os.path.join(
            cache.get_cachedir(),
            settings.get('go_pickle_cache_fname') % self.organism,
        ))
Esempio n. 12
0
def get_pubmed_data(pp, cachefile=None, htp_threshold=20):
    """
    For one PyPath object, obtains metadata for all PubMed IDs
    through NCBI E-utils.

    :param pp:
        ``pypath.PyPath`` object
    :param htp_threshold:
        The number of interactions for one reference
        above the study considered to be high-throughput.
    """

    if cachefile is None:

        cachefile = settings.get('pubmed_cache')

    if htp_threshold is not None:
        pp.htp_stats()

    pubmeds = common.uniqList(
        common.flatList([[r.pmid for r in e['references']]
                         for e in pp.graph.es]))

    if htp_threshold is not None:
        pubmeds = set(pubmeds) - pp.htp[htp_threshold]['htrefs']

    notpmid = [i for i in pubmeds if not i.isdigit()]

    sys.stdout.write('\t:: Number of non PubMed ID references: %u\n' %
                     len(notpmid))

    pmdata = {}
    if os.path.exists(cachefile):
        sys.stdout.write('\t:: Loading data previously downloaded '
                         'from PubMed, from file `%s`\n' % cachefile)
        pmdata = pickle.load(open(cachefile, 'rb'))

    missing = list(set(pubmeds) - set(pmdata.keys()))
    sys.stdout.write('\t:: Downloading data from PubMed about %s papers\n' %
                     len(missing))
    cached_pubmeds_len = len(pmdata)
    pmdata_new = dataio.get_pubmeds(missing)
    pmdata.update(pmdata_new)

    sys.stdout.write('\t:: Saving PubMed data to file `%s`\n' % cachefile)

    if len(pmdata) > cached_pubmeds_len:
        pickle.dump(pmdata, open(cachefile, 'wb'))

    pmdata = dict(i for i in pmdata.items() if i[0] in pubmeds)

    points = []
    earliest = []

    for e in pp.graph.es:

        for s, rs in iteritems(e['refs_by_source']):

            pms = [
                r.pmid for r in rs
                if (htp_threshold is None
                    or r.pmid not in pp.htp[htp_threshold]['htrefs'])
                and r.pmid in pmdata and 'pubdate' in pmdata[r.pmid]
            ]
            if len(pms) > 0:
                yrs = [int(pmdata[pm]['pubdate'][:4]) for pm in pms]
                earliest.append((s, 0, min(yrs), '', e.index))
                for pm in pms:
                    points.append((s, pm, int(pmdata[pm]['pubdate'][:4]),
                                   pmdata[pm]['source'], e.index))

    points = common.uniqList(points)
    earliest = common.uniqList(earliest)

    points = pd.DataFrame.from_records(points)
    earliest = pd.DataFrame.from_records(earliest)
    points.columns = ['database', 'pmid', 'year', 'journal', 'eid']
    earliest.columns = ['database', 'none', 'year', 'none', 'eid']

    return points, earliest
Esempio n. 13
0
}

files = {
    'signalink': {
        'edges': 'signalink3_edges.tsv',
        'nodes': 'signalink3_nodes.tsv'
    },
    'acsn': {
        'names': os.path.join(common.ROOT, 'data', 'acsn_names.gmt'),
        'ppi': os.path.join(common.ROOT, 'data', 'acsn_ppi.txt')
    },
    'phosphopoint': {
        'data': os.path.join(common.ROOT, 'data', 'phosphopoint.csv')
    },
    'phosphosite': {
        'curated':
        os.path.join(
            settings.get('cachedir'),
            'phosphosite_curated.pickle',
        ),
        'noref':
        os.path.join(
            settings.get('cachedir'),
            'phosphosite_noref.pickle',
        )
    },
    'dbptm': {
        'old_dbptm': os.path.join(common.ROOT, 'data', 'old_dbptm.tab'),
    },
}
Esempio n. 14
0
import re

import time
import datetime
import timeloop
timeloop.app.logging.disable(level=9999)

import pypath.urls as urls
import pypath.curl as curl
import pypath.session_mod as session_mod
import pypath.settings as settings

_logger = session_mod.Logger(name='uniprot_input')

db = {}
_cleanup_period = settings.get('mapper_cleanup_interval')
_lifetime = 300
_last_used = {}


def _all_uniprots(organism=9606, swissprot=None):

    swissprot = 'yes' if swissprot == True else swissprot
    rev = '' if not swissprot else ' AND reviewed: %s' % swissprot
    url = urls.urls['uniprot_basic']['url']
    get = {
        'query': 'organism:%s%s' % (str(organism), rev),
        'format': 'tab',
        'columns': 'id',
    }
    c = curl.Curl(url, get=get, silent=False)