def __init__( self, total=None, name="Progress", interval=None, percent=True, status='initializing', done=0, init=True, unit='it', off=None, ): if off is None: self.off = not settings.get('progressbars') else: self.off = off self.name = name self.interval = (max(int(total / 100), 1) if interval is None else interval) self.total = total self.done = done self.status = status self.unit = unit self.start_time = time.time() self.min_update_interval = 0.1 self.last_printed_value = 0 if init and not self.off: self.init_tqdm()
def new_logger(name=None, logdir=None, verbosity=None, **kwargs): """ Returns a new logger with default settings (can be customized). Parameters ---------- name : str Custom name for the log. logdir : str Path to the directoty to store log files. verbosity : int Verbosity level, lowest is 0. Messages from levels above this won't be written to the log.. Returns ------- ``log.Logger`` instance. """ name = name or settings.get('module_name') logdir = logdir or '%s_log' % name return Logger(fname='%s__%s.log' % ( name, Logger.timestamp().replace(' ', '_').replace(':', '.'), ), verbosity=0, logdir=logdir, **kwargs)
def cache_dir_exists(self): if self.cache_dir is None: self.cache_dir = settings.get('cachedir') if not os.path.exists(self.cache_dir): os.makedirs(self.cache_dir)
def __init__( self, fname, verbosity=None, console_level=None, logdir=None, max_width=200, ): """ fname : str Log file name. logdir : name Path to the directory containing the log files. verbosity : int Messages at and below this level will be written into the logfile. All other messages will be dropped. console_level : int Messages below this log level will be printed not only into logfile but also to the console. """ @_log_flush_timeloop.job(interval=datetime.timedelta( seconds=settings.get('log_flush_interval'))) def _flush(): self.flush() _log_flush_timeloop.start(block=False) self.wrapper = textwrap.TextWrapper( width=max_width, subsequent_indent=' ' * 22, break_long_words=False, ) self.logdir = self.get_logdir(logdir) self.fname = os.path.join(self.logdir, fname) self.verbosity = (verbosity if verbosity is not None else settings.get('log_verbosity')) self.console_level = (console_level if console_level is not None else settings.get('console_verbosity')) self.open_logfile() # sending some greetings self.msg('Welcome!') self.msg('Logger started, logging into `%s`.' % self.fname)
def get_cachedir(cachedir=None): """ Ensures the cache directory exists and returns its path. """ cachedir = cachedir or settings.get('cachedir') os.makedirs(cachedir, exist_ok=True) return cachedir
def __init__( self, type_, id_type_a, id_type_b, ncbi_tax_id=None, ): self.type = type_ self.id_type_a = id_type_a self.id_type_b = id_type_b self.ncbi_tax_id = ncbi_tax_id or settings.get('default_organism')
def get_logdir(self, dirname=None): """ Returns the path to log directory. Also creates the directory if does not exist. """ dirname = dirname or '%s_log' % settings.get('module_name') if not os.path.exists(dirname): os.makedirs(dirname) return os.path.abspath(dirname)
def webservice_interactions_df(self): sources_omnipath = set(f.name for f in data_formats.omnipath.values()) sources_extra_directions = settings.get('network_extra_directions') sources_kinase_extra = set(f.name for f in data_formats.ptm_misc.values()) sources_ligrec_extra = set( f.name for f in data_formats.ligand_receptor.values()) sources_pathway_extra = set( f.name for f in data_formats.pathway_noref.values()) sources_mirna = set(f.name for f in data_formats.mirna_target.values()) self.make_df( unique_pairs=False, extra_node_attrs={'ncbi_tax_id': 'ncbi_tax_id'}, extra_edge_attrs={ 'omnipath': lambda e, d: ((bool(e['dirs'].sources[d] & sources_omnipath) or (bool(e['dirs'].sources['undirected'] & sources_omnipath) and bool(e['dirs'].sources[d] & sources_extra_directions))) and 'PPI' in e['type']), 'kinaseextra': lambda e, d: (bool(e['dirs'].sources[d] & sources_kinase_extra) and 'PPI' in e['type']), 'ligrecextra': lambda e, d: (bool(e['dirs'].sources[d] & sources_ligrec_extra) and 'PPI' in e['type']), 'pathwayextra': lambda e, d: (bool(e['dirs'].sources[d] & sources_pathway_extra ) and 'PPI' in e['type']), 'mirnatarget': lambda e, d: (bool(e['dirs'].sources[d] & sources_mirna) and 'MTI' in e['type']), 'tfregulons': lambda e, d: ('TF' in e['sources_by_type'] and bool(e[ 'sources_by_type']['TF'] & e['dirs'].sources[d])), 'tfregulons_curated': 'tfregulons_curated', 'tfregulons_chipseq': 'tfregulons_chipseq', 'tfregulons_tfbs': 'tfregulons_tfbs', 'tfregulons_coexp': 'tfregulons_coexp', 'tfregulons_level': lambda e, d: (';'.join(sorted(e['tfregulons_level'])) if 'tfregulons_level' in e.attributes() and 'TF' in e['sources_by_type'] and bool(e[ 'sources_by_type']['TF'] & e['dirs'].sources[d]) else ''), 'type': lambda e: e['type'][0] })
def __init__(self, input_args = None, **kwargs): input_args = input_args or {} if 'organism' not in input_args: input_args['organism'] = settings.get('default_organism') AbstractComplexResource.__init__( self, name = 'Signor', input_method = 'signor_complexes', input_args = input_args or {}, )
def which_list(self, id_type, ncbi_tax_id=None): ncbi_tax_id = ncbi_tax_id or settings.get('default_organism') key = (id_type, ncbi_tax_id) self.expiry[key] = time.time() if key not in self.lists: self.load(key) if key in self.lists: return self.lists[key]
def _set_pickle_path(self): self._pickle_file = (self._pickle_file or os.path.join( cache.get_cachedir(), settings.get('go_pickle_cache_fname') % self.organism, ))
def get_pubmed_data(pp, cachefile=None, htp_threshold=20): """ For one PyPath object, obtains metadata for all PubMed IDs through NCBI E-utils. :param pp: ``pypath.PyPath`` object :param htp_threshold: The number of interactions for one reference above the study considered to be high-throughput. """ if cachefile is None: cachefile = settings.get('pubmed_cache') if htp_threshold is not None: pp.htp_stats() pubmeds = common.uniqList( common.flatList([[r.pmid for r in e['references']] for e in pp.graph.es])) if htp_threshold is not None: pubmeds = set(pubmeds) - pp.htp[htp_threshold]['htrefs'] notpmid = [i for i in pubmeds if not i.isdigit()] sys.stdout.write('\t:: Number of non PubMed ID references: %u\n' % len(notpmid)) pmdata = {} if os.path.exists(cachefile): sys.stdout.write('\t:: Loading data previously downloaded ' 'from PubMed, from file `%s`\n' % cachefile) pmdata = pickle.load(open(cachefile, 'rb')) missing = list(set(pubmeds) - set(pmdata.keys())) sys.stdout.write('\t:: Downloading data from PubMed about %s papers\n' % len(missing)) cached_pubmeds_len = len(pmdata) pmdata_new = dataio.get_pubmeds(missing) pmdata.update(pmdata_new) sys.stdout.write('\t:: Saving PubMed data to file `%s`\n' % cachefile) if len(pmdata) > cached_pubmeds_len: pickle.dump(pmdata, open(cachefile, 'wb')) pmdata = dict(i for i in pmdata.items() if i[0] in pubmeds) points = [] earliest = [] for e in pp.graph.es: for s, rs in iteritems(e['refs_by_source']): pms = [ r.pmid for r in rs if (htp_threshold is None or r.pmid not in pp.htp[htp_threshold]['htrefs']) and r.pmid in pmdata and 'pubdate' in pmdata[r.pmid] ] if len(pms) > 0: yrs = [int(pmdata[pm]['pubdate'][:4]) for pm in pms] earliest.append((s, 0, min(yrs), '', e.index)) for pm in pms: points.append((s, pm, int(pmdata[pm]['pubdate'][:4]), pmdata[pm]['source'], e.index)) points = common.uniqList(points) earliest = common.uniqList(earliest) points = pd.DataFrame.from_records(points) earliest = pd.DataFrame.from_records(earliest) points.columns = ['database', 'pmid', 'year', 'journal', 'eid'] earliest.columns = ['database', 'none', 'year', 'none', 'eid'] return points, earliest
} files = { 'signalink': { 'edges': 'signalink3_edges.tsv', 'nodes': 'signalink3_nodes.tsv' }, 'acsn': { 'names': os.path.join(common.ROOT, 'data', 'acsn_names.gmt'), 'ppi': os.path.join(common.ROOT, 'data', 'acsn_ppi.txt') }, 'phosphopoint': { 'data': os.path.join(common.ROOT, 'data', 'phosphopoint.csv') }, 'phosphosite': { 'curated': os.path.join( settings.get('cachedir'), 'phosphosite_curated.pickle', ), 'noref': os.path.join( settings.get('cachedir'), 'phosphosite_noref.pickle', ) }, 'dbptm': { 'old_dbptm': os.path.join(common.ROOT, 'data', 'old_dbptm.tab'), }, }
import re import time import datetime import timeloop timeloop.app.logging.disable(level=9999) import pypath.urls as urls import pypath.curl as curl import pypath.session_mod as session_mod import pypath.settings as settings _logger = session_mod.Logger(name='uniprot_input') db = {} _cleanup_period = settings.get('mapper_cleanup_interval') _lifetime = 300 _last_used = {} def _all_uniprots(organism=9606, swissprot=None): swissprot = 'yes' if swissprot == True else swissprot rev = '' if not swissprot else ' AND reviewed: %s' % swissprot url = urls.urls['uniprot_basic']['url'] get = { 'query': 'organism:%s%s' % (str(organism), rev), 'format': 'tab', 'columns': 'id', } c = curl.Curl(url, get=get, silent=False)