def createdb(self, inname, outname, force): """Create the database Args: inname (str): Name of the input file from which the database will be created outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than `inname` """ if os.path.exists(outname): newer = fileutils.file_newer(inname, outname) else: newer = True self.logger = fileconfig.getlogger() if not newer and not force: self.logger.info('Not Updating. %s database already uptodate' \ % inname) else: self.inname = inname self.outname = outname self._makedb() return
def createdb(self, inname, outname, force): """Create the database Args: inname (str): Name of the csv file containing utr data outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than `inname` """ newfiles = [] self.inname = inname if os.path.exists(outname): for fn in os.listdir(inname): infile = os.path.join(inname, fn) newer = fileutils.file_newer(infile, outname) if newer is True: break else: newer = True self.logger = fileconfig.getlogger() if not newer and not force: self.logger.info('Not Updating. UTRdb' + 'database already uptodate') else: self.inname = inname self.outname = outname t1 = time.time() self._makedb() time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" % (self.outname, time_taken)) return
def createdb(self, inname, outname, force): """Create the database Args: inname (str): Swissprot file. Note - The trEMBL file does not contain OMIM annotation and even if it exists the information is partial like only mimid is specified but does not say anything is the id is gene omim id or phenotype omim id. outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than any of `inname` """ self.logger = fileconfig.getlogger() if os.path.exists(outname): newer = fileutils.file_newer(inname, outname) else: newer = True if not newer and not force: self.logger.info('Not Updating. OMIM database already uptodate') else: self.inname = inname self.outname = outname t1 = time.time() self._makedb() time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" % (self.outname, time_taken)) return
def createdb(self, inname, outname, force): """Create the database Args: inname (str): Path to human genome reference file outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than `inname` """ if os.path.exists(outname): if fileutils.file_newer(inname, outname) or \ fileutils.file_newer(dbconfig.DBCONFIG['REFGENE']['name'],\ outname): newer = True else: newer = False else: newer = True self.logger = fileconfig.getlogger() if not newer and not force: self.logger.info('Not Updating. REFMRNA database already uptodate') else: self.inname = inname self.outname = outname t1 = time.time() self._makedb() time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" \ % (self.outname, time_taken)) return
def createdb(self, inname, outname, force): """Create the database Args: inname (str): Name of the csv file containing mirna data outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than `inname` """ if os.path.exists(outname): newer = fileutils.file_newer(inname, outname) else: newer = True self.logger = fileconfig.getlogger() if not newer and not force: self.logger.info('Not Updating. MIRNA database already uptodate') else: self.inname = inname self.outname = outname t1 = time.time() self._makedb() time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" \ % (self.outname, time_taken)) return
def createdb(self, inname, outname, force, hgmd_on=0): """Create the database Args: inname (str): interpro file name from ucsc SQL results outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than any of `inname` """ self.logger = fileconfig.getlogger() if os.path.exists(outname): newer = fileutils.file_newer(inname, outname) else: newer = True if not newer and not force: self.logger.info( 'Not Updating. INTERPRO database already uptodate') else: self.inname = inname self.outname = outname self.hgmd_on = hgmd_on t1 = time.time() self._makedb() time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" % (self.outname, time_taken)) return
def createdb(self, inname, outname, force): """Create the database Args: inname (str): Name of the directory containing NSFP Database files outname (str): Name of sqlite3 database force (bool): If True overwrite existing database even if it is newer than `inname` """ self.indir = os.path.dirname(inname) if os.path.exists(outname): newer = False for filename in os.listdir(self.indir): if 'chr' in os.path.splitext(filename)[1]: infile = os.path.join(self.indir, filename) newer = fileutils.file_newer(infile, outname) if newer: break else: newer = True self.logger = fileconfig.getlogger() if not newer and not force: self.logger.info('Not Updating. NSFP database already uptodate') else: self.outname = outname t1 = time.time() self._makedb() time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" % (self.outname, time_taken)) return
def createdb(self, inname, goaname, outname, force): ''' Args: -inname (str): Name of gene ontology similarity tab delimited file generated by app_build_go.py -outname (str): Name of sqlite3 database -force (bool): If True overwrite existing database even if it is newer than *ism_prefix ''' if os.path.exists(outname): #TODO: to handle more than two files or pick one of them newer = fileutils.file_newer(inname, outname) else: newer = True self.logger = fileconfig.getlogger() if not newer and not force: self.logger.info('Not Updating. GO database already update') else: self.inname = inname self.goaname = goaname self.outname = outname self.uni2gene = {} t1 = time.time() # preprocessing (filtering and sorting) self._preproces() # ----------- self._makedb() # ----------- time_taken = (time.time() - t1) / 60 self.logger.info("Time Taken for creating %s is %f min" % (self.outname, time_taken)) return
def main(proc): global logger code_loc = os.environ.get('GCN') ts = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M') logfile = os.path.join(code_loc, 'varant_install_%s.log' % ts) FILECONFIG['LOGFILE'] = logfile logger = getlogger() logger.info('Loading Data download links from %s' % DATA_LINKS) dlinks = _get_download_links() logger.info('Data download links loaded to memory') p = Pool(proc) data_inputs = [] not_build = [] refgenome = [] for ds, links in dlinks.items(): if ds in FILECONFIG.keys(): data_dest = os.path.dirname(FILECONFIG[ds]) if ds == 'REFGENOME': refgenome.append((ds, links, data_dest)) else: data_inputs.append((ds, links, data_dest)) else: not_build.append(ds) logger.error('Download data destination folder is not set ' 'for %s in etc/fileconfig.py and thus cannot ' 'be build..' % ds) t1 = time.time() for e in refgenome: build(e) r = p.map(build, data_inputs) [not_build.append(e) for e in r if e] if 'REFGENE' in dlinks.keys() and 'REFGENE' not in not_build and \ 'REFMRNA' not in not_build: r = build_splicedb() if r is not None: not_build.append(r) t2 = time.time() tt = float(t2 - t1) / float(60) if not_build: logger.warning('Failed to build the following data sources - ') print 'Failed to build the following data sources - ' for idx, d in enumerate(not_build): logger.warning(str(idx + 1) + ':' + d) print str(idx + 1) + ':' + d logger.info('(%d Processor) Total time taken to set up Varant ' 'databases is %f min' % (proc, tt)) print '(%d Processor) Total time taken to set up is %f min' % (proc, tt)
def get_configuration(progname, logger=None, reload=False): """Get configuration information associated with a program Args: progname (string): Name of the program (case insenstive) logger (file): A logging object. If None, the system logger is used reload (bool): If true, then the configuration will be reloaded from file, even if it had been loaded previously Returns: A configuration dictionary with the configuration options as key """ plower = progname.lower() config = PROGCONFIG.get(plower, None) if config is None or reload: config = PROGCONFIG[plower] = {} sysdir = os.environ.get('GCNHOME', None) userdir = os.environ.get('HOME', None) custdir = os.environ.get('GCNPROGDIR', None) if userdir is not None: userdir = os.path.join(userdir, '.gcn') if not os.path.exists(userdir): userdir = None if logger is None: logger = getlogger() for cfdir in (sysdir, userdir, custdir): if cfdir is not None: cfgfile = os.path.join(cfdir, 'config', progname) if os.path.exists(cfgfile): if logger: logger.info('Loading configuration from %s' % cfgfile) config.update(load_file(cfgfile)) if logger: logger.info('Loaded configuration successfully') return config
def set_configuration(progname, cfgfile, logger=None): """Load and save configuration for a program from the specified file Args: progname (string): Name of the program (case insenstive) cfgfile (string): Name of file with configuration information logger (file): A logging object. If None, the system logger is used Returns: A configuration dictionary with the configuration options as key """ if logger is None: logger = getlogger() logger.info('Loading configuration for %s from file %s' % (progname, cfgfile)) PROGCONFIG[progname.lower()] = cfgdict = load_file(cfgfile) logger.info('Loaded configuration for %s from file %s' % (progname, cfgfile)) return cfgdict
def __init__(self, invcf, max_exac_maf, capture_kit_name, probe_ext_bp, \ outvcf=None, cosmic_on=False, hgmd_on=False, \ dblink=False, log_dir=None): self.invcf = invcf ts = datetime.datetime.fromtimestamp( time.time()).strftime('%Y%m%d_%H%M') self.capture_kit_name = capture_kit_name self.probe_ext_bp = probe_ext_bp self.cosmic_on = cosmic_on self.hgmd_on = hgmd_on self.dblink = dblink self.max_exac_af = max_exac_maf if outvcf: self.outvcf = outvcf else: self.outvcf = os.path.splitext(invcf)[0] + '.varant_%s.vcf' % ts if log_dir: FILECONFIG['LOGFILE'] = os.path.join(log_dir, 'varant_%s.log' % ts) elif outvcf: FILECONFIG['LOGFILE'] = os.path.splitext(self.outvcf)[0] + \ '.varant_%s.log' % ts else: FILECONFIG['LOGFILE'] = os.path.splitext(invcf)[0] +\ '.varant.%s.log' % ts self.logger = getlogger() status = self._check_env() # Check Environment variable setting if status == 1: d = 'There seems to be problem with setting the '\ 'environment variables..' self.logger.error(d) print d else: self.logger.info('Environment variable check successful..')