Exemple #1
0
    def createdb(self, inname, outname, force):
        """Create the database

		Args:
			inname (str): Name of the input file from which the database
							will be created
			outname (str): Name of sqlite3 database
			force (bool): If True overwrite existing database even if it
							is newer than `inname`
		"""
        if os.path.exists(outname):
            newer = fileutils.file_newer(inname, outname)
        else:
            newer = True

        self.logger = fileconfig.getlogger()

        if not newer and not force:
            self.logger.info('Not Updating. %s database already uptodate' \
                  % inname)
        else:
            self.inname = inname
            self.outname = outname
            self._makedb()
        return
Exemple #2
0
    def createdb(self, inname, outname, force):
        """Create the database

        Args:
            inname (str): Name of the csv file containing utr data
            outname (str): Name of sqlite3 database
            force (bool): If True overwrite existing database even if it
                          is newer than `inname`
        """
        newfiles = []
        self.inname = inname
        if os.path.exists(outname):
            for fn in os.listdir(inname):
                infile = os.path.join(inname, fn)
                newer = fileutils.file_newer(infile, outname)
                if newer is True:
                    break
        else:
            newer = True

        self.logger = fileconfig.getlogger()

        if not newer and not force:
            self.logger.info('Not Updating. UTRdb' +
                            'database already uptodate')
        else:
            self.inname = inname
            self.outname = outname
            t1 = time.time()
            self._makedb()
            time_taken = (time.time() - t1) / 60
            self.logger.info("Time Taken for creating %s is %f min"
                             % (self.outname, time_taken))
        return
Exemple #3
0
    def createdb(self, inname, outname, force):
        """Create the database

        Args:
            inname (str): Swissprot file. Note - The trEMBL file does not
                            contain OMIM annotation and even if it exists
                            the information is partial like only mimid is
                            specified but does not say anything is the id
                            is gene omim id or phenotype omim id.
            outname (str): Name of sqlite3 database
            force (bool): If True overwrite existing database even if it
                          is newer than any of  `inname`
        """
        self.logger = fileconfig.getlogger()
        if os.path.exists(outname):
            newer = fileutils.file_newer(inname, outname)
        else:
            newer = True

        if not newer and not force:
            self.logger.info('Not Updating. OMIM database already uptodate')
        else:
            self.inname = inname
            self.outname = outname
            t1 = time.time()
            self._makedb()
            time_taken = (time.time() - t1) / 60
            self.logger.info("Time Taken for creating %s is %f min" %
                             (self.outname, time_taken))

        return
Exemple #4
0
    def createdb(self, inname, outname, force):
        """Create the database

        Args:
            inname (str): Path to human genome reference file
            outname (str): Name of sqlite3 database
            force (bool): If True overwrite existing database even if it
                          is newer than `inname`
        """
        if os.path.exists(outname):
            if fileutils.file_newer(inname, outname) or \
                    fileutils.file_newer(dbconfig.DBCONFIG['REFGENE']['name'],\
                                          outname):
                newer = True
            else:
                newer = False
        else:
            newer = True

        self.logger = fileconfig.getlogger()

        if not newer and not force:
            self.logger.info('Not Updating. REFMRNA database already uptodate')
        else:
            self.inname = inname
            self.outname = outname
            t1 = time.time()
            self._makedb()
            time_taken = (time.time() - t1) / 60
            self.logger.info("Time Taken for creating %s is %f min" \
                             % (self.outname, time_taken))
        return
Exemple #5
0
    def createdb(self, inname, outname, force):
        """Create the database

        Args:
            inname (str): Name of the csv file containing mirna data
            outname (str): Name of sqlite3 database
            force (bool): If True overwrite existing database even if it
                          is newer than `inname`
        """
        if os.path.exists(outname):
            newer = fileutils.file_newer(inname, outname)
        else:
            newer = True

        self.logger = fileconfig.getlogger()

        if not newer and not force:
            self.logger.info('Not Updating. MIRNA database already uptodate')
        else:
            self.inname = inname
            self.outname = outname
            t1 = time.time()
            self._makedb()
            time_taken = (time.time() - t1) / 60
            self.logger.info("Time Taken for creating %s is %f min" \
                             % (self.outname, time_taken))
        return
Exemple #6
0
    def createdb(self, inname, outname, force, hgmd_on=0):
        """Create the database

		Args:
			inname (str): interpro file name from ucsc SQL results
			outname (str): Name of sqlite3 database
			force (bool): If True overwrite existing database even if it
						  is newer than any of  `inname`
		"""
        self.logger = fileconfig.getlogger()
        if os.path.exists(outname):
            newer = fileutils.file_newer(inname, outname)
        else:
            newer = True

        if not newer and not force:
            self.logger.info(
                'Not Updating. INTERPRO database already uptodate')
        else:
            self.inname = inname
            self.outname = outname
            self.hgmd_on = hgmd_on
            t1 = time.time()
            self._makedb()
            time_taken = (time.time() - t1) / 60
            self.logger.info("Time Taken for creating %s is %f min" %
                             (self.outname, time_taken))

        return
Exemple #7
0
    def createdb(self, inname, outname, force):
        """Create the database

        Args:
            inname (str): Name of the directory containing NSFP Database files
            outname (str): Name of sqlite3 database
            force (bool): If True overwrite existing database even if it
                          is newer than `inname`
        """

        self.indir = os.path.dirname(inname)
        if os.path.exists(outname):
            newer = False
            for filename in os.listdir(self.indir):
                if 'chr' in os.path.splitext(filename)[1]:
                    infile = os.path.join(self.indir, filename)
                    newer = fileutils.file_newer(infile, outname)
                    if newer:
                        break
        else:
            newer = True

        self.logger = fileconfig.getlogger()

        if not newer and not force:
            self.logger.info('Not Updating. NSFP database already uptodate')
        else:
            self.outname = outname
            t1 = time.time()
            self._makedb()
            time_taken = (time.time() - t1) / 60
            self.logger.info("Time Taken for creating %s is %f min" %
                             (self.outname, time_taken))
        return
Exemple #8
0
	def createdb(self, inname, goaname, outname, force):
		'''
		Args:
			-inname (str): Name of gene ontology similarity tab delimited file generated by app_build_go.py
			-outname (str): Name of sqlite3 database
			-force (bool): If True overwrite existing database even if it is newer than *ism_prefix
		'''
		
		if os.path.exists(outname):
			#TODO: to handle more than two files or pick one of them
			newer = fileutils.file_newer(inname, outname)
		else:
			newer = True
		self.logger = fileconfig.getlogger()
		
		if not newer and not force:
			self.logger.info('Not Updating. GO database already update')
		else:
			self.inname = inname
			self.goaname = goaname
			self.outname = outname
			self.uni2gene = {}

			t1 = time.time()
			
			# preprocessing (filtering and sorting)
			self._preproces()
			
			# -----------
			self._makedb()
			# -----------
			
			time_taken = (time.time() - t1) / 60
			self.logger.info("Time Taken for creating %s is %f min" % (self.outname, time_taken))
		return
Exemple #9
0
def main(proc):
    global logger
    code_loc = os.environ.get('GCN')
    ts = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M')
    logfile = os.path.join(code_loc, 'varant_install_%s.log' % ts)
    FILECONFIG['LOGFILE'] = logfile
    logger = getlogger()
    logger.info('Loading Data download links from %s' % DATA_LINKS)
    dlinks = _get_download_links()
    logger.info('Data download links loaded to memory')

    p = Pool(proc)

    data_inputs = []
    not_build = []
    refgenome = []

    for ds, links in dlinks.items():
        if ds in FILECONFIG.keys():
            data_dest = os.path.dirname(FILECONFIG[ds])
            if ds == 'REFGENOME':
                refgenome.append((ds, links, data_dest))
            else:
                data_inputs.append((ds, links, data_dest))
        else:
            not_build.append(ds)
            logger.error('Download data destination folder is not set '
                         'for %s in etc/fileconfig.py and thus cannot '
                         'be build..' % ds)
    t1 = time.time()
    for e in refgenome:
        build(e)
    r = p.map(build, data_inputs)
    [not_build.append(e) for e in r if e]
    if 'REFGENE' in dlinks.keys() and 'REFGENE' not in not_build and \
                                        'REFMRNA' not in not_build:
        r = build_splicedb()
        if r is not None:
            not_build.append(r)
    t2 = time.time()
    tt = float(t2 - t1) / float(60)
    if not_build:
        logger.warning('Failed to build the following data sources - ')
        print 'Failed to build the following data sources - '
        for idx, d in enumerate(not_build):
            logger.warning(str(idx + 1) + ':' + d)
            print str(idx + 1) + ':' + d
    logger.info('(%d Processor) Total time taken to set up Varant '
                'databases is %f min' % (proc, tt))
    print '(%d Processor) Total time taken to set up is %f min' % (proc, tt)
Exemple #10
0
def get_configuration(progname, logger=None, reload=False):
    """Get configuration information associated with a program

    Args:
        progname (string): Name of the program (case insenstive)
        logger (file): A logging object. If None, the system logger
                       is used
        reload (bool): If true, then the configuration will be reloaded
                       from file, even if it had been loaded previously

    Returns:
        A configuration dictionary with the configuration options
        as key
    """

    plower = progname.lower()

    config = PROGCONFIG.get(plower, None)

    if config is None or reload:
        config = PROGCONFIG[plower] = {}
        sysdir = os.environ.get('GCNHOME', None)
        userdir = os.environ.get('HOME', None)
        custdir = os.environ.get('GCNPROGDIR', None)
        if userdir is not None:
            userdir = os.path.join(userdir, '.gcn')
            if not os.path.exists(userdir):
                userdir = None

        if logger is None:
            logger = getlogger()

        for cfdir in (sysdir, userdir, custdir):
            if cfdir is not None:
                cfgfile = os.path.join(cfdir, 'config', progname)
                if os.path.exists(cfgfile):
                    if logger:
                        logger.info('Loading configuration from %s' % cfgfile)
                    config.update(load_file(cfgfile))
                    if logger:
                        logger.info('Loaded configuration successfully')

    return config
Exemple #11
0
def set_configuration(progname, cfgfile, logger=None):
    """Load and save configuration for a program from the specified file

    Args:
        progname (string): Name of the program (case insenstive)
        cfgfile (string): Name of file with configuration information
        logger (file): A logging object. If None, the system logger
                       is used

    Returns:
        A configuration dictionary with the configuration options
        as key

    """
    if logger is None:
        logger = getlogger()
    logger.info('Loading configuration for %s from file %s' %
                (progname, cfgfile))
    PROGCONFIG[progname.lower()] = cfgdict = load_file(cfgfile)
    logger.info('Loaded configuration for %s from file %s' %
                (progname, cfgfile))
    return cfgdict
Exemple #12
0
    def __init__(self, invcf, max_exac_maf, capture_kit_name, probe_ext_bp, \
        outvcf=None, cosmic_on=False, hgmd_on=False, \
        dblink=False, log_dir=None):

        self.invcf = invcf
        ts = datetime.datetime.fromtimestamp(
            time.time()).strftime('%Y%m%d_%H%M')

        self.capture_kit_name = capture_kit_name
        self.probe_ext_bp = probe_ext_bp
        self.cosmic_on = cosmic_on
        self.hgmd_on = hgmd_on
        self.dblink = dblink
        self.max_exac_af = max_exac_maf

        if outvcf:
            self.outvcf = outvcf
        else:
            self.outvcf = os.path.splitext(invcf)[0] + '.varant_%s.vcf' % ts

        if log_dir:
            FILECONFIG['LOGFILE'] = os.path.join(log_dir, 'varant_%s.log' % ts)
        elif outvcf:
            FILECONFIG['LOGFILE'] = os.path.splitext(self.outvcf)[0] + \
                  '.varant_%s.log' % ts
        else:
            FILECONFIG['LOGFILE'] = os.path.splitext(invcf)[0] +\
                  '.varant.%s.log' % ts

        self.logger = getlogger()
        status = self._check_env()  # Check Environment variable setting
        if status == 1:
            d = 'There seems to be problem with setting the '\
             'environment variables..'
            self.logger.error(d)
            print d
        else:
            self.logger.info('Environment variable check successful..')