def check_options(opts): if not opts.workdir: raise Exception('ERROR: output option required.') # check resume if not path.exists(opts.workdir) and opts.resume: print('WARNING: can use output files, found, not resuming...') opts.resume = False # sort filters if opts.apply: opts.apply.sort() # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # check if job already run using md5 digestion of parameters if already_run(opts) and not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): if not opts.workdir: raise Exception('ERROR: output option required.') # check resume if not path.exists(opts.workdir) and opts.resume: print ('WARNING: can use output files, found, not resuming...') opts.resume = False # sort filters if opts.apply: opts.apply.sort() # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # check if job already run using md5 digestion of parameters if already_run(opts) and not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): mkdir(opts.workdir) # transform filtering reads option opts.filter = filters_to_bin(opts.filter) # enlighten plotting parameter writing if opts.only_plot: opts.plot = True if opts.interactive: if opts.nox: raise Exception('ERROR: no screen no fun.\n' 'Interactive plot incompatible with noX option.') opts.plot = True opts.only_plot = True # check resume if not path.exists(opts.workdir): raise IOError('ERROR: workdir not found.') # check resume if opts.triangular and opts.coord2: raise NotImplementedError('ERROR: triangular is only available for ' 'symmetric matrices.') # for LUSTRE file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check if job already run using md5 digestion of parameters try: if already_run(opts): if not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit( 'WARNING: exact same job already computed, see JOBs table above' ) else: warn( 'WARNING: exact same job already computed, overwriting...') except IOError: warn(("" "\nWARNING:\n new working directory created. It's ok... " "but next time use TADbit since the beginning!! :)"))
def check_options(opts): # check resume if not path.exists(opts.workdir): raise IOError('ERROR: wordir not found.') if already_run(opts) and not opts.force: exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): mkdir(opts.workdir) # transform filtering reads option opts.filter = filters_to_bin(opts.filter) # enlighten plotting parameter writing if opts.only_plot: opts.plot = True if opts.interactive: if opts.nox: raise Exception('ERROR: no screen no fun.\n' 'Interactive plot incompatible with noX option.') opts.plot = True opts.only_plot = True # check resume if not path.exists(opts.workdir): raise IOError('ERROR: workdir not found.') # check resume if opts.triangular and opts.coord2: raise NotImplementedError('ERROR: triangular is only available for ' 'symmetric matrices.') # for LUSTRE file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check if job already run using md5 digestion of parameters try: if already_run(opts): if not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above') else: warn('WARNING: exact same job already computed, overwriting...') except IOError: warn(("" "\nWARNING:\n new working directory created. It's ok... " "but next time use TADbit from the beginning!! :)"))
def check_options(opts): mkdir(opts.workdir) # create empty DB if don't exists dbpath = path.join(opts.workdir, 'trace.db') open(dbpath, 'a').close() # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass if opts.workdir1: # tmp file dbfile1 = 'trace1_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb1 = path.join(dbdir, dbfile1) try: copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1) except IOError: pass if opts.workdir2: # tmp file dbfile2 = 'trace2_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb2 = path.join(dbdir, dbfile2) try: copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2) except IOError: pass else: if opts.workdir1: opts.tmpdb1 = path.join(opts.workdir1, 'trace.db') if opts.workdir2: opts.tmpdb2 = path.join(opts.workdir2, 'trace.db') # resolution needed to compare if not opts.skip_comparison and not opts.reso: raise Exception('ERROR: need to define resolution at which to compare') # check if job already run using md5 digestion of parameters if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) if opts.workdir1: remove(path.join(dbdir, dbfile1)) if opts.workdir2: remove(path.join(dbdir, dbfile2)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): if not opts.workdir: raise Exception('ERROR: output option required.') if opts.type != 'map': raise NotImplementedError('ERROR: not yet there') if not opts.genome: raise Exception('ERROR: genome parameter required.') if not opts.workdir: raise Exception('ERROR: workdir parameter required.') # check skip if not path.exists(opts.workdir) and opts.skip: print ('WARNING: can use output files, found, not skipping...') opts.skip = False if opts.workdir.endswith('/'): opts.workdir = opts.workdir[:-1] # write log log_format = '[PARSING] %(message)s' # reset logging logging.getLogger().handlers = [] try: print 'Writting log to ' + path.join(opts.workdir, 'process.log') logging.basicConfig(level=logging.INFO, format=log_format, filename=path.join(opts.workdir, 'process.log'), filemode='aw') except IOError: logging.basicConfig(level=logging.DEBUG, format=log_format, filename=path.join(opts.workdir, 'process.log2'), filemode='aw') # to display log on stdout also logging.getLogger().addHandler(logging.StreamHandler()) # write version log vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log') dependencies = get_dependencies_version() if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies: logging.info('Writting versions of TADbit and dependencies') vlog = open(vlog_path, 'w') vlog.write(dependencies) vlog.close() # check if job already run using md5 digestion of parameters if already_run(opts): exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): if not opts.workdir: raise Exception('ERROR: output option required.') # check resume if not path.exists(opts.workdir) and opts.resume: print ('WARNING: can use output files, found, not resuming...') opts.resume = False # sort filters if opts.apply: opts.apply.sort() # check if job already run using md5 digestion of parameters if already_run(opts) and not opts.force: exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): if not opts.workdir: raise Exception('ERROR: output option required.') # check resume if not path.exists(opts.workdir) and opts.resume: print('WARNING: can use output files, found, not resuming...') opts.resume = False # sort filters if opts.apply: opts.apply.sort() # check if job already run using md5 digestion of parameters if already_run(opts) and not opts.force: exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): mkdir(opts.workdir) # transform filtering reads option opts.filter = filters_to_bin(opts.filter) # check custom normalization if opts.normalization == 'custom': if not opts.biases_path: raise IOError( 'ERROR: biases file required for "custom" normalization.') elif not path.exists(opts.biases_path): raise IOError('ERROR: biases not found at path: %s' % opts.biases_path) # check resume if not path.exists(opts.workdir): raise IOError('ERROR: workdir not found.') # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check if job already run using md5 digestion of parameters try: if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit( 'WARNING: exact same job already computed, see JOBs table above' ) except IOError: # new working directory pass
def check_options(opts): # check resume if not path.exists(opts.workdir): raise IOError('ERROR: %s does not exists' % opts.workdir) # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) if opts.rich_in_A and opts.fasta: raise Exception('ERROR: should choose one of FASTA or rich_in_A') # rich_in_A if opts.fasta: if opts.rich_in_A: raise Exception( ('ERROR: if you input a FASTA file, GC content will' 'will be used as "rich in A" metric to infer ' 'compartments.')) opts.rich_in_A = opts.fasta # N EVs if opts.ev_index: if max(opts.ev_index) > opts.n_evs: warn( 'WARNING: increasing number of calculated eigenvectors to %d, ' 'to match the u=input eigenvectors indices' % max(opts.ev_index)) opts.n_evs = max(opts.ev_index) # tmp folder if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) if already_run(opts) and not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): mkdir(opts.workdir) # transform filtering reads option opts.filter = filters_to_bin(opts.filter) # enlight plotting parameter writing if opts.only_plot: opts.plot = True if opts.interactive: opts.plot = True opts.only_plot = True # check resume if not path.exists(opts.workdir): raise IOError('ERROR: workdir not found.') # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check if job already run using md5 digestion of parameters if already_run(opts): if not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit( 'WARNING: exact same job already computed, see JOBs table above' ) else: warn('WARNING: exact same job already computed, overwritting...')
def check_options(opts): mkdir(opts.workdir) # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass if opts.workdir1: # tmp file dbfile1 = 'trace1_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb1 = path.join(dbdir, dbfile1) try: copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1) except IOError: pass if opts.workdir2: # tmp file dbfile2 = 'trace2_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb2 = path.join(dbdir, dbfile2) try: copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2) except IOError: pass else: opts.tmpdb1 = path.join(opts.workdir1, 'trace.db') opts.tmpdb2 = path.join(opts.workdir2, 'trace.db') # check if job already run using md5 digestion of parameters if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) if opts.workdir1: remove(path.join(dbdir, dbfile1)) if opts.workdir2: remove(path.join(dbdir, dbfile2)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): # check resume if not path.exists(opts.workdir): raise IOError('ERROR: %s does not exists' % opts.workdir) if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) if already_run(opts) and not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): # check resume if not path.exists(opts.workdir) and opts.resume: print ('WARNING: can use output files, found, not resuming...') opts.resume = False if 'tmp' in opts and opts.tmp: dbdir = opts.tmp # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmp = path.join(dbdir, dbfile) copyfile(path.join(opts.workdir, 'trace.db'), opts.tmp) if already_run(opts) and not opts.force: if 'tmp' in opts and opts.tmp: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) if opts.rich_in_A and opts.fasta: raise Exception('ERROR: should choose one of FASTA or rich_in_A') # rich_in_A if opts.fasta: if opts.rich_in_A: raise Exception(('ERROR: if you input a FASTA file, GC content will' 'will be used as "rich in A" metric to infer ' 'compartments.')) opts.rich_in_A = opts.fasta # N EVs if opts.ev_index: if max(opts.ev_index) > opts.n_evs: warn('WARNING: increasing number of calculated eigenvectors to %d, ' 'to match the u=input eigenvectors indices' % max(opts.ev_index)) opts.n_evs = max(opts.ev_index) # tmp folder if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) try: _already_run = already_run(opts) except IOError: _already_run = False if _already_run and not opts.force: if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): mkdir(opts.workdir) # transform filtering reads option opts.filter = filters_to_bin(opts.filter) # check custom normalization if opts.normalization=='custom': if not opts.biases_path: raise IOError('ERROR: biases file required for "custom" normalization.') elif not path.exists(opts.biases_path): raise IOError('ERROR: biases not found at path: %s' % opts.biases_path) # check resume if not path.exists(opts.workdir): raise IOError('ERROR: workdir not found.') # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check if job already run using md5 digestion of parameters try: if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above') except IOError: # new working directory pass
def check_options(opts): mkdir(opts.workdir) # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass if opts.workdir1: # tmp file dbfile1 = 'trace1_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb1 = path.join(dbdir, dbfile1) try: copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1) except IOError: pass if opts.workdir2: # tmp file dbfile2 = 'trace2_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb2 = path.join(dbdir, dbfile2) try: copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2) except IOError: pass # check if job already run using md5 digestion of parameters if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) if opts.workdir1: remove(path.join(dbdir, dbfile1)) if opts.workdir2: remove(path.join(dbdir, dbfile2)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): # check resume if not path.exists(opts.workdir): raise IOError('ERROR: wordir not found.') # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # check if job already run using md5 digestion of parameters if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): # check resume if not path.exists(opts.workdir): raise IOError('ERROR: wordir not found.') # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # check if job already run using md5 digestion of parameters if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): if not opts.workdir: raise Exception('ERROR: output option required.') if opts.type != 'map': raise NotImplementedError('ERROR: not yet there') if not opts.genome: raise Exception('ERROR: genome parameter required.') if not opts.workdir: raise Exception('ERROR: workdir parameter required.') # check skip if not path.exists(opts.workdir) and opts.skip: print('WARNING: can use output files, found, not skipping...') opts.skip = False if opts.workdir.endswith('/'): opts.workdir = opts.workdir[:-1] # write log newbie = False if not path.exists(opts.workdir): newbie = True mkdir(opts.workdir) log_format = '[PARSING] %(message)s' # reset logging logging.getLogger().handlers = [] try: print('Writing log to ' + path.join(opts.workdir, 'process.log')) logging.basicConfig(level=logging.INFO, format=log_format, filename=path.join(opts.workdir, 'process.log'), filemode='a+') except IOError: logging.basicConfig(level=logging.DEBUG, format=log_format, filename=path.join(opts.workdir, 'process.log2'), filemode='a+') # to display log on stdout also logging.getLogger().addHandler(logging.StreamHandler()) # write version log vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log') dependencies = get_dependencies_version() if not path.exists( vlog_path) or open(vlog_path).readlines() != dependencies: logging.info('Writing versions of TADbit and dependencies') vlog = open(vlog_path, 'w') vlog.write(dependencies) vlog.close() # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # check if job already run using md5 digestion of parameters try: if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit( 'WARNING: exact same job already computed, see JOBs table above' ) except OSError: pass
def check_options(opts): if not opts.mapper_binary: if opts.mapper == 'gem': opts.mapper_binary = 'gem-mapper' else: opts.mapper_binary = opts.mapper opts.mapper_binary = which(opts.mapper_binary) if not opts.mapper_binary: raise Exception( '\n\nERROR: Mapper binary not found, for GEM install it from:' '\nhttps://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%202/' '\n - Download the GEM-binaries-Linux-x86_64-core_i3 if' 'have a recent computer, the ' 'GEM-binaries-Linux-x86_64-core_2 otherwise\n - ' 'Uncompress with "tar xjvf GEM-binaries-xxx.tbz2"\n - ' 'Copy the binary gem-mapper to /usr/local/bin/ for ' 'example (somewhere in your PATH).\n\nNOTE: GEM does ' 'not provide any binary for MAC-OS.') opts.gem_version = 0 if opts.mapper == 'gem': opts.gem_version = None try: out, _ = Popen([opts.mapper_binary, '--version'], stdout=PIPE, stderr=STDOUT, universal_newlines=True).communicate() opts.gem_version = int(out[1]) except ValueError as e: opts.gem_version = 2 print('Falling to gem v2') if opts.fast_fragment: if opts.gem_version < 3: raise Exception('ERROR: Fast fragment mapping needs GEM v3') if not opts.fastq2 or not path.exists(opts.fastq2): raise Exception( 'ERROR: Fast fragment mapping needs both fastq files. ' 'Please specify --fastq2') if opts.read != 0: raise Exception( 'ERROR: Fast fragment mapping needs to be specified with --read 0' ) if not opts.genome: raise Exception('ERROR: Fast fragment mapping needs ' 'the genome parameter.') # check RE name if opts.renz == ['CHECK']: print('\nSearching for most probable restriction enzyme in file: %s' % (opts.fastq)) try: pat, enz, pv = identify_re(opts.fastq, nreads=100000) print(' -> Most probable digested site: %s (pv: %f)' % (pat, pv)) print(' -> Enzymes matching: %s' % (', '.join(enz))) except ValueError: print(' -> Nothing found...') exit() for n, renz in enumerate(opts.renz): if renz == 'NONE': opts.renz[n] = None continue try: _ = RESTRICTION_ENZYMES[renz] except KeyError: print('\n\nERROR: restriction enzyme %s not found.' % (renz) + 'Use one of:\n\n' + ' '.join(sorted(RESTRICTION_ENZYMES)) + '\n\n') raise KeyError() except AttributeError: pass # check skip if not path.exists(opts.workdir) and opts.skip: print('WARNING: can use output files, found, not skipping...') opts.skip = False # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check paths if opts.mapper == 'gem' and not path.exists(opts.index): raise IOError('ERROR: index file not found at ' + opts.index) if not path.exists(opts.fastq): raise IOError('ERROR: FASTQ file not found at ' + opts.fastq) if not is_fastq(opts.fastq): raise IOError( ('ERROR: FASTQ file %s wrong format, check') % (opts.fastq)) try: opts.windows = [[int(i) for i in win.split(':')] for win in opts.windows] except TypeError: pass mkdir(opts.workdir) # write log # if opts.mapping_only: log_format = '[MAPPING {} READ{}] %(message)s'.format( opts.fastq, opts.read) # else: # log_format = '[DEFAULT] %(message)s' # reset logging logging.getLogger().handlers = [] try: print('Writing log to ' + path.join(opts.workdir, 'process.log')) logging.basicConfig(level=logging.INFO, format=log_format, filename=path.join(opts.workdir, 'process.log'), filemode='a+') except IOError: logging.basicConfig(level=logging.DEBUG, format=log_format, filename=path.join(opts.workdir, 'process.log2'), filemode='a+') # to display log on stdout also logging.getLogger().addHandler(logging.StreamHandler()) # write version log vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log') dependencies = get_dependencies_version() if not path.exists( vlog_path) or open(vlog_path).readlines() != dependencies: logging.info('Writing versions of TADbit and dependencies') vlog = open(vlog_path, 'w') vlog.write(dependencies) vlog.close() # check mapper extra options if opts.mapper_param: if (len(opts.mapper_param) == 1 and ('-' in opts.mapper_param[0] or '--' in opts.mapper_param[0])): # Single string surrounded by quotes opts.mapper_param = opts.mapper_param[0].split() else: opts.mapper_param = dict([o.split(':') for o in opts.mapper_param]) else: opts.mapper_param = {} if opts.mapper == 'gem' and opts.gem_version < 3: gem_valid_option = set([ "granularity", "q", "quality-format", "gem-quality-threshold", "mismatch-alphabet", "m", "e", "min-matched-bases", "max-big-indel-length", "s", "strata-after-best", "fast-mapping", "unique-mapping", "d", "D", "allow-incomplete-strata", "max-decoded-matches", "min-decoded-strata", "p", "paired-end-alignment", "b", "map-both-ends", "min-insert-size", "max-insert-size", "E", "max-extendable-matches", "max-extensions-per-match", "unique-pairing" ]) for k in opts.mapper_param: if not k in gem_valid_option: raise NotImplementedError( ('ERROR: option "%s" not a valid GEM option' 'or not suported by this tool.') % k) # create empty DB if don't exists dbpath = path.join(opts.workdir, 'trace.db') open(dbpath, 'a').close() # for lustre file system.... if 'tmpdb' in opts and opts.tmpdb: dbdir = opts.tmpdb # tmp file dbfile = 'trace_%s' % (''.join( [ascii_letters[int(random() * 52)] for _ in range(10)])) opts.tmpdb = path.join(dbdir, dbfile) try: copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb) except IOError: pass # check if job already run using md5 digestion of parameters if already_run(opts): if 'tmpdb' in opts and opts.tmpdb: remove(path.join(dbdir, dbfile)) exit('WARNING: exact same job already computed, see JOBs table above')
def check_options(opts): if opts.cfg: get_options_from_cfg(opts.cfg, opts) opts.gem_binary = which(opts.gem_binary) if not opts.gem_binary: raise Exception('\n\nERROR: GEM binary not found, install it from:' '\nhttps://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%202/' '\n - Download the GEM-binaries-Linux-x86_64-core_i3 if' 'have a recent computer, the ' 'GEM-binaries-Linux-x86_64-core_2 otherwise\n - ' 'Uncompress with "tar xjvf GEM-binaries-xxx.tbz2"\n - ' 'Copy the binary gem-mapper to /usr/local/bin/ for ' 'example (somewhere in your PATH).\n\nNOTE: GEM does ' 'not provide any binary for MAC-OS.') # check RE name try: _ = RESTRICTION_ENZYMES[opts.renz] except KeyError: print ('\n\nERROR: restriction enzyme not found. Use one of:\n\n' + ' '.join(sorted(RESTRICTION_ENZYMES)) + '\n\n') raise KeyError() except AttributeError: pass # check skip if not path.exists(opts.workdir) and opts.skip: print ('WARNING: can use output files, found, not skipping...') opts.skip = False # number of cpus if opts.cpus == 0: opts.cpus = cpu_count() else: opts.cpus = min(opts.cpus, cpu_count()) # check paths if not path.exists(opts.index): raise IOError('ERROR: index file not found at ' + opts.index) if not path.exists(opts.fastq): raise IOError('ERROR: FASTQ file not found at ' + opts.fastq) # create tmp directory if not opts.tmp: opts.tmp = opts.workdir + '_tmp_r%d' % opts.read try: opts.windows = [[int(i) for i in win.split(':')] for win in opts.windows] except TypeError: pass mkdir(opts.workdir) # write log # if opts.mapping_only: log_format = '[MAPPING {} READ{}] %(message)s'.format(opts.fastq, opts.read) # else: # log_format = '[DEFAULT] %(message)s' # reset logging logging.getLogger().handlers = [] try: print 'Writing log to ' + path.join(opts.workdir, 'process.log') logging.basicConfig(level=logging.INFO, format=log_format, filename=path.join(opts.workdir, 'process.log'), filemode='aw') except IOError: logging.basicConfig(level=logging.DEBUG, format=log_format, filename=path.join(opts.workdir, 'process.log2'), filemode='aw') # to display log on stdout also logging.getLogger().addHandler(logging.StreamHandler()) # write version log vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log') dependencies = get_dependencies_version() if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies: logging.info('Writing versions of TADbit and dependencies') vlog = open(vlog_path, 'w') vlog.write(dependencies) vlog.close() # check GEM mapper extra options if opts.gem_param: opts.gem_param = dict([o.split(':') for o in opts.gem_param]) else: opts.gem_param = {} gem_valid_option = set(["granularity", "q", "quality-format", "gem-quality-threshold", "mismatch-alphabet", "m", "e", "min-matched-bases", "max-big-indel-length", "s", "strata-after-best", "fast-mapping", "unique-mapping", "d", "D", "allow-incomplete-strata", "max-decoded-matches", "min-decoded-strata", "p", "paired-end-alignment", "b", "map-both-ends", "min-insert-size", "max-insert-size", "E", "max-extendable-matches", "max-extensions-per-match", "unique-pairing"]) for k in opts.gem_param: if not k in gem_valid_option: raise NotImplementedError(('ERROR: option "%s" not a valid GEM option' 'or not suported by this tool.') % k) # check if job already run using md5 digestion of parameters if already_run(opts): exit('WARNING: exact same job already computed, see JOBs table above')