def main(): opts = get_options() if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) return windows = opts.windows logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.output) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.output, '01_mapped_r' + opts.read), opts.renz, temp_dir=opts.tmp, frag_map=opts.strategy=='frag', clean=True, windows=windows, get_nread=True) # write machine log with open(path.join(opts.output, 'trace.log'), "a") as mlog: fcntl.flock(mlog, fcntl.LOCK_EX) mlog.write('\n'.join([('# MAPPED READ%s PATH\t%d\t' % (opts.read, num)) + out for out, num in outfiles]) + '\n') fcntl.flock(mlog, fcntl.LOCK_UN) logging.info('cleaning temporary files') # clean system('rm -rf ' + opts.tmp)
def run(opts): check_options(opts) launch_time = time.localtime() # hash that gonna be append to output file names param_hash = digest_parameters(opts, get_md5=True) if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join( opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) logging.info(' - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends) logging.info(' - Ligation sites: %.3f%%', ligated) return logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.workdir, '01_mapped_r%d' % (opts.read)), r_enz=opts.renz, temp_dir=opts.tmp, nthreads=opts.cpus, frag_map=not opts.iterative, clean=not opts.keep_tmp, windows=opts.windows, get_nread=True, skip=opts.skip, suffix=param_hash, **opts.gem_param) # adjust line count if opts.skip: for i, (out, _) in enumerate(outfiles[1:], 1): outfiles[i] = out, outfiles[i-1][1] - sum(1 for _ in open(outfiles[i-1][0])) finish_time = time.localtime() # save all job information to sqlite DB save_to_db(opts, outfiles, launch_time, finish_time) # write machine log while path.exists(path.join(opts.workdir, '__lock_log')): time.sleep(0.5) open(path.join(opts.workdir, '__lock_log'), 'a').close() with open(path.join(opts.workdir, 'trace.log'), "a") as mlog: mlog.write('\n'.join([ ('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out)) for out, num in outfiles]) + '\n') # release lock try: remove(path.join(opts.workdir, '__lock_log')) except OSError: pass
def run(opts): check_options(opts) launch_time = time.localtime() # hash that gonna be append to output file names param_hash = digest_parameters(opts, get_md5=True) if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join( opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) logging.info(' - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends) logging.info(' - Ligation sites: %.3f%%', ligated) return logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.workdir, '01_mapped_r%d' % (opts.read)), opts.renz, temp_dir=opts.tmp, nthreads=opts.cpus, frag_map=not opts.iterative, clean=opts.keep_tmp, windows=opts.windows, get_nread=True, skip=opts.skip, suffix=param_hash, **opts.gem_param) # adjust line count if opts.skip: for i, (out, _) in enumerate(outfiles[1:], 1): outfiles[i] = out, outfiles[i-1][1] - sum(1 for _ in open(outfiles[i-1][0])) finish_time = time.localtime() # save all job information to sqlite DB save_to_db(opts, outfiles, launch_time, finish_time) # write machine log with open(path.join(opts.workdir, 'trace.log'), "a") as mlog: fcntl.flock(mlog, fcntl.LOCK_EX) mlog.write('\n'.join([ ('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out)) for out, num in outfiles]) + '\n') fcntl.flock(mlog, fcntl.LOCK_UN) # clean if not opts.keep_tmp: logging.info('cleaning temporary files') system('rm -rf ' + opts.tmp)
def main(): opts = get_options() if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) return windows = opts.windows logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.output) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.output, '01_mapped_r' + opts.read), opts.renz, temp_dir=opts.tmp, frag_map=opts.strategy == 'frag', clean=True, windows=windows, get_nread=True) # write machine log with open(path.join(opts.output, 'trace.log'), "a") as mlog: fcntl.flock(mlog, fcntl.LOCK_EX) mlog.write('\n'.join([('# MAPPED READ%s PATH\t%d\t' % (opts.read, num)) + out for out, num in outfiles]) + '\n') fcntl.flock(mlog, fcntl.LOCK_UN) logging.info('cleaning temporary files') # clean system('rm -rf ' + opts.tmp)
def run(opts): check_options(opts) launch_time = time.localtime() # hash that gonna be append to output file names param_hash = digest_parameters(opts, get_md5=True) # create tmp directory if not opts.tmp: temp_dir = opts.workdir + '_tmp_r%d_%s' % (opts.read, param_hash) else: temp_dir = path.join(opts.tmp, 'TADbit_tmp_r%d_%s' % (opts.read, param_hash)) # QC plot fig_path = path.join( opts.workdir, '%s_%s_%s.png' % (path.split(opts.fastq)[-1], '-'.join( map(str, opts.renz)), param_hash)) logging.info('Generating Hi-C QC plot') dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=fig_path) for renz in dangling_ends: logging.info(' - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends[renz]) for renz in ligated: logging.info(' - Ligation sites: %.3f%%', ligated[renz]) if opts.skip_mapping: save_to_db(opts, dangling_ends, ligated, fig_path, [], launch_time, time.localtime()) return # Mapping if opts.fast_fragment: mkdir(path.join(opts.workdir, '03_filtered_reads')) logging.info('parsing genomic sequence') try: # allows the use of pickle genome to make it faster genome_seq = load(open(opts.genome[0], 'rb')) except (UnpicklingError, KeyError): genome_seq = parse_fasta(opts.genome) logging.info('mapping %s and %s to %s', opts.fastq, opts.fastq2, opts.workdir) outfiles = fast_fragment_mapping( opts.index, opts.fastq, opts.fastq2, opts.renz, genome_seq, path.join(opts.workdir, '03_filtered_reads', 'all_r1-r2_intersection_%s.tsv' % param_hash), clean=not opts.keep_tmp, get_nread=True, mapper_binary=opts.mapper_binary, mapper_params=opts.mapper_param, suffix=param_hash, temp_dir=temp_dir, nthreads=opts.cpus) else: logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.workdir, '01_mapped_r%d' % (opts.read)), mapper=opts.mapper, r_enz=opts.renz, temp_dir=temp_dir, nthreads=opts.cpus, frag_map=not opts.iterative, clean=not opts.keep_tmp, windows=opts.windows, get_nread=True, skip=opts.skip, suffix=param_hash, mapper_binary=opts.mapper_binary, mapper_params=opts.mapper_param) # adjust line count if opts.skip: for i, (out, _) in enumerate(outfiles[1:], 1): outfiles[i] = out, outfiles[i - 1][1] - sum( 1 for _ in open(outfiles[i - 1][0])) finish_time = time.localtime() # save all job information to sqlite DB save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time, finish_time) try: save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time, finish_time) except Exception as e: # release lock remove(path.join(opts.workdir, '__lock_db')) print_exc() exit(1) # write machine log try: while path.exists(path.join(opts.workdir, '__lock_log')): time.sleep(0.5) open(path.join(opts.workdir, '__lock_log'), 'a').close() with open(path.join(opts.workdir, 'trace.log'), "a") as mlog: mlog.write('\n'.join([('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out)) for out, num in outfiles]) + '\n') # release lock try: remove(path.join(opts.workdir, '__lock_log')) except OSError: pass except Exception as e: # release lock remove(path.join(opts.workdir, '__lock_db')) print_exc() exit(1) # clean if not opts.keep_tmp: logging.info('cleaning temporary files') system('rm -rf ' + temp_dir)
from pytadbit.mapping.filter import filter_reads, apply_filter if mapper == 1: print 'read 1' outfiles1 = iterative_mapping(gem_index_path, fastq, out_map_dir1, r_beg1, [e + 2 for e in r_end1], temp_dir=temp_dir1) print 'read 2' outfiles2 = iterative_mapping(gem_index_path, fastq, out_map_dir2, r_beg2, [e + 2 for e in r_end2], temp_dir=temp_dir2) parse_thing = parse_sam elif mapper == 2: print 'read 1' outfiles1 = full_mapping(gem_index_path, fastq, out_map_dir1, 'HindIII', temp_dir=temp_dir1, frag_map=False, windows=(zip(*(r_beg1, r_end1)))) print 'read 2' outfiles2 = full_mapping(gem_index_path, fastq, out_map_dir2, 'HindIII', temp_dir=temp_dir2, frag_map=False, windows=(zip(*(r_beg2, r_end2)))) parse_thing = parse_map elif mapper == 3: print 'read 1' outfiles1 = full_mapping(gem_index_path, fastq, out_map_dir1, 'HindIII', temp_dir=temp_dir1, windows=(zip(*(r_beg1, r_end1)))) print 'read 2' outfiles2 = full_mapping(gem_index_path, fastq, out_map_dir2, 'HindIII', temp_dir=temp_dir2, windows=(zip(*(r_beg2, r_end2))))
if frag_map == 'True': frag_map = True windows = None elif frag_map == 'False': frag_map = False range_stop = range(20, int(read_length) + 1, 5) range_start = [1] * len(range_stop) windows = (zip(*(range_start, range_stop))) # call mapping function for read1 and read2 for infile in [paired1, paired2]: bname = infile.split("/")[-1].replace(".fastq.gz", "") maps = full_mapping(gem_index_path=gem_index, fastq_path=infile, out_map_dir='%s/%s/' % (MAP_DIR, bname), r_enz=restriction_enzyme, windows=windows, temp_dir='%s/tmp_dir_%s/' % (MAP_DIR, bname), frag_map=frag_map, nthreads=slots) # ======================================================================================== # Process mapped reads according to restriction enzyme fragments, Merging mapped "read1" and "read2" # ======================================================================================== # Import python modules/functions import glob from pytadbit.parsers.map_parser import parse_map from pytadbit.parsers.genome_parser import parse_fasta from pytadbit.mapping import get_intersection # Load the genome