def main(): opts = get_options() if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) return windows = opts.windows logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.output) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.output, '01_mapped_r' + opts.read), opts.renz, temp_dir=opts.tmp, frag_map=opts.strategy=='frag', clean=True, windows=windows, get_nread=True) # write machine log with open(path.join(opts.output, 'trace.log'), "a") as mlog: fcntl.flock(mlog, fcntl.LOCK_EX) mlog.write('\n'.join([('# MAPPED READ%s PATH\t%d\t' % (opts.read, num)) + out for out, num in outfiles]) + '\n') fcntl.flock(mlog, fcntl.LOCK_UN) logging.info('cleaning temporary files') # clean system('rm -rf ' + opts.tmp)
def run(opts): check_options(opts) launch_time = time.localtime() # hash that gonna be append to output file names param_hash = digest_parameters(opts, get_md5=True) if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join( opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) logging.info(' - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends) logging.info(' - Ligation sites: %.3f%%', ligated) return logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.workdir, '01_mapped_r%d' % (opts.read)), r_enz=opts.renz, temp_dir=opts.tmp, nthreads=opts.cpus, frag_map=not opts.iterative, clean=not opts.keep_tmp, windows=opts.windows, get_nread=True, skip=opts.skip, suffix=param_hash, **opts.gem_param) # adjust line count if opts.skip: for i, (out, _) in enumerate(outfiles[1:], 1): outfiles[i] = out, outfiles[i-1][1] - sum(1 for _ in open(outfiles[i-1][0])) finish_time = time.localtime() # save all job information to sqlite DB save_to_db(opts, outfiles, launch_time, finish_time) # write machine log while path.exists(path.join(opts.workdir, '__lock_log')): time.sleep(0.5) open(path.join(opts.workdir, '__lock_log'), 'a').close() with open(path.join(opts.workdir, 'trace.log'), "a") as mlog: mlog.write('\n'.join([ ('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out)) for out, num in outfiles]) + '\n') # release lock try: remove(path.join(opts.workdir, '__lock_log')) except OSError: pass
def main(): opts = get_options() if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join(opts.output, path.split(opts.fastq)[-1] + '.pdf')) return windows = opts.windows logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.output) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.output, '01_mapped_r' + opts.read), opts.renz, temp_dir=opts.tmp, frag_map=opts.strategy == 'frag', clean=True, windows=windows, get_nread=True) # write machine log with open(path.join(opts.output, 'trace.log'), "a") as mlog: fcntl.flock(mlog, fcntl.LOCK_EX) mlog.write('\n'.join([('# MAPPED READ%s PATH\t%d\t' % (opts.read, num)) + out for out, num in outfiles]) + '\n') fcntl.flock(mlog, fcntl.LOCK_UN) logging.info('cleaning temporary files') # clean system('rm -rf ' + opts.tmp)
def run(opts): check_options(opts) launch_time = time.localtime() # hash that gonna be append to output file names param_hash = digest_parameters(opts, get_md5=True) if opts.quality_plot: logging.info('Generating Hi-C QC plot at:\n ' + path.join(opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=path.join( opts.workdir, path.split(opts.fastq)[-1] + '.pdf')) logging.info(' - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends) logging.info(' - Ligation sites: %.3f%%', ligated) return logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.workdir, '01_mapped_r%d' % (opts.read)), opts.renz, temp_dir=opts.tmp, nthreads=opts.cpus, frag_map=not opts.iterative, clean=opts.keep_tmp, windows=opts.windows, get_nread=True, skip=opts.skip, suffix=param_hash, **opts.gem_param) # adjust line count if opts.skip: for i, (out, _) in enumerate(outfiles[1:], 1): outfiles[i] = out, outfiles[i-1][1] - sum(1 for _ in open(outfiles[i-1][0])) finish_time = time.localtime() # save all job information to sqlite DB save_to_db(opts, outfiles, launch_time, finish_time) # write machine log with open(path.join(opts.workdir, 'trace.log'), "a") as mlog: fcntl.flock(mlog, fcntl.LOCK_EX) mlog.write('\n'.join([ ('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out)) for out, num in outfiles]) + '\n') fcntl.flock(mlog, fcntl.LOCK_UN) # clean if not opts.keep_tmp: logging.info('cleaning temporary files') system('rm -rf ' + opts.tmp)
def run(opts): check_options(opts) launch_time = time.localtime() # hash that gonna be append to output file names param_hash = digest_parameters(opts, get_md5=True) # create tmp directory if not opts.tmp: temp_dir = opts.workdir + '_tmp_r%d_%s' % (opts.read, param_hash) else: temp_dir = path.join(opts.tmp, 'TADbit_tmp_r%d_%s' % (opts.read, param_hash)) # QC plot fig_path = path.join( opts.workdir, '%s_%s_%s.png' % (path.split(opts.fastq)[-1], '-'.join( map(str, opts.renz)), param_hash)) logging.info('Generating Hi-C QC plot') dangling_ends, ligated = quality_plot(opts.fastq, r_enz=opts.renz, nreads=100000, paired=False, savefig=fig_path) for renz in dangling_ends: logging.info(' - Dangling-ends (sensu-stricto): %.3f%%', dangling_ends[renz]) for renz in ligated: logging.info(' - Ligation sites: %.3f%%', ligated[renz]) if opts.skip_mapping: save_to_db(opts, dangling_ends, ligated, fig_path, [], launch_time, time.localtime()) return # Mapping if opts.fast_fragment: mkdir(path.join(opts.workdir, '03_filtered_reads')) logging.info('parsing genomic sequence') try: # allows the use of pickle genome to make it faster genome_seq = load(open(opts.genome[0], 'rb')) except (UnpicklingError, KeyError): genome_seq = parse_fasta(opts.genome) logging.info('mapping %s and %s to %s', opts.fastq, opts.fastq2, opts.workdir) outfiles = fast_fragment_mapping( opts.index, opts.fastq, opts.fastq2, opts.renz, genome_seq, path.join(opts.workdir, '03_filtered_reads', 'all_r1-r2_intersection_%s.tsv' % param_hash), clean=not opts.keep_tmp, get_nread=True, mapper_binary=opts.mapper_binary, mapper_params=opts.mapper_param, suffix=param_hash, temp_dir=temp_dir, nthreads=opts.cpus) else: logging.info('mapping %s read %s to %s', opts.fastq, opts.read, opts.workdir) outfiles = full_mapping(opts.index, opts.fastq, path.join(opts.workdir, '01_mapped_r%d' % (opts.read)), mapper=opts.mapper, r_enz=opts.renz, temp_dir=temp_dir, nthreads=opts.cpus, frag_map=not opts.iterative, clean=not opts.keep_tmp, windows=opts.windows, get_nread=True, skip=opts.skip, suffix=param_hash, mapper_binary=opts.mapper_binary, mapper_params=opts.mapper_param) # adjust line count if opts.skip: for i, (out, _) in enumerate(outfiles[1:], 1): outfiles[i] = out, outfiles[i - 1][1] - sum( 1 for _ in open(outfiles[i - 1][0])) finish_time = time.localtime() # save all job information to sqlite DB save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time, finish_time) try: save_to_db(opts, dangling_ends, ligated, fig_path, outfiles, launch_time, finish_time) except Exception as e: # release lock remove(path.join(opts.workdir, '__lock_db')) print_exc() exit(1) # write machine log try: while path.exists(path.join(opts.workdir, '__lock_log')): time.sleep(0.5) open(path.join(opts.workdir, '__lock_log'), 'a').close() with open(path.join(opts.workdir, 'trace.log'), "a") as mlog: mlog.write('\n'.join([('# MAPPED READ%s\t%d\t%s' % (opts.read, num, out)) for out, num in outfiles]) + '\n') # release lock try: remove(path.join(opts.workdir, '__lock_log')) except OSError: pass except Exception as e: # release lock remove(path.join(opts.workdir, '__lock_db')) print_exc() exit(1) # clean if not opts.keep_tmp: logging.info('cleaning temporary files') system('rm -rf ' + temp_dir)
# Created on: Aug 6th, 2015 # Usage: python fastqs_quality_plots.py # Goal: generate per-FASTQ plots showing quality metrics for the reads # Import python modules/functions import sys import matplotlib matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! from matplotlib import pyplot as plt from pytadbit.utils.fastq_utils import quality_plot QUALITY_PLOTS = sys.argv[1] paired1 = sys.argv[2] paired2 = sys.argv[3] reads_nummber_qc = sys.argv[4] restriction_enzyme = sys.argv[5] # Generate quality plots for each processed FASTQ # and print out the percentage of dangling-ends and ligated sites for each read values = [] for infile in [paired1, paired2]: bname = infile.split("/")[-1].replace(".fastq.gz", "") outfile = '%s/%s_processed_reads_quality.png' % (QUALITY_PLOTS, bname) a, b = quality_plot(infile, nreads=int(reads_nummber_qc), r_enz=restriction_enzyme, savefig=outfile) values.append(a) values.append(b) print ';'.join([str(i) for i in values])