def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Calling peaks and generating signal tracks with MACS2...') npeak, fc_bigwig, pval_bigwig, cov_bigwig = macs2( args.tas[0], args.tas[1], args.chrsz, args.gensz, args.pval_thresh, args.shift, args.fraglen, args.cap_num_peak, args.make_signal, args.out_dir) log.info('Blacklist-filtering peaks...') bfilt_npeak = blacklist_filter(npeak, args.blacklist, args.keep_irregular_chr, args.out_dir) log.info('Checking if output is empty...') assert_file_not_empty(bfilt_npeak) log.info('Converting peak to bigbed...') peak_to_bigbed(bfilt_npeak, 'narrowPeak', args.chrsz, args.keep_irregular_chr, args.out_dir) log.info('Converting peak to hammock...') peak_to_hammock(bfilt_npeak, args.out_dir) log.info('Shifted FRiP with fragment length...') frip_qc = frip_shifted(args.tas[0], bfilt_npeak, args.chrsz, args.fraglen, args.out_dir) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Do naive overlap...') overlap_peak = naive_overlap(args.prefix, args.peak1, args.peak2, args.peak_pooled, args.peak_type, args.nonamecheck, args.out_dir) log.info('Blacklist-filtering peaks...') bfilt_overlap_peak = blacklist_filter(overlap_peak, args.blacklist, False, args.out_dir) log.info('Converting peak to bigbed...') peak_to_bigbed(bfilt_overlap_peak, args.peak_type, args.chrsz, args.out_dir) if args.ta: # if TAG-ALIGN is given if args.fraglen: # chip-seq log.info('Shifted FRiP with fragment length...') frip_qc = frip_shifted(args.ta, bfilt_overlap_peak, args.chrsz, args.fraglen, args.out_dir) else: # atac-seq log.info('FRiP without fragment length...') frip_qc = frip(args.ta, bfilt_overlap_peak, args.out_dir) else: frip_qc = '/dev/null' log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Calling peaks and generating signal tracks with spp...') rpeak = spp(args.tas[0], args.tas[1], args.fraglen, args.cap_num_peak, args.nth, args.out_dir) log.info('Checking if output is empty...') assert_file_not_empty(rpeak) log.info('Blacklist-filtering peaks...') bfilt_rpeak = blacklist_filter( rpeak, args.blacklist, False, args.out_dir) log.info('Converting peak to bigbed...') peak_to_bigbed(bfilt_rpeak, 'regionPeak', args.chrsz, args.out_dir) log.info('Converting peak to hammock...') peak_to_hammock(bfilt_rpeak, args.out_dir) log.info('Shifted FRiP with fragment length...') frip_qc = frip_shifted( args.tas[0], bfilt_rpeak, args.chrsz, args.fraglen, args.out_dir) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Calling peaks and generating signal tracks with MACS2...') npeak = macs2(args.ta, args.chrsz, args.gensz, args.pval_thresh, args.smooth_win, args.cap_num_peak, args.out_dir) log.info('Blacklist-filtering peaks...') bfilt_npeak = blacklist_filter(npeak, args.blacklist, args.keep_irregular_chr, args.out_dir) log.info('Checking if output is empty...') assert_file_not_empty(bfilt_npeak) log.info('Converting peak to bigbed...') peak_to_bigbed(bfilt_npeak, 'narrowPeak', args.chrsz, args.keep_irregular_chr, args.out_dir) log.info('Converting peak to hammock...') peak_to_hammock(bfilt_npeak, args.keep_irregular_chr, args.out_dir) if args.ta: # if TAG-ALIGN is given log.info('FRiP without fragment length...') frip_qc = frip(args.ta, bfilt_npeak, args.out_dir) else: frip_qc = '/dev/null' log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Do IDR...') idr_peak, idr_plot, idr_out_gz, idr_stdout = idr( args.prefix, args.peak1, args.peak2, args.peak_pooled, args.peak_type, args.idr_thresh, args.idr_rank, args.out_dir) log.info('Blacklist-filtering peaks...') bfilt_idr_peak = blacklist_filter(idr_peak, args.blacklist, args.keep_irregular_chr, args.out_dir) log.info('Checking if output is empty...') assert_file_not_empty(bfilt_idr_peak) log.info('Converting peak to bigbed...') peak_to_bigbed(bfilt_idr_peak, args.peak_type, args.chrsz, args.keep_irregular_chr, args.out_dir) log.info('Converting peak to hammock...') peak_to_hammock(bfilt_idr_peak, args.keep_irregular_chr, args.out_dir) if args.ta: # if TAG-ALIGN is given if args.fraglen: # chip-seq log.info('Shifted FRiP with fragment length...') frip_qc = frip_shifted(args.ta, bfilt_idr_peak, args.chrsz, args.fraglen, args.out_dir) else: # atac-seq log.info('FRiP without fragment length...') frip_qc = frip(args.ta, bfilt_idr_peak, args.out_dir) else: frip_qc = '/dev/null' log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Reproducibility QC...') # description for variables # N: list of number of peaks in peak files from pseudo replicates # Nt: top number of peaks in peak files from true replicates (rep-x_vs_rep-y) # Np: number of peaks in peak files from pooled pseudo replicate N = [get_num_lines(peak) for peak in args.peaks_pr] if len(args.peaks): # multiple replicate case num_rep = infer_n_from_nC2(len(args.peaks)) num_peaks_tr = [get_num_lines(peak) for peak in args.peaks] Nt = max(num_peaks_tr) Np = get_num_lines(args.peak_ppr) rescue_ratio = float(max(Np,Nt))/float(min(Np,Nt)) self_consistency_ratio = float(max(N))/float(min(N)) Nt_idx = num_peaks_tr.index(Nt) label_tr = infer_pair_label_from_idx(num_rep, Nt_idx) conservative_set = label_tr conservative_peak = make_hard_link(args.peaks[Nt_idx], args.out_dir) N_conservative = Nt if Nt>Np: optimal_set = conservative_set optimal_peak = conservative_peak N_optimal = N_conservative else: optimal_set = "ppr" optimal_peak = make_hard_link(args.peak_ppr, args.out_dir) N_optimal = Np else: # single replicate case num_rep = 1 Nt = 0 Np = 0 rescue_ratio = 0.0 self_consistency_ratio = 1.0 conservative_set = 'rep1-pr' conservative_peak = make_hard_link(args.peaks_pr[0], args.out_dir) N_conservative = N[0] optimal_set = conservative_set optimal_peak = conservative_peak N_optimal = N_conservative reproducibility = 'pass' if rescue_ratio>2.0 or self_consistency_ratio>2.0: reproducibility = 'borderline' if rescue_ratio>2.0 and self_consistency_ratio>2.0: reproducibility = 'fail' log.info('Writing optimal/conservative peak files...') optimal_peak_file = os.path.join(args.out_dir, 'optimal_peak.{}.gz'.format(args.peak_type)) conservative_peak_file = os.path.join(args.out_dir, 'conservative_peak.{}.gz'.format(args.peak_type)) copy_f_to_f(optimal_peak, optimal_peak_file) copy_f_to_f(conservative_peak, conservative_peak_file) if args.chrsz: log.info('Converting peak to bigbed...') peak_to_bigbed(optimal_peak_file, args.peak_type, args.chrsz, args.keep_irregular_chr, args.out_dir) peak_to_bigbed(conservative_peak_file, args.peak_type, args.chrsz, args.keep_irregular_chr, args.out_dir) log.info('Converting peak to hammock...') peak_to_hammock(optimal_peak_file, args.keep_irregular_chr, args.out_dir) peak_to_hammock(conservative_peak_file, args.keep_irregular_chr, args.out_dir) log.info('Writing reproducibility QC log...') if args.prefix: reproducibility_qc = '{}.reproducibility.qc'.format(args.prefix) else: reproducibility_qc = 'reproducibility.qc' reproducibility_qc = os.path.join(args.out_dir, reproducibility_qc) with open(reproducibility_qc,'w') as fp: header = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( 'Nt', '\t'.join(['N{}'.format(i+1) for i in range(num_rep)]), 'Np', 'N_opt', 'N_consv', 'opt_set', 'consv_set', 'rescue_ratio', 'self_consistency_ratio', 'reproducibility', ) line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( Nt, '\t'.join([str(i) for i in N]), Np, N_optimal, N_conservative, optimal_set, conservative_set, rescue_ratio, self_consistency_ratio, reproducibility) fp.write(header) fp.write(line) log.info('All done.')