def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks and generating signal tracks with MACS2...')
    npeak, fc_bigwig, pval_bigwig, cov_bigwig = macs2(
        args.tas[0], args.tas[1], args.chrsz, args.gensz, args.pval_thresh,
        args.shift, args.fraglen, args.cap_num_peak, args.make_signal,
        args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_npeak = blacklist_filter(npeak, args.blacklist,
                                   args.keep_irregular_chr, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_npeak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_npeak, 'narrowPeak', args.chrsz,
                   args.keep_irregular_chr, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_npeak, args.out_dir)

    log.info('Shifted FRiP with fragment length...')
    frip_qc = frip_shifted(args.tas[0], bfilt_npeak, args.chrsz, args.fraglen,
                           args.out_dir)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
예제 #2
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Do naive overlap...')
    overlap_peak = naive_overlap(args.prefix, args.peak1, args.peak2,
                                 args.peak_pooled, args.peak_type,
                                 args.nonamecheck, args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_overlap_peak = blacklist_filter(overlap_peak, args.blacklist, False,
                                          args.out_dir)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_overlap_peak, args.peak_type, args.chrsz,
                   args.out_dir)

    if args.ta:  # if TAG-ALIGN is given
        if args.fraglen:  # chip-seq
            log.info('Shifted FRiP with fragment length...')
            frip_qc = frip_shifted(args.ta, bfilt_overlap_peak, args.chrsz,
                                   args.fraglen, args.out_dir)
        else:  # atac-seq
            log.info('FRiP without fragment length...')
            frip_qc = frip(args.ta, bfilt_overlap_peak, args.out_dir)
    else:
        frip_qc = '/dev/null'

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
예제 #3
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks and generating signal tracks with spp...')
    rpeak = spp(args.tas[0], args.tas[1], 
        args.fraglen, args.cap_num_peak, args.nth, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(rpeak)

    log.info('Blacklist-filtering peaks...')
    bfilt_rpeak = blacklist_filter(
            rpeak, args.blacklist, False, args.out_dir)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_rpeak, 'regionPeak', args.chrsz, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_rpeak, args.out_dir)

    log.info('Shifted FRiP with fragment length...')
    frip_qc = frip_shifted( args.tas[0], bfilt_rpeak,
        args.chrsz, args.fraglen, args.out_dir)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
예제 #4
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks and generating signal tracks with MACS2...')
    npeak = macs2(args.ta, args.chrsz, args.gensz, args.pval_thresh,
                  args.smooth_win, args.cap_num_peak, args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_npeak = blacklist_filter(npeak, args.blacklist,
                                   args.keep_irregular_chr, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_npeak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_npeak, 'narrowPeak', args.chrsz,
                   args.keep_irregular_chr, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_npeak, args.keep_irregular_chr, args.out_dir)

    if args.ta:  # if TAG-ALIGN is given
        log.info('FRiP without fragment length...')
        frip_qc = frip(args.ta, bfilt_npeak, args.out_dir)
    else:
        frip_qc = '/dev/null'

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
예제 #5
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Do IDR...')
    idr_peak, idr_plot, idr_out_gz, idr_stdout = idr(
        args.prefix, args.peak1, args.peak2, args.peak_pooled, args.peak_type,
        args.idr_thresh, args.idr_rank, args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_idr_peak = blacklist_filter(idr_peak, args.blacklist,
                                      args.keep_irregular_chr, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_idr_peak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_idr_peak, args.peak_type, args.chrsz,
                   args.keep_irregular_chr, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_idr_peak, args.keep_irregular_chr, args.out_dir)

    if args.ta:  # if TAG-ALIGN is given
        if args.fraglen:  # chip-seq
            log.info('Shifted FRiP with fragment length...')
            frip_qc = frip_shifted(args.ta, bfilt_idr_peak, args.chrsz,
                                   args.fraglen, args.out_dir)
        else:  # atac-seq
            log.info('FRiP without fragment length...')
            frip_qc = frip(args.ta, bfilt_idr_peak, args.out_dir)
    else:
        frip_qc = '/dev/null'

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()
    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Reproducibility QC...')
    # description for variables
    # N: list of number of peaks in peak files from pseudo replicates
    # Nt: top number of peaks in peak files from true replicates (rep-x_vs_rep-y)
    # Np: number of peaks in peak files from pooled pseudo replicate
    N = [get_num_lines(peak) for peak in args.peaks_pr]
    if len(args.peaks):
        # multiple replicate case
        num_rep = infer_n_from_nC2(len(args.peaks))
        num_peaks_tr = [get_num_lines(peak) for peak in args.peaks]

        Nt = max(num_peaks_tr)
        Np = get_num_lines(args.peak_ppr)
        rescue_ratio = float(max(Np,Nt))/float(min(Np,Nt))
        self_consistency_ratio = float(max(N))/float(min(N))

        Nt_idx = num_peaks_tr.index(Nt)
        label_tr = infer_pair_label_from_idx(num_rep, Nt_idx)

        conservative_set = label_tr
        conservative_peak = make_hard_link(args.peaks[Nt_idx], args.out_dir)
        N_conservative = Nt
        if Nt>Np:
            optimal_set = conservative_set
            optimal_peak = conservative_peak
            N_optimal = N_conservative
        else:
            optimal_set = "ppr"
            optimal_peak = make_hard_link(args.peak_ppr, args.out_dir)
            N_optimal = Np
    else:
        # single replicate case
        num_rep = 1
        
        Nt = 0
        Np = 0
        rescue_ratio = 0.0
        self_consistency_ratio = 1.0

        conservative_set = 'rep1-pr'
        conservative_peak = make_hard_link(args.peaks_pr[0], args.out_dir)
        N_conservative = N[0]
        optimal_set = conservative_set
        optimal_peak = conservative_peak
        N_optimal = N_conservative

    reproducibility = 'pass'
    if rescue_ratio>2.0 or self_consistency_ratio>2.0:
        reproducibility = 'borderline'
    if rescue_ratio>2.0 and self_consistency_ratio>2.0:
        reproducibility = 'fail'

    log.info('Writing optimal/conservative peak files...')
    optimal_peak_file = os.path.join(args.out_dir, 'optimal_peak.{}.gz'.format(args.peak_type))
    conservative_peak_file = os.path.join(args.out_dir, 'conservative_peak.{}.gz'.format(args.peak_type))
    copy_f_to_f(optimal_peak, optimal_peak_file)
    copy_f_to_f(conservative_peak, conservative_peak_file)

    if args.chrsz:
        log.info('Converting peak to bigbed...')
        peak_to_bigbed(optimal_peak_file, args.peak_type, args.chrsz, args.keep_irregular_chr, args.out_dir)
        peak_to_bigbed(conservative_peak_file, args.peak_type, args.chrsz, args.keep_irregular_chr, args.out_dir)

        log.info('Converting peak to hammock...')
        peak_to_hammock(optimal_peak_file, args.keep_irregular_chr, args.out_dir)
        peak_to_hammock(conservative_peak_file, args.keep_irregular_chr, args.out_dir)

    log.info('Writing reproducibility QC log...')
    if args.prefix:
        reproducibility_qc = '{}.reproducibility.qc'.format(args.prefix)
    else:
        reproducibility_qc = 'reproducibility.qc'
    reproducibility_qc = os.path.join(args.out_dir, reproducibility_qc)
    
    with open(reproducibility_qc,'w') as fp:
        header = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
            'Nt',
            '\t'.join(['N{}'.format(i+1) for i in range(num_rep)]),
            'Np',
            'N_opt',
            'N_consv',
            'opt_set',
            'consv_set',
            'rescue_ratio',
            'self_consistency_ratio',
            'reproducibility',
            )
        line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
            Nt,
            '\t'.join([str(i) for i in N]),
            Np,
            N_optimal,
            N_conservative,
            optimal_set,
            conservative_set,
            rescue_ratio,
            self_consistency_ratio,
            reproducibility)
        fp.write(header)
        fp.write(line)

    log.info('All done.')