Example #1
0
def main():
    # read params
    args = parse_arguments()
    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Making self-pseudo replicates...')
    if args.paired_end:
        ta_pr1, ta_pr2 = spr_pe(
            args.ta,
            args.pseudoreplication_random_seed,
            args.out_dir,
        )
    else:
        ta_pr1, ta_pr2 = spr_se(
            args.ta,
            args.pseudoreplication_random_seed,
            args.out_dir,
        )

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(ta_pr1)
    assert_file_not_empty(ta_pr2)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()
    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    if args.paired_end:
        subsampled_ta = subsample_ta_pe(args.ta,
                                        args.subsample,
                                        non_mito=False,
                                        mito_chr_name=None,
                                        r1_only=False,
                                        out_dir=args.out_dir)
    else:
        subsampled_ta = subsample_ta_se(args.ta,
                                        args.subsample,
                                        non_mito=False,
                                        mito_chr_name=None,
                                        out_dir=args.out_dir)
    log.info('Checking if output is empty...')
    assert_file_not_empty(subsampled_ta)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_peak = blacklist_filter(
        args.peak, args.blacklist, args.keep_irregular_chr, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_peak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_peak, args.peak_type, args.chrsz,
                   args.keep_irregular_chr, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_peak, args.keep_irregular_chr, args.out_dir)

    log.info('Shifted FRiP with fragment length...')
    frip_qc = frip_shifted(args.ta, bfilt_peak,
                           args.chrsz, args.fraglen, args.out_dir)

    log.info('Calculating (blacklist-filtered) peak region size QC/plot...')
    region_size_qc, region_size_plot = get_region_size_metrics(bfilt_peak)

    log.info('Calculating number of peaks (blacklist-filtered)...')
    num_peak_qc = get_num_peaks(bfilt_peak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #4
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks with macs2...')
    npeak = macs2(
        args.tas[0],
        args.tas[1],
        args.chrsz,
        args.gensz,
        args.pval_thresh,
        args.shift,
        args.fraglen,
        args.cap_num_peak,
        args.ctl_subsample,
        args.ctl_paired_end,
        args.mem_gb,
        args.out_dir,
    )

    log.info('Checking if output is empty...')
    assert_file_not_empty(npeak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #5
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir_R1)
    if args.paired_end:
        mkdir_p(args.out_dir_R2)

    log.info('Cropping fastqs ({} bp) with Trimmomatic...'.format(
        args.crop_length))
    if args.paired_end:
        cropped_R1, cropped_R2 = trimmomatic_pe(args.fastq1, args.fastq2,
                                                args.crop_length,
                                                args.out_dir_R1,
                                                args.out_dir_R2, args.nth,
                                                args.trimmomatic_java_heap)
    else:
        cropped_R1 = trimmomatic_se(args.fastq1, args.crop_length,
                                    args.out_dir_R1, args.nth,
                                    args.trimmomatic_java_heap)

    log.info('List all files in output directory...')
    ls_l(args.out_dir_R1)
    if args.paired_end:
        ls_l(args.out_dir_R2)

    log.info('Checking if output is empty...')
    assert_file_not_empty(
        cropped_R1,
        help='No reads in FASTQ after cropping. crop_length might be too high? '
        'While cropping, Trimmomatic (with MINLEN) excludes all reads '
        'SHORTER than crop_length.')

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks macs2...')
    npeak = macs2(
        args.ta,
        args.chrsz,
        args.gensz,
        args.pval_thresh,
        args.smooth_win,
        args.cap_num_peak,
        args.mem_gb,
        args.out_dir,
    )

    log.info('Checking if output is empty...')
    assert_file_not_empty(npeak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Do naive overlap...')
    overlap_peak = naive_overlap(
        args.prefix,
        args.peak1,
        args.peak2,
        args.peak_pooled,
        args.peak_type,
        args.nonamecheck,
        args.mem_gb,
        args.out_dir,
    )

    log.info('Blacklist-filtering peaks...')
    bfilt_overlap_peak = blacklist_filter(overlap_peak, args.blacklist,
                                          args.regex_bfilt_peak_chr_name,
                                          args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_overlap_peak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_overlap_peak, args.peak_type, args.chrsz, args.mem_gb,
                   args.out_dir)

    log.info('Converting peak to starch...')
    peak_to_starch(bfilt_overlap_peak, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_overlap_peak, args.mem_gb, args.out_dir)

    if args.ta:  # if TAG-ALIGN is given
        if args.fraglen:  # chip-seq
            log.info('Shifted FRiP with fragment length...')
            frip_shifted(args.ta, bfilt_overlap_peak, args.chrsz, args.fraglen,
                         args.out_dir)
        else:  # atac-seq
            log.info('FRiP without fragment length...')
            frip(args.ta, bfilt_overlap_peak, args.out_dir)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #8
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Trimming fastqs ({} bp)...'.format(args.trim_bp))
    trimmed = trim_fastq(args.fastq, args.trim_bp, args.out_dir)
    assert_file_not_empty(trimmed)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Do IDR...')
    idr_peak, idr_plot, idr_out_gz, idr_stdout = idr(
        args.prefix,
        args.peak1, args.peak2, args.peak_pooled, args.peak_type,
        args.chrsz,
        args.idr_thresh, args.idr_rank, args.mem_gb, args.out_dir,
    )

    log.info('Checking if output is empty...')
    assert_file_not_empty(idr_peak, help=
        'No IDR peaks found. IDR threshold might be too stringent '
        'or replicates have very poor concordance.')

    log.info('Blacklist-filtering peaks...')
    bfilt_idr_peak = blacklist_filter(
        idr_peak, args.blacklist, args.regex_bfilt_peak_chr_name, args.out_dir)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_idr_peak, args.peak_type, args.chrsz,
                   args.mem_gb, args.out_dir)

    log.info('Converting peak to starch...')
    peak_to_starch(bfilt_idr_peak, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_idr_peak, args.mem_gb, args.out_dir)

    if args.ta:  # if TAG-ALIGN is given
        if args.fraglen:  # chip-seq
            log.info('Shifted FRiP with fragment length...')
            frip_shifted(args.ta, bfilt_idr_peak,
                         args.chrsz, args.fraglen, args.out_dir)
        else:  # atac-seq
            log.info('FRiP without fragment length...')
            frip(args.ta, bfilt_idr_peak, args.out_dir)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #10
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    # declare temp arrays
    temp_files = []  # files to deleted later at the end

    log.info('Converting BAM to TAGALIGN...')
    if args.paired_end:
        ta = bam2ta_pe(args.bam, args.nth, args.out_dir)
    else:
        ta = bam2ta_se(args.bam, args.out_dir)

    if args.subsample:
        log.info('Subsampling TAGALIGN...')
        if args.paired_end:
            subsampled_ta = subsample_ta_pe(ta, args.subsample, False,
                                            args.mito_chr_name, False,
                                            args.out_dir)
        else:
            subsampled_ta = subsample_ta_se(ta, args.subsample, False,
                                            args.mito_chr_name, args.out_dir)
        temp_files.append(ta)
    else:
        subsampled_ta = ta

    if args.disable_tn5_shift:
        shifted_ta = subsampled_ta
    else:
        log.info("TN5-shifting TAGALIGN...")
        shifted_ta = tn5_shift_ta(subsampled_ta, args.out_dir)
        temp_files.append(subsampled_ta)

    log.info('Checking if output is empty...')
    assert_file_not_empty(shifted_ta)

    log.info('Removing temporary files...')
    rm_f(temp_files)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks with spp...')
    rpeak = spp(args.tas[0], args.tas[1], args.fraglen, args.cap_num_peak,
                args.nth, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(rpeak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #12
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks with spp...')
    rpeak = spp(args.tas[0], args.tas[1],
                args.fraglen, args.cap_num_peak, args.fdr_thresh,
                args.nth, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(rpeak, help=
        'No peaks found. FDR threshold (fdr_thresh in your input JSON) '
        'might be too stringent or poor quality sample?')

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #13
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Do IDR...')
    idr_peak, idr_plot, idr_out_gz, idr_stdout = idr(
        args.prefix,
        args.peak1, args.peak2, args.peak_pooled, args.peak_type,
        args.idr_thresh, args.idr_rank, args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_idr_peak = blacklist_filter(
        idr_peak, args.blacklist, args.keep_irregular_chr, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_idr_peak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_idr_peak, args.peak_type, args.chrsz,
                   args.keep_irregular_chr, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_idr_peak, args.keep_irregular_chr, args.out_dir)

    if args.ta:  # if TAG-ALIGN is given
        if args.fraglen:  # chip-seq
            log.info('Shifted FRiP with fragment length...')
            frip_shifted(args.ta, bfilt_idr_peak,
                         args.chrsz, args.fraglen, args.out_dir)
        else:  # atac-seq
            log.info('FRiP without fragment length...')
            frip(args.ta, bfilt_idr_peak, args.out_dir)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Blacklist-filtering peaks...')
    bfilt_peak = blacklist_filter(args.peak, args.blacklist,
                                  args.regex_bfilt_peak_chr_name, args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(bfilt_peak)

    log.info('Converting peak to bigbed...')
    peak_to_bigbed(bfilt_peak, args.peak_type, args.chrsz, args.mem_gb,
                   args.out_dir)

    log.info('Converting peak to starch...')
    peak_to_starch(bfilt_peak, args.out_dir)

    log.info('Converting peak to hammock...')
    peak_to_hammock(bfilt_peak, args.mem_gb, args.out_dir)

    log.info('FRiP without fragment length...')
    frip(args.ta, bfilt_peak, args.out_dir)

    log.info('Calculating (blacklist-filtered) peak region size QC/plot...')
    get_region_size_metrics(bfilt_peak)

    log.info('Calculating number of peaks (blacklist-filtered)...')
    get_num_peaks(bfilt_peak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')