def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE TSS enrichment.')
    parser.add_argument('--read-len-log',
                        type=str,
                        help='Read length log file (from aligner task).')
    parser.add_argument('--nodup-bam',
                        type=str,
                        help='Raw BAM file (from task filter).')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--tss', type=str, help='TSS definition bed file.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 2
0
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Count signal track generation')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        default=4.0,
        help='Max. memory for this job in GB. '
        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE frac mito',
        description='Calculates fraction of mito reads')
    parser.add_argument('non_mito_samstat',
                        type=str,
                        help='Path for SAMstats log file')
    parser.add_argument('mito_samstat',
                        type=str,
                        help='Path for SAMstats log file (mito only)')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks macs2...')
    npeak = macs2(
        args.ta,
        args.chrsz,
        args.gensz,
        args.pval_thresh,
        args.smooth_win,
        args.cap_num_peak,
        args.mem_gb,
        args.out_dir,
    )

    log.info('Checking if output is empty...')
    assert_file_not_empty(npeak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE compare signal to roadmap')
    parser.add_argument('--bigwig',
                        type=str,
                        help='BIGWIG file (from task macs2).')
    parser.add_argument('--dnase', type=str, help='DNase file.')
    parser.add_argument('--reg2map', type=str, help='Reg2map file.')
    parser.add_argument('--reg2map-bed', type=str, help='Reg2map bed file.')
    parser.add_argument('--roadmap-meta',
                        type=str,
                        help='Roadmap metadata file.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 6
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE preseq')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end BAM.')
    parser.add_argument('--bam', type=str, help='Raw BAM file.')
    parser.add_argument('--picard-java-heap',
                        help='Picard\'s Java max. heap: java -jar picard.jar '
                        '-Xmx[MAX_HEAP]')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        help='Max. memory for samtools sort in GB. '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC control TAG-ALIGN subsampler.'
             'This script does not check if number of reads in TA is higher than '
             'subsampling number (--subsample). '
             'If number of reads in TA is lower than subsampling number then '
             'TA will be just shuffled.')
    parser.add_argument('ta', type=str,
                        help='Path for control TAGALIGN file.')
    parser.add_argument('--paired-end', action="store_true",
                        help='Paired-end TAGALIGN.')
    parser.add_argument('--subsample', default=0, type=int,
                        help='Number of reads to subsample.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()
    if not args.subsample:
        raise ValueError('--subsample should be a positive integer.')

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 8
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track',
                                     description='')
    parser.add_argument('ta', type=str,
                        help='Path for TAGALIGN file.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz', type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh', default=0.01, type=float,
                        help='P-Value threshold.')
    parser.add_argument('--smooth-win', default=150, type=int,
                        help='Smoothing window size.')
    parser.add_argument('--mem-gb', type=float, default=4.0,
                        help='Max. memory for this job in GB. '
                        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
                        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 9
0
def main():
    # read params
    args = parse_arguments()

    ALIGNED_BAM = args.bam
    OUTPUT_PREFIX = os.path.join(args.out_dir,
                                 os.path.basename(strip_ext_bam(ALIGNED_BAM)))
    RG_FREE_ALIGNED_BAM = remove_read_group(ALIGNED_BAM)
    JAVA_HEAP = args.picard_java_heap
    # Library complexity: Preseq results, NRF, PBC1, PBC2
    if args.paired_end:
        picard_est_lib_size = get_picard_complexity_metrics(
            RG_FREE_ALIGNED_BAM, OUTPUT_PREFIX, JAVA_HEAP)
    else:
        picard_est_lib_size = None
    preseq_data, preseq_log = run_preseq(ALIGNED_BAM,
                                         OUTPUT_PREFIX)  # SORTED BAM

    get_preseq_plot(preseq_data, OUTPUT_PREFIX)

    # write picard_est_lib_size to file
    if picard_est_lib_size is not None:
        picard_est_lib_size_file = OUTPUT_PREFIX + '.picard_est_lib_size.qc'
        with open(picard_est_lib_size_file, 'w') as fp:
            fp.write(str(picard_est_lib_size) + '\n')

    rm_f(RG_FREE_ALIGNED_BAM)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # read params
    args = parse_arguments()

    REF = args.ref_fa
    FINAL_BAM = args.nodup_bam
    OUTPUT_PREFIX = os.path.join(
        args.out_dir,
        os.path.basename(strip_ext_bam(FINAL_BAM)))
    RG_FREE_FINAL_BAM = remove_read_group(FINAL_BAM)
    JAVA_HEAP = args.picard_java_heap

    gc_out, gc_plot_pdf, gc_summary = get_gc(RG_FREE_FINAL_BAM,
                                             REF,
                                             OUTPUT_PREFIX,
                                             JAVA_HEAP)
    # will generate PNG format from gc_out
    plot_gc(gc_out, OUTPUT_PREFIX)

    rm_f(RG_FREE_FINAL_BAM)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Naive overlap.',
        description='NarrowPeak or RegionPeak only.')
    parser.add_argument('peak1', type=str, help='Peak 1.')
    parser.add_argument('peak2', type=str, help='Peak 2.')
    parser.add_argument('peak_pooled', type=str, help='Pooled peak.')
    parser.add_argument('--prefix',
                        default='overlap',
                        type=str,
                        help='Prefix basename for output overlap peak.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--nonamecheck',
                        action='store_true',
                        help='bedtools intersect -nonamecheck. \
                        use this if you get bedtools intersect \
                        naming convenction warnings/errors).')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument(
        '--mem-gb',
        type=float,
        default=4.0,
        help='Max. memory for this job in GB. '
        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 12
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track',
                                     description='')
    parser.add_argument('tas', type=str, nargs='+',
                        help='Path for TAGALIGN file (first) and control TAGALIGN file (second; optional).')
    parser.add_argument('--fraglen', type=int, required=True,
                        help='Fragment length.')
    parser.add_argument('--shift', type=int, default=0,
                        help='macs2 callpeak --shift.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz', type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh', default=0.01, type=float,
                        help='P-Value threshold.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()
    if len(args.tas) == 1:
        args.tas.append('')
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 13
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC pseudo replicator.')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end TAGALIGN.')
    parser.add_argument(
        '--pseudoreplication-random-seed',
        type=int,
        default=0,
        help='Set it to 0 to use file\'s size (in bytes) as random seed.'
        'Otherwise this seed will be used for GNU shuf --random-source=sha256(seed).'
        'It is useful when random seed based on input file size does not work.'
    )
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 14
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.')
    parser.add_argument('peak', type=str, help='Peak file.')
    parser.add_argument('--blacklist',
                        type=str,
                        required=True,
                        help='Blacklist BED file.')
    parser.add_argument('--keep-irregular-chr',
                        action="store_true",
                        help='Keep reads with non-canonical chromosome names.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 15
0
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE spp call_peak')
    parser.add_argument(
        'tas', type=str, nargs=2,
        help='Path for TAGALIGN file and control TAGALIGN file.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen', type=int, required=True,
                        help='Fragment length.')
    parser.add_argument('--fdr-thresh', default=0.01, type=float,
                        help='FDR threshold for run_spp.R -fdr parameter.')
    parser.add_argument('--cap-num-peak', default=300000, type=int,
                        help='Capping number of peaks by taking top N peaks.')
    parser.add_argument('--nth', type=int, default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track',
                                     description='')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz',
                        type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh',
                        default=0.01,
                        type=float,
                        help='P-Value threshold.')
    parser.add_argument('--smooth-win',
                        default=73,
                        type=int,
                        help='Smoothing window size.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 17
0
def main():
    # read params
    args = parse_arguments()
    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Making self-pseudo replicates...')
    if args.paired_end:
        ta_pr1, ta_pr2 = spr_pe(
            args.ta,
            args.pseudoreplication_random_seed,
            args.out_dir,
        )
    else:
        ta_pr1, ta_pr2 = spr_se(
            args.ta,
            args.pseudoreplication_random_seed,
            args.out_dir,
        )

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('Checking if output is empty...')
    assert_file_not_empty(ta_pr1)
    assert_file_not_empty(ta_pr2)

    log.info('All done.')
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC FRiP.', description='')
    parser.add_argument('peak', type=str, help='Peak file.')
    parser.add_argument('ta', type=str, help='TAGALIGN file.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 19
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC TAGALIGN pooler.',
                                     description='')
    parser.add_argument('tas',
                        nargs='+',
                        type=str,
                        help='List of TAGALIGNs to be pooled.')
    parser.add_argument('--prefix', type=str, help='Basename prefix.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--col',
                        help='Number of columns to keep in a pooled TAGALIGN. '
                        'Keep all columns if not defined.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.')
    parser.add_argument('peak', type=str, help='Peak file.')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 21
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir_R1)
    if args.paired_end:
        mkdir_p(args.out_dir_R2)

    log.info('Cropping fastqs ({} bp) with Trimmomatic...'.format(
        args.crop_length))
    if args.paired_end:
        cropped_R1, cropped_R2 = trimmomatic_pe(args.fastq1, args.fastq2,
                                                args.crop_length,
                                                args.out_dir_R1,
                                                args.out_dir_R2, args.nth,
                                                args.trimmomatic_java_heap)
    else:
        cropped_R1 = trimmomatic_se(args.fastq1, args.crop_length,
                                    args.out_dir_R1, args.nth,
                                    args.trimmomatic_java_heap)

    log.info('List all files in output directory...')
    ls_l(args.out_dir_R1)
    if args.paired_end:
        ls_l(args.out_dir_R2)

    log.info('Checking if output is empty...')
    assert_file_not_empty(
        cropped_R1,
        help='No reads in FASTQ after cropping. crop_length might be too high? '
        'While cropping, Trimmomatic (with MINLEN) excludes all reads '
        'SHORTER than crop_length.')

    log.info('All done.')
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE post align', description='')
    parser.add_argument('fastq', type=str, help='Path for FASTQ R1')
    parser.add_argument('bam', type=str, help='Path for BAM')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file. If not given then '
                        'SAMstats on mito-free BAM will not be calcaulted.')
    parser.add_argument('--mito-chr-name',
                        default='chrM',
                        help='Mito chromosome name.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='Wrapper for BSUB job submission for ATAC-seq data.',
        description='')
    parser.add_argument('--path-to-fastqs',
                        default='',
                        type=str,
                        help='Path to FASTQ files.')
    parser.add_argument('--memory',
                        default='10GB',
                        type=str,
                        help='Memory requested to run the analysis.')
    parser.add_argument(
        '--queue',
        default='standard',
        type=str,
        help='Queue to submit the job in HPCF (use bqueues to choose).')
    parser.add_argument('--out-dir', type=str, help='Output Directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 24
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC filter.')
    parser.add_argument('bam', type=str, help='Path for raw BAM file.')
    parser.add_argument('--dup-marker',
                        type=str,
                        choices=['picard', 'sambamba'],
                        default='picard',
                        help='Dupe marker for filtering mapped reads in BAM.')
    parser.add_argument('--mapq-thresh',
                        default=30,
                        type=int,
                        help='Threshold for low MAPQ reads removal.')
    parser.add_argument('--no-dup-removal',
                        action="store_true",
                        help='No dupe reads removal when filtering BAM.')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end BAM.')
    parser.add_argument('--multimapping',
                        default=0,
                        type=int,
                        help='Multimapping reads.')
    parser.add_argument(
        '--filter-chrs',
        nargs='*',
        help='Chromosomes to be filtered for final (nodup/filt) BAM.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--mito-chr-name',
                        default='chrM',
                        help='Mito chromosome name.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        help='Max. memory for samtools sort in GB. '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--picard-java-heap',
                        help='Picard\'s Java max. heap: java -jar picard.jar '
                        '-Xmx[MAX_HEAP]')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE annot_enrich (fraction of reads in annotated regions)')
    parser.add_argument('--ta',
                        type=str,
                        help='TAG-ALIGN file (from task bam2ta).')
    parser.add_argument('--dnase', type=str, help='DNase definition bed file.')
    parser.add_argument('--blacklist', type=str, help='Blacklist bed file.')
    parser.add_argument('--prom',
                        type=str,
                        help='Promoter definition bed file.')
    parser.add_argument('--enh',
                        type=str,
                        help='Enhancer definition bed file.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)',
                                     description='')
    parser.add_argument('peak', type=str,
                        help='Path for PEAK file. Peak filename should be "*.*Peak.gz". '
                             'e.g. rep1.narrowPeak.gz')
    parser.add_argument('--ta', type=str,
                        help='TAG-ALIGN file.')
    parser.add_argument('--peak-type', type=str, required=True,
                        choices=['narrowPeak', 'regionPeak',
                                 'broadPeak', 'gappedPeak'],
                        help='Peak file type.')
    parser.add_argument('--fraglen', type=int, required=True,
                        help='Fragment length.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--blacklist', type=str, required=True,
                        help='Blacklist BED file.')
    parser.add_argument('--keep-irregular-chr', action="store_true",
                        help='Keep reads with non-canonical chromosome names.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist.endswith('/dev/null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 27
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    log.info('Calling peaks with macs2...')
    npeak = macs2(
        args.tas[0],
        args.tas[1],
        args.chrsz,
        args.gensz,
        args.pval_thresh,
        args.shift,
        args.fraglen,
        args.cap_num_peak,
        args.ctl_subsample,
        args.ctl_paired_end,
        args.mem_gb,
        args.out_dir,
    )

    log.info('Checking if output is empty...')
    assert_file_not_empty(npeak)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Ejemplo n.º 28
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC bwa aligner.',
                                     description='')
    parser.add_argument('bwa_index_prefix_or_tar', type=str,
                        help='Path for prefix (or a tarball .tar) \
                            for reference bwa index. \
                            Prefix must be like [PREFIX].sa. \
                            TAR ball can have any [PREFIX] but it should not \
                            have a directory structure in it.')
    parser.add_argument('fastqs', nargs='+', type=str,
                        help='List of FASTQs (R1 and R2). \
                            FASTQs must be compressed with gzip (with .gz).')
    parser.add_argument(
        '--use-bwa-mem-for-pe', action="store_true",
        help='Use "bwa mem" for PAIRED-ENDED dataset with R1 FASTQ\'s read length >= --bwa-mem-read-len-limit. '
             'For shorter reads, bwa aln will be used. ')
    parser.add_argument(
        '--rescue-reads-for-bwa-mem', action="store_true",
        help='Use -P for "bwa mem" to rescue missing hits only (by using SW algorithm) '
             'but do not try to find hits that fit a proper pair.'
    )
    parser.add_argument('--bwa-mem-read-len-limit', type=int, default=70,
                        help='Read length limit for bwa mem (for paired-ended FASTQs only). '
                             'bwa aln will be used instead of bwa mem if R1 reads are shorter than this.')
    parser.add_argument('--paired-end', action="store_true",
                        help='Paired-end FASTQs.')
    parser.add_argument('--nth', type=int, default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--mem-gb', type=float,
                        help='Max. memory for samtools sort and GNU sort -S '
                        '(half of this value will be used for GNU sort) in GB. '
                        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    # check if fastqs have correct dimension
    if args.paired_end and len(args.fastqs) != 2:
        raise argparse.ArgumentTypeError('Need 2 fastqs for paired end.')
    if not args.paired_end and len(args.fastqs) != 1:
        raise argparse.ArgumentTypeError('Need 1 fastq for single end.')

    if args.use_bwa_mem_for_pe and not args.paired_end:
        raise ValueError(
            '--use-bwa-mem-for-pe is for paired ended FASTQs only.'
        )
    if not args.use_bwa_mem_for_pe and args.rescue_reads_for_bwa_mem:
        raise ValueError(
            '--rescue-reads-for-bwa-mem is available only when --use-bwa-mem-for-pe is activated.'
        )

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Naive overlap.',
        description='NarrowPeak or RegionPeak only.')
    parser.add_argument('peak1', type=str, help='Peak 1.')
    parser.add_argument('peak2', type=str, help='Peak 2.')
    parser.add_argument('peak_pooled', type=str, help='Pooled peak.')
    parser.add_argument('--prefix',
                        default='overlap',
                        type=str,
                        help='Prefix basename for output overlap peak.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--nonamecheck',
                        action='store_true',
                        help='bedtools intersect -nonamecheck. \
                        use this if you get bedtools intersect \
                        naming convenction warnings/errors).')
    parser.add_argument('--blacklist',
                        type=str,
                        required=True,
                        help='Blacklist BED file.')
    parser.add_argument('--keep-irregular-chr',
                        action="store_true",
                        help='Keep reads with non-canonical chromosome names.')
    parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist.endswith('/dev/null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Ejemplo n.º 30
0
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC IDR.', description='NarrowPeak or RegionPeak only.')
    parser.add_argument('peak1', type=str, help='Peak file 1.')
    parser.add_argument('peak2', type=str, help='Peak file 2.')
    parser.add_argument('peak_pooled', type=str, help='Pooled peak file.')
    parser.add_argument('--prefix',
                        default='idr',
                        type=str,
                        help='Prefix basename for output IDR peak.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--idr-thresh',
                        default=0.1,
                        type=float,
                        help='IDR threshold.')
    parser.add_argument('--idr-rank',
                        default='p.value',
                        type=str,
                        choices=['p.value', 'q.value', 'signal.value'],
                        help='IDR ranking method.')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args