def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE compare signal to roadmap')
    parser.add_argument('--bigwig',
                        type=str,
                        help='BIGWIG file (from task macs2).')
    parser.add_argument('--dnase', type=str, help='DNase file.')
    parser.add_argument('--reg2map', type=str, help='Reg2map file.')
    parser.add_argument('--reg2map-bed', type=str, help='Reg2map bed file.')
    parser.add_argument('--roadmap-meta',
                        type=str,
                        help='Roadmap metadata file.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC FRiP.', description='')
    parser.add_argument('peak', type=str, help='Peak file.')
    parser.add_argument('ta', type=str, help='TAGALIGN file.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #3
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track',
                                     description='')
    parser.add_argument('ta', type=str,
                        help='Path for TAGALIGN file.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz', type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh', default=0.01, type=float,
                        help='P-Value threshold.')
    parser.add_argument('--smooth-win', default=150, type=int,
                        help='Smoothing window size.')
    parser.add_argument('--mem-gb', type=float, default=4.0,
                        help='Max. memory for this job in GB. '
                        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
                        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC control TAG-ALIGN subsampler.'
             'This script does not check if number of reads in TA is higher than '
             'subsampling number (--subsample). '
             'If number of reads in TA is lower than subsampling number then '
             'TA will be just shuffled.')
    parser.add_argument('ta', type=str,
                        help='Path for control TAGALIGN file.')
    parser.add_argument('--paired-end', action="store_true",
                        help='Paired-end TAGALIGN.')
    parser.add_argument('--subsample', default=0, type=int,
                        help='Number of reads to subsample.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()
    if not args.subsample:
        raise ValueError('--subsample should be a positive integer.')

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #5
0
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Count signal track generation')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        default=4.0,
        help='Max. memory for this job in GB. '
        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #6
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE preseq')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end BAM.')
    parser.add_argument('--bam', type=str, help='Raw BAM file.')
    parser.add_argument('--picard-java-heap',
                        help='Picard\'s Java max. heap: java -jar picard.jar '
                        '-Xmx[MAX_HEAP]')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        help='Max. memory for samtools sort in GB. '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE TSS enrichment.')
    parser.add_argument('--read-len-log',
                        type=str,
                        help='Read length log file (from aligner task).')
    parser.add_argument('--nodup-bam',
                        type=str,
                        help='Raw BAM file (from task filter).')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--tss', type=str, help='TSS definition bed file.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #8
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC TAGALIGN pooler.',
                                     description='')
    parser.add_argument('tas',
                        nargs='+',
                        type=str,
                        help='List of TAGALIGNs to be pooled.')
    parser.add_argument('--prefix', type=str, help='Basename prefix.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--col',
                        help='Number of columns to keep in a pooled TAGALIGN. '
                        'Keep all columns if not defined.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC filter.')
    parser.add_argument('bam', type=str, help='Path for raw BAM file.')
    parser.add_argument('--dup-marker',
                        type=str,
                        choices=['picard', 'sambamba'],
                        default='picard',
                        help='Dupe marker for filtering mapped reads in BAM.')
    parser.add_argument('--mapq-thresh',
                        default=30,
                        type=int,
                        help='Threshold for low MAPQ reads removal.')
    parser.add_argument('--no-dup-removal',
                        action="store_true",
                        help='No dupe reads removal when filtering BAM.')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end BAM.')
    parser.add_argument('--multimapping',
                        default=0,
                        type=int,
                        help='Multimapping reads.')
    parser.add_argument(
        '--filter-chrs',
        nargs='*',
        help='Chromosomes to be filtered for final (nodup/filt) BAM.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--mito-chr-name',
                        default='chrM',
                        help='Mito chromosome name.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        help='Max. memory for samtools sort in GB. '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--picard-java-heap',
                        help='Picard\'s Java max. heap: java -jar picard.jar '
                        '-Xmx[MAX_HEAP]')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)',
                                     description='')
    parser.add_argument('peak', type=str,
                        help='Path for PEAK file. Peak filename should be "*.*Peak.gz". '
                             'e.g. rep1.narrowPeak.gz')
    parser.add_argument('--ta', type=str,
                        help='TAG-ALIGN file.')
    parser.add_argument('--peak-type', type=str, required=True,
                        choices=['narrowPeak', 'regionPeak',
                                 'broadPeak', 'gappedPeak'],
                        help='Peak file type.')
    parser.add_argument('--fraglen', type=int, required=True,
                        help='Fragment length.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--blacklist', type=str, required=True,
                        help='Blacklist BED file.')
    parser.add_argument('--keep-irregular-chr', action="store_true",
                        help='Keep reads with non-canonical chromosome names.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist.endswith('/dev/null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #11
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track',
                                     description='')
    parser.add_argument('tas', type=str, nargs='+',
                        help='Path for TAGALIGN file (first) and control TAGALIGN file (second; optional).')
    parser.add_argument('--fraglen', type=int, required=True,
                        help='Fragment length.')
    parser.add_argument('--shift', type=int, default=0,
                        help='macs2 callpeak --shift.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz', type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh', default=0.01, type=float,
                        help='P-Value threshold.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()
    if len(args.tas) == 1:
        args.tas.append('')
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE post align', description='')
    parser.add_argument('fastq', type=str, help='Path for FASTQ R1')
    parser.add_argument('bam', type=str, help='Path for BAM')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file. If not given then '
                        'SAMstats on mito-free BAM will not be calcaulted.')
    parser.add_argument('--mito-chr-name',
                        default='chrM',
                        help='Mito chromosome name.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE annot_enrich (fraction of reads in annotated regions)')
    parser.add_argument('--ta',
                        type=str,
                        help='TAG-ALIGN file (from task bam2ta).')
    parser.add_argument('--dnase', type=str, help='DNase definition bed file.')
    parser.add_argument('--blacklist', type=str, help='Blacklist bed file.')
    parser.add_argument('--prom',
                        type=str,
                        help='Promoter definition bed file.')
    parser.add_argument('--enh',
                        type=str,
                        help='Enhancer definition bed file.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        help='Log level',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ])
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #14
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.')
    parser.add_argument('peak', type=str, help='Peak file.')
    parser.add_argument('--blacklist',
                        type=str,
                        required=True,
                        help='Blacklist BED file.')
    parser.add_argument('--keep-irregular-chr',
                        action="store_true",
                        help='Keep reads with non-canonical chromosome names.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE frac mito',
        description='Calculates fraction of mito reads')
    parser.add_argument('non_mito_samstat',
                        type=str,
                        help='Path for SAMstats log file')
    parser.add_argument('mito_samstat',
                        type=str,
                        help='Path for SAMstats log file (mito only)')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='Wrapper for BSUB job submission for ATAC-seq data.',
        description='')
    parser.add_argument('--path-to-fastqs',
                        default='',
                        type=str,
                        help='Path to FASTQ files.')
    parser.add_argument('--memory',
                        default='10GB',
                        type=str,
                        help='Memory requested to run the analysis.')
    parser.add_argument(
        '--queue',
        default='standard',
        type=str,
        help='Queue to submit the job in HPCF (use bqueues to choose).')
    parser.add_argument('--out-dir', type=str, help='Output Directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #17
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC pseudo replicator.')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end TAGALIGN.')
    parser.add_argument(
        '--pseudoreplication-random-seed',
        type=int,
        default=0,
        help='Set it to 0 to use file\'s size (in bytes) as random seed.'
        'Otherwise this seed will be used for GNU shuf --random-source=sha256(seed).'
        'It is useful when random seed based on input file size does not work.'
    )
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #18
0
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE spp call_peak')
    parser.add_argument(
        'tas', type=str, nargs=2,
        help='Path for TAGALIGN file and control TAGALIGN file.')
    parser.add_argument('--chrsz', type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen', type=int, required=True,
                        help='Fragment length.')
    parser.add_argument('--fdr-thresh', default=0.01, type=float,
                        help='FDR threshold for run_spp.R -fdr parameter.')
    parser.add_argument('--cap-num-peak', default=300000, type=int,
                        help='Capping number of peaks by taking top N peaks.')
    parser.add_argument('--nth', type=int, default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track',
                                     description='')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz',
                        type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh',
                        default=0.01,
                        type=float,
                        help='P-Value threshold.')
    parser.add_argument('--smooth-win',
                        default=73,
                        type=int,
                        help='Smoothing window size.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.')
    parser.add_argument('peak', type=str, help='Peak file.')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Naive overlap.',
        description='NarrowPeak or RegionPeak only.')
    parser.add_argument('peak1', type=str, help='Peak 1.')
    parser.add_argument('peak2', type=str, help='Peak 2.')
    parser.add_argument('peak_pooled', type=str, help='Pooled peak.')
    parser.add_argument('--prefix',
                        default='overlap',
                        type=str,
                        help='Prefix basename for output overlap peak.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--nonamecheck',
                        action='store_true',
                        help='bedtools intersect -nonamecheck. \
                        use this if you get bedtools intersect \
                        naming convenction warnings/errors).')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument(
        '--mem-gb',
        type=float,
        default=4.0,
        help='Max. memory for this job in GB. '
        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #22
0
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC bwa aligner.',
                                     description='')
    parser.add_argument('bwa_index_prefix_or_tar', type=str,
                        help='Path for prefix (or a tarball .tar) \
                            for reference bwa index. \
                            Prefix must be like [PREFIX].sa. \
                            TAR ball can have any [PREFIX] but it should not \
                            have a directory structure in it.')
    parser.add_argument('fastqs', nargs='+', type=str,
                        help='List of FASTQs (R1 and R2). \
                            FASTQs must be compressed with gzip (with .gz).')
    parser.add_argument(
        '--use-bwa-mem-for-pe', action="store_true",
        help='Use "bwa mem" for PAIRED-ENDED dataset with R1 FASTQ\'s read length >= --bwa-mem-read-len-limit. '
             'For shorter reads, bwa aln will be used. ')
    parser.add_argument(
        '--rescue-reads-for-bwa-mem', action="store_true",
        help='Use -P for "bwa mem" to rescue missing hits only (by using SW algorithm) '
             'but do not try to find hits that fit a proper pair.'
    )
    parser.add_argument('--bwa-mem-read-len-limit', type=int, default=70,
                        help='Read length limit for bwa mem (for paired-ended FASTQs only). '
                             'bwa aln will be used instead of bwa mem if R1 reads are shorter than this.')
    parser.add_argument('--paired-end', action="store_true",
                        help='Paired-end FASTQs.')
    parser.add_argument('--nth', type=int, default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--mem-gb', type=float,
                        help='Max. memory for samtools sort and GNU sort -S '
                        '(half of this value will be used for GNU sort) in GB. '
                        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    # check if fastqs have correct dimension
    if args.paired_end and len(args.fastqs) != 2:
        raise argparse.ArgumentTypeError('Need 2 fastqs for paired end.')
    if not args.paired_end and len(args.fastqs) != 1:
        raise argparse.ArgumentTypeError('Need 1 fastq for single end.')

    if args.use_bwa_mem_for_pe and not args.paired_end:
        raise ValueError(
            '--use-bwa-mem-for-pe is for paired ended FASTQs only.'
        )
    if not args.use_bwa_mem_for_pe and args.rescue_reads_for_bwa_mem:
        raise ValueError(
            '--rescue-reads-for-bwa-mem is available only when --use-bwa-mem-for-pe is activated.'
        )

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC bowtie2 aligner.',
                                     description='')
    parser.add_argument('bowtie2_index_prefix_or_tar',
                        type=str,
                        help='Path for prefix (or a tarball .tar) \
                            for reference bowtie2 index. \
                            Prefix must be like [PREFIX].1.bt2*. \
                            TAR ball can have any [PREFIX] but it should not \
                            have a directory structure in it.')
    parser.add_argument('fastqs',
                        nargs='+',
                        type=str,
                        help='List of FASTQs (R1 and R2). \
                            FASTQs must be compressed with gzip (with .gz).')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end FASTQs.')
    parser.add_argument(
        '--multimapping',
        default=0,
        type=int,
        help='Multimapping reads (for bowtie2 -k(m+1). '
        'This will be incremented in an actual bowtie2 command line'
        'e.g. --multimapping 3 will be bowtie2 -k 4')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        help='Max. memory for samtools sort in GB. '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    # check if fastqs have correct dimension
    if args.paired_end and len(args.fastqs) != 2:
        raise argparse.ArgumentTypeError('Need 2 fastqs for paired end.')
    if not args.paired_end and len(args.fastqs) != 1:
        raise argparse.ArgumentTypeError('Need 1 fastq for single end.')

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC IDR.', description='NarrowPeak or RegionPeak only.')
    parser.add_argument('peak1', type=str, help='Peak file 1.')
    parser.add_argument('peak2', type=str, help='Peak file 2.')
    parser.add_argument('peak_pooled', type=str, help='Pooled peak file.')
    parser.add_argument('--prefix',
                        default='idr',
                        type=str,
                        help='Prefix basename for output IDR peak.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--idr-thresh',
                        default=0.1,
                        type=float,
                        help='IDR threshold.')
    parser.add_argument('--idr-rank',
                        default='p.value',
                        type=str,
                        choices=['p.value', 'q.value', 'signal.value'],
                        help='IDR ranking method.')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Naive overlap.',
        description='NarrowPeak or RegionPeak only.')
    parser.add_argument('peak1', type=str, help='Peak 1.')
    parser.add_argument('peak2', type=str, help='Peak 2.')
    parser.add_argument('peak_pooled', type=str, help='Pooled peak.')
    parser.add_argument('--prefix',
                        default='overlap',
                        type=str,
                        help='Prefix basename for output overlap peak.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--nonamecheck',
                        action='store_true',
                        help='bedtools intersect -nonamecheck. \
                        use this if you get bedtools intersect \
                        naming convenction warnings/errors).')
    parser.add_argument('--blacklist',
                        type=str,
                        required=True,
                        help='Blacklist BED file.')
    parser.add_argument('--keep-irregular-chr',
                        action="store_true",
                        help='Keep reads with non-canonical chromosome names.')
    parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--fraglen',
                        type=int,
                        default=0,
                        help='Fragment length for TAGALIGN file. \
                        If given, do shifted FRiP (for ChIP-Seq).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist.endswith('/dev/null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #26
0
def parse_arguments(debug=False):
    parser = argparse.ArgumentParser(prog='ENCODE DCC Trimmomatic wrapper.')
    parser.add_argument('--fastq1', help='FASTQ R1 to be trimmed.')
    parser.add_argument('--fastq2', help='FASTQ R2 to be trimmed.')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end FASTQs.')
    parser.add_argument('--crop-length',
                        type=int,
                        required=True,
                        help='Number of basepair to crop.'
                        'Trimmomatic\'s parameter CROP.')
    parser.add_argument(
        '--crop-length-tol',
        type=int,
        default=2,
        help='Crop length tolerance to keep shorter reads '
        'around the crop length. '
        'Trimmomatic\'s parameter MINLEN will be --crop-length '
        '- abs(--crop-length-tol).')
    parser.add_argument('--phred-score-format',
                        default='auto',
                        choices=['auto', 'phred33', 'phred64'],
                        help='Base encoding for Phred scores in FASTQs. '
                        'If it is not auto then -phred33 or -phred64 to '
                        'Trimmomatic\'s command line.')
    parser.add_argument('--out-dir-R1',
                        default='',
                        type=str,
                        help='Output directory for cropped R1 fastq.')
    parser.add_argument('--out-dir-R2',
                        default='',
                        type=str,
                        help='Output directory for cropped R2 fastq.')
    parser.add_argument(
        '--trimmomatic-java-heap',
        help='Trimmomatic\'s Java max. heap: java -jar Trimmomatic.jar '
        '-Xmx[MAX_HEAP]')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if not args.crop_length:
        raise ValueError('Crop length must be > 0.')

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 callpeak')
    parser.add_argument('tas',
                        type=str,
                        nargs='+',
                        help='Path for TAGALIGN file (first) and '
                        'control TAGALIGN file (second; optional).')
    parser.add_argument('--fraglen',
                        type=int,
                        required=True,
                        help='Fragment length.')
    parser.add_argument('--shift',
                        type=int,
                        default=0,
                        help='macs2 callpeak --shift.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--gensz',
                        type=str,
                        help='Genome size (sum of entries in 2nd column of \
                            chr. sizes file, or hs for human, ms for mouse).')
    parser.add_argument('--pval-thresh',
                        default=0.01,
                        type=float,
                        help='P-Value threshold.')
    parser.add_argument('--cap-num-peak',
                        default=500000,
                        type=int,
                        help='Capping number of peaks by taking top N peaks.')
    parser.add_argument('--ctl-subsample',
                        default=0,
                        type=int,
                        help='Subsample control to this read depth '
                        '(0: no subsampling).')
    parser.add_argument('--ctl-paired-end',
                        action="store_true",
                        help='Paired-end control TA.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if len(args.tas) == 1:
        args.tas.append('')
    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC Choose control.',
        description='Choose appropriate control for each IP replicate.'
                    'ctl_for_repN.tagAlign.gz will be generated for each '
                    'IP replicate on --out-dir. '
                    'This outputs a file with integers '
                    '(chosen control index for each replicate per line).')
    parser.add_argument('--tas', type=str, nargs='+', required=True,
                        help='List of experiment TAG-ALIGN per IP replicate.')
    parser.add_argument('--ctl-tas', type=str, nargs='+', required=True,
                        help='List of control TAG-ALIGN per IP replicate.')
    parser.add_argument('--ta-pooled', type=str, nargs='*',
                        help='Pooled experiment TAG-ALIGN.')
    parser.add_argument('--ctl-ta-pooled', type=str, nargs='*',
                        help='Pooled control TAG-ALIGN.')
    parser.add_argument('--ctl-depth-ratio', type=float, required=True,
                        help='Control depth ratio (between any two controls).')
    parser.add_argument('--ctl-depth-limit', type=int, default=200000000,
                        help='Control depth limit. If read depth of chosen control is '
                             'over this limit then such control should be subsampled.')
    parser.add_argument('--exp-ctl-depth-ratio-limit', type=float, default=5.0,
                        help='Exp vs. control depth ratio limit. ')
    parser.add_argument('--always-use-pooled-ctl', action="store_true",
                        help='Always use pooled control for all IP '
                             'replicates.')
    parser.add_argument('--out-tsv-basename', default='chosen_ctl.tsv', type=str,
                        help='Output TSV basename '
                             '(will be written on directory --out-dir). '
                             'This TSV file has chosen control index '
                             'per line (for each exp replicate).')
    parser.add_argument('--out-tsv-subsample-basename', default='chosen_ctl_subsample.tsv', type=str,
                        help='Output TSV subsample basename '
                             '(will be written on directory --out-dir). '
                             'This TSV file has number of reads to subsample control '
                             'per line (for each exp replicate). '
                             '0 means no subsampling for control.')
    parser.add_argument('--out-txt-subsample-pooled-basename', default='chosen_ctl_subsample_pooled.txt', type=str,
                        help='Output TXT subsample basename for pooled control'
                             '(will be written on directory --out-dir). '
                             'This TXT file has a single line for '
                             'number of reads to subsample pooled control control'
                             '0 means no subsampling for control.')
    parser.add_argument('--out-dir', default='', type=str,
                        help='Output directory.')
    parser.add_argument('--log-level', default='INFO',
                        choices=['NOTSET', 'DEBUG', 'INFO',
                                 'WARNING', 'CRITICAL', 'ERROR',
                                 'CRITICAL'],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Example #29
0
def parse_arguments():
    parser = argparse.ArgumentParser(
        prog='ENCODE DCC cross-correlation analysis.')
    parser.add_argument('ta', type=str, help='Path for TAGALIGN file.')
    parser.add_argument('--mito-chr-name',
                        default='chrM',
                        help='Mito chromosome name.')
    parser.add_argument('--subsample',
                        type=int,
                        default=0,
                        help='Subsample TAGALIGN.')
    parser.add_argument('--speak',
                        type=int,
                        default=-1,
                        help='User-defined cross-corr. peak strandshift \
                        (-speak= in run_spp.R). Disabled if -1.')
    parser.add_argument('--exclusion-range-min',
                        type=int,
                        help='User-defined exclusion range minimum used for '
                        '-x=${xcor_exclusion_range_min}:'
                        '${xcor_exclusion_range_max}')
    parser.add_argument('--exclusion-range-max',
                        type=int,
                        help='User-defined exclusion range maximum used for '
                        '-x=${xcor_exclusion_range_min}:'
                        '${xcor_exclusion_range_max}')
    parser.add_argument('--chip-seq-type',
                        choices=['tf', 'histone'],
                        help='Type of ChIP-seq pipeline (histone of tf)')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end TAGALIGN.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
def parse_arguments():
    parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)',
                                     description='')
    parser.add_argument(
        'peak',
        type=str,
        help='Path for PEAK file. Peak filename should be "*.*Peak.gz". '
        'e.g. rep1.narrowPeak.gz')
    parser.add_argument('--ta', type=str, help='TAG-ALIGN file.')
    parser.add_argument(
        '--peak-type',
        type=str,
        required=True,
        choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'],
        help='Peak file type.')
    parser.add_argument('--fraglen',
                        type=int,
                        required=True,
                        help='Fragment length.')
    parser.add_argument('--chrsz',
                        type=str,
                        help='2-col chromosome sizes file.')
    parser.add_argument('--blacklist', type=str, help='Blacklist BED file.')
    parser.add_argument('--regex-bfilt-peak-chr-name',
                        help='Keep chromosomes matching this pattern only '
                        'in .bfilt. peak files.')
    parser.add_argument(
        '--mem-gb',
        type=float,
        default=4.0,
        help='Max. memory for this job in GB. '
        'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
        'It should be total memory for this task (not memory per thread).')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()
    if args.blacklist is None or args.blacklist.endswith('null'):
        args.blacklist = ''

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args