def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE compare signal to roadmap') parser.add_argument('--bigwig', type=str, help='BIGWIG file (from task macs2).') parser.add_argument('--dnase', type=str, help='DNase file.') parser.add_argument('--reg2map', type=str, help='Reg2map file.') parser.add_argument('--reg2map-bed', type=str, help='Reg2map bed file.') parser.add_argument('--roadmap-meta', type=str, help='Roadmap metadata file.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC FRiP.', description='') parser.add_argument('peak', type=str, help='Peak file.') parser.add_argument('ta', type=str, help='TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track', description='') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--smooth-win', default=150, type=int, help='Smoothing window size.') parser.add_argument('--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC control TAG-ALIGN subsampler.' 'This script does not check if number of reads in TA is higher than ' 'subsampling number (--subsample). ' 'If number of reads in TA is lower than subsampling number then ' 'TA will be just shuffled.') parser.add_argument('ta', type=str, help='Path for control TAGALIGN file.') parser.add_argument('--paired-end', action="store_true", help='Paired-end TAGALIGN.') parser.add_argument('--subsample', default=0, type=int, help='Number of reads to subsample.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() if not args.subsample: raise ValueError('--subsample should be a positive integer.') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Count signal track generation') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument( '--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE preseq') parser.add_argument('--paired-end', action="store_true", help='Paired-end BAM.') parser.add_argument('--bam', type=str, help='Raw BAM file.') parser.add_argument('--picard-java-heap', help='Picard\'s Java max. heap: java -jar picard.jar ' '-Xmx[MAX_HEAP]') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument( '--mem-gb', type=float, help='Max. memory for samtools sort in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE TSS enrichment.') parser.add_argument('--read-len-log', type=str, help='Read length log file (from aligner task).') parser.add_argument('--nodup-bam', type=str, help='Raw BAM file (from task filter).') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--tss', type=str, help='TSS definition bed file.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC TAGALIGN pooler.', description='') parser.add_argument('tas', nargs='+', type=str, help='List of TAGALIGNs to be pooled.') parser.add_argument('--prefix', type=str, help='Basename prefix.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--col', help='Number of columns to keep in a pooled TAGALIGN. ' 'Keep all columns if not defined.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC filter.') parser.add_argument('bam', type=str, help='Path for raw BAM file.') parser.add_argument('--dup-marker', type=str, choices=['picard', 'sambamba'], default='picard', help='Dupe marker for filtering mapped reads in BAM.') parser.add_argument('--mapq-thresh', default=30, type=int, help='Threshold for low MAPQ reads removal.') parser.add_argument('--no-dup-removal', action="store_true", help='No dupe reads removal when filtering BAM.') parser.add_argument('--paired-end', action="store_true", help='Paired-end BAM.') parser.add_argument('--multimapping', default=0, type=int, help='Multimapping reads.') parser.add_argument( '--filter-chrs', nargs='*', help='Chromosomes to be filtered for final (nodup/filt) BAM.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--mito-chr-name', default='chrM', help='Mito chromosome name.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument( '--mem-gb', type=float, help='Max. memory for samtools sort in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--picard-java-heap', help='Picard\'s Java max. heap: java -jar picard.jar ' '-Xmx[MAX_HEAP]') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)', description='') parser.add_argument('peak', type=str, help='Path for PEAK file. Peak filename should be "*.*Peak.gz". ' 'e.g. rep1.narrowPeak.gz') parser.add_argument('--ta', type=str, help='TAG-ALIGN file.') parser.add_argument('--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--blacklist', type=str, required=True, help='Blacklist BED file.') parser.add_argument('--keep-irregular-chr', action="store_true", help='Keep reads with non-canonical chromosome names.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() if args.blacklist.endswith('/dev/null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track', description='') parser.add_argument('tas', type=str, nargs='+', help='Path for TAGALIGN file (first) and control TAGALIGN file (second; optional).') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--shift', type=int, default=0, help='macs2 callpeak --shift.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() if len(args.tas) == 1: args.tas.append('') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE post align', description='') parser.add_argument('fastq', type=str, help='Path for FASTQ R1') parser.add_argument('bam', type=str, help='Path for BAM') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file. If not given then ' 'SAMstats on mito-free BAM will not be calcaulted.') parser.add_argument('--mito-chr-name', default='chrM', help='Mito chromosome name.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE annot_enrich (fraction of reads in annotated regions)') parser.add_argument('--ta', type=str, help='TAG-ALIGN file (from task bam2ta).') parser.add_argument('--dnase', type=str, help='DNase definition bed file.') parser.add_argument('--blacklist', type=str, help='Blacklist bed file.') parser.add_argument('--prom', type=str, help='Promoter definition bed file.') parser.add_argument('--enh', type=str, help='Enhancer definition bed file.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.') parser.add_argument('peak', type=str, help='Peak file.') parser.add_argument('--blacklist', type=str, required=True, help='Blacklist BED file.') parser.add_argument('--keep-irregular-chr', action="store_true", help='Keep reads with non-canonical chromosome names.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE frac mito', description='Calculates fraction of mito reads') parser.add_argument('non_mito_samstat', type=str, help='Path for SAMstats log file') parser.add_argument('mito_samstat', type=str, help='Path for SAMstats log file (mito only)') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='Wrapper for BSUB job submission for ATAC-seq data.', description='') parser.add_argument('--path-to-fastqs', default='', type=str, help='Path to FASTQ files.') parser.add_argument('--memory', default='10GB', type=str, help='Memory requested to run the analysis.') parser.add_argument( '--queue', default='standard', type=str, help='Queue to submit the job in HPCF (use bqueues to choose).') parser.add_argument('--out-dir', type=str, help='Output Directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC pseudo replicator.') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--paired-end', action="store_true", help='Paired-end TAGALIGN.') parser.add_argument( '--pseudoreplication-random-seed', type=int, default=0, help='Set it to 0 to use file\'s size (in bytes) as random seed.' 'Otherwise this seed will be used for GNU shuf --random-source=sha256(seed).' 'It is useful when random seed based on input file size does not work.' ) parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE spp call_peak') parser.add_argument( 'tas', type=str, nargs=2, help='Path for TAGALIGN file and control TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--fdr-thresh', default=0.01, type=float, help='FDR threshold for run_spp.R -fdr parameter.') parser.add_argument('--cap-num-peak', default=300000, type=int, help='Capping number of peaks by taking top N peaks.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track', description='') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--smooth-win', default=73, type=int, help='Smoothing window size.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.') parser.add_argument('peak', type=str, help='Peak file.') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Naive overlap.', description='NarrowPeak or RegionPeak only.') parser.add_argument('peak1', type=str, help='Peak 1.') parser.add_argument('peak2', type=str, help='Peak 2.') parser.add_argument('peak_pooled', type=str, help='Pooled peak.') parser.add_argument('--prefix', default='overlap', type=str, help='Prefix basename for output overlap peak.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--nonamecheck', action='store_true', help='bedtools intersect -nonamecheck. \ use this if you get bedtools intersect \ naming convenction warnings/errors).') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument( '--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC bwa aligner.', description='') parser.add_argument('bwa_index_prefix_or_tar', type=str, help='Path for prefix (or a tarball .tar) \ for reference bwa index. \ Prefix must be like [PREFIX].sa. \ TAR ball can have any [PREFIX] but it should not \ have a directory structure in it.') parser.add_argument('fastqs', nargs='+', type=str, help='List of FASTQs (R1 and R2). \ FASTQs must be compressed with gzip (with .gz).') parser.add_argument( '--use-bwa-mem-for-pe', action="store_true", help='Use "bwa mem" for PAIRED-ENDED dataset with R1 FASTQ\'s read length >= --bwa-mem-read-len-limit. ' 'For shorter reads, bwa aln will be used. ') parser.add_argument( '--rescue-reads-for-bwa-mem', action="store_true", help='Use -P for "bwa mem" to rescue missing hits only (by using SW algorithm) ' 'but do not try to find hits that fit a proper pair.' ) parser.add_argument('--bwa-mem-read-len-limit', type=int, default=70, help='Read length limit for bwa mem (for paired-ended FASTQs only). ' 'bwa aln will be used instead of bwa mem if R1 reads are shorter than this.') parser.add_argument('--paired-end', action="store_true", help='Paired-end FASTQs.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--mem-gb', type=float, help='Max. memory for samtools sort and GNU sort -S ' '(half of this value will be used for GNU sort) in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() # check if fastqs have correct dimension if args.paired_end and len(args.fastqs) != 2: raise argparse.ArgumentTypeError('Need 2 fastqs for paired end.') if not args.paired_end and len(args.fastqs) != 1: raise argparse.ArgumentTypeError('Need 1 fastq for single end.') if args.use_bwa_mem_for_pe and not args.paired_end: raise ValueError( '--use-bwa-mem-for-pe is for paired ended FASTQs only.' ) if not args.use_bwa_mem_for_pe and args.rescue_reads_for_bwa_mem: raise ValueError( '--rescue-reads-for-bwa-mem is available only when --use-bwa-mem-for-pe is activated.' ) log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC bowtie2 aligner.', description='') parser.add_argument('bowtie2_index_prefix_or_tar', type=str, help='Path for prefix (or a tarball .tar) \ for reference bowtie2 index. \ Prefix must be like [PREFIX].1.bt2*. \ TAR ball can have any [PREFIX] but it should not \ have a directory structure in it.') parser.add_argument('fastqs', nargs='+', type=str, help='List of FASTQs (R1 and R2). \ FASTQs must be compressed with gzip (with .gz).') parser.add_argument('--paired-end', action="store_true", help='Paired-end FASTQs.') parser.add_argument( '--multimapping', default=0, type=int, help='Multimapping reads (for bowtie2 -k(m+1). ' 'This will be incremented in an actual bowtie2 command line' 'e.g. --multimapping 3 will be bowtie2 -k 4') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument( '--mem-gb', type=float, help='Max. memory for samtools sort in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() # check if fastqs have correct dimension if args.paired_end and len(args.fastqs) != 2: raise argparse.ArgumentTypeError('Need 2 fastqs for paired end.') if not args.paired_end and len(args.fastqs) != 1: raise argparse.ArgumentTypeError('Need 1 fastq for single end.') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC IDR.', description='NarrowPeak or RegionPeak only.') parser.add_argument('peak1', type=str, help='Peak file 1.') parser.add_argument('peak2', type=str, help='Peak file 2.') parser.add_argument('peak_pooled', type=str, help='Pooled peak file.') parser.add_argument('--prefix', default='idr', type=str, help='Prefix basename for output IDR peak.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--idr-thresh', default=0.1, type=float, help='IDR threshold.') parser.add_argument('--idr-rank', default='p.value', type=str, choices=['p.value', 'q.value', 'signal.value'], help='IDR ranking method.') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Naive overlap.', description='NarrowPeak or RegionPeak only.') parser.add_argument('peak1', type=str, help='Peak 1.') parser.add_argument('peak2', type=str, help='Peak 2.') parser.add_argument('peak_pooled', type=str, help='Pooled peak.') parser.add_argument('--prefix', default='overlap', type=str, help='Prefix basename for output overlap peak.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--nonamecheck', action='store_true', help='bedtools intersect -nonamecheck. \ use this if you get bedtools intersect \ naming convenction warnings/errors).') parser.add_argument('--blacklist', type=str, required=True, help='Blacklist BED file.') parser.add_argument('--keep-irregular-chr', action="store_true", help='Keep reads with non-canonical chromosome names.') parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist.endswith('/dev/null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(debug=False): parser = argparse.ArgumentParser(prog='ENCODE DCC Trimmomatic wrapper.') parser.add_argument('--fastq1', help='FASTQ R1 to be trimmed.') parser.add_argument('--fastq2', help='FASTQ R2 to be trimmed.') parser.add_argument('--paired-end', action="store_true", help='Paired-end FASTQs.') parser.add_argument('--crop-length', type=int, required=True, help='Number of basepair to crop.' 'Trimmomatic\'s parameter CROP.') parser.add_argument( '--crop-length-tol', type=int, default=2, help='Crop length tolerance to keep shorter reads ' 'around the crop length. ' 'Trimmomatic\'s parameter MINLEN will be --crop-length ' '- abs(--crop-length-tol).') parser.add_argument('--phred-score-format', default='auto', choices=['auto', 'phred33', 'phred64'], help='Base encoding for Phred scores in FASTQs. ' 'If it is not auto then -phred33 or -phred64 to ' 'Trimmomatic\'s command line.') parser.add_argument('--out-dir-R1', default='', type=str, help='Output directory for cropped R1 fastq.') parser.add_argument('--out-dir-R2', default='', type=str, help='Output directory for cropped R2 fastq.') parser.add_argument( '--trimmomatic-java-heap', help='Trimmomatic\'s Java max. heap: java -jar Trimmomatic.jar ' '-Xmx[MAX_HEAP]') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if not args.crop_length: raise ValueError('Crop length must be > 0.') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 callpeak') parser.add_argument('tas', type=str, nargs='+', help='Path for TAGALIGN file (first) and ' 'control TAGALIGN file (second; optional).') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--shift', type=int, default=0, help='macs2 callpeak --shift.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--cap-num-peak', default=500000, type=int, help='Capping number of peaks by taking top N peaks.') parser.add_argument('--ctl-subsample', default=0, type=int, help='Subsample control to this read depth ' '(0: no subsampling).') parser.add_argument('--ctl-paired-end', action="store_true", help='Paired-end control TA.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if len(args.tas) == 1: args.tas.append('') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Choose control.', description='Choose appropriate control for each IP replicate.' 'ctl_for_repN.tagAlign.gz will be generated for each ' 'IP replicate on --out-dir. ' 'This outputs a file with integers ' '(chosen control index for each replicate per line).') parser.add_argument('--tas', type=str, nargs='+', required=True, help='List of experiment TAG-ALIGN per IP replicate.') parser.add_argument('--ctl-tas', type=str, nargs='+', required=True, help='List of control TAG-ALIGN per IP replicate.') parser.add_argument('--ta-pooled', type=str, nargs='*', help='Pooled experiment TAG-ALIGN.') parser.add_argument('--ctl-ta-pooled', type=str, nargs='*', help='Pooled control TAG-ALIGN.') parser.add_argument('--ctl-depth-ratio', type=float, required=True, help='Control depth ratio (between any two controls).') parser.add_argument('--ctl-depth-limit', type=int, default=200000000, help='Control depth limit. If read depth of chosen control is ' 'over this limit then such control should be subsampled.') parser.add_argument('--exp-ctl-depth-ratio-limit', type=float, default=5.0, help='Exp vs. control depth ratio limit. ') parser.add_argument('--always-use-pooled-ctl', action="store_true", help='Always use pooled control for all IP ' 'replicates.') parser.add_argument('--out-tsv-basename', default='chosen_ctl.tsv', type=str, help='Output TSV basename ' '(will be written on directory --out-dir). ' 'This TSV file has chosen control index ' 'per line (for each exp replicate).') parser.add_argument('--out-tsv-subsample-basename', default='chosen_ctl_subsample.tsv', type=str, help='Output TSV subsample basename ' '(will be written on directory --out-dir). ' 'This TSV file has number of reads to subsample control ' 'per line (for each exp replicate). ' '0 means no subsampling for control.') parser.add_argument('--out-txt-subsample-pooled-basename', default='chosen_ctl_subsample_pooled.txt', type=str, help='Output TXT subsample basename for pooled control' '(will be written on directory --out-dir). ' 'This TXT file has a single line for ' 'number of reads to subsample pooled control control' '0 means no subsampling for control.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC cross-correlation analysis.') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--mito-chr-name', default='chrM', help='Mito chromosome name.') parser.add_argument('--subsample', type=int, default=0, help='Subsample TAGALIGN.') parser.add_argument('--speak', type=int, default=-1, help='User-defined cross-corr. peak strandshift \ (-speak= in run_spp.R). Disabled if -1.') parser.add_argument('--exclusion-range-min', type=int, help='User-defined exclusion range minimum used for ' '-x=${xcor_exclusion_range_min}:' '${xcor_exclusion_range_max}') parser.add_argument('--exclusion-range-max', type=int, help='User-defined exclusion range maximum used for ' '-x=${xcor_exclusion_range_min}:' '${xcor_exclusion_range_max}') parser.add_argument('--chip-seq-type', choices=['tf', 'histone'], help='Type of ChIP-seq pipeline (histone of tf)') parser.add_argument('--paired-end', action="store_true", help='Paired-end TAGALIGN.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)', description='') parser.add_argument( 'peak', type=str, help='Path for PEAK file. Peak filename should be "*.*Peak.gz". ' 'e.g. rep1.narrowPeak.gz') parser.add_argument('--ta', type=str, help='TAG-ALIGN file.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument( '--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args