def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE TSS enrichment.') parser.add_argument('--read-len-log', type=str, help='Read length log file (from aligner task).') parser.add_argument('--nodup-bam', type=str, help='Raw BAM file (from task filter).') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--tss', type=str, help='TSS definition bed file.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Count signal track generation') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument( '--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE frac mito', description='Calculates fraction of mito reads') parser.add_argument('non_mito_samstat', type=str, help='Path for SAMstats log file') parser.add_argument('mito_samstat', type=str, help='Path for SAMstats log file (mito only)') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Calling peaks macs2...') npeak = macs2( args.ta, args.chrsz, args.gensz, args.pval_thresh, args.smooth_win, args.cap_num_peak, args.mem_gb, args.out_dir, ) log.info('Checking if output is empty...') assert_file_not_empty(npeak) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE compare signal to roadmap') parser.add_argument('--bigwig', type=str, help='BIGWIG file (from task macs2).') parser.add_argument('--dnase', type=str, help='DNase file.') parser.add_argument('--reg2map', type=str, help='Reg2map file.') parser.add_argument('--reg2map-bed', type=str, help='Reg2map bed file.') parser.add_argument('--roadmap-meta', type=str, help='Roadmap metadata file.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE preseq') parser.add_argument('--paired-end', action="store_true", help='Paired-end BAM.') parser.add_argument('--bam', type=str, help='Raw BAM file.') parser.add_argument('--picard-java-heap', help='Picard\'s Java max. heap: java -jar picard.jar ' '-Xmx[MAX_HEAP]') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument( '--mem-gb', type=float, help='Max. memory for samtools sort in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC control TAG-ALIGN subsampler.' 'This script does not check if number of reads in TA is higher than ' 'subsampling number (--subsample). ' 'If number of reads in TA is lower than subsampling number then ' 'TA will be just shuffled.') parser.add_argument('ta', type=str, help='Path for control TAGALIGN file.') parser.add_argument('--paired-end', action="store_true", help='Paired-end TAGALIGN.') parser.add_argument('--subsample', default=0, type=int, help='Number of reads to subsample.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() if not args.subsample: raise ValueError('--subsample should be a positive integer.') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track', description='') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--smooth-win', default=150, type=int, help='Smoothing window size.') parser.add_argument('--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def main(): # read params args = parse_arguments() ALIGNED_BAM = args.bam OUTPUT_PREFIX = os.path.join(args.out_dir, os.path.basename(strip_ext_bam(ALIGNED_BAM))) RG_FREE_ALIGNED_BAM = remove_read_group(ALIGNED_BAM) JAVA_HEAP = args.picard_java_heap # Library complexity: Preseq results, NRF, PBC1, PBC2 if args.paired_end: picard_est_lib_size = get_picard_complexity_metrics( RG_FREE_ALIGNED_BAM, OUTPUT_PREFIX, JAVA_HEAP) else: picard_est_lib_size = None preseq_data, preseq_log = run_preseq(ALIGNED_BAM, OUTPUT_PREFIX) # SORTED BAM get_preseq_plot(preseq_data, OUTPUT_PREFIX) # write picard_est_lib_size to file if picard_est_lib_size is not None: picard_est_lib_size_file = OUTPUT_PREFIX + '.picard_est_lib_size.qc' with open(picard_est_lib_size_file, 'w') as fp: fp.write(str(picard_est_lib_size) + '\n') rm_f(RG_FREE_ALIGNED_BAM) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # read params args = parse_arguments() REF = args.ref_fa FINAL_BAM = args.nodup_bam OUTPUT_PREFIX = os.path.join( args.out_dir, os.path.basename(strip_ext_bam(FINAL_BAM))) RG_FREE_FINAL_BAM = remove_read_group(FINAL_BAM) JAVA_HEAP = args.picard_java_heap gc_out, gc_plot_pdf, gc_summary = get_gc(RG_FREE_FINAL_BAM, REF, OUTPUT_PREFIX, JAVA_HEAP) # will generate PNG format from gc_out plot_gc(gc_out, OUTPUT_PREFIX) rm_f(RG_FREE_FINAL_BAM) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Naive overlap.', description='NarrowPeak or RegionPeak only.') parser.add_argument('peak1', type=str, help='Peak 1.') parser.add_argument('peak2', type=str, help='Peak 2.') parser.add_argument('peak_pooled', type=str, help='Pooled peak.') parser.add_argument('--prefix', default='overlap', type=str, help='Prefix basename for output overlap peak.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--nonamecheck', action='store_true', help='bedtools intersect -nonamecheck. \ use this if you get bedtools intersect \ naming convenction warnings/errors).') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument( '--mem-gb', type=float, default=4.0, help='Max. memory for this job in GB. ' 'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track', description='') parser.add_argument('tas', type=str, nargs='+', help='Path for TAGALIGN file (first) and control TAGALIGN file (second; optional).') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--shift', type=int, default=0, help='macs2 callpeak --shift.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() if len(args.tas) == 1: args.tas.append('') log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC pseudo replicator.') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--paired-end', action="store_true", help='Paired-end TAGALIGN.') parser.add_argument( '--pseudoreplication-random-seed', type=int, default=0, help='Set it to 0 to use file\'s size (in bytes) as random seed.' 'Otherwise this seed will be used for GNU shuf --random-source=sha256(seed).' 'It is useful when random seed based on input file size does not work.' ) parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.') parser.add_argument('peak', type=str, help='Peak file.') parser.add_argument('--blacklist', type=str, required=True, help='Blacklist BED file.') parser.add_argument('--keep-irregular-chr', action="store_true", help='Keep reads with non-canonical chromosome names.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE spp call_peak') parser.add_argument( 'tas', type=str, nargs=2, help='Path for TAGALIGN file and control TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--fdr-thresh', default=0.01, type=float, help='FDR threshold for run_spp.R -fdr parameter.') parser.add_argument('--cap-num-peak', default=300000, type=int, help='Capping number of peaks by taking top N peaks.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC MACS2 signal track', description='') parser.add_argument('ta', type=str, help='Path for TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--gensz', type=str, help='Genome size (sum of entries in 2nd column of \ chr. sizes file, or hs for human, ms for mouse).') parser.add_argument('--pval-thresh', default=0.01, type=float, help='P-Value threshold.') parser.add_argument('--smooth-win', default=73, type=int, help='Smoothing window size.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Making self-pseudo replicates...') if args.paired_end: ta_pr1, ta_pr2 = spr_pe( args.ta, args.pseudoreplication_random_seed, args.out_dir, ) else: ta_pr1, ta_pr2 = spr_se( args.ta, args.pseudoreplication_random_seed, args.out_dir, ) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('Checking if output is empty...') assert_file_not_empty(ta_pr1) assert_file_not_empty(ta_pr2) log.info('All done.')
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC FRiP.', description='') parser.add_argument('peak', type=str, help='Peak file.') parser.add_argument('ta', type=str, help='TAGALIGN file.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC TAGALIGN pooler.', description='') parser.add_argument('tas', nargs='+', type=str, help='List of TAGALIGNs to be pooled.') parser.add_argument('--prefix', type=str, help='Basename prefix.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--col', help='Number of columns to keep in a pooled TAGALIGN. ' 'Keep all columns if not defined.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC Blacklist filter.') parser.add_argument('peak', type=str, help='Peak file.') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir_R1) if args.paired_end: mkdir_p(args.out_dir_R2) log.info('Cropping fastqs ({} bp) with Trimmomatic...'.format( args.crop_length)) if args.paired_end: cropped_R1, cropped_R2 = trimmomatic_pe(args.fastq1, args.fastq2, args.crop_length, args.out_dir_R1, args.out_dir_R2, args.nth, args.trimmomatic_java_heap) else: cropped_R1 = trimmomatic_se(args.fastq1, args.crop_length, args.out_dir_R1, args.nth, args.trimmomatic_java_heap) log.info('List all files in output directory...') ls_l(args.out_dir_R1) if args.paired_end: ls_l(args.out_dir_R2) log.info('Checking if output is empty...') assert_file_not_empty( cropped_R1, help='No reads in FASTQ after cropping. crop_length might be too high? ' 'While cropping, Trimmomatic (with MINLEN) excludes all reads ' 'SHORTER than crop_length.') log.info('All done.')
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE post align', description='') parser.add_argument('fastq', type=str, help='Path for FASTQ R1') parser.add_argument('bam', type=str, help='Path for BAM') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file. If not given then ' 'SAMstats on mito-free BAM will not be calcaulted.') parser.add_argument('--mito-chr-name', default='chrM', help='Mito chromosome name.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='Wrapper for BSUB job submission for ATAC-seq data.', description='') parser.add_argument('--path-to-fastqs', default='', type=str, help='Path to FASTQ files.') parser.add_argument('--memory', default='10GB', type=str, help='Memory requested to run the analysis.') parser.add_argument( '--queue', default='standard', type=str, help='Queue to submit the job in HPCF (use bqueues to choose).') parser.add_argument('--out-dir', type=str, help='Output Directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC filter.') parser.add_argument('bam', type=str, help='Path for raw BAM file.') parser.add_argument('--dup-marker', type=str, choices=['picard', 'sambamba'], default='picard', help='Dupe marker for filtering mapped reads in BAM.') parser.add_argument('--mapq-thresh', default=30, type=int, help='Threshold for low MAPQ reads removal.') parser.add_argument('--no-dup-removal', action="store_true", help='No dupe reads removal when filtering BAM.') parser.add_argument('--paired-end', action="store_true", help='Paired-end BAM.') parser.add_argument('--multimapping', default=0, type=int, help='Multimapping reads.') parser.add_argument( '--filter-chrs', nargs='*', help='Chromosomes to be filtered for final (nodup/filt) BAM.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--mito-chr-name', default='chrM', help='Mito chromosome name.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument( '--mem-gb', type=float, help='Max. memory for samtools sort in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--picard-java-heap', help='Picard\'s Java max. heap: java -jar picard.jar ' '-Xmx[MAX_HEAP]') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE annot_enrich (fraction of reads in annotated regions)') parser.add_argument('--ta', type=str, help='TAG-ALIGN file (from task bam2ta).') parser.add_argument('--dnase', type=str, help='DNase definition bed file.') parser.add_argument('--blacklist', type=str, help='Blacklist bed file.') parser.add_argument('--prom', type=str, help='Promoter definition bed file.') parser.add_argument('--enh', type=str, help='Enhancer definition bed file.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', help='Log level', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ]) args = parser.parse_args() log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)', description='') parser.add_argument('peak', type=str, help='Path for PEAK file. Peak filename should be "*.*Peak.gz". ' 'e.g. rep1.narrowPeak.gz') parser.add_argument('--ta', type=str, help='TAG-ALIGN file.') parser.add_argument('--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--fraglen', type=int, required=True, help='Fragment length.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--blacklist', type=str, required=True, help='Blacklist BED file.') parser.add_argument('--keep-irregular-chr', action="store_true", help='Keep reads with non-canonical chromosome names.') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() if args.blacklist.endswith('/dev/null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) log.info('Calling peaks with macs2...') npeak = macs2( args.tas[0], args.tas[1], args.chrsz, args.gensz, args.pval_thresh, args.shift, args.fraglen, args.cap_num_peak, args.ctl_subsample, args.ctl_paired_end, args.mem_gb, args.out_dir, ) log.info('Checking if output is empty...') assert_file_not_empty(npeak) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def parse_arguments(): parser = argparse.ArgumentParser(prog='ENCODE DCC bwa aligner.', description='') parser.add_argument('bwa_index_prefix_or_tar', type=str, help='Path for prefix (or a tarball .tar) \ for reference bwa index. \ Prefix must be like [PREFIX].sa. \ TAR ball can have any [PREFIX] but it should not \ have a directory structure in it.') parser.add_argument('fastqs', nargs='+', type=str, help='List of FASTQs (R1 and R2). \ FASTQs must be compressed with gzip (with .gz).') parser.add_argument( '--use-bwa-mem-for-pe', action="store_true", help='Use "bwa mem" for PAIRED-ENDED dataset with R1 FASTQ\'s read length >= --bwa-mem-read-len-limit. ' 'For shorter reads, bwa aln will be used. ') parser.add_argument( '--rescue-reads-for-bwa-mem', action="store_true", help='Use -P for "bwa mem" to rescue missing hits only (by using SW algorithm) ' 'but do not try to find hits that fit a proper pair.' ) parser.add_argument('--bwa-mem-read-len-limit', type=int, default=70, help='Read length limit for bwa mem (for paired-ended FASTQs only). ' 'bwa aln will be used instead of bwa mem if R1 reads are shorter than this.') parser.add_argument('--paired-end', action="store_true", help='Paired-end FASTQs.') parser.add_argument('--nth', type=int, default=1, help='Number of threads to parallelize.') parser.add_argument('--mem-gb', type=float, help='Max. memory for samtools sort and GNU sort -S ' '(half of this value will be used for GNU sort) in GB. ' 'It should be total memory for this task (not memory per thread).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL'], help='Log level') args = parser.parse_args() # check if fastqs have correct dimension if args.paired_end and len(args.fastqs) != 2: raise argparse.ArgumentTypeError('Need 2 fastqs for paired end.') if not args.paired_end and len(args.fastqs) != 1: raise argparse.ArgumentTypeError('Need 1 fastq for single end.') if args.use_bwa_mem_for_pe and not args.paired_end: raise ValueError( '--use-bwa-mem-for-pe is for paired ended FASTQs only.' ) if not args.use_bwa_mem_for_pe and args.rescue_reads_for_bwa_mem: raise ValueError( '--rescue-reads-for-bwa-mem is available only when --use-bwa-mem-for-pe is activated.' ) log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC Naive overlap.', description='NarrowPeak or RegionPeak only.') parser.add_argument('peak1', type=str, help='Peak 1.') parser.add_argument('peak2', type=str, help='Peak 2.') parser.add_argument('peak_pooled', type=str, help='Pooled peak.') parser.add_argument('--prefix', default='overlap', type=str, help='Prefix basename for output overlap peak.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--nonamecheck', action='store_true', help='bedtools intersect -nonamecheck. \ use this if you get bedtools intersect \ naming convenction warnings/errors).') parser.add_argument('--blacklist', type=str, required=True, help='Blacklist BED file.') parser.add_argument('--keep-irregular-chr', action="store_true", help='Keep reads with non-canonical chromosome names.') parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist.endswith('/dev/null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args
def parse_arguments(): parser = argparse.ArgumentParser( prog='ENCODE DCC IDR.', description='NarrowPeak or RegionPeak only.') parser.add_argument('peak1', type=str, help='Peak file 1.') parser.add_argument('peak2', type=str, help='Peak file 2.') parser.add_argument('peak_pooled', type=str, help='Pooled peak file.') parser.add_argument('--prefix', default='idr', type=str, help='Prefix basename for output IDR peak.') parser.add_argument( '--peak-type', type=str, required=True, choices=['narrowPeak', 'regionPeak', 'broadPeak', 'gappedPeak'], help='Peak file type.') parser.add_argument('--idr-thresh', default=0.1, type=float, help='IDR threshold.') parser.add_argument('--idr-rank', default='p.value', type=str, choices=['p.value', 'q.value', 'signal.value'], help='IDR ranking method.') parser.add_argument('--blacklist', type=str, help='Blacklist BED file.') parser.add_argument('--regex-bfilt-peak-chr-name', help='Keep chromosomes matching this pattern only ' 'in .bfilt. peak files.') parser.add_argument('--ta', type=str, help='TAGALIGN file for FRiP.') parser.add_argument('--chrsz', type=str, help='2-col chromosome sizes file.') parser.add_argument('--fraglen', type=int, default=0, help='Fragment length for TAGALIGN file. \ If given, do shifted FRiP (for ChIP-Seq).') parser.add_argument('--out-dir', default='', type=str, help='Output directory.') parser.add_argument('--log-level', default='INFO', choices=[ 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL', 'ERROR', 'CRITICAL' ], help='Log level') args = parser.parse_args() if args.blacklist is None or args.blacklist.endswith('null'): args.blacklist = '' log.setLevel(args.log_level) log.info(sys.argv) return args