def main(): """ Generage homog files. Given a blocks file and pat[s], count the number of U,X,M reads for each block for each file """ args = parse_args() if args.nr_bits not in (8 , 16): raise IllegalArgumentError('nr_bits must be in {8, 16}') if args.rlen < 3: raise IllegalArgumentError('rlen must be >= 3') if args.thresholds is not None: th = args.thresholds.split(',') if not len(th) == 2: # and th[0].is_number(): raise IllegalArgumentError('Invalid thresholds') th = float(th[0]), float(th[1]) if not (1 > th[1] > th[0] > 0): raise IllegalArgumentError('Invalid thresholds') # make sure homog tool is valid: validate_local_exe(homog_tool) pats = args.input_files validate_file_list(pats, '.pat.gz') outdir, prefix = parse_outdir_prefix(args) # load blocks: blocks_df = load_blocks_file(args.blocks_file) is_nice, msg = is_block_file_nice(blocks_df) if not is_nice: homog_log(msg) raise IllegalArgumentError(f'Invalid blocks file: {args.blocks_file}') for pat in sorted(pats): homog_process(pat, blocks_df, args, outdir, prefix)
def view_gr(pat, args, get_cmd=False): validate_single_file(pat, '.pat.gz') gr = GenomicRegion(args) if gr.is_whole(): s = 1 e = gr.genome.get_nr_sites() + 1 cmd = f'gunzip -c {pat} ' else: s, e = gr.sites ms = max(1, s - MAX_PAT_LEN) cmd = f'tabix {pat} {gr.chrom}:{ms}-{e - 1} ' view_flags = set_view_flags(args) cmd += f' | {cview_tool} --sites "{s}\t{e}" ' + view_flags if hasattr( args, 'sub_sample') and args.sub_sample is not None: # sub-sample reads validate_local_exe(pat_sampler) cmd += f' | {pat_sampler} {args.sub_sample} ' if not gr.is_whole(): cmd += f' | sort -k2,2n -k3,3 ' cmd += f' | {collapse_pat_script} - ' if get_cmd: return cmd if args.out_path is not None: cmd += f' > {args.out_path}' subprocess_wrap_sigpipe(cmd)
def add_bed_to_cpgs(site_file, genome, out_path=None): validate_local_exe(add_loci_tool) g = GenomeRefPaths(genome) cmd = f'cat {site_file} | {add_loci_tool} {g.dict_path} {g.chrom_cpg_sizes}' if (out_path is not None) and out_path != sys.stdout: cmd += f' > {out_path}' subprocess.check_call(cmd, shell=True)
def main(): """ Generate a beta file from a pat file """ args = parse_args() validate_local_exe(pat2beta_tool) for pat in args.pat_paths: pat2beta(pat, args.out_dir, args, args.force)
def main(): """ Segment the genome, or a subset region, to homogenously methylated blocks. Input: one or more beta files to segment Output: blocks file (BED format + startCpG, endCpG columns) """ args = parse_args() validate_local_exe(segment_tool) betas = parse_betas_input(args) SegmentByChunks(args, betas).run()
def main(): """ view pat file with the c++ engine """ parser = parse_args() args = parser.parse_args() # validate input file pat = args.pat validate_single_file(pat) if (args.sub_sample is not None) and (args.sub_sample < 0): parser.error('[wt view] sub-sampling rate must be >= 0') validate_local_exe(cview_tool) cview(pat, args)
def main(): """ Add to bam file an extra field, YI:Z:{nr_meth},{nr_unmeth}, to count Cytosine retention at CpG context. """ parser = argparse.ArgumentParser(description=main.__doc__) parser = add_args(parser) parser = add_cpg_args(parser) args = parser.parse_args() validate_local_exe(add_cpg_count_tool) for bam in args.bam: BamMethylData(args, bam).start_threads()
def main(): """ Run the WGBS pipeline to generate pat & beta files out of an input bam file """ parser = add_args_snp_splitt() args = parse_args_snp_split(parser) # validate output dir: if not op.isdir(args.out_dir): raise IllegalArgumentError(f'Invalid output dir: {args.out_dir}') validate_local_exe(allele_split_tool) for bam in [args.bam]: if not validate_bam(bam): eprint(f'[wt bam2pat] Skipping {bam}') continue pat = op.join(args.out_dir, op.basename(bam)[:-4] + PAT_SUFF) if not delete_or_skip(pat, args.force): continue SNPSplit(args, bam)
def main(): """ Run the WGBS pipeline to generate pat & beta files out of an input bam file """ parser = argparse.ArgumentParser(description=main.__doc__) parser = add_args(parser) args = parse_args(parser) # validate output dir: if not op.isdir(args.out_dir): raise IllegalArgumentError(f'Invalid output dir: {args.out_dir}') validate_local_exe(match_maker_tool) validate_local_exe(patter_tool) for bam in args.bam: if not validate_bam(bam): eprint(f'[wt bam2pat] Skipping {bam}') continue pat = op.join(args.out_dir, op.basename(bam)[:-4] + PAT_SUFF) if not delete_or_skip(pat, args.force): continue Bam2Pat(args, bam)
def view_bed(pat, args): # assume columns 4-5 of args.bed_file are startCpG, endCpG: bpath = args.bed_file # validate blocks file. If it's long, and starts with "chr1", use gunzip instead of tabix. df = load_blocks_file(bpath, nrows=1e6) if df.shape[0] == 1e6 and df.iloc[0, 0] in ('1', 'chr1'): tabix_cmd = f'gunzip -c {pat} ' else: # extended blocks: tabix_cmd = 'gunzip -c' if bpath.endswith('.gz') else 'cat' tabix_cmd += f' {bpath} | {cview_extend_blocks_script} | tabix -R - {pat} ' view_flags = set_view_flags(args) cmd = tabix_cmd + f' | {cview_tool} {view_flags} --blocks_path {bpath}' if args.sub_sample is not None: # sub-sample reads validate_local_exe(pat_sampler) cmd += f' | {pat_sampler} {args.sub_sample} ' cmd += f' | sort -k2,2n -k3,3 | {collapse_pat_script} - ' if args.out_path is not None: cmd += f' > {args.out_path}' subprocess_wrap_sigpipe(cmd)