def main(): """ Merge files. Accumulate all reads / observations from multiple (>=2) input files, and output a single file of the same format. Supported formats: pat.gz, beta """ args = parse_args() # validate input files input_files = args.input_files validate_files_list(input_files, min_len=2) # construct output path out_path = args.prefix + splitextgz(args.input_files[0])[1] if not delete_or_skip(out_path, args.force): return files_type = splitextgz(input_files[0])[1][1:] if files_type in ('beta', 'bin'): merge_betas(input_files, out_path) elif files_type == 'pat.gz': MergePats(input_files, args.prefix + '.pat', args.labels, args).merge_pats() elif files_type == 'unq.gz': merge_unqs() else: print('Unknown input format:', input_files[0]) return
def main(): """ Collapse beta file to blocks binary file, of the same beta format """ args = parse_args() files = args.input_files validate_files_list(files, '.beta') if not op.isfile(args.blocks_file): eprint('Invalid blocks file:', args.blocks_file) return names = ['chr', 'sloc', 'eloc', 'ssite', 'esite'] df = pd.read_csv(args.blocks_file, sep='\t', usecols=[0, 1, 2, 3, 4], header=None, names=names) nr_removed = df[df.ssite == df.esite].shape[0] if nr_removed: eprint('removed {} regions with no CpGs'.format(nr_removed)) if args.debug: eprint(df[df.ssite == df.esite]) df = df[df.ssite < df.esite] blocks_bins, filtered_indices = get_bins(df) with Pool() as p: for beta_path in files: params = (args, blocks_bins, filtered_indices, beta_path, df[['chr', 'sloc', 'eloc']]) p.apply_async(apply_filter_wrapper, params) p.close() p.join()
def main(args): validate_files_list(args.input_files, '.pat.gz') gr = GenomicRegion(args) print(gr) for pat_file in args.input_files: print(splitextgz(op.basename(pat_file))[0]) # print file name PatVis(args, pat_file).print_results()
def main(): """ Plot histogram of reads lengths of unq file Output to stdout the histogram values if requested """ args = parse_args() validate_files_list(args.unq_paths, 'unq.gz') multi_FragLen(args)
def main(): """ Convert beta file[s] to bed file[s]. """ args = parse_args() validate_files_list(args.beta_paths, '.beta') b = BetaToBigWig(args) for beta in args.beta_paths: b.run_beta_to_bed(beta)
def main(): """ Compare between pairs of beta files, by plotting a 2d histogram for every pair. Drop sites with low coverage (< cov_thresh argument), for performance and robustness. """ args = parse_args() validate_files_list(args.betas, '.beta', min_len=2) compare_all_paires(args.betas, args.min_cov, GenomicRegion(args).sites)
def main(): """ Convert beta file[s] to Illumina-450K format. Output: a csv file with ~480K rows, for the ~480K Illumina sites, and with columns corresponding to the beta files. all values are in range [0, 1], or NaN. """ args = parse_args() validate_files_list(args.input_files, '.beta') betas2csv(args)
def main(): """ Convert bed[.gz] file[s] to beta file[s]. bed file should be of the format (tab-separated): chr start end meth total """ # todo: bed or bedGraph? args = parse_args() validate_files_list(args.bed_paths) bed2betas(args)
def main(): """ Convert beta file[s] to bigwig file[s]. Assuming bedGraphToBigWig is installed and in PATH """ args = parse_args() validate_files_list(args.beta_paths, '.beta') b = BetaToBigWig(args) for beta in args.beta_paths: b.run_beta_to_bw(beta)
def main(): """ Mix samples from K different pat files. Output a single mixed pat.gz[.csi] file - sorted, bgzipped and indexed - with an informative name. """ args = parse_args() validate_files_list(args.pat_files, 'pat.gz', 2) if args.bed_file and (args.region or args.sites): eprint('-L, -s and -r are mutually exclusive') return mult_mix(args) return
def main(args): validate_files_list(args.input_files, '.beta') BetaVis(args)