def average_exome_coverage(args): out_file = args.out if file_exists(out_file): return out_file with file_transaction(out_file) as tx_out_file: # dfs = [_calc_total_exome_coverage(bam, bed_file) for bam in in_bams] resources = {'name': 'bedtools', 'mem': 12, 'cores': 1} cluster.send_job(_calc_total_exome_coverage, args.bams, args, resources) # df = rbind(dfs) # df.to_csv(tx_out_file, mode='a', index=False, header=["r10", "r25", "r50", "region", "size", "sample"]) return out_file
def test_cluster(self): def fake_fn(n, args): print 'inside function with %s' % n time.sleep(n * 2) start = time.time() parser = ArgumentParser(description="Test cluster") parser = arguments.myargs(parser) args = parser.parse_args() logger.info(args) resources = {'name': 'step1', 'mem': 1, 'cores': 1} cluster.send_job(fake_fn, [1, 2], args, resources) logger.info('It took %.3f minutes without ipython' % ((time.time() - start) / 60))
def detect(args): assert args.files, "Need a set of fastq files" for mem in [6, 8, 10, 12, 14, 16, 18, 20, 24]: safe_makedir("mem%s" % mem) resources = {'name': "mem%s" % mem, 'mem': mem, 'cores': 1} data = _prepare_samples(args) data = _update_algorithm(data, resources) data = cluster.send_job(trymemory, data, args, resources)
def detect_positions(data, args): assert args.reference, "Need --reference" assert args.index, "Need --index" assert args.files, "Need a set of fastq files" assert args.snp, "Need --snp" resources = {'name': 'trimming', 'mem': 4, 'cores': 1} data = _update_algorithm(data, resources) data = cluster.send_job(prepare, data, args, resources) resources = {'name': 'align', 'mem': 2, 'cores': 8} data = _update_algorithm(data, resources) data = cluster.send_job(create_bam, data, args, resources) resources = {'name': 'bissnp', 'mem': 3, 'cores': 8} data = _update_algorithm(data, resources) data = cluster.send_job(call_variations, data, args, resources) resources = {'name': 'report', 'mem': 2, 'cores': 5} data = _update_algorithm(data, resources) data = cluster.send_job(create_report, data, args, resources)
def link_sites(data, args): assert args.files, "Need a set of fastq files" assert args.out, "Need prefix" resources = {'name': 'link', 'mem': 6, 'cores': 1} workdir = args.out workdir = op.abspath(safe_makedir(workdir)) data = _update_algorithm(data, resources) data = cluster.send_job(detect_asm, data, args, resources) vcf_res = [sample[0]['asm'] for sample in data] vcf_merged = op.join(workdir, args.out + ".vcf") post_processing(vcf_res, vcf_merged, op.join(workdir, "link"))
def bias_exome_coverage(args): resources = {'name': 'bias', 'mem': 1, 'cores': 1} cluster.send_job(calculate_bias_over_multiple_regions, args.bams, args, resources)
def calculate_cg_depth_coverage(args): resources = {'name': 'vcf_stats', 'mem': 1, 'cores': 1} cluster.send_job(calc_variants_stats, args.bams, args, resources)
def average_exome_coverage(data, args): # dfs = [_calc_total_exome_coverage(bam, bed_file) for bam in in_bams] safe_makedir(args.out) resources = {'name': 'bedtools', 'mem': 8, 'cores': 1} data = _update_algorithm(data, resources) cluster.send_job(_calc_total_exome_coverage, data, args, resources)
def bias_exome_coverage(data, args): safe_makedir(args.out) resources = {'name': 'bias', 'mem': 1, 'cores': 1} data = _update_algorithm(data, resources) cluster.send_job(calculate_bias_over_multiple_regions, data, args, resources)
def calculate_cg_depth_coverage(data, args): safe_makedir(args.out) resources = {'name': 'vcf_stats', 'mem': 2, 'cores': 1} data = _update_algorithm(data, resources) cluster.send_job(calc_variants_stats, data, args, resources)
import time from argparse import ArgumentParser from ichwrapper import arguments, cluster, fake from ichwrapper.log import logger if __name__ == "__main__": start = time.time() parser = ArgumentParser(description="Test cluster") parser = arguments.myargs(parser) parser.add_argument("--cmd", default=False, action='store_true') args = parser.parse_args() # args = parser.parse_args(["--parallel", "ipython", "--local"]) logger.info(args) resources = {'name': 'step1', 'mem': 1, 'cores': 1} data = [{'num': 1}, {'num': 2}] data = cluster.update_samples(data, resources, args) cluster.send_job(fake.fake_fn, data, args, resources) if args.cmd: cluster.send_job(fake.fake_cmd_fn, data, args, resources) logger.info('It took %.3f minutes with ipython' % ((time.time()-start)/60))