def retrieve_task(wq): retrieved_tasks = 0 print "{} Waiting on tasks to complete...\n".format(time.asctime()) t = wq.wait(30) if t: print "{} Task (id# {}) complete: {} (return code {})\n".format(time.asctime(), t.tag, t.command, t.return_status) if(t.return_status != 0) or (t.result != WORK_QUEUE_RESULT_SUCCESS) : print "{} Task (id# {}) failed : Return Code {} : Result {}\n".format(time.asctime(), t.tag, t.return_status, t.result) print "{} .\n".format(t.output) desc_name = "task_input_desc_{}.json".format(t.tag) desc_oname = "task_output_desc_{}.json".format(t.tag) desc_otar = "task_output_desc_{}.tar".format(t.tag) with open(desc_oname, 'r') as desc_file: report = json.load(desc_file) job = Job.from_description(report["job"]) job_otar = tarfile.open(desc_otar) job_otar.extractall() job_otar.close() os.unlink(desc_name) os.unlink(desc_oname) os.unlink(desc_otar) retrieved_tasks += 1 print "{} Retrieved {} tasks.\n".format(time.asctime(), retrieved_tasks) print "{} .\n".format(t.output) return (job, report["sys_time"], report["comp_time"], report["split_time"], report["join_time"]) else : print "{} Retrieved {} tasks.\n".format(time.asctime(), retrieved_tasks) return None
elif args.type == "logical-vf": bwa_seq = BWAQueryLogicalVF(args.input, args.output, input_files, [], environment={'reference': args.ref}) elif args.type == "index-vf": data_file = FastqFile.from_file(args.input) bwa_seq = BWAQueryIndexVF(data_file, args.output, input_files, [], environment={'reference': args.ref}) print len(bwa_seq) completed_size = 0 job = Job(specs=[bwa_seq]) if args.test == "ft": completed_size = flat_run(job, args.output, args.first) elif args.test == "tr": completed_size = tiered_run(job, args.output, args.first, args.second) elif args.test == "tl": completed_size = time_limit_run(job, args.output, args.first, args.second) elif args.test == "tla": completed_size = time_limit_adjust_run(job, args.output, args.first, args.second, args.time, args.lock) elif args.test == "dyn": completed_size = dyn_run(job, args.output, args.first) print "{} {}".format(len(bwa_seq), completed_size) result = len(bwa_seq) - completed_size
help='JSON file input job description') parser.add_argument('output', type=str, help='JSON file output destination') parser.add_argument('output_tar', type=str, help='JSON file output files tar destination') parser.add_argument('size', type=int, help='Size of sub-job partitions') parser.add_argument('count', type=int, help='Number of sub-job partitions') parser.add_argument('time', type=int, help='Time allowed for execution') args = parser.parse_args() job = None with open(args.input, 'r') as job_desc: job_json = json.load(job_desc) job = Job.from_description(job_json) print "Running {}".format(str(job)) (output_job, sys_time, avail_time, split_time, join_time) = run_jobs(job, args.size, args.count, args.time) output_json = output_job.to_description() report = {} report["job"] = output_json report["sys_time"] = sys_time report["comp_time"] = avail_time report["split_time"] = split_time report["join_time"] = join_time with tarfile.open(args.output_tar, "w") as output_tar:
else: raise Exception('{} root|sql'.format(sys.argv[0])) input_files = [ "dimuon.py", "EventFile.py", "DimounRootTask.py", "DivisibleJob.py", "SQLEventFile", "UpRootEventFile", data_file ] output_file = 'output.json' dimdetect = DimuonTask(event_file, output_file, input_files, environment={}, repeat=repeat) job = Job(specs=[dimdetect]) #size = 50000 # Number of slices per jobs count = len(dimdetect) / size # number of jobs with size jobs = job.split(size, count) for j in jobs: j.execute() # create histogram import dimuon for j in jobs: for s in j.specs: dimuon.histogram_fill_from_file(s.output_file) dimuon.histogram(output='output.png')
else: raise Exception('{} root|sql'.format(sys.argv[0])) input_files = [ "dimuon.py", "EventFile.py", "DimounRootTask.py", "DivisibleJob.py", "SQLEventFile", "UpRootEventFile", data_file ] output_file = 'output.json' dimdetect = DimuonTask(event_file, output_file, input_files, environment={}, repeat=repeat) job = Job(specs=[dimdetect]) time = 30000 # Time in seconds to run a batch of jobs # size = 50000 # Number of slices per jobs count = len(dimdetect) / size # number of jobs with size job = Job(specs=[dimdetect]) if dyn == "ft": (jobs, sys_time, avail_time, split_time, join_time) = run_jobs(job, size, count, time) else: (jobs, sys_time, avail_time, split_time, join_time) = run_jobs(job, size, count, time, False, True) print "Size of jobs vs initial : {} : {}".format(len(jobs), len(job))