def test_job_complete(self): """check if a job is complete""" t = mergeorder(['A','B','C','D','E'],'foo') self.assertFalse(job_complete(t)) self.assertFalse(job_complete(t.Children[0])) self.assertFalse(job_complete(t.Children[1].Children[1])) self.assertRaises(JobError, job_complete, t.Children[0].Children[0]) f = 'test_parallel_merge_otus_JOB_COMPLETE_TEST.poll' self.assertFalse(os.path.exists(f)) testf = open(f,'w') testf.write('0\n') testf.close() t.PollPath = f t.StartTime = 10 self.assertTrue(job_complete(t)) self.assertNotEqual(t.EndTime, None) self.assertNotEqual(t.TotalTime, None) testf = open(f,'w') testf.write('1\n') testf.close() self.assertRaises(JobError, job_complete, t) t.Processed = False self.assertRaises(JobError, job_complete, t) os.remove(f)
def test_job_complete(self): """check if a job is complete""" t = mergeorder(["A", "B", "C", "D", "E"], "foo") self.assertFalse(job_complete(t)) self.assertFalse(job_complete(t.Children[0])) self.assertFalse(job_complete(t.Children[1].Children[1])) self.assertRaises(JobError, job_complete, t.Children[0].Children[0]) f = "test_parallel_merge_otus_JOB_COMPLETE_TEST.poll" self.assertFalse(os.path.exists(f)) testf = open(f, "w") testf.write("0\n") testf.close() t.PollPath = f t.StartTime = 10 self.assertTrue(job_complete(t)) self.assertNotEqual(t.EndTime, None) self.assertNotEqual(t.TotalTime, None) testf = open(f, "w") testf.write("1\n") testf.close() self.assertRaises(JobError, job_complete, t) t.Processed = False self.assertRaises(JobError, job_complete, t) os.remove(f)
def test_job_complete(self): """check if a job is complete""" t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo') self.assertFalse(job_complete(t)) self.assertFalse(job_complete(t.Children[0])) self.assertFalse(job_complete(t.Children[1].Children[1])) self.assertRaises(JobError, job_complete, t.Children[0].Children[0]) f = 'test_parallel_merge_otus_JOB_COMPLETE_TEST.poll' self.assertFalse(os.path.exists(f)) testf = open(f, 'w') testf.write('0\n') testf.close() t.PollPath = f t.StartTime = 10 self.assertTrue(job_complete(t)) self.assertNotEqual(t.EndTime, None) self.assertNotEqual(t.TotalTime, None) testf = open(f, 'w') testf.write('1\n') testf.close() self.assertRaises(JobError, job_complete, t) t.Processed = False self.assertRaises(JobError, job_complete, t) os.remove(f)
def test_initial_nodes_to_merge(self): """determine the first nodes to merge""" t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo') exp = set([t.Children[0], t.Children[1].Children[1]]) obs = initial_nodes_to_merge(t) self.assertEqual(obs, exp)
def test_mergeorder(self): """recursively build and join all the subtrees""" exp = "((A,B)0,(C,(D,E)1)2)3;" obs = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo') self.assertEqual(obs.getNewick(escape_name=False), exp)
def test_start_job(self): """start a job""" exp = 'echo "y -i A.biom,B.biom -o foo/0.biom; echo $? > foo/0.biom.poll" | qsub -k oe -N MOTU -q ignored' t = mergeorder(['A.biom', 'B.biom', 'C', 'D', 'E'], 'foo') start_job(t.Children[0], 'y', 'ignored', torque_job, False) self.assertEqual(t.Children[0].FullCommand, exp)
def test_initial_has_dependencies(self): """determine initial has_dependencies""" t = mergeorder(['A','B','C','D','E'],'foo') exp = [t,t.Children[1]] obs = initial_has_dependencies(t, initial_nodes_to_merge(t)) self.assertEqual(obs, exp)
def test_initial_nodes_to_merge(self): """determine the first nodes to merge""" t = mergeorder(['A','B','C','D','E'],'foo') exp = set([t.Children[0], t.Children[1].Children[1]]) obs = initial_nodes_to_merge(t) self.assertEqual(obs,exp)
def test_mergeorder(self): """recursively build and join all the subtrees""" exp = "((A,B)0,(C,(D,E)1)2)3;" obs = mergeorder(["A", "B", "C", "D", "E"], "foo") self.assertEqual(obs.getNewick(escape_name=False), exp)
def test_start_job(self): """start a job""" exp = 'echo "x y -i A.biom,B.biom -o foo/0.biom; echo $? > foo/0.biom.poll" | qsub -k oe -N MOTU -q ignored' t = mergeorder(['A.biom','B.biom','C','D','E'],'foo') start_job(t.Children[0], 'x','y','ignored',torque_job,False) self.assertEqual(t.Children[0].FullCommand, exp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_fps = opts.input_fps output_dir = opts.output_dir seconds_to_sleep = opts.seconds_to_sleep verbose = opts.verbose merge_otus_serial_script = 'merge_otu_tables.py' created_temp_paths = [] # set the job_prefix either based on what the user passed in, # or a random string beginning with MOTU job_prefix = opts.job_prefix or get_random_job_prefix('MOTU') # A temporary output directory is created in output_dir named # job_prefix. Output files are then moved from the temporary # directory to the output directory when they are complete, allowing # a poller to detect when runs complete by the presence of their # output files. working_dir = '%s/%s' % (output_dir, job_prefix) try: makedirs(working_dir) except OSError: # working dir already exists pass import os.path # wrapper log output contains run details log_fp = os.path.join(output_dir, 'parallel_merge_otus.log') wrapper_log_output = open(log_fp, 'w') wrapper_log_output.write("Parallel merge output\n\n") # construct the dependency tree import os for f in input_fps: if not os.path.exists(f): raise IOError("%f does not exist!" % f) tree = mergeorder(input_fps, working_dir) if verbose: print tree.asciiArt() wrapper_log_output.write('Dependency tree:\n') wrapper_log_output.write(tree.asciiArt()) wrapper_log_output.write('\n\n') wrapper_log_output.flush() to_process = initial_nodes_to_merge(tree) has_dependencies = initial_has_dependencies(tree, to_process) # loop until the whole shabang is done pending = [] # jobs that are currently running while not tree.Processed: # check if we have nodes to process, if so, shoot them off for node in to_process: if opts.cluster: start_job(node, merge_otus_serial_script, qiime_config['torque_queue'], wrap_call=torque_job) else: start_job(node, merge_otus_serial_script, qiime_config['torque_queue'], wrap_call=local_job) wrapper_log_output.write(node.FullCommand) wrapper_log_output.write('\n') wrapper_log_output.flush() pending.append(node) to_process = set([]) # check running jobs current_pending = [] for pending_node in pending: # if we're complete, update state if job_complete(pending_node): wrapper_log_output.write( "Node %s completed in %f seconds" % (pending_node.Name, pending_node.TotalTime)) wrapper_log_output.write('\n') wrapper_log_output.flush() else: current_pending.append(pending_node) pending = current_pending # check for new jobs to add current_dependencies = [] for dep_node in has_dependencies: # if children are satisfied, then allow for processing # the logic here is odd to handle the case where an internal node # has both a tip that is a child and child that is an internal node children_are_complete = [(c.Processed or c.istip()) for c in dep_node.Children] if all(children_are_complete): to_process.add(dep_node) else: current_dependencies.append(dep_node) has_dependencies = current_dependencies sleep(seconds_to_sleep) os.rename(tree.FilePath, "%s/%s" % (output_dir, "merged.biom"))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_fps = opts.input_fps output_dir = opts.output_dir seconds_to_sleep = opts.seconds_to_sleep verbose = opts.verbose merge_otus_serial_script = 'merge_otu_tables.py' created_temp_paths = [] # set the job_prefix either based on what the user passed in, # or a random string beginning with MOTU job_prefix = opts.job_prefix or get_random_job_prefix('MOTU') # A temporary output directory is created in output_dir named # job_prefix. Output files are then moved from the temporary # directory to the output directory when they are complete, allowing # a poller to detect when runs complete by the presence of their # output files. working_dir = '%s/%s' % (output_dir, job_prefix) try: makedirs(working_dir) except OSError: # working dir already exists pass import os.path # wrapper log output contains run details log_fp = os.path.join(output_dir, 'parallel_merge_otus.log') wrapper_log_output = open(log_fp, 'w') wrapper_log_output.write("Parallel merge output\n\n") # construct the dependency tree import os for f in input_fps: if not os.path.exists(f): raise IOError("%f does not exist!" % f) tree = mergeorder(input_fps, working_dir) if verbose: print tree.asciiArt() wrapper_log_output.write('Dependency tree:\n') wrapper_log_output.write(tree.asciiArt()) wrapper_log_output.write('\n\n') wrapper_log_output.flush() to_process = initial_nodes_to_merge(tree) has_dependencies = initial_has_dependencies(tree, to_process) # loop until the whole shabang is done pending = [] # jobs that are currently running while not tree.Processed: # check if we have nodes to process, if so, shoot them off for node in to_process: if opts.cluster: start_job(node, merge_otus_serial_script, qiime_config['torque_queue'], wrap_call=torque_job) else: start_job(node, merge_otus_serial_script, qiime_config['torque_queue'], wrap_call=local_job) wrapper_log_output.write(node.FullCommand) wrapper_log_output.write('\n') wrapper_log_output.flush() pending.append(node) to_process = set([]) # check running jobs current_pending = [] for pending_node in pending: # if we're complete, update state if job_complete(pending_node): wrapper_log_output.write("Node %s completed in %f seconds" % (pending_node.Name, pending_node.TotalTime)) wrapper_log_output.write('\n') wrapper_log_output.flush() else: current_pending.append(pending_node) pending = current_pending # check for new jobs to add current_dependencies = [] for dep_node in has_dependencies: # if children are satisfied, then allow for processing # the logic here is odd to handle the case where an internal node # has both a tip that is a child and child that is an internal node children_are_complete = [(c.Processed or c.istip()) for c in dep_node.Children] if all(children_are_complete): to_process.add(dep_node) else: current_dependencies.append(dep_node) has_dependencies = current_dependencies sleep(seconds_to_sleep) os.rename(tree.FilePath, "%s/%s" % (output_dir, "merged.biom"))
def test_initial_has_dependencies(self): """determine initial has_dependencies""" t = mergeorder(["A", "B", "C", "D", "E"], "foo") exp = [t, t.Children[1]] obs = initial_has_dependencies(t, initial_nodes_to_merge(t)) self.assertEqual(obs, exp)
def test_initial_nodes_to_merge(self): """determine the first nodes to merge""" t = mergeorder(["A", "B", "C", "D", "E"], "foo") exp = set([t.Children[0], t.Children[1].Children[1]]) obs = initial_nodes_to_merge(t) self.assertEqual(obs, exp)
def test_initial_has_dependencies(self): """determine initial has_dependencies""" t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo') exp = [t, t.Children[1]] obs = initial_has_dependencies(t, initial_nodes_to_merge(t)) self.assertEqual(obs, exp)
def test_mergeorder(self): """recursively build and join all the subtrees""" exp = "((A,B)0,(C,(D,E)1)2)3;" obs = mergeorder(['A','B','C','D','E'],'foo') self.assertEqual(obs.getNewick(escape_name=False), exp)
def test_start_job(self): """start a job""" exp = 'echo "y -i A.biom,B.biom -o foo/0.biom; echo $? > foo/0.biom.poll" | qsub -k oe -N MOTU -q ignored' t = mergeorder(["A.biom", "B.biom", "C", "D", "E"], "foo") start_job(t.Children[0], "y", "ignored", torque_job, False) self.assertEqual(t.Children[0].FullCommand, exp)