def test_initial_has_dependencies(self): """determine initial has_dependencies""" t = mergeorder(['A','B','C','D','E'],'foo') exp = [t,t.Children[1]] obs = initial_has_dependencies(t, initial_nodes_to_merge(t)) self.assertEqual(obs, exp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_fps = opts.input_fps python_exe_fp = opts.python_exe_fp #cluster_jobs_fp = opts.cluster_jobs_fp #jobs_to_start = opts.jobs_to_start output_fp = opts.output_fp merge_otus_fp = opts.merge_otus_fp #poller_fp = opts.poller_fp #retain_temp_files = opts.retain_temp_files #suppress_polling = opts.suppress_polling seconds_to_sleep = opts.seconds_to_sleep #poll_directly = opts.poll_directly verbose = opts.verbose created_temp_paths = [] # set the job_prefix either based on what the user passed in, # or a random string beginning with MOTU job_prefix = opts.job_prefix or get_random_job_prefix('MOTU') # A temporary output directory is created in output_dir named # job_prefix. Output files are then moved from the temporary # directory to the output directory when they are complete, allowing # a poller to detect when runs complete by the presence of their # output files. working_dir = '%s/%s' % (output_fp,job_prefix) try: makedirs(working_dir) except OSError: # # working dir already exists pass import os.path # wrapper log output contains run details log_fp = os.path.join(working_dir, 'parallel_merge_otus.log') #log_fp = 'parallel_merge_otus.log' if os.path.exists(log_fp): raise IOError,"log file already exists!" wrapper_log_output = open(log_fp, 'w') wrapper_log_output.write("Parallel merge output\n\n") # munge input filenames intentionally, output munge #filenames = munge_filenames(input_fps) #wrapper_log_output.write("Munge file mapping:\n") #for m,f in zip(filenames,input_fps): # wrapper_log_output.write('\t'.join([m,f])) # wrapper_log_output.write('\n') #wrapper_log_output.write('\n') #wrapper_log_output.flush() # construct the dependency tree import os tree = mergeorder(input_fps, working_dir) if verbose: print tree.asciiArt() wrapper_log_output.write('Dependency tree:\n') wrapper_log_output.write(tree.asciiArt()) wrapper_log_output.write('\n\n') wrapper_log_output.flush() to_process = initial_nodes_to_merge(tree) has_dependencies = initial_has_dependencies(tree, to_process) # loop until the whole shabang is done pending = [] # jobs that are currently running while not tree.Processed: # check if we have nodes to process, if so, shoot them off for node in to_process: start_job(node, python_exe_fp, merge_otus_fp, wrap_call=torque_job) wrapper_log_output.write(node.FullCommand) wrapper_log_output.write('\n') wrapper_log_output.flush() pending.append(node) to_process = set([]) # check running jobs current_pending = [] for pending_node in pending: # if we're complete, update state if job_complete(pending_node): wrapper_log_output.write("Node %s completed in %f seconds" % \ (pending_node.Name, pending_node.TotalTime)) wrapper_log_output.write('\n') wrapper_log_output.flush() else: current_pending.append(pending_node) pending = current_pending # check for new jobs to add current_dependencies = [] for dep_node in has_dependencies: # if children are satisfied, then allow for processing # the logic here is odd to handle the case where an internal node # has both a tip that is a child and child that is an internal node children_are_complete = [(c.Processed or c.istip()) for c in dep_node.Children] if all(children_are_complete): to_process.add(dep_node) else: current_dependencies.append(dep_node) has_dependencies = current_dependencies sleep(seconds_to_sleep)