def run_reducer(self, **kwargs): """ Inside the run() function, apply the reducer to all of the mapped-aggregated result values. """ if self.qsub is False: reducer = kwargs['reducer'] mapped_results = kwargs['mapped_results'] return reducer(mapped_results[0], parameters=self.parameters[0]) else: import shutil import subprocess random_string = str(uuid.uuid4()) temp_job_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmpRed_" + random_string) # Create temp_job_directory. if not os.path.exists(temp_job_directory): os.makedirs(temp_job_directory) unpickled_inp = dict(mapped_results=kwargs['mapped_results'], parameters=self.parameters) input_file_path = os.path.join(temp_job_directory, constants.reduce_input_file_name) # Write input file with open(input_file_path, "wb") as input_file: cloudpickle.dump(unpickled_inp, input_file) # Copy input files to working directory. shutil.copyfile(kwargs['pickled_cluster_input_file'], os.path.join(temp_job_directory, constants.pickled_cluster_input_file)) shutil.copyfile(input_file_path, os.path.join(temp_job_directory, constants.job_input_file_name)) # Copy library scripts to working directory. shutil.copyfile(constants.parameter_sweep_run_reducer_shell_script, os.path.join(temp_job_directory, os.path.basename( constants.parameter_sweep_run_reducer_shell_script))) shutil.copyfile(constants.parameter_sweep_run_reducer_pyfile, os.path.join(temp_job_directory, os.path.basename( constants.parameter_sweep_run_reducer_pyfile))) reduce_script_file = os.path.join(temp_job_directory, os.path.basename( constants.parameter_sweep_run_reducer_shell_script)) container_name = os.path.basename(temp_job_directory) # Invoke parameter_sweep_run_reducer. subprocess.call("bash {0} {1} {2}".format(reduce_script_file, container_name, temp_job_directory), shell=True) self._wait_for_all_results_to_return([temp_job_directory]) with open(os.path.join(temp_job_directory, constants.job_output_file_name), "r") as of: result = of.read() # Remove job directory and container. clean_up([temp_job_directory], [container_name]) return result
def _submit_qsub_job(self, job_program_file, job_name, job_input, containers, dirs, temp_job_directory): import shutil from subprocess import Popen pickled_cluster_input_file = job_input['pickled_cluster_input_file'] # write input file for qsub job. with open(os.path.join(temp_job_directory, constants.job_input_file_name), "wb") as input_file: cloudpickle.dump(job_input, input_file) if self.cluster_execution is True: shutil.copyfile(pickled_cluster_input_file, os.path.join(temp_job_directory, constants.pickled_cluster_input_file)) # write job program file. shutil.copyfile(job_program_file, os.path.join(temp_job_directory, constants.qsub_job_name)) # append to list of related job containers containers.append(job_name) # invoke qsub to start container with same name as job_name Popen(['qsub', '-d', temp_job_directory, '-N', job_name, constants.qsub_file], shell=False) # append to list of related job directories dirs.append(temp_job_directory)
def put(self, filename, data): with open(self.folder_name + "/" + filename, 'wb') as fh: if self.serialization_method == "cloudpickle": cloudpickle.dump(data, fh) elif self.serialization_method == "json": json.dump(data, fh)
def put(self, filename, data): with open(os.path.join(self.folder_name, filename), 'wb') as fh: cloudpickle.dump(data, fh)
def run_qsub_parameter_sweep(parameters, mapper_fn=mapAnalysis, reducer_fn=reduceAnalysis): pset_list = [] dat = [] if len(parameters) > 1: pnames = parameters.keys() for pvals1 in parameters[pnames[0]]: for pvals2 in parameters[pnames[1]]: pset = {pnames[0]: pvals1, pnames[1]: pvals2} pset_list.append(pset) else: for pname, pvals in parameters.iteritems(): for pval in pvals: pset = {pname: pval} pset_list.append(pset) if 'seed' in StochSSModel.json_data and int( StochSSModel.json_data['seed']) != -1: seed = int(StochSSModel.json_data['seed']) else: random.seed() seed = random.randint(0, 2147483647) counter = 0 base_dir = os.path.join('/home/aviral/CSE/qsub_experiments/', "temp_" + str(uuid.uuid4())) qsub_file = "/home/aviral/CSE/qsub_experiments/job_submission.pbs" job_file = "/home/aviral/CSE/qsub_experiments/ComputeEnsemble.py" job_name_prefix = "xyz_ps_job_" molns_cloudpickle_file = "/home/aviral/CSE/molnsutil/molnsutil/molns_cloudpickle.py" dirs = [] containers = [] if not os.path.exists(base_dir): os.makedirs(base_dir) for pndx, pset in enumerate(pset_list): unpickled_list = [] model = StochSSModel(**pset) seed_plus_pndx = seed + pndx number_of_trajectories = StochSSModel.json_data['trajectories'] unpickled_list.append(number_of_trajectories) unpickled_list.append(seed_plus_pndx) unpickled_list.append(model) unpickled_list.append(mapper_fn) job_name = job_name_prefix + str(counter) # create temp directory for this job. temp_job_directory = os.path.join(base_dir, job_name + "/") if not os.path.exists(temp_job_directory): os.makedirs(temp_job_directory) # write input file for qsub job. output = open(os.path.join(temp_job_directory, "input"), "wb") CloudPickle.dump(unpickled_list, output) output.close() # write job program file. shutil.copyfile(job_file, os.path.join(temp_job_directory, "ComputeEnsemble.py")) # write molns_cloudpickle. shutil.copyfile( molns_cloudpickle_file, os.path.join(temp_job_directory, "molns_cloudpickle.py")) containers.append(job_name) # invoke qsub to star container with same name as job_name Popen(['qsub', '-d', temp_job_directory, '-N', job_name, qsub_file], shell=False) dirs.append(temp_job_directory) counter += 1 temp_dirs = dirs[:] wait_for_all_results_to_return(temp_dirs) print "reducing results.." for dir in dirs: mapped_list = get_unpickled_result(dir) dat.append({'parameters': pset, 'result': reducer_fn(mapped_list)}) # remove temporary files and finished containers. clean_up([base_dir], containers) return dat
def put(self, filename, data): with open(self.folder_name + "/" + filename, 'wb') as fh: cloudpickle.dump(data, fh)
def put(self, filename, data): with open(self.folder_name+"/"+filename,'wb') as fh: cloudpickle.dump(data,fh)
#! /usr/bin/python import pickle import molns_cloudpickle as CloudPickle with open("input", "rb") as inpput: unpickled_list = pickle.load(inpput) number_of_trajectories = unpickled_list[0] seed_plus_pndx = unpickled_list[1] model = unpickled_list[2] mapper_fn = unpickled_list[3] results = model.run(number_of_trajectories=number_of_trajectories, seed=seed_plus_pndx, show_labels=True) if not isinstance(results, list): results = [results] mapped_list = [] for r in results: mapped_list.append(mapper_fn(r)) output = open("output", "wb") CloudPickle.dump(mapped_list, output)