def submit_cluster(self, mh_dict): """Submits the job to the cluster""" # Get the number of tasks that will have to be submitted in order to get ntrials ntrials = self.mh_dict['n_trials'] n_tasks = int(ntrials / self.trials_per_task) logger.debug(f'running {ntrials} trials in {n_tasks} tasks') # The mh_dict will be submitted n_task times and will perform mh_dict['n_trials'] each time. # Therefore we have to adjust mh_dict['n_trials'] in order to actually perform the number # specified in self.mh_dict['n_trials'] mh_dict['n_trials'] = self.trials_per_task path = make_analysis_pickle(mh_dict) # make the executable and the submit file if not os.path.isdir(self.cluster_files_directory): logger.debug(f'making directory {self.cluster_files_directory}') os.makedirs(self.cluster_files_directory) self.make_executable_file(path) self.make_submit_file(n_tasks) cmd = f"ssh {WIPACSubmitter.username}@submit-1.icecube.wisc.edu " \ f"'condor_submit {self.submit_file}'" logger.debug(f'command is {cmd}') prc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) msg = prc.stdout.read().decode() logger.info(msg) self.job_id = str(msg).split('cluster ')[-1].split('.')[0]
def submit_cluster(self, mh_dict): """Submits the job to the cluster""" # if specified, remove old logs from log directory if self.remove_old_logs: self.clear_log_dir() # Get the number of tasks that will have to be submitted in order to get ntrials ntrials = mh_dict['n_trials'] n_tasks = int(ntrials / self.trials_per_task) logger.debug(f'running {ntrials} trials in {n_tasks} tasks') # The mh_dict will be submitted n_task times and will perform mh_dict['n_trials'] each time. # Therefore we have to adjust mh_dict['n_trials'] in order to actually perform the number # specified in self.mh_dict['n_trials'] mh_dict['n_trials'] = self.trials_per_task path = make_analysis_pickle(mh_dict) # assemble the submit command submit_cmd = DESYSubmitter.submit_cmd if self.cluster_cpu > 1: submit_cmd += " -pe multicore {0} -R y ".format(self.cluster_cpu) submit_cmd += f"-t 1-{n_tasks}:1 {DESYSubmitter.submit_file} {path} {self.cluster_cpu}" logger.debug(f"Ram per core: {self.ram_per_core}") logger.info(f"{time.asctime(time.localtime())}: {submit_cmd}") self.make_cluster_submission_script() process = subprocess.Popen(submit_cmd, stdout=subprocess.PIPE, shell=True) msg = process.stdout.read().decode() logger.info(str(msg)) self.job_id = int(str(msg).split('job-array')[1].split('.')[0])
def submit_cluster(self, mh_dict): """Submits the job to the cluster""" # if specified, remove old logs from log directory if self.remove_old_logs: self.clear_log_dir() # Get the number of tasks that will have to be submitted in order to get ntrials ntrials = mh_dict["n_trials"] n_tasks = int(ntrials / self.trials_per_task) logger.debug(f"running {ntrials} trials in {n_tasks} tasks") # The mh_dict will be submitted n_task times and will perform mh_dict['n_trials'] each time. # Therefore we have to adjust mh_dict['n_trials'] in order to actually perform the number # specified in self.mh_dict['n_trials'] mh_dict["n_trials"] = self.trials_per_task path = make_analysis_pickle(mh_dict) # assemble the submit command submit_cmd = DESYSubmitter.submit_cmd if self.cluster_cpu > 1: submit_cmd += " -pe multicore {0} -R y".format(self.cluster_cpu) submit_cmd += ( f" -t 1-{n_tasks}:1 {DESYSubmitter.submit_file} {path} {self.cluster_cpu}" ) logger.info(f"Ram per core: {self.ram_per_core}") logger.info(f"{time.asctime(time.localtime())}: {submit_cmd}") self.make_cluster_submission_script() if not self.manual_submit: process = subprocess.Popen(submit_cmd, stdout=subprocess.PIPE, shell=True) msg = process.stdout.read().decode() logger.info(str(msg)) self.job_id = int(str(msg).split("job-array")[1].split(".")[0]) else: input( f"Running in 'manual_submit' mode. Login to a submission host and launch the following command:\n" f"{submit_cmd}\n" f"Press enter to continue after the jobs are finished.\n" f"[ENTER]")
def submit_cluster(self, mh_dict): """Submits the job to the cluster""" # Get the number of tasks that will have to be submitted in order to get ntrials ntrials = self.mh_dict["n_trials"] n_tasks = int(ntrials / self.trials_per_task) logger.debug(f"running {ntrials} trials in {n_tasks} tasks") # The mh_dict will be submitted n_task times and will perform mh_dict['n_trials'] each time. # Therefore we have to adjust mh_dict['n_trials'] in order to actually perform the number # specified in self.mh_dict['n_trials'] mh_dict["n_trials"] = self.trials_per_task path = make_analysis_pickle(mh_dict) # make the executable and the submit file if not os.path.isdir(self.cluster_files_directory): logger.debug(f"making directory {self.cluster_files_directory}") os.makedirs(self.cluster_files_directory) self.make_executable_file(path) self.make_submit_file(n_tasks) if not self.manual_submit: cmd = (f"ssh {WIPACSubmitter.username}@submit-1.icecube.wisc.edu " f"'condor_submit {self.submit_file}'") logger.debug(f"command is {cmd}") prc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) msg = prc.stdout.read().decode() logger.info(msg) self.job_id = str(msg).split("cluster ")[-1].split(".")[0] else: input( f"You selected manual submit mode: \n" f"\tThe submit file can be found here: \n" f"\t{self.submit_file} \n" f"\tPlease submit this to the cluster and hit enter when all jobs are done! \n" f"[ENTER]")
# Creates the Minimisation Handler dictionary, which contains all relevant # information to run an analysis mh_dict = { "name": name, "mh_name": "fixed_weights", "datasets": [IC86_234_dict], "catalogue": txs_cat_path, "inj kwargs": inj_kwargs, "llh_dict": llh_kwargs, "scale": scale, "n_trials": 100, "n_steps": 11, } pkl_file = make_analysis_pickle(mh_dict) # Creates a Minimisation Handler using the dictionary, and runs the trials # mh_power_law = MinimisationHandler.create(mh_dict_power_law) # mh_power_law.iterate_run(mh_dict_power_law["scale"], n_steps=mh_dict_power_law["n_steps"], # n_trials=mh_dict_power_law["n_trials"]) rd.submit_to_cluster(pkl_file, n_jobs=5) res_dict[i] = mh_dict rd.wait_for_cluster() sens = [] disc = []
def submit_local(self, mh_dict): """Uses the MultiprocessWrapper to split the trials into jobs and run them locally""" # max CPU number is all but one make_analysis_pickle(mh_dict) n_cpu = min(self.n_cpu, os.cpu_count() - 1) run_multiprocess(n_cpu=n_cpu, mh_dict=mh_dict)