def run_dataset(prob_label): """Run the experiment""" sample_source, n = get_sample_source(prob_label) # /////// submit jobs ////////// # create folder name string home = os.path.expanduser("~") foldername = os.path.join(home, "freqopttest_slurm", 'e%d'%ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters( foldername=foldername, job_name_base="e%d_"%ex, parameter_prefix="") # Use the following line if Slurm queue is not used. #engine = SerialComputationEngine() engine = SlurmComputationEngine(batch_parameters, do_clean_up=True) n_methods = len(method_job_funcs) # repetitions x #methods aggregators = np.empty((reps, n_methods ), dtype=object) d = sample_source.dim() for r in range(reps): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \ %(prob_label, func_name, J, r, d, alpha, tr_proportion) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info('%s exists. Load and return.'%fname) test_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(test_result)) aggregators[r, mi] = sra else: # result not exists or rerun job = Ex5Job(SingleResultAggregator(), prob_label, r, n, f) agg = engine.submit_job(job) aggregators[r, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") test_results = np.empty((reps, n_methods), dtype=object) for r in range(reps): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d)" % (f.__name__, r )) # let the aggregator finalize things aggregators[r, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result test_result = aggregators[r, mi].get_final_result().result test_results[r, mi] = test_result func_name = f.__name__ fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \ %(prob_label, func_name, J, r, d, alpha, tr_proportion) glo.ex_save_result(ex, test_result, prob_label, fname) func_names = [f.__name__ for f in method_job_funcs] func2labels = exglobal.get_func2label_map() method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = {'results': test_results, 'n': n, 'data_fname':label2fname[prob_label], 'alpha': alpha, 'J': J, 'sample_source': sample_source, 'tr_proportion': 0.5, 'method_job_funcs': method_job_funcs, 'prob_label': prob_label, 'method_labels': method_labels} # class name fname = 'ex%d-%s-me%d_J%d_rs%d_nma%d_d%d_a%.3f_trp%.2f.p' \ %(ex, prob_label, n_methods, J, reps, n, d, alpha, tr_proportion) glo.ex_save_result(ex, results, fname) logger.info('Saved aggregated results to %s'%fname)
def run_dataset(prob_label): """Run the experiment""" sample_source, n = get_sample_source(prob_label) # /////// submit jobs ////////// # create folder name string home = os.path.expanduser("~") foldername = os.path.join(home, "freqopttest_slurm", 'e%d' % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. #engine = SerialComputationEngine() engine = SlurmComputationEngine(batch_parameters, do_clean_up=True) n_methods = len(method_job_funcs) # repetitions x #methods aggregators = np.empty((reps, n_methods), dtype=object) d = sample_source.dim() for r in range(reps): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \ %(prob_label, func_name, J, r, d, alpha, tr_proportion) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info('%s exists. Load and return.' % fname) test_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(test_result)) aggregators[r, mi] = sra else: # result not exists or rerun job = Ex5Job(SingleResultAggregator(), prob_label, r, n, f) agg = engine.submit_job(job) aggregators[r, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") test_results = np.empty((reps, n_methods), dtype=object) for r in range(reps): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d)" % (f.__name__, r)) # let the aggregator finalize things aggregators[r, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result test_result = aggregators[r, mi].get_final_result().result test_results[r, mi] = test_result func_name = f.__name__ fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \ %(prob_label, func_name, J, r, d, alpha, tr_proportion) glo.ex_save_result(ex, test_result, prob_label, fname) func_names = [f.__name__ for f in method_job_funcs] func2labels = exglobal.get_func2label_map() method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = { 'results': test_results, 'n': n, 'data_fname': label2fname[prob_label], 'alpha': alpha, 'J': J, 'sample_source': sample_source, 'tr_proportion': tr_proportion, 'method_job_funcs': method_job_funcs, 'prob_label': prob_label, 'method_labels': method_labels } # class name fname = 'ex%d-%s-me%d_J%d_rs%d_nma%d_d%d_a%.3f_trp%.2f.p' \ %(ex, prob_label, n_methods, J, reps, n, d, alpha, tr_proportion) glo.ex_save_result(ex, results, fname) logger.info('Saved aggregated results to %s' % fname)
def run_problem(prob_label): """Run the experiment""" L = get_pqsource_list(prob_label) prob_params, ps, data_sources = zip(*L) # make them lists prob_params = list(prob_params) ps = list(ps) data_sources = list(data_sources) # /////// submit jobs ////////// # create folder name string #result_folder = glo.result_folder() from kgof.config import expr_configs tmp_dir = expr_configs['scratch_path'] foldername = os.path.join(tmp_dir, 'kgof_slurm', 'e%d' % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. #engine = SerialComputationEngine() engine = SlurmComputationEngine(batch_parameters) #engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute') n_methods = len(method_job_funcs) # repetitions x len(prob_params) x #methods aggregators = np.empty((reps, len(prob_params), n_methods), dtype=object) for r in range(reps): for pi, param in enumerate(prob_params): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \ %(prob_label, func_name, sample_size, r, param, alpha, tr_proportion) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info('%s exists. Load and return.' % fname) job_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(job_result)) aggregators[r, pi, mi] = sra else: # result not exists or rerun # p: an UnnormalizedDensity object p = ps[pi] job = Ex2Job(SingleResultAggregator(), p, data_sources[pi], prob_label, r, f, param) agg = engine.submit_job(job) aggregators[r, pi, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") job_results = np.empty((reps, len(prob_params), n_methods), dtype=object) for r in range(reps): for pi, param in enumerate(prob_params): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d, param=%.3g)" % (f.__name__, r, param)) # let the aggregator finalize things aggregators[r, pi, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result job_result = aggregators[r, pi, mi].get_final_result().result job_results[r, pi, mi] = job_result #func_names = [f.__name__ for f in method_job_funcs] #func2labels = exglobal.get_func2label_map() #method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = { 'job_results': job_results, 'prob_params': prob_params, 'alpha': alpha, 'repeats': reps, 'ps': ps, 'list_data_source': data_sources, 'tr_proportion': tr_proportion, 'method_job_funcs': method_job_funcs, 'prob_label': prob_label, 'sample_size': sample_size, } # class name fname = 'ex%d-%s-me%d_n%d_rs%d_pmi%g_pma%g_a%.3f_trp%.2f.p' \ %(ex, prob_label, n_methods, sample_size, reps, min(prob_params), max(prob_params), alpha, tr_proportion) glo.ex_save_result(ex, results, fname) logger.info('Saved aggregated results to %s' % fname)
def run_problem(folder_path, prob_label): """Run the experiment""" pl = exglo.parse_prob_label(prob_label) is_h0 = pl['is_h0'] n = pl['n'] # /////// submit jobs ////////// # create folder name string #result_folder = glo.result_folder() #tmp_dir = tempfile.gettempdir() from fsic.config import expr_configs tmp_dir = expr_configs['scratch_dir'] foldername = os.path.join(tmp_dir, 'wj_slurm', 'e%d' % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. #engine = SerialComputationEngine() engine = SlurmComputationEngine(batch_parameters) n_methods = len(method_job_funcs) # repetitions x sample_sizes x #methods aggregators = np.empty((reps, n_methods), dtype=object) for r in range(reps): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = '%s-%s-r%d_a%.3f_trp%.2f.p' \ %(prob_label, func_name, r, alpha, tr_proportion) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info('%s exists. Load and return.' % fname) job_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(job_result)) aggregators[r, mi] = sra else: # result not exists or rerun job = Ex4Job(SingleResultAggregator(), folder_path, prob_label, r, f) agg = engine.submit_job(job) aggregators[r, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") job_results = np.empty((reps, n_methods), dtype=object) for r in range(reps): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d, n=%d)" % (f.__name__, r, n)) # let the aggregator finalize things aggregators[r, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result job_result = aggregators[r, mi].get_final_result().result job_results[r, mi] = job_result #func_names = [f.__name__ for f in method_job_funcs] #func2labels = exglobal.get_func2label_map() #method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results # - Do not store PairedSource because it can be very big. results = { 'job_results': job_results, 'n': n, 'is_h0': is_h0, 'alpha': alpha, 'repeats': reps, 'tr_proportion': tr_proportion, 'method_job_funcs': method_job_funcs, 'prob_label': prob_label, } # class name fname = 'ex%d-%s-me%d_rs%d_a%.3f_trp%.2f.p' \ %(ex, prob_label, n_methods, reps, alpha, tr_proportion) glo.ex_save_result(ex, results, fname) logger.info('Saved aggregated results to %s' % fname)
def run_problem(prob_label): """Run the experiment""" # /////// submit jobs ////////// # create folder name string #result_folder = glo.result_folder() from kmod.config import expr_configs tmp_dir = expr_configs['scratch_path'] foldername = os.path.join(tmp_dir, 'kmod_slurm', 'e%d' % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. #engine = SerialComputationEngine() partitions = expr_configs['slurm_partitions'] if partitions is None: engine = SlurmComputationEngine(batch_parameters) else: engine = SlurmComputationEngine(batch_parameters, partition=partitions) n_methods = len(method_funcs) # problem setting ns, P, Q, ds, = get_ns_pqrsource(prob_label) # repetitions x len(ns) x #methods aggregators = np.empty((reps, len(ns), n_methods), dtype=object) for r in range(reps): for ni, n in enumerate(ns): for mi, f in enumerate(method_funcs): # name used to save the result func_name = f.__name__ fname = '%s-%s-n%d_r%d_a%.3f.p' \ %(prob_label, func_name, n, r, alpha,) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info('%s exists. Load and return.' % fname) job_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(job_result)) aggregators[r, ni, mi] = sra else: # result not exists or rerun job = Ex1Job(SingleResultAggregator(), P, Q, ds, prob_label, r, f, n) agg = engine.submit_job(job) aggregators[r, ni, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") job_results = np.empty((reps, len(ns), n_methods), dtype=object) for r in range(reps): for ni, n in enumerate(ns): for mi, f in enumerate(method_funcs): logger.info("Collecting result (%s, r=%d, n=%d)" % (f.__name__, r, n)) # let the aggregator finalize things aggregators[r, ni, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result job_result = aggregators[r, ni, mi].get_final_result().result job_results[r, ni, mi] = job_result #func_names = [f.__name__ for f in method_funcs] #func2labels = exglobal.get_func2label_map() #method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = { 'job_results': job_results, 'P': P, 'Q': Q, 'data_source': ds, 'alpha': alpha, 'repeats': reps, 'ns': ns, 'method_funcs': method_funcs, 'prob_label': prob_label, } # class name fname = 'ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f.p' \ %(ex, prob_label, n_methods, reps, min(ns), max(ns), alpha,) glo.ex_save_result(ex, results, fname) logger.info('Saved aggregated results to %s' % fname)
def run_problem(prob_label): """Run the experiment""" ns, p, ds = get_ns_pqsource(prob_label) # /////// submit jobs ////////// # create folder name string # result_folder = glo.result_folder() from sbibm.third_party.kgof.config import expr_configs tmp_dir = expr_configs["scratch_path"] foldername = os.path.join(tmp_dir, "kgof_slurm", "e%d" % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. # engine = SerialComputationEngine() # engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute') engine = SlurmComputationEngine(batch_parameters) n_methods = len(method_job_funcs) # repetitions x len(ns) x #methods aggregators = np.empty((reps, len(ns), n_methods), dtype=object) for r in range(reps): for ni, n in enumerate(ns): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % ( prob_label, func_name, n, r, alpha, tr_proportion, ) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info("%s exists. Load and return." % fname) job_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(job_result)) aggregators[r, ni, mi] = sra else: # result not exists or rerun # p: an UnnormalizedDensity object job = Ex1Job(SingleResultAggregator(), p, ds, prob_label, r, f, n) agg = engine.submit_job(job) aggregators[r, ni, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") job_results = np.empty((reps, len(ns), n_methods), dtype=object) for r in range(reps): for ni, n in enumerate(ns): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d, n=%rd)" % (f.__name__, r, n)) # let the aggregator finalize things aggregators[r, ni, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result job_result = aggregators[r, ni, mi].get_final_result().result job_results[r, ni, mi] = job_result # func_names = [f.__name__ for f in method_job_funcs] # func2labels = exglobal.get_func2label_map() # method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = { "job_results": job_results, "data_source": ds, "alpha": alpha, "repeats": reps, "ns": ns, "p": p, "tr_proportion": tr_proportion, "method_job_funcs": method_job_funcs, "prob_label": prob_label, } # class name fname = "ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f_trp%.2f.p" % ( ex, prob_label, n_methods, reps, min(ns), max(ns), alpha, tr_proportion, ) glo.ex_save_result(ex, results, fname) logger.info("Saved aggregated results to %s" % fname)
sra = SingleResultAggregator() if test_result is SingleResult: sra.submit_result(test_result) else: sra.submit_result(SingleResult(test_result)) aggregators[r, mi] = sra else: # result not exists or rerun job = Ex4Job(SingleResultAggregator(), prob_label, r, n, f) agg = engine.submit_job(job) aggregators[r, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") test_results = np.empty((reps, n_methods), dtype=object) for r in range(reps): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d)" % (f.__name__, r )) # let the aggregator finalize things aggregators[r, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result test_result = aggregators[r, mi].get_final_result().result if isinstance(test_result, SingleResult): test_result = test_result.result