def test_dump_and_load(): res = gp_minimize(bench3, [(-2.0, 2.0)], x0=[0.], acq_func="LCB", n_calls=2, n_random_starts=0, random_state=1) # Test normal dumping and loading with tempfile.TemporaryFile() as f: dump(res, f) f.seek(0) res_loaded = load(f) check_optimization_results_equality(res, res_loaded) assert_true("func" in res_loaded.specs["args"]) # Test dumping without objective function with tempfile.TemporaryFile() as f: dump(res, f, store_objective=False) f.seek(0) res_loaded = load(f) check_optimization_results_equality(res, res_loaded) assert_true(not ("func" in res_loaded.specs["args"])) # Delete the objective function and dump the modified object del res.specs["args"]["func"] with tempfile.TemporaryFile() as f: dump(res, f, store_objective=False) f.seek(0) res_loaded = load(f) check_optimization_results_equality(res, res_loaded) assert_true(not ("func" in res_loaded.specs["args"]))
def test_dump_and_load(): res = gp_minimize(bench3, [(-2.0, 2.0)], x0=[0.], acq_func="LCB", n_calls=2, n_random_starts=0, random_state=1) # Test normal dumping and loading with tempfile.TemporaryFile() as f: dump(res, f) f.seek(0) res_loaded = load(f) check_optimization_results_equality(res, res_loaded) assert_true("func" in res_loaded.specs["args"]) # Test dumping without objective function with tempfile.TemporaryFile() as f: dump(res, f, store_objective=False) f.seek(0) res_loaded = load(f) check_optimization_results_equality(res, res_loaded) assert_true(not ("func" in res_loaded.specs["args"])) # Delete the objective function and dump the modified object del res.specs["args"]["func"] with tempfile.TemporaryFile() as f: dump(res, f, store_objective=False) f.seek(0) res_loaded = load(f) check_optimization_results_equality(res, res_loaded) assert_true(not ("func" in res_loaded.specs["args"]))
def test_dump_and_load_optimizer(): base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) with tempfile.TemporaryFile() as f: dump(opt, f) load(f)
def test_dump_and_load_optimizer(): base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) with tempfile.TemporaryFile() as f: dump(opt, f) f.seek(0) load(f)
def get_optimizer(logger, optimizer_path, n_iter): logger.info('Retrieving model stored at: {}'.format(optimizer_path)) try: optimizer = load(optimizer_path) logger.info('Loading model stored at: {}'.format(optimizer_path)) except KeyError: logger.error('Cannot open the file {}'.format(optimizer_path)) optimizer = None except ValueError: logger.error('Cannot open the file {}'.format(optimizer_path)) optimizer = None except FileNotFoundError: logger.error('No such file or directory: {}'.format(optimizer_path)) optimizer = None if optimizer is not None: finished_iterations = np.array(optimizer.yi).shape[0] if finished_iterations == 0: optimizer = None logger.info( 'Optimizer did not finish any iterations so setting optimizer to null' ) else: n_iter = n_iter - finished_iterations if n_iter < 0: n_iter = 0 logger.info( 'Iterations already done: {} and running iterations {}'.format( finished_iterations, n_iter)) return optimizer, n_iter
def main(): # filename = 'results_test.pickle' # with open(filename, 'rb') as f: results = load('result.gz') print(results) skopt.plots.plot_convergence(results) plt.show()
def load_model(hyperparams_file=None, model_file=None): if hyperparams_file is not None: try: res = load(hyperparams_file) param_dict = unpack_params(res.x) print('Loaded hyperparameters: ') print_hyperparams(res) return CatBoostRegressor(learning_rate=param_dict['learning_rate'], depth=param_dict['depth'], l2_leaf_reg=param_dict['l2_leaf_reg'], n_estimators=param_dict['n_estimators'], thread_count=-1) except FileNotFoundError: pass if model_file is not None: try: print('Loaded model from ', model_file) cb = load_data(model_file) return cb except FileNotFoundError: pass # default settings return CatBoostRegressor(thread_count=-1)
def load_model(hyperparams_file=None, model_file=None): if hyperparams_file is not None: try: res = load(hyperparams_file) param_dict = unpack_params(res.x) print('Loaded hyperparameters: ') print_hyperparams(res) return GradientBoostingRegressor( max_depth=param_dict['max_depth'], learning_rate=param_dict['learning_rate'], n_estimators=param_dict['n_estimators'], min_samples_split=param_dict['min_samples_split'], min_samples_leaf=param_dict['min_samples_leaf'], subsample=param_dict['subsample'], max_features=param_dict['max_features'], alpha=param_dict['alpha'], verbose=0) except FileNotFoundError: pass if model_file is not None: try: print('Loaded model from ', model_file) gbr = load_data(model_file) return gbr except FileNotFoundError: pass # default settings return GradientBoostingRegressor(verbose=0)
def load_model(hyperparams_file=None, model_file=None): if hyperparams_file is not None: try: res = load(hyperparams_file) param_dict = unpack_params(res.x) print('Loaded hyperparameters: ') print_hyperparams(res) return SVR( C=param_dict['C'], gamma=param_dict['gamma'], epsilon=param_dict['epsilon'], tol=param_dict['tol'], kernel='rbf' ) except FileNotFoundError: pass if model_file is not None: try: print('Loaded model from ', model_file) svr = load_data(model_file) return svr except FileNotFoundError: pass # default settings return SVR(kernel='rbf', C=100, gamma=0.1)
def load_model(hyperparams_file=None, model_file=None): if hyperparams_file is not None: try: res = load(hyperparams_file) param_dict = unpack_params(res.x) print('Loaded hyperparameters: ') print_hyperparams(res) return RandomForestRegressor( n_estimators=param_dict['n_estimators'], min_samples_split=param_dict['min_samples_split'], min_samples_leaf=param_dict['min_samples_leaf'], max_features=param_dict['max_features'], verbose=0, n_jobs=-1) except FileNotFoundError: pass if model_file is not None: try: print('Loaded model from ', model_file) rf = load_data(model_file) return rf except FileNotFoundError: pass # default settings return RandomForestRegressor(verbose=0, n_jobs=-1)
def test(param_names, fname, obj_name): from parametersConfig import N_TRIALS, test_tools, test_actions with tqdm(total=N_TRIALS - 1, file=sys.stdout) as pbar: func = gen_run_experiment(pbar, param_names, tools=test_tools, actions=test_actions) c_all = [] costs = [] best = [] best_iters = [] best_params = [] max_target = 1000.0 res = load(fname) for ind, xi in enumerate(res.x_iters): # data = json.loads(line) pprint(res.func_vals[ind]) ctarget = res.func_vals[ind] c_all.append(ctarget) if ctarget < 0.9*max_target: max_target = ctarget best.append(ctarget) best_iters.append(ind) print("new best:{}".format(ctarget)) params = xi # data['params'] best_params.append(params) pprint(params) c = func(params) costs.append(c) print(c_all) print(best) print(costs) print(best_params) print(ind)
def load_results(results_path, sort=False, reverse_sort=False): """ Loads results from distributed run. Parameters ---------- * `results_path` [string] Path where results from the distributed run is stored. * `sort` [Bool, default=False] Sorts results by objective function minimum (lowest first). * `reverse_sort` [Bool, defaul=False] Sort results by objective function minimum (highest first.) - `sort` must be set to True. Returns ------- * results [list] """ results = [] for file in os.listdir(results_path): full_path = os.path.join(results_path, file) results.append(load(str(full_path))) if reverse_sort and not sort: sort = True if sort: results = sorted(results, key=lambda result: result.fun) print("Number of results: {}\n".format(len(results))) return results
def plot_cumulative_distribution(revisions): Y_3000s = [] for revision, title in revisions: # FIXME label = title # FIXME # res = skopt.load('../generated/5.5/bayesOptResults.0.1.' + str(revision) + '-' + title + '.sav') res = skopt.load('../experiments/bayesOptResults.0.1.sav') func_vals = res.func_vals Y_3000s.append((-np.array([i for i in func_vals]), label)) color = plt.cm.viridis(np.linspace(0, 4, 18)) plt.rc('axes', prop_cycle=(cycler('color', color))) # + # cycler('linestyle', ['-', '--', ':', '-.']))) fig = plt.gcf() for data, label in Y_3000s: values, base = np.histogram(data, bins=40) cumulative = np.cumsum(values) / 60 plt.plot(base[:-1], cumulative, linewidth=4, label=label) plt.legend() plt.grid() plt.tick_params(top=True, direction='in') plt.grid(which='major', linestyle='--', alpha=0.4) plt.show() return fig
def previousRuns(filename): data = load(filename) x0 = data['x_iters'] y0 = data['func_vals'] return x0, y0
def load_result(tag: str, path: PosixPath = CACHE_ROOTPATH, extension: str = "pkl"): filepath = path / f"{tag}.{extension}" if os.path.isfile(filepath): return load(filepath) return None
def find_best_hyperparameters(model, X, y, dynamic_params_space, scoring, plot, nfold, **HPO_params): # filter these warnings - they are not consistent, arise even for float features from warnings import filterwarnings # simplefilter("ignore", UserWarning) filterwarnings("ignore", message="The objective has been evaluated at this point before", category=UserWarning) # Get model name model_name = model.__class__.__name__ # Get dynamic parameters names: @use_named_args(dynamic_params_space) def get_params_names(**dynamic_params): return list(dynamic_params.keys()) param_names = get_params_names(dynamic_params_space) # Define an objective function @use_named_args(dynamic_params_space) def objective(**dynamic_params): #model.set_params(**static_params) model.set_params(**dynamic_params) cv = StratifiedKFold(n_splits=nfold, random_state=seed, shuffle=True) scores = cross_validate(model, X, y, cv=cv, scoring = scoring, n_jobs=-1) val_score = np.mean(scores['test_score']) return -val_score print(model_name, 'model training...') # Load previously trained results and get starting point (x0) as best model from previous run try: res = load(r'output/models/'+model_name) x0 = res.x # If not trained before -> no initial point provided except: x0 = None res = forest_minimize(objective, dynamic_params_space, x0 = x0, **HPO_params) # add attribute - parameters names to the res res.param_names = param_names print('Optimized parameters: ', res.param_names) print('Previous best parameters:', x0) print('Current best parameters:', res.x) print('Best score:', -res.fun) # Saved optimization result dump(res, r'output/models/'+model_name, store_objective=False) if plot == True: plt.figure(figsize=(5,2)) plot_convergence(res) try: # plot_objective would not work if only one parameter was searched for plot_objective(res) except: pass plt.show()
def from_checkpoint(path): """ :param path: :return: """ search_result = skopt.load(path / 'search_result.pkl') dimensions = Dimensions() dimensions.load(path / 'dimensions.p') return search_result, dimensions
def load_best_parameters(model): model_name = model.__class__.__name__ try: res = load(r'output/models/'+model_name) param_names = res.param_names param_values = res.x best_pparameters_dict = dict(zip(param_names, param_values)) model.set_params(**best_pparameters_dict) print(model_name, 'optimized parameters:', best_pparameters_dict) except: print(model_name, 'parameters were not previously optimized') return model
def _load_checkpoint(self, tune_name=None): if tune_name is None: tune_name = self.tune_name if '.pkl' not in tune_name: tune_name += '.pkl' if os.path.isfile(tune_name): self.saved_res = skopt.load(tune_name) self.x0 = self.saved_res.x_iters self.y0 = self.saved_res.func_vals print('Checkpoint {} with score {:.2f} loaded !!!'.format( tune_name, min(self.y0)))
def reload_multiple(scenario, init_iters, addtl_iters, seeds=range(5), func_names=["gp", "dummy", "forest", "gbrt"], verb_model=False, verb=False, mute_reload=True): """ Call reload() across several functions and seeds. Automatically dump results. PARAMETERS ---------- scenario [int]: id of the scenario. Used to get_params(). Determines the directory to save in. init_iters [int]: Iteration count of optimizations to load. addtl_iters [int]: Number of additional iterations to run the optimization for. New total is used in filenames upon saving. seeds [list of int]: Values to be passed as seeds. Used in filenames upon saving. func_names [list of str]: Names of optimization functions to reload. Used in filenames upon saving. """ opt_params = get_params(scenario) for seed in seeds: print("Seed: " + str(seed)) for func_name in func_names: print(func_name + ':') result_loaded = load("optims/scenario" + str(scenario) + '/' + func_name + '_' + str(seed) + "_" + str(init_iters) + ".opt") result = reload(result_loaded, opt_params, addtl_iters, seed, verb_model=verb_model, verb=verb, mute_reload=mute_reload) dump( result, "optims/scenario" + str(scenario) + '/' + func_name + '_' + str(seed) + "_" + str(init_iters + addtl_iters) + ".opt")
def inspect(file): _log.info('loading file %s', file) opt = skopt.load(file) n = len(opt.x_iters) print('iterations:', n) print('optimal HR:', -opt.fun) for i in range(n): x = opt.x_iters[i] nhr = opt.func_vals[i] if hasattr(opt, 'iter_time'): time = opt.iter_time[i] else: time = np.nan print('iter[{}]: {!r} -> {:f} ({:.1f}s)'.format(i, x, -nhr, time))
def run_search(data, algo, out_name, evaluate): opts = dict() algo_mod = evaluate.module opts.update(getattr(algo_mod, 'options', {})) afn = dt.afname(algo) if not out_name: out_name = afn ddir = data_dir / data tdir = ddir / 'tuning' ofile = tdir / f'{out_name}.opt' cpfile = tdir / f'{out_name}.cp' if cpfile.exists(): _log.info('loading checkpoint file %s', cpfile) initial = skopt.load(cpfile) opts['x0'] = initial.x_iters opts['y0'] = initial.func_vals opts['n_random_starts'] = max(0, 10 - len(initial.x_iters)) opts['n_calls'] = 100 - len(initial.x_iters) _log.info('checkpoint has %d iterations', len(initial.x_iters)) saver = skopt.callbacks.CheckpointSaver(cpfile) stopper = ImprovementStopper(0.01, min_runs=20) timer = skopt.callbacks.TimerCallback() with LogFile(tdir / f'{afn}-search.log'): res = skopt.gp_minimize(evaluate, algo_mod.dimensions, callback=[timer, saver, stopper], **opts) _log.info('%s: optimal MRR of %f at %s after %d searches', algo, -res.fun, res.x, len(res.x_iters)) res.iter_time = timer.iter_time _log.info('writing results to %s', ofile) skopt.dump(res, ofile) with (tdir / f'{out_name}.json').open('w') as jsf: json.dump( { 'params': [x.item() for x in res.x], 'iters': len(res.x_iters), 'MRR': -res.fun }, jsf) _log.info('removing checkpoint file %s', cpfile) if cpfile.exists(): cpfile.unlink()
def Baye_search_resume(self, func, path: str, space): assert os.path.exists(path) ckpt = load(path) checkpoint_saver = CheckpointSaver(args.hyper_ckpt) rlt = gp_minimize( func, dimensions=space, x0=ckpt.x_iters, y0=ckpt.func_vals, n_calls=20, n_random_starts=3, # callback=[checkpoint_saver], random_state=42) logger.debug(rlt) plot_convergence(rlt) return rlt
def get_mc_icalc(opt, out_prefix): '''Calculate and save diffuse intensity from BGO-optimized weights''' res = skopt.load(opt.output_fname) print('s =', res.x) print('Objective function =', res.fun) # Calculate diffuse scattering i_calc = opt.get_mc_intens(res.x) ## Save calculated diffuse intensity if out_prefix is not None: h5_output_fname = out_prefix + '_diffcalc.h5' else: h5_output_fname = op.splitext(opt.output_fname)[0] + '_diffcalc.h5' print('Writing Imc to', h5_output_fname) with h5py.File(h5_output_fname, 'w') as fptr: fptr['diff_intens'] = i_calc return i_calc
def load_best_parameters_sampling(model): model_name = model.__class__.__name__ try: res = load(r'output/models/'+model_name) param_names = res.param_names param_values = res.x best_pparameters_dict = dict(zip(param_names, param_values)) # --remove sampling parameters-- alpha_over = best_pparameters_dict.pop('alpha_over') k_neighbors = best_pparameters_dict.pop('k_neighbors') #-------------------------------- model.set_params(**best_pparameters_dict) print(model_name, 'optimized parameters:', best_pparameters_dict) except: print(model_name, 'parameters were not previously optimized') return model, alpha_over, k_neighbors
def _load_checkpoint(results_path, rank): """ Loads checkpoint to resume optimization. * `results_path` [str] Path to the previously saved results. * `rank` [int] Rank to which the saved results belong. """ files = _listfiles(results_path) for file in files: saved_rank = re.findall(r'\d+', file) if rank == int(saved_rank[0]): filepath = os.path.join(results_path, file) checkpoint = load(str(filepath)) print(f'loading checkpoint for rank {int(saved_rank[0])}') return checkpoint
def test(param_names, fname, obj_name): test_tools = ("hook", ) test_actions = ("tap_from_right", ) with tqdm(total=N_TRIALS - 1, file=sys.stdout) as pbar: func = gen_run_experiment(pbar, param_names, tools=test_tools, actions=test_actions, object_name=obj_name) c_all = [] costs = [] best = [] best_iters = [] best_params = [] max_target = 1000.0 res = load(fname) for ind, xi in enumerate(res.x_iters): pprint(res.func_vals[ind]) ctarget = res.func_vals[ind] c_all.append(ctarget) if ctarget < max_target: max_target = ctarget best.append(ctarget) best_iters.append(ind) print("new best:{}".format(ctarget)) params = xi # data['params'] best_params.append(params) pprint(params) c = func(params) costs.append(c) print(c_all) logging.info("tr costs:{}".format(c_all)) print(best) logging.info("tr best:{}".format(best)) print(costs) logging.info("tst best:{}".format(costs)) print(best_params) logging.info("best params:{}".format(best_params)) print(ind) logging.info("ind:{}".format(ind))
def log_best_params(file): opt = load(file) if "ps" in opt.acq_func: best_i = np.argmin(np.array(opt.yi)[:, 0]) best_loss = opt.yi[best_i] best_params = opt.Xi[best_i] logger.info( "Best parameters so far with a loss for file {} of {:.4f} time of {:.4f}:\n {}" .format(os.path.basename(file), best_loss[0], best_loss[1], best_params)) else: best_i = np.argmin(opt.yi) best_loss = opt.yi[best_i] best_params = opt.Xi[best_i] logger.info( "Best parameters so far with a loss for file {} of {:.4f}:\n {}". format(os.path.basename(file), best_loss, best_params)) return best_loss
def plotSkopt(filename): #dict_keys(['x', 'fun', 'func_vals', 'x_iters', 'models', 'space', 'random_state', 'specs']) #data['space'] #Space([Real(low=1e-10, high=0.001, prior='log-uniform', transform='normalize'), #Categorical(categories=('low', 'mid', 'high', 'up', 'down'), prior=None), #Real(low=1e-09, high=0.0001, prior='log-uniform', transform='normalize'), #Integer(low=5, high=128), #Categorical(categories=('RMSProp', 'Adagrad', 'Adadelta', 'Adam'), prior=None)]) #l2_reg, dropouts, learning_rate, batch_size, optimizer = args #len(data['func_vals']) 100 #len(data['x_iters']) 100 data = skopt.load(filename) print("Best loss: {}".format(data['fun'])) print(" Values: {}".format(data['x'])) values = data['x_iters'] values = np.array(values) #values.shape = (100, 5) losses = data['func_vals'] #print( "Losses: {} {} {}".format( np.min(losses), np.mean(losses), np.max(losses) ) ) l2 = np.array(values[:, 0]).astype(np.float) lr = np.array(values[:, 2]).astype(np.float) batch = np.array(values[:, 3]).astype(np.float) #print( "L2 Reg: {} {} {}".format( np.min(l2), np.mean(l2), np.max(l2) ) ) plotRegressionHparam(losses, l2, "L2 Reg", logx=True, logy=True) plotRegressionHparam(losses, lr, "Learning Rate", logx=True, logy=True) plotRegressionHparam(losses, batch, "Batch Size", logx=False, logy=True) def makeCatDict(values_idx): catDict = defaultdict(list) for idx, run in enumerate(values): opt = run[values_idx] loss = losses[idx] catDict[opt].append(loss) return catDict plotCategoricalHparam(makeCatDict(4), "Optimizers") plotCategoricalHparam(makeCatDict(1), "Dropout")
def convert_res_to_json(input_res=None, input_filename=None, output_filename=None): if input_res == None and input_filename != None: res = load(filename='hom_optimisation_res') elif input_res != None and input_filename == None: res = input_res else: print( 'input res must be provided either as a filename of a dump or as the res object' ) results = [] for input_parameters, output_parameter in zip(res.x_iters, res.func_vals): result = {} result['tbr'] = 1. / output_parameter # result['tbr_std_dev'] = tally_std_dev result['firstwall_coolant'] = input_parameters[0] blanket_structural_fraction = input_parameters[1] result['blanket_structural_fraction'] = blanket_structural_fraction result['blanket_multiplier_material'] = input_parameters[2] result['blanket_breeder_material'] = input_parameters[3] result['blanket_breeder_li6_enrichment_fraction'] = input_parameters[4] breeder_to_breeder_plus_multiplier_fraction = input_parameters[5] result[ 'fraction_of_breeder_in_breeder_plus_multiplier_volume'] = breeder_to_breeder_plus_multiplier_fraction result['blanket_multiplier_fraction'] = ( 1. - breeder_to_breeder_plus_multiplier_fraction) * ( 1. - blanket_structural_fraction) result[ 'blanket_breeder_fraction'] = breeder_to_breeder_plus_multiplier_fraction * ( 1. - blanket_structural_fraction) results.append(result) if output_filename != None: with open(output_filename, 'w') as fp: json.dump(results, fp, indent=4) return results
def main(): parser = argparse.ArgumentParser(description='Setup experiment.') parser.add_argument('--results_dir', type=str, help='Path to results directory.') args = parser.parse_args() hparams = [(0.001, 0.1), (0.0, 0.90), (0.001, 0.1), (0.0, 0.90), (16, 32), (3, 13)] res = load('./checkpoint.pkl') x0 = res.x_iters y0 = res.func_vals print(f'Previous iteration made it through {len(y0)} iterations') checkpoint_saver = CheckpointSaver("./checkpoint.pkl") res_gp = gp_minimize(objective, hparams, x0=x0, y0=y0, n_calls=20, callback=[checkpoint_saver], random_state=0, verbose=True) dump(res_gp, 'smbo20')
def reeval_optimum(scenario, iterations, best_seed=0, best_func="forest", seeds=range(5), verb_model=False): """ Reload an already completed optimization and re-evaluate on its optimum. PARAMETERS ---------- scenario [int]: id of the loaded optimization. Used in filename of loaded optimization. iterations [int]: Number of iterations of the loaded optimization. Used in filename of loaded optimization. best_seed [int, default=0]: Seed of the loaded optimization. Used in filename of loaded optimization. best_func [str, default="forest"]: Optimization function of the loaded optimization. Used in filename of loaded optimization. seeds [list of int]: Seed values used for re-evaluating. verb_model [bool, default=False]: Whether to pass verbose=True to the model. """ optimum = res_optimum( load("./optims/scenario" + str(scenario) + "/" + best_func + "_" + str(best_seed) + "_" + str(iterations) + ".opt")) print("optimum:") print(optimum) return reeval(scenario, optimum[0], seeds, verb_model)