def _format_keras_history(history): """nicely format keras history """ return { "params": history.params, "loss": merge_dicts({"epoch": history.epoch}, history.history), }
def eval_model(model, valid, test, add_eval_metrics={}): """Evaluate model's performance on the test-set. # Arguments model: Keras model test: test-dataset. Tuple of inputs `x` and target `y` - `(x, y)`. add_eval_metrics: Additional evaluation metrics to use. Can be a dictionary or a list of functions accepting arguments: `y_true`, `y_predicted`. Alternatively, you can provide names of functions from the `kopt.eval_metrics` module. # Returns dictionary with evaluation metrics """ # evaluate the model logger.info("Evaluate...") # - model_metrics model_metrics_values = model.evaluate(valid[0], valid[1], verbose=0, batch_size=valid[1].shape[0]) # evaluation is done in a single pass to have more precise metics model_metrics = dict(zip(_listify(model.metrics_names), _listify(model_metrics_values))) # - eval_metrics y_true = test[1] y_pred = model.predict(test[0], verbose=0) eval_metrics = {k: v(y_true, y_pred) for k, v in add_eval_metrics.items()} # handle the case where the two metrics names intersect # - omit duplicates from eval_metrics intersected_keys = set(model_metrics).intersection(set(eval_metrics)) if len(intersected_keys) > 0: logger.warning("Some metric names intersect: {0}. Ignoring the add_eval_metrics ones". format(intersected_keys)) eval_metrics = _delete_keys(eval_metrics, intersected_keys) return merge_dicts(model_metrics, eval_metrics)
def get_ok_results(self, verbose=True): """Return a list of results with ok status """ if len(self.trials) == 0: return [] not_ok = np.where(np.array(self.statuses()) != "ok")[0] if len(not_ok) > 0 and verbose: print("{0}/{1} trials were not ok.".format(len(not_ok), len(self.trials))) print("Trials: " + str(not_ok)) print("Statuses: " + str(np.array(self.statuses())[not_ok])) r = [merge_dicts({"tid": t["tid"]}, t["result"].to_dict()) for t in self.trials if t["result"]["status"] == "ok"] return r
def get_data(data_fn, param): """Feed data_fn with param """ return data_fn(**merge_dicts(param["data"], param.get("shared", {})))
def get_model(model_fn, train_data, param): """Feed model_fn with train_data and param """ model_param = merge_dicts({"train_data": train_data}, param["model"], param.get("shared", {})) return model_fn(**model_param)
def test_hyopt(tmpdir): # get the base dir mongodb_path = str(tmpdir.mkdir('mongodb')) results_path = str(tmpdir.mkdir('results')) # mongodb_path = "/tmp/mongodb_test/" # results_path = "/tmp/results/" proc_args = [ "mongod", "--dbpath=%s" % mongodb_path, "--noprealloc", "--port=22334" ] print("starting mongod", proc_args) mongodb_proc = subprocess.Popen( proc_args, # stdout=subprocess.PIPE, # stderr=subprocess.PIPE, cwd=mongodb_path, # this prevented mongod assertion fail ) # wait a bit time.sleep(1) proc_args_worker = [ "hyperopt-mongo-worker", "--mongo=localhost:22334/test", "--poll-interval=0.1" ] mongo_worker_proc = subprocess.Popen( proc_args_worker, # stdout=subprocess.PIPE, # stderr=subprocess.PIPE, env=merge_dicts(os.environ, {"PYTHONPATH": os.getcwd()}), ) # -------------------------------------------- db_name = "test" exp_name = "test2" fn = CompileFN(db_name, exp_name, data_fn=data.data, model_fn=model.build_model, optim_metric="acc", optim_metric_mode="max", save_dir=results_path) hyper_params = { "data": {}, "shared": { "max_features": 100, "maxlen": 20 }, "model": { "filters": hp.choice("m_filters", (2, 5)), "hidden_dims": 3, }, "fit": { "epochs": 1 } } fn_test(fn, hyper_params, tmp_dir=str(tmpdir)) trials = KMongoTrials(db_name, exp_name, ip="localhost", kill_timeout=5 * 60, port=22334) best = fmin(fn, hyper_params, trials=trials, algo=tpe.suggest, max_evals=2) assert len(trials) == 2 assert len(trials) == trials.n_ok() assert isinstance(best, dict) assert "m_filters" in best # test my custom functions trials.as_df() trials.train_history(trials.valid_tid()[0]) trials.train_history(trials.valid_tid()) trials.get_ok_results() tid_best = trials.best_trial_tid() assert tid_best == trials.best_trial["tid"] assert trials.optimal_epochs(tid_best) == 1 # -------------------------------------------- # cross-validation db_name = "test" exp_name = "test2_cv" fn = CompileFN(db_name, exp_name, data_fn=data.data, model_fn=model.build_model, cv_n_folds=3, save_dir=results_path) trials = KMongoTrials(db_name, exp_name, ip="localhost", kill_timeout=5 * 60, port=22334) fn_test(fn, hyper_params, tmp_dir=str(tmpdir)) best = fmin(fn, deepcopy(hyper_params), trials=trials, algo=tpe.suggest, max_evals=2) assert len(trials) == 2 assert len(trials) == trials.n_ok() assert isinstance(best, dict) assert "m_filters" in best # test my custom functions trials.as_df() trials.train_history(trials.valid_tid()[0]) trials.train_history(trials.valid_tid()) trials.get_ok_results() tid_best = trials.best_trial_tid() assert tid_best == trials.best_trial["tid"] assert trials.optimal_epochs(tid_best) == 1 assert trials.best_trial_tid() == trials.best_trial["tid"] # -------------------------------------------- # close mongo_worker_proc.terminate() mongodb_proc.terminate()
def __call__(self): m_pid = None w_pid = None if self.run_on_mongodb: if self.start_mongodb: mongodb_path = tempfile.mkdtemp() proc_args = [ "mongod", "--dbpath=%s" % mongodb_path, "--noprealloc", "--port=" + str(self.port) ] print("starting mongod", proc_args) mongodb_proc = subprocess.Popen( proc_args, cwd=mongodb_path, ) #workers_list = [] #if self.nr_of_workers > self.max_evals: # self.nr_of_workers = copy(self.max_evals) #for p in range(1,self.nr_of_workers): proc_args_worker = [ "hyperopt-mongo-worker", "--mongo=" + str(self.ip) + ":" + os.path.join(str(self.port), str(self.db_name)), "--poll-interval=0.1" ] mongo_worker_proc = subprocess.Popen( proc_args_worker, env=merge_dicts(os.environ, {"PYTHONPATH": os.getcwd()}), ) #workers_list.append(mongo_worker_proc) m_pid = mongodb_proc.pid w_pid = mongo_worker_proc.pid try: trials = CMongoTrials(self.db_name, self.exp_name, ip=self.ip, port=self.port, kill_timeout=KILL_TIMEOUT) except pymongo.errors.ServerSelectionTimeoutError: print( "No mongod process detected! Please use flag --start_mongodb or" + " start mongoDB and workers. Port: " + str(self.port) + " Host: " + str(self.ip) + " DB name: " + str(self.db_name)) sys.exit(0) else: trials = Trials() dat = OptimizationData(m_pid, w_pid, self.path, self.sep) mod = OptimizationModel() if self.metric == "OutlierLoss": fn = CompileFN( self.db_name, self.exp_name, data_fn=dat.data, model_fn=mod.model, add_eval_metrics={"outlier_loss": OutlierLoss()}, loss_metric="outlier_loss", # which metric to optimize for loss_metric_mode="min", # try to maximize the metric valid_split= None, # use 20% of the training data for the validation set save_model=None, # checkpoint the best model save_results= True, # save the results as .json (in addition to mongoDB) save_dir=DIR_OUT_TRIALS) elif self.metric == "OutlierRecall": fn = CompileFN( self.db_name, self.exp_name, data_fn=dat.data, model_fn=mod.model, add_eval_metrics={ "outlier_recall": OutlierRecall(theta=25, threshold=1000) }, loss_metric="outlier_recall", # which metric to optimize for loss_metric_mode="max", # try to maximize the metric valid_split= None, # use 20% of the training data for the validation set save_model=None, # checkpoint the best model save_results= True, # save the results as .json (in addition to mongoDB) save_dir=DIR_OUT_TRIALS) else: raise ValueError( "No such metric: " + str(self.metric) + " Available metrics for --use_metric are: 'OutlierLoss'(default), 'OutlierRecall'." ) best = fmin(fn, self.hyper_params, trials=trials, algo=tpe.suggest, max_evals=self.max_evals) best['encoding_dim'] = self.values.q[best['encoding_dim']] best['batch_size'] = self.values.batch[best['batch_size']] best['epochs'] = self.values.epochs[best['epochs']] with open(os.path.join(DIR_OUT_RESULTS, self.exp_name + "_best.json"), 'wt') as f: json.dump(best, f) print("----------------------------------------------------") print("best_parameters: " + str(best)) print("----------------------------------------------------") if self.start_mongodb: #for proc in workers_list: #proc.kill() #os.kill(proc.pid, signal.SIGKILL) mongo_worker_proc.kill() mongodb_proc.kill()