Example #1
0
 def _format_keras_history(history):
     """nicely format keras history
     """
     return {
         "params": history.params,
         "loss": merge_dicts({"epoch": history.epoch}, history.history),
     }
Example #2
0
def eval_model(model, valid, test, add_eval_metrics={}):
    """Evaluate model's performance on the test-set.
    # Arguments
        model: Keras model
        test: test-dataset. Tuple of inputs `x` and target `y` - `(x, y)`.
        add_eval_metrics: Additional evaluation metrics to use. Can be a dictionary or a list of functions
    accepting arguments: `y_true`, `y_predicted`. Alternatively, you can provide names of functions from
    the `kopt.eval_metrics` module.
    # Returns
        dictionary with evaluation metrics
    """
    # evaluate the model
    logger.info("Evaluate...")
    # - model_metrics
    model_metrics_values = model.evaluate(valid[0], valid[1], verbose=0,
                                          batch_size=valid[1].shape[0])
    # evaluation is done in a single pass to have more precise metics
    model_metrics = dict(zip(_listify(model.metrics_names),
                             _listify(model_metrics_values)))
    # - eval_metrics
    y_true = test[1]
    y_pred = model.predict(test[0], verbose=0)
    eval_metrics = {k: v(y_true, y_pred) for k, v in add_eval_metrics.items()}

    # handle the case where the two metrics names intersect
    # - omit duplicates from eval_metrics
    intersected_keys = set(model_metrics).intersection(set(eval_metrics))
    if len(intersected_keys) > 0:
        logger.warning("Some metric names intersect: {0}. Ignoring the add_eval_metrics ones".
                       format(intersected_keys))
        eval_metrics = _delete_keys(eval_metrics, intersected_keys)

    return merge_dicts(model_metrics, eval_metrics)
Example #3
0
    def get_ok_results(self, verbose=True):
        """Return a list of results with ok status
        """
        if len(self.trials) == 0:
            return []

        not_ok = np.where(np.array(self.statuses()) != "ok")[0]

        if len(not_ok) > 0 and verbose:
            print("{0}/{1} trials were not ok.".format(len(not_ok), len(self.trials)))
            print("Trials: " + str(not_ok))
            print("Statuses: " + str(np.array(self.statuses())[not_ok]))

        r = [merge_dicts({"tid": t["tid"]}, t["result"].to_dict())
             for t in self.trials if t["result"]["status"] == "ok"]
        return r
Example #4
0
def get_data(data_fn, param):
    """Feed data_fn with param
    """
    return data_fn(**merge_dicts(param["data"], param.get("shared", {})))
Example #5
0
def get_model(model_fn, train_data, param):
    """Feed model_fn with train_data and param
    """
    model_param = merge_dicts({"train_data": train_data}, param["model"],
                              param.get("shared", {}))
    return model_fn(**model_param)
Example #6
0
def test_hyopt(tmpdir):
    # get the base dir
    mongodb_path = str(tmpdir.mkdir('mongodb'))
    results_path = str(tmpdir.mkdir('results'))
    # mongodb_path = "/tmp/mongodb_test/"
    # results_path = "/tmp/results/"

    proc_args = [
        "mongod",
        "--dbpath=%s" % mongodb_path, "--noprealloc", "--port=22334"
    ]
    print("starting mongod", proc_args)
    mongodb_proc = subprocess.Popen(
        proc_args,
        # stdout=subprocess.PIPE,
        # stderr=subprocess.PIPE,
        cwd=mongodb_path,  # this prevented mongod assertion fail
    )

    # wait a bit
    time.sleep(1)
    proc_args_worker = [
        "hyperopt-mongo-worker", "--mongo=localhost:22334/test",
        "--poll-interval=0.1"
    ]

    mongo_worker_proc = subprocess.Popen(
        proc_args_worker,
        # stdout=subprocess.PIPE,
        # stderr=subprocess.PIPE,
        env=merge_dicts(os.environ, {"PYTHONPATH": os.getcwd()}),
    )
    # --------------------------------------------

    db_name = "test"
    exp_name = "test2"

    fn = CompileFN(db_name,
                   exp_name,
                   data_fn=data.data,
                   model_fn=model.build_model,
                   optim_metric="acc",
                   optim_metric_mode="max",
                   save_dir=results_path)
    hyper_params = {
        "data": {},
        "shared": {
            "max_features": 100,
            "maxlen": 20
        },
        "model": {
            "filters": hp.choice("m_filters", (2, 5)),
            "hidden_dims": 3,
        },
        "fit": {
            "epochs": 1
        }
    }
    fn_test(fn, hyper_params, tmp_dir=str(tmpdir))
    trials = KMongoTrials(db_name,
                          exp_name,
                          ip="localhost",
                          kill_timeout=5 * 60,
                          port=22334)

    best = fmin(fn, hyper_params, trials=trials, algo=tpe.suggest, max_evals=2)
    assert len(trials) == 2
    assert len(trials) == trials.n_ok()
    assert isinstance(best, dict)
    assert "m_filters" in best

    # test my custom functions
    trials.as_df()
    trials.train_history(trials.valid_tid()[0])
    trials.train_history(trials.valid_tid())
    trials.get_ok_results()
    tid_best = trials.best_trial_tid()
    assert tid_best == trials.best_trial["tid"]
    assert trials.optimal_epochs(tid_best) == 1

    # --------------------------------------------
    # cross-validation
    db_name = "test"
    exp_name = "test2_cv"

    fn = CompileFN(db_name,
                   exp_name,
                   data_fn=data.data,
                   model_fn=model.build_model,
                   cv_n_folds=3,
                   save_dir=results_path)

    trials = KMongoTrials(db_name,
                          exp_name,
                          ip="localhost",
                          kill_timeout=5 * 60,
                          port=22334)
    fn_test(fn, hyper_params, tmp_dir=str(tmpdir))
    best = fmin(fn,
                deepcopy(hyper_params),
                trials=trials,
                algo=tpe.suggest,
                max_evals=2)
    assert len(trials) == 2
    assert len(trials) == trials.n_ok()
    assert isinstance(best, dict)
    assert "m_filters" in best

    # test my custom functions
    trials.as_df()
    trials.train_history(trials.valid_tid()[0])
    trials.train_history(trials.valid_tid())
    trials.get_ok_results()
    tid_best = trials.best_trial_tid()
    assert tid_best == trials.best_trial["tid"]
    assert trials.optimal_epochs(tid_best) == 1

    assert trials.best_trial_tid() == trials.best_trial["tid"]
    # --------------------------------------------
    # close
    mongo_worker_proc.terminate()
    mongodb_proc.terminate()
Example #7
0
    def __call__(self):
        m_pid = None
        w_pid = None
        if self.run_on_mongodb:
            if self.start_mongodb:
                mongodb_path = tempfile.mkdtemp()

                proc_args = [
                    "mongod",
                    "--dbpath=%s" % mongodb_path, "--noprealloc",
                    "--port=" + str(self.port)
                ]
                print("starting mongod", proc_args)
                mongodb_proc = subprocess.Popen(
                    proc_args,
                    cwd=mongodb_path,
                )
                #workers_list = []
                #if self.nr_of_workers > self.max_evals:
                #       self.nr_of_workers = copy(self.max_evals)
                #for p in range(1,self.nr_of_workers):
                proc_args_worker = [
                    "hyperopt-mongo-worker", "--mongo=" + str(self.ip) + ":" +
                    os.path.join(str(self.port), str(self.db_name)),
                    "--poll-interval=0.1"
                ]
                mongo_worker_proc = subprocess.Popen(
                    proc_args_worker,
                    env=merge_dicts(os.environ, {"PYTHONPATH": os.getcwd()}),
                )
                #workers_list.append(mongo_worker_proc)

                m_pid = mongodb_proc.pid
                w_pid = mongo_worker_proc.pid
            try:
                trials = CMongoTrials(self.db_name,
                                      self.exp_name,
                                      ip=self.ip,
                                      port=self.port,
                                      kill_timeout=KILL_TIMEOUT)
            except pymongo.errors.ServerSelectionTimeoutError:
                print(
                    "No mongod process detected! Please use flag --start_mongodb or"
                    + " start mongoDB and workers. Port: " + str(self.port) +
                    " Host: " + str(self.ip) + " DB name: " +
                    str(self.db_name))
                sys.exit(0)
        else:
            trials = Trials()
        dat = OptimizationData(m_pid, w_pid, self.path, self.sep)
        mod = OptimizationModel()
        if self.metric == "OutlierLoss":
            fn = CompileFN(
                self.db_name,
                self.exp_name,
                data_fn=dat.data,
                model_fn=mod.model,
                add_eval_metrics={"outlier_loss": OutlierLoss()},
                loss_metric="outlier_loss",  # which metric to optimize for
                loss_metric_mode="min",  # try to maximize the metric
                valid_split=
                None,  # use 20% of the training data for the validation set
                save_model=None,  # checkpoint the best model
                save_results=
                True,  # save the results as .json (in addition to mongoDB)
                save_dir=DIR_OUT_TRIALS)
        elif self.metric == "OutlierRecall":
            fn = CompileFN(
                self.db_name,
                self.exp_name,
                data_fn=dat.data,
                model_fn=mod.model,
                add_eval_metrics={
                    "outlier_recall": OutlierRecall(theta=25, threshold=1000)
                },
                loss_metric="outlier_recall",  # which metric to optimize for
                loss_metric_mode="max",  # try to maximize the metric
                valid_split=
                None,  # use 20% of the training data for the validation set
                save_model=None,  # checkpoint the best model
                save_results=
                True,  # save the results as .json (in addition to mongoDB)
                save_dir=DIR_OUT_TRIALS)
        else:
            raise ValueError(
                "No such metric: " + str(self.metric) +
                " Available metrics for --use_metric are: 'OutlierLoss'(default), 'OutlierRecall'."
            )
        best = fmin(fn,
                    self.hyper_params,
                    trials=trials,
                    algo=tpe.suggest,
                    max_evals=self.max_evals)
        best['encoding_dim'] = self.values.q[best['encoding_dim']]
        best['batch_size'] = self.values.batch[best['batch_size']]
        best['epochs'] = self.values.epochs[best['epochs']]
        with open(os.path.join(DIR_OUT_RESULTS, self.exp_name + "_best.json"),
                  'wt') as f:
            json.dump(best, f)
        print("----------------------------------------------------")
        print("best_parameters: " + str(best))
        print("----------------------------------------------------")
        if self.start_mongodb:
            #for proc in workers_list:
            #proc.kill()
            #os.kill(proc.pid, signal.SIGKILL)
            mongo_worker_proc.kill()
            mongodb_proc.kill()