Example #1
0
def test_report_bin():
    t1 = Trial("a", dict(a=1, b=2), keys=["x", "y"])
    r1 = TrialReport(t1, 0.8, sort_metric=-0.8)
    t2 = Trial("b", dict(a=11, b=12), keys=["xx", "y"])
    r2 = TrialReport(t2, 0.7, sort_metric=-0.7)
    t3 = Trial("c", dict(a=10, b=20), keys=["x", "y"])
    r3 = TrialReport(t3, 0.9, sort_metric=-0.9)

    b = _ReportBin()
    assert 0 == len(b.records)
    assert b.best is None
    assert b.on_report(r1)
    assert b.on_report(r3)
    assert r3 is b.best
    assert 2 == len(b.records)

    b = _ReportBin()
    assert b.on_report(r3)
    assert not b.on_report(r1)
    assert r3 is b.best
    assert 2 == len(b.records)

    b = _ReportBin(new_best_only=True)
    assert b.on_report(r3)
    assert not b.on_report(r1)
    assert r3 is b.best
    assert 1 == len(b.records)
Example #2
0
def test_print_best():
    t1 = Trial("a", dict(a=1, b=2), keys=["x", "y"])
    r1 = TrialReport(t1, 0.8, sort_metric=-0.8)
    t2 = Trial("b", dict(a=11, b=12), keys=["xx", "y"])
    r2 = TrialReport(t2, 0.7, sort_metric=-0.7)
    t3 = Trial("c", dict(a=10, b=20), keys=["x", "y"])
    r3 = TrialReport(t3, 0.9, sort_metric=-0.9)

    b = PrintBest()
    b.on_report(r3)
    b.on_report(r2)
    b.on_report(r1)
Example #3
0
    def run(self, trial: Trial) -> TrialReport:
        params = dict(trial.params)
        if trial.trial_id != self._last_id:
            self._model_type = to_sk_model(params.pop(SPACE_MODEL_NAME))
            self._model_expr = to_sk_model_expr(self._model_type)
            self._train_x, self._train_y = self._reset_xy(
                trial.dfs[TUNE_DATASET_DF_DEFAULT_NAME]
            )
            self._test_x, self._test_y = self._reset_xy(
                trial.dfs[TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME]
            )
            self._last_id = trial.trial_id
        else:
            params.pop(SPACE_MODEL_NAME)

        model = self._model_type(**params).fit(self._train_x, self._train_y)
        metric = get_scorer(self._scoring)(model, self._test_x, self._test_y)
        metadata = dict(model=self._model_expr)
        if self._checkpoint_path is not None:
            fp = os.path.join(self._checkpoint_path, str(uuid4()) + ".pkl")
            with FileSystem().openbin(fp, mode="wb") as f:
                pickle.dump(model, f)
            metadata["checkpoint_path"] = fp
        return TrialReport(
            trial,
            metric=metric,
            metadata=metadata,
            sort_metric=self.generate_sort_metric(metric),
        )
Example #4
0
    def run(self, trial: Trial) -> TrialReport:
        params = dict(trial.params)
        if trial.trial_id != self._last_id:
            self._model_type = to_sk_model(params.pop(SPACE_MODEL_NAME))
            self._model_expr = to_sk_model_expr(self._model_type)
            self._train_x, self._train_y = self._reset_xy(
                trial.dfs[TUNE_DATASET_DF_DEFAULT_NAME]
            )
            self._last_id = trial.trial_id
        else:
            params.pop(SPACE_MODEL_NAME)

        model = self._model_type(**params)
        s = cross_val_score(
            model, self._train_x, self._train_y, cv=self._cv, scoring=self._scoring
        )
        metadata = dict(model=self._model_expr, cv_scores=[float(x) for x in s])
        if self._checkpoint_path is not None:
            model.fit(self._train_x, self._train_y)
            fp = os.path.join(self._checkpoint_path, str(uuid4()) + ".pkl")
            with FileSystem().openbin(fp, mode="wb") as f:
                pickle.dump(model, f)
            metadata["checkpoint_path"] = fp
        metric = float(np.mean(s))
        return TrialReport(
            trial,
            metric=metric,
            metadata=metadata,
            sort_metric=self.generate_sort_metric(metric),
        )
Example #5
0
 def run_single_iteration(self) -> TrialReport:
     self.step += 1
     trial = self.current_trial
     return TrialReport(
         trial=trial,
         metric=f(
             self.step,
             trial.params.simple_value["a"],
             trial.params.simple_value["b"],
         ),
     )
Example #6
0
def _test_charts():
    t1 = Trial("a", dict(a=1, b=2), keys=["x", "y"])
    r1 = TrialReport(t1, 0.8, sort_metric=-0.8)
    t2 = Trial("b", dict(a=11, b=12), keys=["xx", "y"])
    r2 = TrialReport(t2, 0.7, sort_metric=-0.7)
    t3 = Trial("c", dict(a=10, b=20), keys=["x", "y"])
    r3 = TrialReport(t3, 0.9, sort_metric=-0.9)

    b = NotebookSimpleTimeSeries("1sec")
    b.on_report(r3)
    b.on_report(r2)
    b.on_report(r1)

    b = NotebookSimpleHist("2sec")
    b.on_report(r3)
    b.on_report(r2)
    b.on_report(r1)

    b = NotebookSimpleRungs("3sec")
    b.on_report(r3)
    b.on_report(r2)
    b.on_report(r1)
Example #7
0
 def run_single_rung(self, budget: float) -> TrialReport:
     trial = self.current_trial
     fit_args, fit_kwargs = self.spec.get_fit_params()
     fit_kwargs = dict(fit_kwargs)
     fit_kwargs.update(
         dict(epochs=self._epochs + int(budget),
              initial_epoch=self._epochs))
     h = self.model.fit(*fit_args, **fit_kwargs)
     metric = self.spec.get_fit_metric(h)
     self._epochs += int(budget)
     return TrialReport(trial=trial,
                        metric=metric,
                        cost=budget,
                        rung=self.rung)
Example #8
0
def suggest_sk_models_by_cv(
    space: Space,
    train_df: Any,
    scoring: str,
    cv: int = 5,
    temp_path: str = "",
    feature_prefix: str = "",
    label_col: str = "label",
    save_model: bool = False,
    partition_keys: Optional[List[str]] = None,
    top_n: int = 1,
    objective_runner: Optional[NonIterativeObjectiveRunner] = None,
    monitor: Any = None,
    distributed: Optional[bool] = None,
    execution_engine: Any = None,
    execution_engine_conf: Any = None,
) -> List[TrialReport]:
    dag = FugueWorkflow()
    dataset = TUNE_OBJECT_FACTORY.make_dataset(
        dag,
        space,
        df=train_df,
        partition_keys=partition_keys,
        temp_path=temp_path,
    )
    objective = SKCVObjective(
        scoring=scoring,
        cv=cv,
        feature_prefix=feature_prefix,
        label_col=label_col,
        checkpoint_path=temp_path if save_model else None,
    )
    study = optimize_noniterative(
        objective=objective,
        dataset=dataset,
        runner=objective_runner,
        distributed=distributed,
        monitor=monitor,
    )
    study.result(top_n).yield_dataframe_as("result")

    rows = list(
        dag.run(
            execution_engine,
            conf=execution_engine_conf,
        )["result"].as_dict_iterable())
    return [
        TrialReport.from_jsondict(json.loads(r[TUNE_REPORT]))
        for r in sorted(rows, key=lambda r: r[TUNE_REPORT_METRIC])
    ]