Esempio n. 1
0
 def run(self, trial: Trial) -> TrialReport:  # pragma: no cover
     judge = _NonIterativeJudgeWrapper(self._budget)
     base_fs = FileSystem()
     fs = base_fs.makedirs(self._checkpoint_path, recreate=True)
     self._func = self._func.copy()
     self._func.run(trial, judge=judge, checkpoint_basedir_fs=fs)
     return judge.report
Esempio n. 2
0
 def _compute(
     self,
     df: Iterable[Dict[str, Any]],
     entrypoint: Callable[[str, Dict[str, Any]], Any],
 ) -> Iterable[Dict[str, Any]]:
     fs = FileSystem()
     ck_fs = fs.makedirs(self._checkpoint_path, recreate=True)
     for row in df:
         for trial in _get_trials_from_row(row):
             rjudge = RemoteTrialJudge(entrypoint)
             self._objective.copy().run(trial, rjudge, ck_fs)
             if rjudge.report is not None:
                 yield rjudge.report.fill_dict(dict(row))
Esempio n. 3
0
def optimize_by_continuous_asha(
    objective: Any,
    dataset: TuneDataset,
    plan: List[Tuple[float, int]],
    checkpoint_path: str = "",
    always_checkpoint: bool = False,
    study_early_stop: Optional[Callable[[List[Any], List[RungHeap]], bool]] = None,
    trial_early_stop: Optional[
        Callable[[TrialReport, List[TrialReport], List[RungHeap]], bool]
    ] = None,
    monitor: Any = None,
) -> StudyResult:
    _objective = TUNE_OBJECT_FACTORY.make_iterative_objective(objective)
    _monitor = TUNE_OBJECT_FACTORY.make_monitor(monitor)
    checkpoint_path = TUNE_OBJECT_FACTORY.get_path_or_temp(checkpoint_path)
    judge = ASHAJudge(
        schedule=plan,
        always_checkpoint=always_checkpoint,
        study_early_stop=study_early_stop,
        trial_early_stop=trial_early_stop,
        monitor=_monitor,
    )
    path = os.path.join(checkpoint_path, str(uuid4()))
    FileSystem().makedirs(path, recreate=True)
    study = IterativeStudy(_objective, checkpoint_path=path)
    return study.optimize(dataset, judge=judge)
Esempio n. 4
0
def _sk_cv(
    _sk__model: str,
    _sk__train_df: pd.DataFrame,
    _sk__scoring: Any,
    _sk__cv: int = 5,
    _sk__feature_prefix: str = "",
    _sk__label_col: str = "label",
    _sk__save_path: str = "",
    **kwargs: Any,
) -> Dict[str, Any]:
    model = _to_model(_sk__model)(**kwargs)
    train_df = _sk__train_df.sample(frac=1,
                                    random_state=0).reset_index(drop=True)

    train_x = train_df.drop([_sk__label_col], axis=1)
    cols = [x for x in train_x.columns if x.startswith(_sk__feature_prefix)]
    train_x = train_x[cols]
    train_y = train_df[_sk__label_col]

    s = cross_val_score(model,
                        train_x,
                        train_y,
                        cv=_sk__cv,
                        scoring=_sk__scoring)
    metadata = dict(sk_model=_sk__model, cv_scores=[float(x) for x in s])
    if _sk__save_path != "":
        model.fit(train_x, train_y)
        fp = os.path.join(_sk__save_path, str(uuid4()) + ".pkl")
        with FileSystem().openbin(fp, mode="wb") as f:
            pickle.dump(model, f)
        metadata["model_path"] = fp
    return dict(error=-np.mean(s),
                hp=dict(_sk__model=_sk__model, **kwargs),
                metadata=metadata)
Esempio n. 5
0
    def run(self, trial: Trial) -> TrialReport:
        params = dict(trial.params)
        if trial.trial_id != self._last_id:
            self._model_type = to_sk_model(params.pop(SPACE_MODEL_NAME))
            self._model_expr = to_sk_model_expr(self._model_type)
            self._train_x, self._train_y = self._reset_xy(
                trial.dfs[TUNE_DATASET_DF_DEFAULT_NAME]
            )
            self._test_x, self._test_y = self._reset_xy(
                trial.dfs[TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME]
            )
            self._last_id = trial.trial_id
        else:
            params.pop(SPACE_MODEL_NAME)

        model = self._model_type(**params).fit(self._train_x, self._train_y)
        metric = get_scorer(self._scoring)(model, self._test_x, self._test_y)
        metadata = dict(model=self._model_expr)
        if self._checkpoint_path is not None:
            fp = os.path.join(self._checkpoint_path, str(uuid4()) + ".pkl")
            with FileSystem().openbin(fp, mode="wb") as f:
                pickle.dump(model, f)
            metadata["checkpoint_path"] = fp
        return TrialReport(
            trial,
            metric=metric,
            metadata=metadata,
            sort_metric=self.generate_sort_metric(metric),
        )
Esempio n. 6
0
    def run(self, trial: Trial) -> TrialReport:
        params = dict(trial.params)
        if trial.trial_id != self._last_id:
            self._model_type = to_sk_model(params.pop(SPACE_MODEL_NAME))
            self._model_expr = to_sk_model_expr(self._model_type)
            self._train_x, self._train_y = self._reset_xy(
                trial.dfs[TUNE_DATASET_DF_DEFAULT_NAME]
            )
            self._last_id = trial.trial_id
        else:
            params.pop(SPACE_MODEL_NAME)

        model = self._model_type(**params)
        s = cross_val_score(
            model, self._train_x, self._train_y, cv=self._cv, scoring=self._scoring
        )
        metadata = dict(model=self._model_expr, cv_scores=[float(x) for x in s])
        if self._checkpoint_path is not None:
            model.fit(self._train_x, self._train_y)
            fp = os.path.join(self._checkpoint_path, str(uuid4()) + ".pkl")
            with FileSystem().openbin(fp, mode="wb") as f:
                pickle.dump(model, f)
            metadata["checkpoint_path"] = fp
        metric = float(np.mean(s))
        return TrialReport(
            trial,
            metric=metric,
            metadata=metadata,
            sort_metric=self.generate_sort_metric(metric),
        )
Esempio n. 7
0
def test_checkpoint(tmpdir):
    fs = FileSystem().opendir(str(tmpdir))
    cp = Checkpoint(fs)
    assert 0 == len(cp)
    with raises(AssertionError):
        cp.latest
    try:
        for i in range(4):
            with cp.create() as sfs:
                sfs.writetext("a.txt", str(i))
                if i == 3:
                    raise Exception
    except Exception:
        pass
    assert 3 == len(cp)
    assert "2" == cp.latest.readtext("a.txt")
    files = fs.listdir(".")
    assert 4 == len(files)
    cp2 = Checkpoint(fs)
    assert 3 == len(cp2)
    assert "2" == cp2.latest.readtext("a.txt")
def test_objective_func(tmpdir):
    fs = FileSystem().opendir(str(tmpdir))
    j = J([3, 3, 2])
    f = F().copy()
    t = Trial("abc", {"a": 1})
    f.run(t, judge=j, checkpoint_basedir_fs=fs)
    assert -10 == f.v
    f.run(t, judge=j, checkpoint_basedir_fs=fs)
    assert -10 == f.v
    assert 6.0 == j.report.metric
    assert -6.0 == j.report.sort_metric
    f.run(t, judge=j, checkpoint_basedir_fs=fs)
    assert -10 == f.v
    assert 8.0 == j.report.metric
    assert -8.0 == j.report.sort_metric
Esempio n. 9
0
 def __init__(self, spark_session: Optional[SparkSession] = None, conf: Any = None):
     if spark_session is None:
         spark_session = SparkSession.builder.getOrCreate()
     self._spark_session = spark_session
     cf = dict(FUGUE_SPARK_DEFAULT_CONF)
     cf.update({x[0]: x[1] for x in spark_session.sparkContext.getConf().getAll()})
     cf.update(ParamDict(conf))
     super().__init__(cf)
     self._fs = FileSystem()
     self._log = logging.getLogger()
     self._broadcast_func = RunOnce(
         self._broadcast, lambda *args, **kwargs: id(args[0])
     )
     self._persist_func = RunOnce(self._persist, lambda *args, **kwargs: id(args[0]))
     self._register_func = RunOnce(
         self._register, lambda *args, **kwargs: id(args[0])
     )
     self._io = SparkIO(self.spark_session, self.fs)
Esempio n. 10
0
def test_serialize_df(tmpdir):
    def assert_eq(df, df_expected=None, raw=False):
        if df_expected is None:
            df_expected = df
        df_actual = deserialize_df(serialize_df(df))
        if raw:
            assert df_expected.native == df_actual.native
        else:
            df_eq(df_expected, df_actual, throw=True)

    fs = FileSystem()
    assert deserialize_df(serialize_df(None)) is None
    assert_eq(ArrayDataFrame([], "a:int,b:int"))
    assert_eq(ArrayDataFrame([[None, None]], "a:int,b:int"))
    assert_eq(ArrayDataFrame([[None, "abc"]], "a:int,b:str"))
    assert_eq(ArrayDataFrame([[None, [1, 2], dict(x=1)]],
                             "a:int,b:[int],c:{x:int}"),
              raw=True)
    assert_eq(
        IterableDataFrame([[None, [1, 2], dict(x=1)]],
                          "a:int,b:[int],c:{x:int}"),
        ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"),
        raw=True,
    )
    assert_eq(PandasDataFrame([[None, None]], "a:int,b:int"))
    assert_eq(PandasDataFrame([[None, "abc"]], "a:int,b:str"))

    raises(
        InvalidOperationError,
        lambda: serialize_df(ArrayDataFrame([], "a:int,b:int"), 0),
    )

    path = os.path.join(tmpdir, "1.pkl")

    df = ArrayDataFrame([[None, None]], "a:int,b:int")
    s = serialize_df(df, 0, path, fs)
    df_eq(df, deserialize_df(s, fs), throw=True)
    df_eq(df, deserialize_df(s), throw=True)

    s = serialize_df(df, 0, path)
    df_eq(df, deserialize_df(s), throw=True)

    raises(ValueError, lambda: deserialize_df('{"x":1}'))
Esempio n. 11
0
 def load_checkpoint(self, fs: FSBase, model: keras.models.Model) -> None:
     with tempfile.NamedTemporaryFile(suffix=".h5") as tf:
         local_fs = FileSystem()
         with fs.open("model.h5", "rb") as fin:
             local_fs.writefile(tf.name, fin)
         model.load_weights(tf.name)
Esempio n. 12
0
def _sk_stack_cv(
    _sk__model: str,
    _sk__estimators: str,
    _sk__train_df: pd.DataFrame,
    _sk__scoring: Any,
    _sk__stack_cv: int = 2,
    _sk__method: str = "auto",
    _sk__passthrough: bool = False,
    _sk__cv: int = 5,
    _sk__feature_prefix: str = "",
    _sk__label_col: str = "label",
    _sk__save_path: str = "",
    **kwargs: Any,
) -> Dict[str, Any]:
    final_estimator = _to_model(_sk__model)(**kwargs)
    estimators: List[Tuple[str, Any]] = []
    for i, d in enumerate(json.loads(_sk__estimators)):
        key = f"_{i}"
        m = _to_model(d.pop("_sk__model"))
        estimators.append((key, m(**d)))
    if is_classifier(final_estimator):
        model = StackingClassifier(
            estimators,
            final_estimator,
            cv=_sk__stack_cv,
            stack_method=_sk__method,
            passthrough=_sk__passthrough,
            n_jobs=kwargs.get("n_jobs", 1),
        )
    else:
        model = StackingRegressor(
            estimators,
            final_estimator,
            cv=_sk__stack_cv,
            passthrough=_sk__passthrough,
            n_jobs=kwargs.get("n_jobs", 1),
        )
    train_df = _sk__train_df.sample(frac=1,
                                    random_state=0).reset_index(drop=True)

    train_x = train_df.drop([_sk__label_col], axis=1)
    cols = [x for x in train_x.columns if x.startswith(_sk__feature_prefix)]
    train_x = train_x[cols]
    train_y = train_df[_sk__label_col]

    s = cross_val_score(model,
                        train_x,
                        train_y,
                        cv=_sk__cv,
                        scoring=_sk__scoring)
    metadata = dict(sk_model=get_full_type_path(model),
                    cv_scores=[float(x) for x in s])
    if _sk__save_path != "":
        model.fit(train_x, train_y)
        fp = os.path.join(_sk__save_path, str(uuid4()) + ".pkl")
        with FileSystem().openbin(fp, mode="wb") as f:
            pickle.dump(model, f)
        metadata["model_path"] = fp
    return dict(
        error=-np.mean(s),
        hp=dict(
            _sk__model=get_full_type_path(model),
            _sk__estimators=dict(
                **{
                    f"_{i}": d
                    for i, d in enumerate(json.loads(_sk__estimators))
                },
                stacking=dict(_sk__model=_sk__model, **kwargs),
            ),
            _sk__stack_cv=_sk__stack_cv,
            _sk__method=_sk__method,
            _sk__passthrough=_sk__passthrough,
        ),
        metadata=metadata,
    )