Beispiel #1
0
def extract_optimization_trace(  # pragma: no cover
        experiment: Experiment, problem: BenchmarkProblem) -> np.ndarray:
    """Extract outcomes of an experiment: best cumulative objective as numpy ND-
    array, and total model-fitting time and candidate generation time as floats.
    """
    # Get true values by evaluting the synthetic function noiselessly
    if isinstance(problem,
                  SimpleBenchmarkProblem) and problem.uses_synthetic_function:
        return _extract_optimization_trace_from_synthetic_function(
            experiment=experiment, problem=problem)

    # True values are not available, so just use the known values
    elif isinstance(problem, SimpleBenchmarkProblem):
        logger.info(
            "Cannot obtain true best objectives since an ad-hoc function was used."
        )
        assert len(experiment.optimization_config.outcome_constraints) == 0
        values = np.array([
            checked_cast(Trial, trial).objective_mean
            for trial in experiment.trials
        ])
        return best_feasible_objective(
            optimization_config=experiment.optimization_config,
            values={problem.name: values},
        )

    else:  # Get true values for every outcome for each iteration
        return _extract_optimization_trace_from_metrics(experiment=experiment)
Beispiel #2
0
def _extract_optimization_trace_from_metrics(
        experiment: Experiment) -> np.ndarray:
    names = []
    for trial in experiment.trials.values():
        for i, arm in enumerate(trial.arms):
            reps = int(trial.weights[i]) if isinstance(trial,
                                                       BatchTrial) else 1
            names.extend([arm.name] * reps)
    iters_df = pd.DataFrame({"arm_name": names})
    data_df = experiment.fetch_data(noisy=False).df
    metrics = data_df["metric_name"].unique()
    true_values = {}
    for metric in metrics:
        df_m = data_df[data_df["metric_name"] == metric]
        # Get one row per arm
        df_m = df_m.groupby("arm_name").first().reset_index()
        df_b = pd.merge(iters_df, df_m, how="left", on="arm_name")
        true_values[metric] = df_b["mean"].values
    if isinstance(experiment.optimization_config,
                  MultiObjectiveOptimizationConfig):
        return feasible_hypervolume(
            # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
            #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
            optimization_config=experiment.optimization_config,
            values=true_values,
        )
    return best_feasible_objective(
        # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
        #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
        optimization_config=experiment.optimization_config,
        values=true_values,
    )
Beispiel #3
0
 def test_best_feasible_objective(self):
     bfo = best_feasible_objective(
         self.optimization_config,
         values={
             "a": np.array([1.0, 3.0, 2.0]),
             "b": np.array([0.0, -1.0, 0.0])
         },
     )
     self.assertEqual(list(bfo), [1.0, 1.0, 2.0])
Beispiel #4
0
def _extract_optimization_trace_from_synthetic_function(
    experiment: Experiment, problem: SimpleBenchmarkProblem
) -> np.ndarray:
    if any(isinstance(trial, BatchTrial) for trial in experiment.trials.values()):
        raise NotImplementedError("Batched trials are not yet supported.")
    true_values = []
    for trial in experiment.trials.values():
        parameters = not_none(checked_cast(Trial, trial).arm).parameters
        # Expecting numerical parameters only.
        value = problem.f(*[float(x) for x in parameters.values()])  # pyre-ignore[6]
        true_values.append(value)
    return best_feasible_objective(
        optimization_config=experiment.optimization_config,
        values={problem.name: true_values},
    )
Beispiel #5
0
def extract_optimization_trace(  # pragma: no cover
    experiment: Experiment,
    problem: BenchmarkProblem,
    is_asynchronous: bool,
    **kwargs,
) -> np.ndarray:
    """Extract outcomes of an experiment: best cumulative objective as numpy ND-
    array, and total model-fitting time and candidate generation time as floats.
    """
    if is_asynchronous:
        return _extract_asynchronous_optimization_trace(
            experiment=experiment,
            start_time=kwargs.get("start_time", 0.0),
            end_time=kwargs.get("end_time", 100.0),
            delta_t=kwargs.get("delta_t", 1.0),
            completed_time_key=kwargs.get("completed_time_key", "completed_time"),
            include_only_completed_trials=kwargs.get(
                "include_only_completed_trials", True
            ),
        )

    # Get true values by evaluating the synthetic function noiselessly
    elif (
        isinstance(problem, SimpleBenchmarkProblem) and problem.uses_synthetic_function
    ):
        return _extract_optimization_trace_from_synthetic_function(
            experiment=experiment, problem=problem
        )

    # True values are not available, so just use the known values
    elif isinstance(problem, SimpleBenchmarkProblem):
        logger.info(
            "Cannot obtain true best objectives since an ad-hoc function was used."
        )
        # pyre-fixme[16]: `Optional` has no attribute `outcome_constraints`.
        assert len(experiment.optimization_config.outcome_constraints) == 0
        values = np.array(
            [checked_cast(Trial, trial).objective_mean for trial in experiment.trials]
        )
        return best_feasible_objective(
            # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
            #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
            optimization_config=experiment.optimization_config,
            values={problem.name: values},
        )

    else:  # Get true values for every outcome for each iteration
        return _extract_optimization_trace_from_metrics(experiment=experiment)
Beispiel #6
0
def _extract_asynchronous_optimization_trace(
    experiment: Experiment,
    start_time: float,
    end_time: float,
    delta_t: float,
    completed_time_key: str,
    include_only_completed_trials: bool,
) -> np.ndarray:
    """Extract optimization trace for an asynchronous benchmark run. This involves
    getting the `completed_time` from the trial `run_metadata`, as described by
    the `completed_time_key`. From the `start_time`, `end_time`, and `delta_t`
    arguments, a sequence of times is constructed. The returned optimization trace
    is the best achieved value so far for each time, amongst completed (or early
    stopped) trials.

    Args:
        experiment: The experiment from which to generate results.
        start_time: The starting time.
        end_time: The ending time.
        delta_t: The increment between successive time points.
        completed_time_key: The key from which we look up completed run times
            from trial `run_metadata`.
        include_only_completed_trials: Include results only from completed trials.
            This will ignore trials that were early stopped.

    Returns:
        An array representing the optimization trace as a function of time.
    """
    if any(
            isinstance(trial, BatchTrial)
            for trial in experiment.trials.values()):
        raise NotImplementedError("Batched trials are not yet supported.")

    def get_completed_time(row):
        time = experiment.trials[
            row.trial_index].run_metadata[completed_time_key]
        return pd.Series({"completed_time": time})

    if include_only_completed_trials:
        completed_trials = experiment.trial_indices_by_status[
            TrialStatus.COMPLETED]
        data_df = experiment.fetch_trials_data(trial_indices=completed_trials,
                                               noisy=False).df
    else:
        data_df = experiment.fetch_data(noisy=False).df

    minimize = experiment.optimization_config.objective.minimize  # pyre-ignore[16]
    num_periods_running = int((end_time - start_time) // delta_t + 1)
    # TODO: Currently, the timestamps generated below must exactly match the
    # `completed_time` column
    iters_df = pd.DataFrame({
        "completed_time":
        np.arange(num_periods_running) * delta_t + start_time
    })
    true_values = {}
    for metric, df_m in data_df.groupby("metric_name"):
        df_m = data_df[data_df["metric_name"] == metric]

        # only keep the last data point for each arm
        df_m = (df_m.sort_values(["timestamp"],
                                 ascending=True).groupby("arm_name").tail(n=1))

        # get completed times from run metadata
        df_m["completed_time"] = df_m.apply(get_completed_time, axis=1)

        # for trials that completed at the same time, keep only the best
        df_m_g = df_m.groupby("completed_time")
        df_m = (df_m_g.min() if minimize else df_m_g.max()).reset_index()

        # take cumulative best wrt the completed time
        df_m = df_m.sort_index()
        df_m["mean"] = df_m["mean"].cummin(
        ) if minimize else df_m["mean"].cummax()
        df_b = pd.merge(iters_df, df_m, how="left", on="completed_time")

        # replace nans with Infs, which can be handled by `best_feasible_objective`
        true_values[metric] = df_b["mean"].fillna(
            np.Inf if minimize else -np.Inf)
    return best_feasible_objective(
        # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
        #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
        optimization_config=experiment.optimization_config,
        values=true_values,
    )