Example #1
0
def cross_validate_by_trial(model: ModelBridge, trial: int = -1) -> List[CVResult]:
    """Cross validation for model predictions on a particular trial.

    Uses all of the data up until the specified trial to predict each of the
    arms that was launched in that trial. Defaults to the last trial.

    Args:
        model: Fitted model (ModelBridge) to cross validate.
        trial: Trial for which predictions are evaluated.

    Returns:
        A CVResult for each observation in the training data.
    """
    # Get in-design training points
    training_data = [
        obs
        for i, obs in enumerate(model.get_training_data())
        if model.training_in_design[i]
    ]
    all_trials = {
        # pyre-fixme[6]: Expected `Union[bytes, str, typing.SupportsInt]` for 1st
        #  param but got `Optional[np.int64]`.
        int(d.features.trial_index)
        for d in training_data
        if d.features.trial_index is not None
    }
    if len(all_trials) < 2:
        raise ValueError(f"Training data has fewer than 2 trials ({all_trials})")
    if trial < 0:
        trial = max(all_trials)
    elif trial not in all_trials:
        raise ValueError(f"Trial {trial} not found in training data")
    # Construct train/test data
    cv_training_data = []
    cv_test_data = []
    cv_test_points = []
    for obs in training_data:
        if obs.features.trial_index is None:
            continue
        # pyre-fixme[58]: `<` is not supported for operand types
        #  `Optional[np.int64]` and `int`.
        elif obs.features.trial_index < trial:
            cv_training_data.append(obs)
        elif obs.features.trial_index == trial:
            cv_test_points.append(obs.features)
            cv_test_data.append(obs)
    # Make the prediction
    cv_test_predictions = model.cross_validate(
        cv_training_data=cv_training_data, cv_test_points=cv_test_points
    )
    # Form CVResult objects
    result = [
        CVResult(observed=obs, predicted=cv_test_predictions[i])
        for i, obs in enumerate(cv_test_data)
    ]
    return result
Example #2
0
def get_fixed_values(
    model: ModelBridge,
    slice_values: Optional[Dict[str, Any]] = None,
    trial_index: Optional[int] = None,
) -> TParameterization:
    """Get fixed values for parameters in a slice plot.

    If there is an in-design status quo, those values will be used. Otherwise,
    the mean of RangeParameters or the mode of ChoiceParameters is used.

    Any value in slice_values will override the above.

    Args:
        model: ModelBridge being used for plotting
        slice_values: Map from parameter name to value at which is should be
            fixed.

    Returns: Map from parameter name to fixed value.
    """

    if trial_index is not None:
        if slice_values is None:
            slice_values = {}
        slice_values["TRIAL_PARAM"] = str(trial_index)

    # Check if status_quo is in design
    if model.status_quo is not None and model.model_space.check_membership(
        # pyre-fixme[16]: `Optional` has no attribute `features`.
        model.status_quo.features.parameters
    ):
        setx = model.status_quo.features.parameters
    else:
        observations = model.get_training_data()
        setx = {}
        for p_name, parameter in model.model_space.parameters.items():
            # Exclude out of design status quo (no parameters)
            vals = [
                obs.features.parameters[p_name]
                for obs in observations
                if (
                    len(obs.features.parameters) > 0
                    and parameter.validate(obs.features.parameters[p_name])
                )
            ]
            if isinstance(parameter, FixedParameter):
                setx[p_name] = parameter.value
            elif isinstance(parameter, ChoiceParameter):
                setx[p_name] = Counter(vals).most_common(1)[0][0]
            elif isinstance(parameter, RangeParameter):
                setx[p_name] = parameter.cast(np.mean(vals))

    if slice_values is not None:
        # slice_values has type Dictionary[str, Any]
        setx.update(slice_values)
    return setx
Example #3
0
    def testModelBridge(self, mock_fit, mock_gen_arms,
                        mock_observations_from_data):
        # Test that on init transforms are stored and applied in the correct order
        transforms = [transform_1, transform_2]
        exp = get_experiment_for_value()
        ss = get_search_space_for_value()
        modelbridge = ModelBridge(ss, 0, transforms, exp, 0)
        self.assertEqual(list(modelbridge.transforms.keys()),
                         ["transform_1", "transform_2"])
        fit_args = mock_fit.mock_calls[0][2]
        self.assertTrue(
            fit_args["search_space"] == get_search_space_for_value(8.0))
        self.assertTrue(fit_args["observation_features"] == [])
        self.assertTrue(fit_args["observation_data"] == [])
        self.assertTrue(mock_observations_from_data.called)

        # Test prediction on out of design features.
        modelbridge._predict = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._predict",
            autospec=True,
            side_effect=ValueError("Out of Design"),
        )
        # This point is in design, and thus failures in predict are legitimate.
        with mock.patch.object(ModelBridge,
                               "model_space",
                               return_value=get_search_space_for_range_values):
            with self.assertRaises(ValueError):
                modelbridge.predict([get_observation2().features])

        # This point is out of design, and not in training data.
        with self.assertRaises(ValueError):
            modelbridge.predict([get_observation_status_quo0().features])

        # Now it's in the training data.
        with mock.patch.object(
                ModelBridge,
                "get_training_data",
                return_value=[get_observation_status_quo0()],
        ):
            # Return raw training value.
            self.assertEqual(
                modelbridge.predict([get_observation_status_quo0().features]),
                unwrap_observation_data([get_observation_status_quo0().data]),
            )

        # Test that transforms are applied correctly on predict
        modelbridge._predict = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._predict",
            autospec=True,
            return_value=[get_observation2trans().data],
        )
        modelbridge.predict([get_observation2().features])
        # Observation features sent to _predict are un-transformed afterwards
        modelbridge._predict.assert_called_with([get_observation2().features])

        # Check that _single_predict is equivalent here.
        modelbridge._single_predict([get_observation2().features])
        # Observation features sent to _predict are un-transformed afterwards
        modelbridge._predict.assert_called_with([get_observation2().features])

        # Test transforms applied on gen
        modelbridge._gen = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._gen",
            autospec=True,
            return_value=([get_observation1trans().features], [2], None, {}),
        )
        oc = OptimizationConfig(objective=Objective(metric=Metric(
            name="test_metric")))
        modelbridge._set_kwargs_to_save(model_key="TestModel",
                                        model_kwargs={},
                                        bridge_kwargs={})
        gr = modelbridge.gen(
            n=1,
            search_space=get_search_space_for_value(),
            optimization_config=oc,
            pending_observations={"a": [get_observation2().features]},
            fixed_features=ObservationFeatures({"x": 5}),
        )
        self.assertEqual(gr._model_key, "TestModel")
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace(
                [FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc,
            pending_observations={"a": [get_observation2trans().features]},
            fixed_features=ObservationFeatures({"x": 36}),
            model_gen_options=None,
        )
        mock_gen_arms.assert_called_with(
            arms_by_signature={},
            observation_features=[get_observation1().features])

        # Gen with no pending observations and no fixed features
        modelbridge.gen(n=1,
                        search_space=get_search_space_for_value(),
                        optimization_config=None)
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace(
                [FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=None,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Gen with multi-objective optimization config.
        oc2 = OptimizationConfig(objective=ScalarizedObjective(
            metrics=[Metric(name="test_metric"),
                     Metric(name="test_metric_2")]))
        modelbridge.gen(n=1,
                        search_space=get_search_space_for_value(),
                        optimization_config=oc2)
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace(
                [FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc2,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Test transforms applied on cross_validate
        modelbridge._cross_validate = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._cross_validate",
            autospec=True,
            return_value=[get_observation1trans().data],
        )
        cv_training_data = [get_observation2()]
        cv_test_points = [get_observation1().features]
        cv_predictions = modelbridge.cross_validate(
            cv_training_data=cv_training_data, cv_test_points=cv_test_points)
        modelbridge._cross_validate.assert_called_with(
            obs_feats=[get_observation2trans().features],
            obs_data=[get_observation2trans().data],
            cv_test_points=[get_observation1().features
                            ],  # untransformed after
        )
        self.assertTrue(cv_predictions == [get_observation1().data])

        # Test stored training data
        obs = modelbridge.get_training_data()
        self.assertTrue(obs == [get_observation1(), get_observation2()])
        self.assertEqual(modelbridge.metric_names, {"a", "b"})
        self.assertIsNone(modelbridge.status_quo)
        self.assertTrue(
            modelbridge.model_space == get_search_space_for_value())
        self.assertEqual(modelbridge.training_in_design, [False, False])

        with self.assertRaises(ValueError):
            modelbridge.training_in_design = [True, True, False]

        with self.assertRaises(ValueError):
            modelbridge.training_in_design = [True, True, False]

        # Test feature_importances
        with self.assertRaises(NotImplementedError):
            modelbridge.feature_importances("a")
Example #4
0
def _get_in_sample_arms(
    model: ModelBridge,
    metric_names: Set[str],
    fixed_features: Optional[ObservationFeatures] = None,
) -> Tuple[Dict[str, PlotInSampleArm], RawData, Dict[str, TParameterization]]:
    """Get in-sample arms from a model with observed and predicted values
    for specified metrics.

    Returns a PlotInSampleArm object in which repeated observations are merged
    with IVW, and a RawData object in which every observation is listed.

    Fixed features input can be used to override fields of the insample arms
    when making model predictions.

    Args:
        model: An instance of the model bridge.
        metric_names: Restrict predictions to these metrics. If None, uses all
            metrics in the model.
        fixed_features: Features that should be fixed in the arms this function
            will obtain predictions for.

    Returns:
        A tuple containing

        - Map from arm name to PlotInSampleArm.
        - List of the data for each observation like::

            {'metric_name': 'likes', 'arm_name': '0_0', 'mean': 1., 'sem': 0.1}

        - Map from arm name to parameters
    """
    observations = model.get_training_data()
    # Calculate raw data
    raw_data = []
    arm_name_to_parameters = {}
    for obs in observations:
        arm_name_to_parameters[obs.arm_name] = obs.features.parameters
        for j, metric_name in enumerate(obs.data.metric_names):
            if metric_name in metric_names:
                raw_data.append({
                    "metric_name": metric_name,
                    "arm_name": obs.arm_name,
                    "mean": obs.data.means[j],
                    "sem": np.sqrt(obs.data.covariance[j, j]),
                })

    # Check that we have one ObservationFeatures per arm name since we
    # key by arm name and the model is not Multi-task.
    # If "TrialAsTask" is present, one of the arms is also chosen.
    if ("TrialAsTask" not in model.transforms.keys()) and (
            len(arm_name_to_parameters) != len(observations)):
        logger.error(
            "Have observations of arms with different features but same"
            " name. Arbitrary one will be plotted.")

    # Merge multiple measurements within each Observation with IVW to get
    # un-modeled prediction
    t = IVW(None, [], [])
    obs_data = t.transform_observation_data([obs.data for obs in observations],
                                            [])
    # Start filling in plot data
    in_sample_plot: Dict[str, PlotInSampleArm] = {}
    for i, obs in enumerate(observations):
        if obs.arm_name is None:
            raise ValueError("Observation must have arm name for plotting.")

        # Extract raw measurement
        obs_y = {}  # Observed metric means.
        obs_se = {}  # Observed metric standard errors.
        # Use the IVW data, not obs.data
        for j, metric_name in enumerate(obs_data[i].metric_names):
            if metric_name in metric_names:
                obs_y[metric_name] = obs_data[i].means[j]
                obs_se[metric_name] = np.sqrt(obs_data[i].covariance[j, j])
        # Make a prediction.
        if model.training_in_design[i]:
            features = obs.features
            if fixed_features is not None:
                features.update_features(fixed_features)
            pred_y, pred_se = _predict_at_point(model, features, metric_names)
        else:
            # Use raw data for out-of-design points
            pred_y = obs_y
            pred_se = obs_se
        in_sample_plot[not_none(obs.arm_name)] = PlotInSampleArm(
            name=not_none(obs.arm_name),
            y=obs_y,
            se=obs_se,
            parameters=obs.features.parameters,
            y_hat=pred_y,
            se_hat=pred_se,
            context_stratum=None,
        )
    return in_sample_plot, raw_data, arm_name_to_parameters
Example #5
0
def cross_validate(model: ModelBridge,
                   folds: int = -1,
                   test_selector: Optional[Callable] = None) -> List[CVResult]:
    """Cross validation for model predictions.

    Splits the model's training data into train/test folds and makes
    out-of-sample predictions on the test folds.

    Train/test splits are made based on arm names, so that repeated
    observations of a arm will always be in the train or test set
    together.

    The test set can be limited to a specific set of observations by passing in
    a test_selector callable. This function should take in an Observation
    and return a boolean indiciating if it should be used in the test set or
    not. For example, we can limit the test set to arms with trial 0 with
    test_selector = lambda obs: obs.features.trial_index == 0
    If not provided, all observations will be available for the test set.

    Args:
        model: Fitted model (ModelBridge) to cross validate.
        folds: Number of folds. Use -1 for leave-one-out, otherwise will be
            k-fold.
        test_selector: Function for selecting observations for the test set.

    Returns:
        A CVResult for each observation in the training data.
    """
    # Get in-design training points
    training_data = [
        obs for i, obs in enumerate(model.get_training_data())
        if model.training_in_design[i]
    ]
    arm_names = {obs.arm_name for obs in training_data}
    n = len(arm_names)
    if folds > n:
        raise ValueError(
            f"Training data only has {n} arms, which is less than folds")
    elif folds < 2 and folds != -1:
        raise ValueError("Folds must be -1 for LOO, or > 1.")
    elif folds == -1:
        folds = n

    arm_names_rnd = np.array(list(arm_names))
    np.random.shuffle(arm_names_rnd)
    result = []
    for train_names, test_names in _gen_train_test_split(
            folds=folds, arm_names=arm_names_rnd):
        # Construct train/test data
        cv_training_data = []
        cv_test_data = []
        cv_test_points = []
        for obs in training_data:
            if obs.arm_name in train_names:
                cv_training_data.append(obs)
            elif obs.arm_name in test_names and (test_selector is None
                                                 or test_selector(obs)):
                cv_test_points.append(obs.features)
                cv_test_data.append(obs)
        if len(cv_test_points) == 0:
            continue
        # Make the prediction
        cv_test_predictions = model.cross_validate(
            cv_training_data=cv_training_data, cv_test_points=cv_test_points)
        # Form CVResult objects
        for i, obs in enumerate(cv_test_data):
            result.append(
                CVResult(observed=obs, predicted=cv_test_predictions[i]))
    return result
Example #6
0
def _get_in_sample_arms(
    model: ModelBridge, metric_names: Set[str]
) -> Tuple[Dict[str, PlotInSampleArm], RawData, Dict[str, TParameterization]]:
    """Get in-sample arms from a model with observed and predicted values
    for specified metrics.

    Returns a PlotInSampleArm object in which repeated observations are merged
    with IVW, and a RawData object in which every observation is listed.

    Args:
        model: An instance of the model bridge.
        metric_names: Restrict predictions to these metrics. If None, uses all
            metrics in the model.

    Returns:
        A tuple containing

        - Map from arm name to PlotInSampleArm.
        - List of the data for each observation like::

            {'metric_name': 'likes', 'arm_name': '0_0', 'mean': 1., 'sem': 0.1}

        - Map from arm name to parameters
    """
    observations = model.get_training_data()
    # Calculate raw data
    raw_data = []
    cond_name_to_parameters = {}
    for obs in observations:
        cond_name_to_parameters[obs.arm_name] = obs.features.parameters
        for j, metric_name in enumerate(obs.data.metric_names):
            if metric_name in metric_names:
                raw_data.append({
                    "metric_name": metric_name,
                    "arm_name": obs.arm_name,
                    "mean": obs.data.means[j],
                    "sem": np.sqrt(obs.data.covariance[j, j]),
                })
    # Check that we have one ObservationFeatures per arm name since we
    # key by arm name.
    if len(cond_name_to_parameters) != len(observations):
        logger.error(
            "Have observations of arms with different features but same"
            " name. Arbitrary one will be plotted.")
    # Merge multiple measurements within each Observation with IVW to get
    # un-modeled prediction
    t = IVW(None, [], [])
    obs_data = t.transform_observation_data([obs.data for obs in observations],
                                            [])
    # Start filling in plot data
    in_sample_plot: Dict[str, PlotInSampleArm] = {}
    for i, obs in enumerate(observations):
        if obs.arm_name is None:
            raise ValueError("Observation must have arm name for plotting.")

        # Extract raw measurement
        obs_y = {}
        obs_se = {}
        # Use the IVW data, not obs.data
        for j, metric_name in enumerate(obs_data[i].metric_names):
            if metric_name in metric_names:
                obs_y[metric_name] = obs_data[i].means[j]
                obs_se[metric_name] = np.sqrt(obs_data[i].covariance[j, j])
        # Make a prediction.
        if model.training_in_design[i]:
            pred_y, pred_se = _predict_at_point(model, obs.features,
                                                metric_names)
        else:
            # Use raw data for out-of-design points
            pred_y = obs_y
            pred_se = obs_se
        in_sample_plot[obs.arm_name] = PlotInSampleArm(
            name=obs.arm_name,
            y=obs_y,
            se=obs_se,
            parameters=obs.features.parameters,
            y_hat=pred_y,
            se_hat=pred_se,
            context_stratum=None,
        )
    return in_sample_plot, raw_data, cond_name_to_parameters
Example #7
0
    def testModelBridge(self, mock_fit, mock_gen_arms, mock_observations_from_data):
        # Test that on init transforms are stored and applied in the correct order
        transforms = [t1, t2]
        exp = get_experiment()
        modelbridge = ModelBridge(search_space_for_value(), 0, transforms, exp, 0)
        self.assertEqual(list(modelbridge.transforms.keys()), ["t1", "t2"])
        fit_args = mock_fit.mock_calls[0][2]
        self.assertTrue(fit_args["search_space"] == search_space_for_value(8.0))
        self.assertTrue(
            fit_args["observation_features"]
            == [observation1trans().features, observation2trans().features]
        )
        self.assertTrue(
            fit_args["observation_data"]
            == [observation1trans().data, observation2trans().data]
        )
        self.assertTrue(mock_observations_from_data.called)

        # Test that transforms are applied correctly on predict
        modelbridge._predict = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._predict",
            autospec=True,
            return_value=[observation2trans().data],
        )

        modelbridge.predict([observation2().features])
        # Observation features sent to _predict are un-transformed afterwards
        modelbridge._predict.assert_called_with([observation2().features])

        # Test transforms applied on gen
        modelbridge._gen = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._gen",
            autospec=True,
            return_value=([observation1trans().features], [2], None),
        )
        oc = OptimizationConfig(objective=Objective(metric=Metric(name="test_metric")))
        modelbridge._set_kwargs_to_save(
            model_key="TestModel", model_kwargs={}, bridge_kwargs={}
        )
        gr = modelbridge.gen(
            n=1,
            search_space=search_space_for_value(),
            optimization_config=oc,
            pending_observations={"a": [observation2().features]},
            fixed_features=ObservationFeatures({"x": 5}),
        )
        self.assertEqual(gr._model_key, "TestModel")
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc,
            pending_observations={"a": [observation2trans().features]},
            fixed_features=ObservationFeatures({"x": 36}),
            model_gen_options=None,
        )
        mock_gen_arms.assert_called_with(
            arms_by_signature={}, observation_features=[observation1().features]
        )

        # Gen with no pending observations and no fixed features
        modelbridge.gen(
            n=1, search_space=search_space_for_value(), optimization_config=None
        )
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=None,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Gen with multi-objective optimization config.
        oc2 = OptimizationConfig(
            objective=ScalarizedObjective(
                metrics=[Metric(name="test_metric"), Metric(name="test_metric_2")]
            )
        )
        modelbridge.gen(
            n=1, search_space=search_space_for_value(), optimization_config=oc2
        )
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc2,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Test transforms applied on cross_validate
        modelbridge._cross_validate = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._cross_validate",
            autospec=True,
            return_value=[observation1trans().data],
        )
        cv_training_data = [observation2()]
        cv_test_points = [observation1().features]
        cv_predictions = modelbridge.cross_validate(
            cv_training_data=cv_training_data, cv_test_points=cv_test_points
        )
        modelbridge._cross_validate.assert_called_with(
            obs_feats=[observation2trans().features],
            obs_data=[observation2trans().data],
            cv_test_points=[observation1().features],  # untransformed after
        )
        self.assertTrue(cv_predictions == [observation1().data])

        # Test stored training data
        obs = modelbridge.get_training_data()
        self.assertTrue(obs == [observation1(), observation2()])
        self.assertEqual(modelbridge.metric_names, {"a", "b"})
        self.assertIsNone(modelbridge.status_quo)
        self.assertTrue(modelbridge.model_space == search_space_for_value())
        self.assertEqual(modelbridge.training_in_design, [True, True])

        modelbridge.training_in_design = [True, False]
        with self.assertRaises(ValueError):
            modelbridge.training_in_design = [True, True, False]

        ood_obs = modelbridge.out_of_design_data()
        self.assertTrue(ood_obs == unwrap_observation_data([observation2().data]))