Ejemplo n.º 1
0
def cross_validate_by_trial(model: ModelBridge, trial: int = -1) -> List[CVResult]:
    """Cross validation for model predictions on a particular trial.

    Uses all of the data up until the specified trial to predict each of the
    arms that was launched in that trial. Defaults to the last trial.

    Args:
        model: Fitted model (ModelBridge) to cross validate.
        trial: Trial for which predictions are evaluated.

    Returns:
        A CVResult for each observation in the training data.
    """
    # Get in-design training points
    training_data = [
        obs
        for i, obs in enumerate(model.get_training_data())
        if model.training_in_design[i]
    ]
    all_trials = {
        # pyre-fixme[6]: Expected `Union[bytes, str, typing.SupportsInt]` for 1st
        #  param but got `Optional[np.int64]`.
        int(d.features.trial_index)
        for d in training_data
        if d.features.trial_index is not None
    }
    if len(all_trials) < 2:
        raise ValueError(f"Training data has fewer than 2 trials ({all_trials})")
    if trial < 0:
        trial = max(all_trials)
    elif trial not in all_trials:
        raise ValueError(f"Trial {trial} not found in training data")
    # Construct train/test data
    cv_training_data = []
    cv_test_data = []
    cv_test_points = []
    for obs in training_data:
        if obs.features.trial_index is None:
            continue
        # pyre-fixme[58]: `<` is not supported for operand types
        #  `Optional[np.int64]` and `int`.
        elif obs.features.trial_index < trial:
            cv_training_data.append(obs)
        elif obs.features.trial_index == trial:
            cv_test_points.append(obs.features)
            cv_test_data.append(obs)
    # Make the prediction
    cv_test_predictions = model.cross_validate(
        cv_training_data=cv_training_data, cv_test_points=cv_test_points
    )
    # Form CVResult objects
    result = [
        CVResult(observed=obs, predicted=cv_test_predictions[i])
        for i, obs in enumerate(cv_test_data)
    ]
    return result
Ejemplo n.º 2
0
    def testModelBridge(self, mock_fit, mock_gen_arms,
                        mock_observations_from_data):
        # Test that on init transforms are stored and applied in the correct order
        transforms = [transform_1, transform_2]
        exp = get_experiment_for_value()
        ss = get_search_space_for_value()
        modelbridge = ModelBridge(ss, 0, transforms, exp, 0)
        self.assertEqual(list(modelbridge.transforms.keys()),
                         ["transform_1", "transform_2"])
        fit_args = mock_fit.mock_calls[0][2]
        self.assertTrue(
            fit_args["search_space"] == get_search_space_for_value(8.0))
        self.assertTrue(fit_args["observation_features"] == [])
        self.assertTrue(fit_args["observation_data"] == [])
        self.assertTrue(mock_observations_from_data.called)

        # Test prediction on out of design features.
        modelbridge._predict = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._predict",
            autospec=True,
            side_effect=ValueError("Out of Design"),
        )
        # This point is in design, and thus failures in predict are legitimate.
        with mock.patch.object(ModelBridge,
                               "model_space",
                               return_value=get_search_space_for_range_values):
            with self.assertRaises(ValueError):
                modelbridge.predict([get_observation2().features])

        # This point is out of design, and not in training data.
        with self.assertRaises(ValueError):
            modelbridge.predict([get_observation_status_quo0().features])

        # Now it's in the training data.
        with mock.patch.object(
                ModelBridge,
                "get_training_data",
                return_value=[get_observation_status_quo0()],
        ):
            # Return raw training value.
            self.assertEqual(
                modelbridge.predict([get_observation_status_quo0().features]),
                unwrap_observation_data([get_observation_status_quo0().data]),
            )

        # Test that transforms are applied correctly on predict
        modelbridge._predict = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._predict",
            autospec=True,
            return_value=[get_observation2trans().data],
        )
        modelbridge.predict([get_observation2().features])
        # Observation features sent to _predict are un-transformed afterwards
        modelbridge._predict.assert_called_with([get_observation2().features])

        # Check that _single_predict is equivalent here.
        modelbridge._single_predict([get_observation2().features])
        # Observation features sent to _predict are un-transformed afterwards
        modelbridge._predict.assert_called_with([get_observation2().features])

        # Test transforms applied on gen
        modelbridge._gen = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._gen",
            autospec=True,
            return_value=([get_observation1trans().features], [2], None, {}),
        )
        oc = OptimizationConfig(objective=Objective(metric=Metric(
            name="test_metric")))
        modelbridge._set_kwargs_to_save(model_key="TestModel",
                                        model_kwargs={},
                                        bridge_kwargs={})
        gr = modelbridge.gen(
            n=1,
            search_space=get_search_space_for_value(),
            optimization_config=oc,
            pending_observations={"a": [get_observation2().features]},
            fixed_features=ObservationFeatures({"x": 5}),
        )
        self.assertEqual(gr._model_key, "TestModel")
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace(
                [FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc,
            pending_observations={"a": [get_observation2trans().features]},
            fixed_features=ObservationFeatures({"x": 36}),
            model_gen_options=None,
        )
        mock_gen_arms.assert_called_with(
            arms_by_signature={},
            observation_features=[get_observation1().features])

        # Gen with no pending observations and no fixed features
        modelbridge.gen(n=1,
                        search_space=get_search_space_for_value(),
                        optimization_config=None)
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace(
                [FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=None,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Gen with multi-objective optimization config.
        oc2 = OptimizationConfig(objective=ScalarizedObjective(
            metrics=[Metric(name="test_metric"),
                     Metric(name="test_metric_2")]))
        modelbridge.gen(n=1,
                        search_space=get_search_space_for_value(),
                        optimization_config=oc2)
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace(
                [FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc2,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Test transforms applied on cross_validate
        modelbridge._cross_validate = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._cross_validate",
            autospec=True,
            return_value=[get_observation1trans().data],
        )
        cv_training_data = [get_observation2()]
        cv_test_points = [get_observation1().features]
        cv_predictions = modelbridge.cross_validate(
            cv_training_data=cv_training_data, cv_test_points=cv_test_points)
        modelbridge._cross_validate.assert_called_with(
            obs_feats=[get_observation2trans().features],
            obs_data=[get_observation2trans().data],
            cv_test_points=[get_observation1().features
                            ],  # untransformed after
        )
        self.assertTrue(cv_predictions == [get_observation1().data])

        # Test stored training data
        obs = modelbridge.get_training_data()
        self.assertTrue(obs == [get_observation1(), get_observation2()])
        self.assertEqual(modelbridge.metric_names, {"a", "b"})
        self.assertIsNone(modelbridge.status_quo)
        self.assertTrue(
            modelbridge.model_space == get_search_space_for_value())
        self.assertEqual(modelbridge.training_in_design, [False, False])

        with self.assertRaises(ValueError):
            modelbridge.training_in_design = [True, True, False]

        with self.assertRaises(ValueError):
            modelbridge.training_in_design = [True, True, False]

        # Test feature_importances
        with self.assertRaises(NotImplementedError):
            modelbridge.feature_importances("a")
Ejemplo n.º 3
0
def cross_validate(model: ModelBridge,
                   folds: int = -1,
                   test_selector: Optional[Callable] = None) -> List[CVResult]:
    """Cross validation for model predictions.

    Splits the model's training data into train/test folds and makes
    out-of-sample predictions on the test folds.

    Train/test splits are made based on arm names, so that repeated
    observations of a arm will always be in the train or test set
    together.

    The test set can be limited to a specific set of observations by passing in
    a test_selector callable. This function should take in an Observation
    and return a boolean indiciating if it should be used in the test set or
    not. For example, we can limit the test set to arms with trial 0 with
    test_selector = lambda obs: obs.features.trial_index == 0
    If not provided, all observations will be available for the test set.

    Args:
        model: Fitted model (ModelBridge) to cross validate.
        folds: Number of folds. Use -1 for leave-one-out, otherwise will be
            k-fold.
        test_selector: Function for selecting observations for the test set.

    Returns:
        A CVResult for each observation in the training data.
    """
    # Get in-design training points
    training_data = [
        obs for i, obs in enumerate(model.get_training_data())
        if model.training_in_design[i]
    ]
    arm_names = {obs.arm_name for obs in training_data}
    n = len(arm_names)
    if folds > n:
        raise ValueError(
            f"Training data only has {n} arms, which is less than folds")
    elif folds < 2 and folds != -1:
        raise ValueError("Folds must be -1 for LOO, or > 1.")
    elif folds == -1:
        folds = n

    arm_names_rnd = np.array(list(arm_names))
    np.random.shuffle(arm_names_rnd)
    result = []
    for train_names, test_names in _gen_train_test_split(
            folds=folds, arm_names=arm_names_rnd):
        # Construct train/test data
        cv_training_data = []
        cv_test_data = []
        cv_test_points = []
        for obs in training_data:
            if obs.arm_name in train_names:
                cv_training_data.append(obs)
            elif obs.arm_name in test_names and (test_selector is None
                                                 or test_selector(obs)):
                cv_test_points.append(obs.features)
                cv_test_data.append(obs)
        if len(cv_test_points) == 0:
            continue
        # Make the prediction
        cv_test_predictions = model.cross_validate(
            cv_training_data=cv_training_data, cv_test_points=cv_test_points)
        # Form CVResult objects
        for i, obs in enumerate(cv_test_data):
            result.append(
                CVResult(observed=obs, predicted=cv_test_predictions[i]))
    return result
Ejemplo n.º 4
0
    def testModelBridge(self, mock_fit, mock_gen_arms, mock_observations_from_data):
        # Test that on init transforms are stored and applied in the correct order
        transforms = [t1, t2]
        exp = get_experiment()
        modelbridge = ModelBridge(search_space_for_value(), 0, transforms, exp, 0)
        self.assertEqual(list(modelbridge.transforms.keys()), ["t1", "t2"])
        fit_args = mock_fit.mock_calls[0][2]
        self.assertTrue(fit_args["search_space"] == search_space_for_value(8.0))
        self.assertTrue(
            fit_args["observation_features"]
            == [observation1trans().features, observation2trans().features]
        )
        self.assertTrue(
            fit_args["observation_data"]
            == [observation1trans().data, observation2trans().data]
        )
        self.assertTrue(mock_observations_from_data.called)

        # Test that transforms are applied correctly on predict
        modelbridge._predict = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._predict",
            autospec=True,
            return_value=[observation2trans().data],
        )

        modelbridge.predict([observation2().features])
        # Observation features sent to _predict are un-transformed afterwards
        modelbridge._predict.assert_called_with([observation2().features])

        # Test transforms applied on gen
        modelbridge._gen = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._gen",
            autospec=True,
            return_value=([observation1trans().features], [2], None),
        )
        oc = OptimizationConfig(objective=Objective(metric=Metric(name="test_metric")))
        modelbridge._set_kwargs_to_save(
            model_key="TestModel", model_kwargs={}, bridge_kwargs={}
        )
        gr = modelbridge.gen(
            n=1,
            search_space=search_space_for_value(),
            optimization_config=oc,
            pending_observations={"a": [observation2().features]},
            fixed_features=ObservationFeatures({"x": 5}),
        )
        self.assertEqual(gr._model_key, "TestModel")
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc,
            pending_observations={"a": [observation2trans().features]},
            fixed_features=ObservationFeatures({"x": 36}),
            model_gen_options=None,
        )
        mock_gen_arms.assert_called_with(
            arms_by_signature={}, observation_features=[observation1().features]
        )

        # Gen with no pending observations and no fixed features
        modelbridge.gen(
            n=1, search_space=search_space_for_value(), optimization_config=None
        )
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=None,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Gen with multi-objective optimization config.
        oc2 = OptimizationConfig(
            objective=ScalarizedObjective(
                metrics=[Metric(name="test_metric"), Metric(name="test_metric_2")]
            )
        )
        modelbridge.gen(
            n=1, search_space=search_space_for_value(), optimization_config=oc2
        )
        modelbridge._gen.assert_called_with(
            n=1,
            search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]),
            optimization_config=oc2,
            pending_observations={},
            fixed_features=ObservationFeatures({}),
            model_gen_options=None,
        )

        # Test transforms applied on cross_validate
        modelbridge._cross_validate = mock.MagicMock(
            "ax.modelbridge.base.ModelBridge._cross_validate",
            autospec=True,
            return_value=[observation1trans().data],
        )
        cv_training_data = [observation2()]
        cv_test_points = [observation1().features]
        cv_predictions = modelbridge.cross_validate(
            cv_training_data=cv_training_data, cv_test_points=cv_test_points
        )
        modelbridge._cross_validate.assert_called_with(
            obs_feats=[observation2trans().features],
            obs_data=[observation2trans().data],
            cv_test_points=[observation1().features],  # untransformed after
        )
        self.assertTrue(cv_predictions == [observation1().data])

        # Test stored training data
        obs = modelbridge.get_training_data()
        self.assertTrue(obs == [observation1(), observation2()])
        self.assertEqual(modelbridge.metric_names, {"a", "b"})
        self.assertIsNone(modelbridge.status_quo)
        self.assertTrue(modelbridge.model_space == search_space_for_value())
        self.assertEqual(modelbridge.training_in_design, [True, True])

        modelbridge.training_in_design = [True, False]
        with self.assertRaises(ValueError):
            modelbridge.training_in_design = [True, True, False]

        ood_obs = modelbridge.out_of_design_data()
        self.assertTrue(ood_obs == unwrap_observation_data([observation2().data]))