def cross_validate_by_trial(model: ModelBridge, trial: int = -1) -> List[CVResult]: """Cross validation for model predictions on a particular trial. Uses all of the data up until the specified trial to predict each of the arms that was launched in that trial. Defaults to the last trial. Args: model: Fitted model (ModelBridge) to cross validate. trial: Trial for which predictions are evaluated. Returns: A CVResult for each observation in the training data. """ # Get in-design training points training_data = [ obs for i, obs in enumerate(model.get_training_data()) if model.training_in_design[i] ] all_trials = { # pyre-fixme[6]: Expected `Union[bytes, str, typing.SupportsInt]` for 1st # param but got `Optional[np.int64]`. int(d.features.trial_index) for d in training_data if d.features.trial_index is not None } if len(all_trials) < 2: raise ValueError(f"Training data has fewer than 2 trials ({all_trials})") if trial < 0: trial = max(all_trials) elif trial not in all_trials: raise ValueError(f"Trial {trial} not found in training data") # Construct train/test data cv_training_data = [] cv_test_data = [] cv_test_points = [] for obs in training_data: if obs.features.trial_index is None: continue # pyre-fixme[58]: `<` is not supported for operand types # `Optional[np.int64]` and `int`. elif obs.features.trial_index < trial: cv_training_data.append(obs) elif obs.features.trial_index == trial: cv_test_points.append(obs.features) cv_test_data.append(obs) # Make the prediction cv_test_predictions = model.cross_validate( cv_training_data=cv_training_data, cv_test_points=cv_test_points ) # Form CVResult objects result = [ CVResult(observed=obs, predicted=cv_test_predictions[i]) for i, obs in enumerate(cv_test_data) ] return result
def testModelBridge(self, mock_fit, mock_gen_arms, mock_observations_from_data): # Test that on init transforms are stored and applied in the correct order transforms = [transform_1, transform_2] exp = get_experiment_for_value() ss = get_search_space_for_value() modelbridge = ModelBridge(ss, 0, transforms, exp, 0) self.assertEqual(list(modelbridge.transforms.keys()), ["transform_1", "transform_2"]) fit_args = mock_fit.mock_calls[0][2] self.assertTrue( fit_args["search_space"] == get_search_space_for_value(8.0)) self.assertTrue(fit_args["observation_features"] == []) self.assertTrue(fit_args["observation_data"] == []) self.assertTrue(mock_observations_from_data.called) # Test prediction on out of design features. modelbridge._predict = mock.MagicMock( "ax.modelbridge.base.ModelBridge._predict", autospec=True, side_effect=ValueError("Out of Design"), ) # This point is in design, and thus failures in predict are legitimate. with mock.patch.object(ModelBridge, "model_space", return_value=get_search_space_for_range_values): with self.assertRaises(ValueError): modelbridge.predict([get_observation2().features]) # This point is out of design, and not in training data. with self.assertRaises(ValueError): modelbridge.predict([get_observation_status_quo0().features]) # Now it's in the training data. with mock.patch.object( ModelBridge, "get_training_data", return_value=[get_observation_status_quo0()], ): # Return raw training value. self.assertEqual( modelbridge.predict([get_observation_status_quo0().features]), unwrap_observation_data([get_observation_status_quo0().data]), ) # Test that transforms are applied correctly on predict modelbridge._predict = mock.MagicMock( "ax.modelbridge.base.ModelBridge._predict", autospec=True, return_value=[get_observation2trans().data], ) modelbridge.predict([get_observation2().features]) # Observation features sent to _predict are un-transformed afterwards modelbridge._predict.assert_called_with([get_observation2().features]) # Check that _single_predict is equivalent here. modelbridge._single_predict([get_observation2().features]) # Observation features sent to _predict are un-transformed afterwards modelbridge._predict.assert_called_with([get_observation2().features]) # Test transforms applied on gen modelbridge._gen = mock.MagicMock( "ax.modelbridge.base.ModelBridge._gen", autospec=True, return_value=([get_observation1trans().features], [2], None, {}), ) oc = OptimizationConfig(objective=Objective(metric=Metric( name="test_metric"))) modelbridge._set_kwargs_to_save(model_key="TestModel", model_kwargs={}, bridge_kwargs={}) gr = modelbridge.gen( n=1, search_space=get_search_space_for_value(), optimization_config=oc, pending_observations={"a": [get_observation2().features]}, fixed_features=ObservationFeatures({"x": 5}), ) self.assertEqual(gr._model_key, "TestModel") modelbridge._gen.assert_called_with( n=1, search_space=SearchSpace( [FixedParameter("x", ParameterType.FLOAT, 8.0)]), optimization_config=oc, pending_observations={"a": [get_observation2trans().features]}, fixed_features=ObservationFeatures({"x": 36}), model_gen_options=None, ) mock_gen_arms.assert_called_with( arms_by_signature={}, observation_features=[get_observation1().features]) # Gen with no pending observations and no fixed features modelbridge.gen(n=1, search_space=get_search_space_for_value(), optimization_config=None) modelbridge._gen.assert_called_with( n=1, search_space=SearchSpace( [FixedParameter("x", ParameterType.FLOAT, 8.0)]), optimization_config=None, pending_observations={}, fixed_features=ObservationFeatures({}), model_gen_options=None, ) # Gen with multi-objective optimization config. oc2 = OptimizationConfig(objective=ScalarizedObjective( metrics=[Metric(name="test_metric"), Metric(name="test_metric_2")])) modelbridge.gen(n=1, search_space=get_search_space_for_value(), optimization_config=oc2) modelbridge._gen.assert_called_with( n=1, search_space=SearchSpace( [FixedParameter("x", ParameterType.FLOAT, 8.0)]), optimization_config=oc2, pending_observations={}, fixed_features=ObservationFeatures({}), model_gen_options=None, ) # Test transforms applied on cross_validate modelbridge._cross_validate = mock.MagicMock( "ax.modelbridge.base.ModelBridge._cross_validate", autospec=True, return_value=[get_observation1trans().data], ) cv_training_data = [get_observation2()] cv_test_points = [get_observation1().features] cv_predictions = modelbridge.cross_validate( cv_training_data=cv_training_data, cv_test_points=cv_test_points) modelbridge._cross_validate.assert_called_with( obs_feats=[get_observation2trans().features], obs_data=[get_observation2trans().data], cv_test_points=[get_observation1().features ], # untransformed after ) self.assertTrue(cv_predictions == [get_observation1().data]) # Test stored training data obs = modelbridge.get_training_data() self.assertTrue(obs == [get_observation1(), get_observation2()]) self.assertEqual(modelbridge.metric_names, {"a", "b"}) self.assertIsNone(modelbridge.status_quo) self.assertTrue( modelbridge.model_space == get_search_space_for_value()) self.assertEqual(modelbridge.training_in_design, [False, False]) with self.assertRaises(ValueError): modelbridge.training_in_design = [True, True, False] with self.assertRaises(ValueError): modelbridge.training_in_design = [True, True, False] # Test feature_importances with self.assertRaises(NotImplementedError): modelbridge.feature_importances("a")
def cross_validate(model: ModelBridge, folds: int = -1, test_selector: Optional[Callable] = None) -> List[CVResult]: """Cross validation for model predictions. Splits the model's training data into train/test folds and makes out-of-sample predictions on the test folds. Train/test splits are made based on arm names, so that repeated observations of a arm will always be in the train or test set together. The test set can be limited to a specific set of observations by passing in a test_selector callable. This function should take in an Observation and return a boolean indiciating if it should be used in the test set or not. For example, we can limit the test set to arms with trial 0 with test_selector = lambda obs: obs.features.trial_index == 0 If not provided, all observations will be available for the test set. Args: model: Fitted model (ModelBridge) to cross validate. folds: Number of folds. Use -1 for leave-one-out, otherwise will be k-fold. test_selector: Function for selecting observations for the test set. Returns: A CVResult for each observation in the training data. """ # Get in-design training points training_data = [ obs for i, obs in enumerate(model.get_training_data()) if model.training_in_design[i] ] arm_names = {obs.arm_name for obs in training_data} n = len(arm_names) if folds > n: raise ValueError( f"Training data only has {n} arms, which is less than folds") elif folds < 2 and folds != -1: raise ValueError("Folds must be -1 for LOO, or > 1.") elif folds == -1: folds = n arm_names_rnd = np.array(list(arm_names)) np.random.shuffle(arm_names_rnd) result = [] for train_names, test_names in _gen_train_test_split( folds=folds, arm_names=arm_names_rnd): # Construct train/test data cv_training_data = [] cv_test_data = [] cv_test_points = [] for obs in training_data: if obs.arm_name in train_names: cv_training_data.append(obs) elif obs.arm_name in test_names and (test_selector is None or test_selector(obs)): cv_test_points.append(obs.features) cv_test_data.append(obs) if len(cv_test_points) == 0: continue # Make the prediction cv_test_predictions = model.cross_validate( cv_training_data=cv_training_data, cv_test_points=cv_test_points) # Form CVResult objects for i, obs in enumerate(cv_test_data): result.append( CVResult(observed=obs, predicted=cv_test_predictions[i])) return result
def testModelBridge(self, mock_fit, mock_gen_arms, mock_observations_from_data): # Test that on init transforms are stored and applied in the correct order transforms = [t1, t2] exp = get_experiment() modelbridge = ModelBridge(search_space_for_value(), 0, transforms, exp, 0) self.assertEqual(list(modelbridge.transforms.keys()), ["t1", "t2"]) fit_args = mock_fit.mock_calls[0][2] self.assertTrue(fit_args["search_space"] == search_space_for_value(8.0)) self.assertTrue( fit_args["observation_features"] == [observation1trans().features, observation2trans().features] ) self.assertTrue( fit_args["observation_data"] == [observation1trans().data, observation2trans().data] ) self.assertTrue(mock_observations_from_data.called) # Test that transforms are applied correctly on predict modelbridge._predict = mock.MagicMock( "ax.modelbridge.base.ModelBridge._predict", autospec=True, return_value=[observation2trans().data], ) modelbridge.predict([observation2().features]) # Observation features sent to _predict are un-transformed afterwards modelbridge._predict.assert_called_with([observation2().features]) # Test transforms applied on gen modelbridge._gen = mock.MagicMock( "ax.modelbridge.base.ModelBridge._gen", autospec=True, return_value=([observation1trans().features], [2], None), ) oc = OptimizationConfig(objective=Objective(metric=Metric(name="test_metric"))) modelbridge._set_kwargs_to_save( model_key="TestModel", model_kwargs={}, bridge_kwargs={} ) gr = modelbridge.gen( n=1, search_space=search_space_for_value(), optimization_config=oc, pending_observations={"a": [observation2().features]}, fixed_features=ObservationFeatures({"x": 5}), ) self.assertEqual(gr._model_key, "TestModel") modelbridge._gen.assert_called_with( n=1, search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]), optimization_config=oc, pending_observations={"a": [observation2trans().features]}, fixed_features=ObservationFeatures({"x": 36}), model_gen_options=None, ) mock_gen_arms.assert_called_with( arms_by_signature={}, observation_features=[observation1().features] ) # Gen with no pending observations and no fixed features modelbridge.gen( n=1, search_space=search_space_for_value(), optimization_config=None ) modelbridge._gen.assert_called_with( n=1, search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]), optimization_config=None, pending_observations={}, fixed_features=ObservationFeatures({}), model_gen_options=None, ) # Gen with multi-objective optimization config. oc2 = OptimizationConfig( objective=ScalarizedObjective( metrics=[Metric(name="test_metric"), Metric(name="test_metric_2")] ) ) modelbridge.gen( n=1, search_space=search_space_for_value(), optimization_config=oc2 ) modelbridge._gen.assert_called_with( n=1, search_space=SearchSpace([FixedParameter("x", ParameterType.FLOAT, 8.0)]), optimization_config=oc2, pending_observations={}, fixed_features=ObservationFeatures({}), model_gen_options=None, ) # Test transforms applied on cross_validate modelbridge._cross_validate = mock.MagicMock( "ax.modelbridge.base.ModelBridge._cross_validate", autospec=True, return_value=[observation1trans().data], ) cv_training_data = [observation2()] cv_test_points = [observation1().features] cv_predictions = modelbridge.cross_validate( cv_training_data=cv_training_data, cv_test_points=cv_test_points ) modelbridge._cross_validate.assert_called_with( obs_feats=[observation2trans().features], obs_data=[observation2trans().data], cv_test_points=[observation1().features], # untransformed after ) self.assertTrue(cv_predictions == [observation1().data]) # Test stored training data obs = modelbridge.get_training_data() self.assertTrue(obs == [observation1(), observation2()]) self.assertEqual(modelbridge.metric_names, {"a", "b"}) self.assertIsNone(modelbridge.status_quo) self.assertTrue(modelbridge.model_space == search_space_for_value()) self.assertEqual(modelbridge.training_in_design, [True, True]) modelbridge.training_in_design = [True, False] with self.assertRaises(ValueError): modelbridge.training_in_design = [True, True, False] ood_obs = modelbridge.out_of_design_data() self.assertTrue(ood_obs == unwrap_observation_data([observation2().data]))