Example #1
0
def get_pending_observation_features(
    experiment: Experiment,
    include_failed_as_pending: bool = False
) -> Optional[Dict[str, List[ObservationFeatures]]]:
    """Computes a list of pending observation features (corresponding to arms that
    have been generated and deployed in the course of the experiment, but have not
    been completed with data or to arms that have been abandoned or belong to
    abandoned trials).

    NOTE: Pending observation features are passed to the model to
    instruct it to not generate the same points again.

    Args:
        experiment: Experiment, pending features on which we seek to compute.
        include_failed_as_pending: Whether to include failed trials as pending
            (for example, to avoid the model suggesting them again).

    Returns:
        An optional mapping from metric names to a list of observation features,
        pending for that metric (i.e. do not have evaluation data for that metric).
        If there are no pending features for any of the metrics, return is None.
    """
    pending_features = {}
    # Note that this assumes that if a metric appears in fetched data, the trial is
    # not pending for the metric. Where only the most recent data matters, this will
    # work, but may need to add logic to check previously added data objects, too.
    for trial_index, trial in experiment.trials.items():
        dat = trial.lookup_data()
        for metric_name in experiment.metrics:
            if metric_name not in pending_features:
                pending_features[metric_name] = []
            include_since_failed = include_failed_as_pending and trial.status.is_failed
            if isinstance(trial, BatchTrial):
                if trial.status.is_abandoned or (
                    (trial.status.is_deployed or include_since_failed)
                        and metric_name not in dat.df.metric_name.values
                        and trial.arms is not None):
                    for arm in trial.arms:
                        not_none(pending_features.get(metric_name)).append(
                            ObservationFeatures.from_arm(
                                arm=arm, trial_index=np.int64(trial_index)))
                abandoned_arms = trial.abandoned_arms
                for abandoned_arm in abandoned_arms:
                    not_none(pending_features.get(metric_name)).append(
                        ObservationFeatures.from_arm(
                            arm=abandoned_arm,
                            trial_index=np.int64(trial_index)))

            if isinstance(trial, Trial):
                if trial.status.is_abandoned or (
                    (trial.status.is_deployed or include_since_failed)
                        and metric_name not in dat.df.metric_name.values
                        and trial.arm is not None):
                    not_none(pending_features.get(metric_name)).append(
                        ObservationFeatures.from_arm(
                            arm=not_none(trial.arm),
                            trial_index=np.int64(trial_index)))
    return pending_features if any(
        x for x in pending_features.values()) else None
Example #2
0
 def test_flatten_observation_features(self):
     # Ensure that during casting, full parameterization is saved
     # in metadata and actual parameterization is cast to HSS; during
     # flattening, parameterization in metadata is used ot inject back
     # the parameters removed during casting.
     hss_1_obs_feats_1 = ObservationFeatures.from_arm(
         arm=self.hss_1_arm_1_flat)
     hss_1_obs_feats_1_cast = self.hss_1.cast_observation_features(
         observation_features=hss_1_obs_feats_1)
     hss_1_obs_feats_1_flattened = self.hss_1.flatten_observation_features(
         observation_features=hss_1_obs_feats_1_cast)
     self.assertEqual(  # Cast-flatten roundtrip.
         hss_1_obs_feats_1.parameters,
         hss_1_obs_feats_1_flattened.parameters,
     )
     self.assertEqual(  # Check that both cast and flattened have full params.
         hss_1_obs_feats_1_cast.metadata.get(Keys.FULL_PARAMETERIZATION),
         hss_1_obs_feats_1_flattened.metadata.get(
             Keys.FULL_PARAMETERIZATION),
     )
     # Check that flattening observation features without metadata does nothing.
     self.assertEqual(
         self.hss_1.flatten_observation_features(
             observation_features=hss_1_obs_feats_1),
         hss_1_obs_feats_1,
     )
Example #3
0
 def setUp(self) -> None:
     self.experiment = get_experiment()
     self.arm = Arm({"x": 1, "y": "foo", "z": True, "w": 4})
     self.trial = self.experiment.new_trial(GeneratorRun([self.arm]))
     self.experiment_2 = get_experiment()
     self.batch_trial = self.experiment_2.new_batch_trial(GeneratorRun([self.arm]))
     self.batch_trial.set_status_quo_with_weight(self.experiment_2.status_quo, 1)
     self.obs_feat = ObservationFeatures.from_arm(
         arm=self.trial.arm, trial_index=np.int64(self.trial.index)
     )
Example #4
0
    def testClone(self):
        # Test simple cloning.
        arm = Arm({"x": 0, "y": "a"})
        obsf = ObservationFeatures.from_arm(arm, trial_index=3)
        self.assertIsNot(obsf, obsf.clone())
        self.assertEqual(obsf, obsf.clone())

        # Test cloning with swapping parameters.
        clone_with_new_params = obsf.clone(replace_parameters={"x": 1, "y": "b"})
        self.assertNotEqual(obsf, clone_with_new_params)
        obsf.parameters = {"x": 1, "y": "b"}
        self.assertEqual(obsf, clone_with_new_params)
Example #5
0
def _get_out_of_sample_arms(
    model: ModelBridge,
    generator_runs_dict: Dict[str, GeneratorRun],
    metric_names: Set[str],
    fixed_features: Optional[ObservationFeatures] = None,
) -> Dict[str, Dict[str, PlotOutOfSampleArm]]:
    """Get out-of-sample predictions from a model given a dict of generator runs.

    Fixed features input can be used to override fields of the candidate arms
    when making model predictions.

    Args:
        model: The model.
        generator_runs_dict: a mapping from generator run name to generator run.
        metric_names: metrics to include in the plot.

    Returns:
        A mapping from name to a mapping from arm name to plot.

    """
    out_of_sample_plot: Dict[str, Dict[str, PlotOutOfSampleArm]] = {}
    for generator_run_name, generator_run in generator_runs_dict.items():
        out_of_sample_plot[generator_run_name] = {}
        for arm in generator_run.arms:
            # This assumes context is None
            obsf = ObservationFeatures.from_arm(arm)
            if fixed_features is not None:
                obsf.update_features(fixed_features)

            # Make a prediction
            try:
                pred_y, pred_se = _predict_at_point(model, obsf, metric_names)
            except Exception:
                # Check if it is an out-of-design arm.
                if not model.model_space.check_membership(obsf.parameters):
                    # Skip this point
                    continue
                else:
                    # It should have worked
                    raise
            arm_name = arm.name_or_short_signature
            out_of_sample_plot[generator_run_name][
                arm_name] = PlotOutOfSampleArm(
                    name=arm_name,
                    parameters=obsf.parameters,
                    y_hat=pred_y,
                    se_hat=pred_se,
                    context_stratum=None,
                )
    return out_of_sample_plot
Example #6
0
    def test_get_pending_observation_features(self):
        # Pending observations should be none if there aren't any.
        self.assertIsNone(get_pending_observation_features(self.experiment))

        self.trial.mark_dispatched()
        # Now that the trial is deployed, it should become a pending trial on the
        # experiment and appear as pending for all metrics.
        obs_feat = ObservationFeatures.from_arm(arm=self.trial.arm,
                                                trial_index=np.int64(
                                                    self.trial.index))
        self.assertEqual(
            get_pending_observation_features(self.experiment),
            {
                "tracking": [obs_feat],
                "m2": [obs_feat],
                "m1": [obs_feat]
            },
        )
        self.experiment.attach_data(
            Data.from_evaluations({self.trial.arm.name: {
                "m2": (1, 0)
            }},
                                  trial_index=self.trial.index))
        # Not m2 should have empty pending features, since the trial was updated
        # for m2.
        self.assertEqual(
            get_pending_observation_features(self.experiment),
            {
                "tracking": [obs_feat],
                "m2": [],
                "m1": [obs_feat]
            },
        )
        # When a trial is marked failed, it should no longer appear in pending...
        self.trial.mark_failed()
        self.assertIsNone(get_pending_observation_features(self.experiment))
        # ... unless specified to include failed trials in pending observations.
        self.assertEqual(
            get_pending_observation_features(self.experiment,
                                             include_failed_as_pending=True),
            {
                "tracking": [obs_feat],
                "m2": [obs_feat],
                "m1": [obs_feat]
            },
        )
        self.experiment.new_batch_trial(GeneratorRun([]))
        # Batch trials are not yet supported.
        with self.assertRaises(NotImplementedError):
            get_pending_observation_features(self.experiment)
Example #7
0
 def test_cast_observation_features(self):
     # Ensure that during casting, full parameterization is saved
     # in metadata and actual parameterization is cast to HSS.
     hss_1_obs_feats_1 = ObservationFeatures.from_arm(
         arm=self.hss_1_arm_1_flat)
     hss_1_obs_feats_1_cast = self.hss_1.cast_observation_features(
         observation_features=hss_1_obs_feats_1)
     self.assertEqual(  # Check one subtree.
         hss_1_obs_feats_1_cast.parameters,
         ObservationFeatures.from_arm(arm=self.hss_1_arm_1_cast).parameters,
     )
     self.assertEqual(  # Check one subtree.
         hss_1_obs_feats_1_cast.metadata.get(Keys.FULL_PARAMETERIZATION),
         hss_1_obs_feats_1.parameters,
     )
     # Check that difference with observation features made from cast arm
     # is only in metadata (to ensure only parameters and metadata are
     # manipulated during casting).
     hss_1_obs_feats_1_cast.metadata = None
     self.assertEqual(
         hss_1_obs_feats_1_cast,
         ObservationFeatures.from_arm(arm=self.hss_1_arm_1_cast),
     )
Example #8
0
def get_pending_observation_features_based_on_trial_status(
    experiment: Experiment,
) -> Optional[Dict[str, List[ObservationFeatures]]]:
    """A faster analogue of ``get_pending_observation_features`` that makes
    assumptions about trials in experiment in order to speed up extraction
    of pending points.

    Assumptions:

    * All arms in all trials in ``STAGED,`` ``RUNNING`` and ``ABANDONED`` statuses
      are to be considered pending for all outcomes.
    * All arms in all trials in other statuses are to be considered not pending for
      all outcomes.

    This entails:

    * No actual data-fetching for trials to determine whether arms in them are pending
      for specific outcomes.
    * Even if data is present for some outcomes in ``RUNNING`` trials, their arms will
      still be considered pending for those outcomes.

    NOTE: This function should not be used to extract pending features in field
    experiments, where arms in running trials should not be considered pending if
    there is data for those arms.

    Args:
        experiment: Experiment, pending features on which we seek to compute.

    Returns:
        An optional mapping from metric names to a list of observation features,
        pending for that metric (i.e. do not have evaluation data for that metric).
        If there are no pending features for any of the metrics, return is None.
    """
    pending_features = defaultdict(list)
    for status in [
            TrialStatus.STAGED, TrialStatus.RUNNING, TrialStatus.ABANDONED
    ]:
        for trial in experiment.trials_by_status[status]:
            for metric_name in experiment.metrics:
                pending_features[metric_name].extend(
                    ObservationFeatures.from_arm(
                        arm=arm,
                        trial_index=np.int64(trial.index),
                        metadata=trial._get_candidate_metadata(
                            arm_name=arm.name),
                    ) for arm in trial.arms)

    return dict(pending_features) if any(
        x for x in pending_features.values()) else None
Example #9
0
 def setUp(self) -> None:
     self.experiment = get_experiment()
     self.arm = Arm({"x": 1, "y": "foo", "z": True, "w": 4})
     self.trial = self.experiment.new_trial(GeneratorRun([self.arm]))
     self.experiment_2 = get_experiment()
     self.batch_trial = self.experiment_2.new_batch_trial(
         GeneratorRun([self.arm]))
     self.batch_trial.set_status_quo_with_weight(
         self.experiment_2.status_quo, 1)
     self.obs_feat = ObservationFeatures.from_arm(arm=self.trial.arm,
                                                  trial_index=np.int64(
                                                      self.trial.index))
     self.hss_exp = get_hierarchical_search_space_experiment()
     self.hss_sobol = Models.SOBOL(search_space=self.hss_exp.search_space)
     self.hss_gr = self.hss_sobol.gen(n=1)
     self.hss_trial = self.hss_exp.new_trial(self.hss_gr)
     self.hss_arm = not_none(self.hss_trial.arm)
     self.hss_cand_metadata = self.hss_trial._get_candidate_metadata(
         arm_name=self.hss_arm.name)
     self.hss_full_parameterization = self.hss_cand_metadata.get(
         Keys.FULL_PARAMETERIZATION).copy()
     self.assertTrue(
         all(p_name in self.hss_full_parameterization
             for p_name in self.hss_exp.search_space.parameters))
     self.hss_obs_feat = ObservationFeatures.from_arm(
         arm=self.hss_arm,
         trial_index=np.int64(self.hss_trial.index),
         metadata=self.hss_cand_metadata,
     )
     self.hss_obs_feat_all_params = ObservationFeatures.from_arm(
         arm=Arm(self.hss_full_parameterization),
         trial_index=np.int64(self.hss_trial.index),
         metadata={
             Keys.FULL_PARAMETERIZATION: self.hss_full_parameterization
         },
     )
Example #10
0
 def test_get_pending_observation_features_batch_trial(self):
     # Check the same functionality for batched trials.
     self.assertIsNone(get_pending_observation_features(self.experiment_2))
     self.batch_trial.mark_running(no_runner_required=True)
     sq_obs_feat = ObservationFeatures.from_arm(
         self.batch_trial.arms_by_name.get("status_quo"),
         trial_index=self.batch_trial.index,
     )
     self.assertEqual(
         get_pending_observation_features(self.experiment_2),
         {
             "tracking": [self.obs_feat, sq_obs_feat],
             "m2": [self.obs_feat, sq_obs_feat],
             "m1": [self.obs_feat, sq_obs_feat],
         },
     )
Example #11
0
def get_pending_observation_features(
    experiment: Experiment,
    include_failed_as_pending: bool = False
) -> Optional[Dict[str, List[ObservationFeatures]]]:
    """Computes a list of pending observation features (corresponding to arms that
    have been generated and deployed in the course of the experiment, but have not
    been completed with data).

    Args:
        experiment: Experiment, pending features on which we seek to compute.
        include_failed_as_pending: Whether to include failed trials as pending
            (for example, to avoid the model suggesting them again).

    Returns:
        An optional mapping from metric names to a list of observation features,
        pending for that metric (i.e. do not have evaluation data for that metric).
        If there are no pending features for any of the metrics, return is None.
    """
    pending_features = {}
    for trial_index, trial in experiment.trials.items():
        if isinstance(trial, BatchTrial):
            raise NotImplementedError("BatchTrials are not yet supported.")
        assert isinstance(trial, Trial)
        for metric_name in experiment.metrics:
            if metric_name not in pending_features:
                pending_features[metric_name] = []
            # Note that this assumes that if a metric appears in fetched data,
            # the trial is not pending for the metric. For the cases where we are
            # only concerned with the most recent data, this will work, but we
            # may need to add logic to check previously added data objects, too.
            include_since_failed = include_failed_as_pending and trial.status.is_failed
            if ((trial.status.is_deployed or include_since_failed) and
                    metric_name not in trial.fetch_data().df.metric_name.values
                    and trial.arm is not None):
                # pyre-fixme[16]: `Optional` has no attribute `append`.
                pending_features.get(metric_name).append(
                    ObservationFeatures.from_arm(
                        arm=trial.arm, trial_index=np.int64(trial_index)))
    return pending_features if any(
        x for x in pending_features.values()) else None
Example #12
0
 def testObservationFeaturesFromArm(self):
     arm = Arm({"x": 0, "y": "a"})
     obsf = ObservationFeatures.from_arm(arm, trial_index=3)
     self.assertEqual(obsf.parameters, arm.parameters)
     self.assertEqual(obsf.trial_index, 3)