Ejemplo n.º 1
0
    def _set_current_model(self, data: Optional[Data]) -> None:
        """Instantiate the current model with all available data.
        """
        model_kwargs = self._curr.model_kwargs or {}

        # If last generator run's index matches the current step, extract
        # model state from last generator run and pass it to the model
        # being instantiated in this function.
        lgr = self.last_generator_run
        if (
            lgr is not None
            and lgr._generation_step_index == self._curr.index
            and lgr._model_state_after_gen
        ):
            model_kwargs = _combine_model_kwargs_and_state(
                model_kwargs=model_kwargs,
                generator_run=lgr,
                model_class=not_none(not_none(self.model).model.__class__),
            )

        if data is None:
            if self._curr.use_update:
                # If the new step is using `update`, it's important to instantiate
                # the model with data for completed trials only, so later we can
                # update it with data for new trials as they become completed.
                # `experiment.fetch_data` can fetch all available data, including
                # for non-completed trials (depending on how the experiment's metrics
                # implement `fetch_experiment_data`). We avoid fetching data for
                # trials with statuses other than `COMPLETED`, by fetching specifically
                # for `COMPLETED` trials.
                data = self.experiment.fetch_trials_data(
                    self.experiment.trial_indices_by_status[TrialStatus.COMPLETED]
                )
            else:
                data = self.experiment.fetch_data()
        # By the time we get here, we will have already transitioned
        # to a new step, but if previou step required observed data,
        # we should raise an error even if enough trials were completed.
        # Such an empty data case does indicate an invalid state; this
        # check is to improve the experience of detecting and debugging
        # the invalid state that led to this.
        previous_step_required_observations = (
            self._curr.index > 0
            and self._steps[self._curr.index - 1].min_trials_observed > 0
        )
        if data.df.empty and previous_step_required_observations:
            raise NoDataError(
                f"Observed data is required for generation step #{self._curr.index} "
                f"(model {self._curr.model_name}), but fetched data was empty. "
                "Something is wrong with experiment setup -- likely metrics do not "
                "implement fetching logic (check your metrics) or no data was "
                "attached to experiment for completed trials."
            )
        if isinstance(self._curr.model, Models):
            self._set_current_model_from_models_enum(data=data, **model_kwargs)
        else:
            # If model was not specified as Models member, it was specified as a
            # factory function.
            self._set_current_model_from_factory_function(data=data, **model_kwargs)
Ejemplo n.º 2
0
    def _set_current_model(self, data: Optional[Data]) -> None:
        """Instantiate the current model with all available data.
        """
        model_kwargs = self._curr.model_kwargs or {}

        # If last generator run's index matches the current step, extract
        # model state from last generator run and pass it to the model
        # being instantiated in this function.
        lgr = self.last_generator_run
        if (lgr is not None and lgr._generation_step_index == self._curr.index
                and lgr._model_state_after_gen):
            model_kwargs = _combine_model_kwargs_and_state(
                model_kwargs=model_kwargs,
                generator_run=lgr,
                model_class=not_none(not_none(self.model).model.__class__),
            )

        # TODO[T65857344]: move from fetching all data to using cached data
        if data is None:
            if self._curr.use_update:
                # If the new step is using `update`, it's important to instantiate
                # the model with data for completed trials only, so later we can
                # update it with data for new trials as they become completed.
                # `experiment.fetch_data` can fetch all available data, including
                # for non-completed trials (depending on how the experiment's metrics
                # implement `fetch_experiment_data`). We avoid fetching data for
                # trials with statuses other than `COMPLETED`, by fetching specifically
                # for `COMPLETED` trials.
                data = self.experiment.fetch_trials_data(
                    self.experiment.trial_indices_by_status[
                        TrialStatus.COMPLETED])
            else:
                data = self.experiment.fetch_data()
        if isinstance(self._curr.model, Models):
            self._set_current_model_from_models_enum(data=data, **model_kwargs)
        else:
            # If model was not specified as Models member, it was specified as a
            # factory function.
            self._set_current_model_from_factory_function(data=data,
                                                          **model_kwargs)
Ejemplo n.º 3
0
    def _set_current_model(self, data: Optional[Data]) -> None:
        """Instantiate the current model with all available data."""
        model_kwargs = self._curr.model_kwargs or {}

        # If last generator run's index matches the current step, extract
        # model state from last generator run and pass it to the model
        # being instantiated in this function.
        lgr = self.last_generator_run
        if (lgr is not None and lgr._generation_step_index == self._curr.index
                and lgr._model_state_after_gen):
            model_kwargs = _combine_model_kwargs_and_state(
                model_kwargs=model_kwargs,
                generator_run=lgr,
                model_class=not_none(not_none(self.model).model.__class__),
            )

        if data is None:
            if self._curr.use_update:
                # If the new step is using `update`, it's important to instantiate
                # the model with data for completed trials only, so later we can
                # update it with data for new trials as they become completed.
                # `experiment.lookup_data` can lookup all available data, including
                # for non-completed trials (depending on how the experiment's metrics
                # implement `fetch_experiment_data`). We avoid fetching data for
                # trials with statuses other than `COMPLETED`, by fetching specifically
                # for `COMPLETED` trials.
                avail_while_running_metrics = {
                    m.name
                    for m in self.experiment.metrics.values()
                    if m.is_available_while_running()
                }
                if avail_while_running_metrics:
                    raise NotImplementedError(
                        f"Metrics {avail_while_running_metrics} are available while "
                        "trial is running, but use of `update` functionality in "
                        "generation strategy relies on new data being available upon "
                        "trial completion.")
                data = self.experiment.lookup_data(
                    trial_indices=self.experiment.trial_indices_by_status[
                        TrialStatus.COMPLETED])
            else:
                data = self.experiment.lookup_data()
        # By the time we get here, we will have already transitioned
        # to a new step, but if previou step required observed data,
        # we should raise an error even if enough trials were completed.
        # Such an empty data case does indicate an invalid state; this
        # check is to improve the experience of detecting and debugging
        # the invalid state that led to this.
        previous_step_required_observations = (
            self._curr.index > 0
            and self._steps[self._curr.index - 1].min_trials_observed > 0)
        if data.df.empty and previous_step_required_observations:
            raise NoDataError(
                f"Observed data is required for generation step #{self._curr.index} "
                f"(model {self._curr.model_name}), but fetched data was empty. "
                "Something is wrong with experiment setup -- likely metrics do not "
                "implement fetching logic (check your metrics) or no data was "
                "attached to experiment for completed trials.")
        if not data.df.empty:
            trial_indices_in_data = sorted(data.df["trial_index"].unique())
            logger.debug(
                f"Setting model with data for trials: {trial_indices_in_data}")
        # TODO(jej)[T87591836] Support non-`Data` data types.
        if isinstance(self._curr.model, ModelRegistryBase):
            # pyre-fixme [6]: Incompat param: Expect `Data` got `AbstractDataFrameData`
            self._set_current_model_from_models_enum(data=data, **model_kwargs)
        else:
            # If model was not specified as Models member, it was specified as a
            # factory function.
            # pyre-fixme [6]: Incompat param: Expect `Data` got `AbstractDataFrameData`
            self._set_current_model_from_factory_function(data=data,
                                                          **model_kwargs)