Example #1
0
def plot_feature_importance(df: pd.DataFrame, title: str) -> AxPlotConfig:
    if df.empty:
        raise NoDataError("No Data on Feature Importances found.")
    df.set_index(df.columns[0], inplace=True)
    data = [
        go.Bar(y=df.index,
               x=df[column_name],
               name=column_name,
               orientation="h") for column_name in df.columns
    ]
    fig = subplots.make_subplots(
        rows=len(df.columns),
        cols=1,
        subplot_titles=df.columns,
        print_grid=False,
        shared_xaxes=True,
    )

    for idx, item in enumerate(data):
        fig.append_trace(item, idx + 1, 1)
    fig.layout.showlegend = False
    fig.layout.margin = go.layout.Margin(
        l=8 * min(max(len(idx) for idx in df.index), 75)  # noqa E741
    )
    fig.layout.title = title
    return AxPlotConfig(data=fig, plot_type=AxPlotTypes.GENERIC)
Example #2
0
    def _set_current_model(self, data: Optional[Data]) -> None:
        """Instantiate the current model with all available data.
        """
        model_kwargs = self._curr.model_kwargs or {}

        # If last generator run's index matches the current step, extract
        # model state from last generator run and pass it to the model
        # being instantiated in this function.
        lgr = self.last_generator_run
        if (
            lgr is not None
            and lgr._generation_step_index == self._curr.index
            and lgr._model_state_after_gen
        ):
            model_kwargs = _combine_model_kwargs_and_state(
                model_kwargs=model_kwargs,
                generator_run=lgr,
                model_class=not_none(not_none(self.model).model.__class__),
            )

        if data is None:
            if self._curr.use_update:
                # If the new step is using `update`, it's important to instantiate
                # the model with data for completed trials only, so later we can
                # update it with data for new trials as they become completed.
                # `experiment.fetch_data` can fetch all available data, including
                # for non-completed trials (depending on how the experiment's metrics
                # implement `fetch_experiment_data`). We avoid fetching data for
                # trials with statuses other than `COMPLETED`, by fetching specifically
                # for `COMPLETED` trials.
                data = self.experiment.fetch_trials_data(
                    self.experiment.trial_indices_by_status[TrialStatus.COMPLETED]
                )
            else:
                data = self.experiment.fetch_data()
        # By the time we get here, we will have already transitioned
        # to a new step, but if previou step required observed data,
        # we should raise an error even if enough trials were completed.
        # Such an empty data case does indicate an invalid state; this
        # check is to improve the experience of detecting and debugging
        # the invalid state that led to this.
        previous_step_required_observations = (
            self._curr.index > 0
            and self._steps[self._curr.index - 1].min_trials_observed > 0
        )
        if data.df.empty and previous_step_required_observations:
            raise NoDataError(
                f"Observed data is required for generation step #{self._curr.index} "
                f"(model {self._curr.model_name}), but fetched data was empty. "
                "Something is wrong with experiment setup -- likely metrics do not "
                "implement fetching logic (check your metrics) or no data was "
                "attached to experiment for completed trials."
            )
        if isinstance(self._curr.model, Models):
            self._set_current_model_from_models_enum(data=data, **model_kwargs)
        else:
            # If model was not specified as Models member, it was specified as a
            # factory function.
            self._set_current_model_from_factory_function(data=data, **model_kwargs)
Example #3
0
 def _get_data_for_fit(self, passed_in_data: Optional[Data]) -> Data:
     if passed_in_data is None:
         if self._curr.use_update:
             # If the new step is using `update`, it's important to instantiate
             # the model with data for completed trials only, so later we can
             # update it with data for new trials as they become completed.
             # `experiment.lookup_data` can lookup all available data, including
             # for non-completed trials (depending on how the experiment's metrics
             # implement `fetch_experiment_data`). We avoid fetching data for
             # trials with statuses other than `COMPLETED`, by fetching specifically
             # for `COMPLETED` trials.
             avail_while_running_metrics = {
                 m.name
                 for m in self.experiment.metrics.values()
                 if m.is_available_while_running()
             }
             if avail_while_running_metrics:
                 raise NotImplementedError(
                     f"Metrics {avail_while_running_metrics} are available while "
                     "trial is running, but use of `update` functionality in "
                     "generation strategy relies on new data being available upon "
                     "trial completion.")
             data = self.experiment.lookup_data(
                 trial_indices=self.experiment.trial_indices_by_status[
                     TrialStatus.COMPLETED])
         else:
             data = self.experiment.lookup_data()
     else:
         data = passed_in_data
     # By the time we get here, we will have already transitioned
     # to a new step, but if previous step required observed data,
     # we should raise an error even if enough trials were completed.
     # Such an empty data case does indicate an invalid state; this
     # check is to improve the experience of detecting and debugging
     # the invalid state that led to this.
     previous_step_required_observations = (
         self._curr.index > 0
         and self._steps[self._curr.index - 1].min_trials_observed > 0)
     if data.df.empty and previous_step_required_observations:
         raise NoDataError(
             f"Observed data is required for generation step #{self._curr.index} "
             f"(model {self._curr.model_name}), but fetched data was empty. "
             "Something is wrong with experiment setup -- likely metrics do not "
             "implement fetching logic (check your metrics) or no data was "
             "attached to experiment for completed trials.")
     return data
Example #4
0
    def _set_current_model(self, data: Optional[Data]) -> None:
        """Instantiate the current model with all available data."""
        model_kwargs = self._curr.model_kwargs or {}

        # If last generator run's index matches the current step, extract
        # model state from last generator run and pass it to the model
        # being instantiated in this function.
        lgr = self.last_generator_run
        if (lgr is not None and lgr._generation_step_index == self._curr.index
                and lgr._model_state_after_gen):
            model_kwargs = _combine_model_kwargs_and_state(
                model_kwargs=model_kwargs,
                generator_run=lgr,
                model_class=not_none(not_none(self.model).model.__class__),
            )

        if data is None:
            if self._curr.use_update:
                # If the new step is using `update`, it's important to instantiate
                # the model with data for completed trials only, so later we can
                # update it with data for new trials as they become completed.
                # `experiment.lookup_data` can lookup all available data, including
                # for non-completed trials (depending on how the experiment's metrics
                # implement `fetch_experiment_data`). We avoid fetching data for
                # trials with statuses other than `COMPLETED`, by fetching specifically
                # for `COMPLETED` trials.
                avail_while_running_metrics = {
                    m.name
                    for m in self.experiment.metrics.values()
                    if m.is_available_while_running()
                }
                if avail_while_running_metrics:
                    raise NotImplementedError(
                        f"Metrics {avail_while_running_metrics} are available while "
                        "trial is running, but use of `update` functionality in "
                        "generation strategy relies on new data being available upon "
                        "trial completion.")
                data = self.experiment.lookup_data(
                    trial_indices=self.experiment.trial_indices_by_status[
                        TrialStatus.COMPLETED])
            else:
                data = self.experiment.lookup_data()
        # By the time we get here, we will have already transitioned
        # to a new step, but if previou step required observed data,
        # we should raise an error even if enough trials were completed.
        # Such an empty data case does indicate an invalid state; this
        # check is to improve the experience of detecting and debugging
        # the invalid state that led to this.
        previous_step_required_observations = (
            self._curr.index > 0
            and self._steps[self._curr.index - 1].min_trials_observed > 0)
        if data.df.empty and previous_step_required_observations:
            raise NoDataError(
                f"Observed data is required for generation step #{self._curr.index} "
                f"(model {self._curr.model_name}), but fetched data was empty. "
                "Something is wrong with experiment setup -- likely metrics do not "
                "implement fetching logic (check your metrics) or no data was "
                "attached to experiment for completed trials.")
        if not data.df.empty:
            trial_indices_in_data = sorted(data.df["trial_index"].unique())
            logger.debug(
                f"Setting model with data for trials: {trial_indices_in_data}")
        # TODO(jej)[T87591836] Support non-`Data` data types.
        if isinstance(self._curr.model, ModelRegistryBase):
            # pyre-fixme [6]: Incompat param: Expect `Data` got `AbstractDataFrameData`
            self._set_current_model_from_models_enum(data=data, **model_kwargs)
        else:
            # If model was not specified as Models member, it was specified as a
            # factory function.
            # pyre-fixme [6]: Incompat param: Expect `Data` got `AbstractDataFrameData`
            self._set_current_model_from_factory_function(data=data,
                                                          **model_kwargs)