def plot_feature_importance(df: pd.DataFrame, title: str) -> AxPlotConfig: if df.empty: raise NoDataError("No Data on Feature Importances found.") df.set_index(df.columns[0], inplace=True) data = [ go.Bar(y=df.index, x=df[column_name], name=column_name, orientation="h") for column_name in df.columns ] fig = subplots.make_subplots( rows=len(df.columns), cols=1, subplot_titles=df.columns, print_grid=False, shared_xaxes=True, ) for idx, item in enumerate(data): fig.append_trace(item, idx + 1, 1) fig.layout.showlegend = False fig.layout.margin = go.layout.Margin( l=8 * min(max(len(idx) for idx in df.index), 75) # noqa E741 ) fig.layout.title = title return AxPlotConfig(data=fig, plot_type=AxPlotTypes.GENERIC)
def _set_current_model(self, data: Optional[Data]) -> None: """Instantiate the current model with all available data. """ model_kwargs = self._curr.model_kwargs or {} # If last generator run's index matches the current step, extract # model state from last generator run and pass it to the model # being instantiated in this function. lgr = self.last_generator_run if ( lgr is not None and lgr._generation_step_index == self._curr.index and lgr._model_state_after_gen ): model_kwargs = _combine_model_kwargs_and_state( model_kwargs=model_kwargs, generator_run=lgr, model_class=not_none(not_none(self.model).model.__class__), ) if data is None: if self._curr.use_update: # If the new step is using `update`, it's important to instantiate # the model with data for completed trials only, so later we can # update it with data for new trials as they become completed. # `experiment.fetch_data` can fetch all available data, including # for non-completed trials (depending on how the experiment's metrics # implement `fetch_experiment_data`). We avoid fetching data for # trials with statuses other than `COMPLETED`, by fetching specifically # for `COMPLETED` trials. data = self.experiment.fetch_trials_data( self.experiment.trial_indices_by_status[TrialStatus.COMPLETED] ) else: data = self.experiment.fetch_data() # By the time we get here, we will have already transitioned # to a new step, but if previou step required observed data, # we should raise an error even if enough trials were completed. # Such an empty data case does indicate an invalid state; this # check is to improve the experience of detecting and debugging # the invalid state that led to this. previous_step_required_observations = ( self._curr.index > 0 and self._steps[self._curr.index - 1].min_trials_observed > 0 ) if data.df.empty and previous_step_required_observations: raise NoDataError( f"Observed data is required for generation step #{self._curr.index} " f"(model {self._curr.model_name}), but fetched data was empty. " "Something is wrong with experiment setup -- likely metrics do not " "implement fetching logic (check your metrics) or no data was " "attached to experiment for completed trials." ) if isinstance(self._curr.model, Models): self._set_current_model_from_models_enum(data=data, **model_kwargs) else: # If model was not specified as Models member, it was specified as a # factory function. self._set_current_model_from_factory_function(data=data, **model_kwargs)
def _get_data_for_fit(self, passed_in_data: Optional[Data]) -> Data: if passed_in_data is None: if self._curr.use_update: # If the new step is using `update`, it's important to instantiate # the model with data for completed trials only, so later we can # update it with data for new trials as they become completed. # `experiment.lookup_data` can lookup all available data, including # for non-completed trials (depending on how the experiment's metrics # implement `fetch_experiment_data`). We avoid fetching data for # trials with statuses other than `COMPLETED`, by fetching specifically # for `COMPLETED` trials. avail_while_running_metrics = { m.name for m in self.experiment.metrics.values() if m.is_available_while_running() } if avail_while_running_metrics: raise NotImplementedError( f"Metrics {avail_while_running_metrics} are available while " "trial is running, but use of `update` functionality in " "generation strategy relies on new data being available upon " "trial completion.") data = self.experiment.lookup_data( trial_indices=self.experiment.trial_indices_by_status[ TrialStatus.COMPLETED]) else: data = self.experiment.lookup_data() else: data = passed_in_data # By the time we get here, we will have already transitioned # to a new step, but if previous step required observed data, # we should raise an error even if enough trials were completed. # Such an empty data case does indicate an invalid state; this # check is to improve the experience of detecting and debugging # the invalid state that led to this. previous_step_required_observations = ( self._curr.index > 0 and self._steps[self._curr.index - 1].min_trials_observed > 0) if data.df.empty and previous_step_required_observations: raise NoDataError( f"Observed data is required for generation step #{self._curr.index} " f"(model {self._curr.model_name}), but fetched data was empty. " "Something is wrong with experiment setup -- likely metrics do not " "implement fetching logic (check your metrics) or no data was " "attached to experiment for completed trials.") return data
def _set_current_model(self, data: Optional[Data]) -> None: """Instantiate the current model with all available data.""" model_kwargs = self._curr.model_kwargs or {} # If last generator run's index matches the current step, extract # model state from last generator run and pass it to the model # being instantiated in this function. lgr = self.last_generator_run if (lgr is not None and lgr._generation_step_index == self._curr.index and lgr._model_state_after_gen): model_kwargs = _combine_model_kwargs_and_state( model_kwargs=model_kwargs, generator_run=lgr, model_class=not_none(not_none(self.model).model.__class__), ) if data is None: if self._curr.use_update: # If the new step is using `update`, it's important to instantiate # the model with data for completed trials only, so later we can # update it with data for new trials as they become completed. # `experiment.lookup_data` can lookup all available data, including # for non-completed trials (depending on how the experiment's metrics # implement `fetch_experiment_data`). We avoid fetching data for # trials with statuses other than `COMPLETED`, by fetching specifically # for `COMPLETED` trials. avail_while_running_metrics = { m.name for m in self.experiment.metrics.values() if m.is_available_while_running() } if avail_while_running_metrics: raise NotImplementedError( f"Metrics {avail_while_running_metrics} are available while " "trial is running, but use of `update` functionality in " "generation strategy relies on new data being available upon " "trial completion.") data = self.experiment.lookup_data( trial_indices=self.experiment.trial_indices_by_status[ TrialStatus.COMPLETED]) else: data = self.experiment.lookup_data() # By the time we get here, we will have already transitioned # to a new step, but if previou step required observed data, # we should raise an error even if enough trials were completed. # Such an empty data case does indicate an invalid state; this # check is to improve the experience of detecting and debugging # the invalid state that led to this. previous_step_required_observations = ( self._curr.index > 0 and self._steps[self._curr.index - 1].min_trials_observed > 0) if data.df.empty and previous_step_required_observations: raise NoDataError( f"Observed data is required for generation step #{self._curr.index} " f"(model {self._curr.model_name}), but fetched data was empty. " "Something is wrong with experiment setup -- likely metrics do not " "implement fetching logic (check your metrics) or no data was " "attached to experiment for completed trials.") if not data.df.empty: trial_indices_in_data = sorted(data.df["trial_index"].unique()) logger.debug( f"Setting model with data for trials: {trial_indices_in_data}") # TODO(jej)[T87591836] Support non-`Data` data types. if isinstance(self._curr.model, ModelRegistryBase): # pyre-fixme [6]: Incompat param: Expect `Data` got `AbstractDataFrameData` self._set_current_model_from_models_enum(data=data, **model_kwargs) else: # If model was not specified as Models member, it was specified as a # factory function. # pyre-fixme [6]: Incompat param: Expect `Data` got `AbstractDataFrameData` self._set_current_model_from_factory_function(data=data, **model_kwargs)