def test_projecting_point_from_categorical_hierachical_to_discrete_flat_hypergrid(
            self):
        """ Exercises the stacking functionality.

        This is a major use case for our models.

        :return:
        """
        first_adapter = HierarchicalToFlatHypergridAdapter(
            adaptee=self.hierarchical_hypergrid)
        adapter = CategoricalToDiscreteHypergridAdapter(adaptee=first_adapter)
        self.assertFalse(
            any(
                isinstance(dimension, CategoricalDimension)
                for dimension in adapter.dimensions))
        self.assertFalse(
            any("." in dimension.name for dimension in adapter.dimensions))

        for _ in range(1000):
            original_point = self.hierarchical_hypergrid.random()
            projected_point = adapter.project_point(original_point)

            self.assertTrue(
                all(
                    isinstance(dim_value, Number)
                    for dim_name, dim_value in projected_point))
            self.assertFalse(
                any("." in dim_name for dim_name, value in projected_point))
            self.assertFalse(projected_point == original_point)

            unprojected_point = adapter.unproject_point(projected_point)
            self.assertTrue(unprojected_point in self.hierarchical_hypergrid)
            self.assertTrue(original_point == unprojected_point)
Beispiel #2
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger("DecisionTreeRegressionModel")
        self.logger = logger

        assert model_config in decision_tree_config_store.parameter_space
        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)

        self._input_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.input_space)

        self.input_dimension_names = [
            dimension.name
            for dimension in self._input_space_adapter.dimensions
        ]
        self.target_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]
        self.logger.debug(
            f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}."
        )

        assert len(
            self.target_dimension_names
        ) == 1, "For now (and perhaps forever) we only support single target per tree."

        self._regressor = DecisionTreeRegressor(
            criterion=self.model_config.criterion,
            splitter=self.model_config.splitter,
            max_depth=self.model_config.max_depth
            if self.model_config.max_depth != 0 else None,
            min_samples_split=self.model_config.min_samples_split,
            min_samples_leaf=self.model_config.min_samples_leaf,
            min_weight_fraction_leaf=self.model_config.
            min_weight_fraction_leaf,
            max_features=self.model_config.max_features,
            random_state=self.model_config.get("random_state", None),
            max_leaf_nodes=self.model_config.max_leaf_nodes
            if self.model_config.max_leaf_nodes not in (0, 1) else None,
            min_impurity_decrease=self.model_config.min_impurity_decrease,
            ccp_alpha=self.model_config.ccp_alpha)

        # These are used to compute the variance in predictions
        self._observations_per_leaf = dict()
        self._mean_per_leaf = dict()
        self._mean_variance_per_leaf = dict()
        self._sample_variance_per_leaf = dict()
        self._count_per_leaf = dict()

        self._trained = False
Beispiel #3
0
    def test_translating_dataframe_from_categorical_hierarchical_to_discrete_flat_hypergrid(self):
        adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=HierarchicalToFlatHypergridAdapter(
                adaptee=self.hierarchical_hypergrid
            )
        )
        self.assertFalse(any(isinstance(dimension, CategoricalDimension) for dimension in adapter.dimensions))
        self.assertFalse(any("." in dimension.name for dimension in adapter.dimensions))

        original_df = self.hierarchical_hypergrid.random_dataframe(num_samples=10000)
        translated_df = adapter.translate_dataframe(df=original_df, in_place=False)
        untranslated_df = adapter.untranslate_dataframe(df=translated_df, in_place=False)
        self.assertTrue(original_df.equals(untranslated_df))
Beispiel #4
0
    def test_translating_point_from_categorical_to_discrete_simple_hypergrid(self):
        """ Tests if we can successfully execute all HypergridAdapters on a simple hypergrid.

        :return:
        """
        adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.simple_hypergrid)
        self._test_translating_categorical_to_discrete_point_from_adaptee(adaptee=self.simple_hypergrid, adapter=adapter)
 def test_projecting_point_from_hierarchical_categorical_to_discrete_hypergrid(
         self):
     # This used to raise, but now it's handled internally so let's make sure it doesn't raise anymore.
     hierarchical_adapter = CategoricalToDiscreteHypergridAdapter(
         adaptee=self.hierarchical_hypergrid)
     self._test_projecting_categorical_to_discrete_point_from_adaptee(
         adaptee=self.hierarchical_hypergrid, adapter=hierarchical_adapter)
Beispiel #6
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 objective_name: str,
                 observations_data_source: ObservationsDataSource,
                 logger=None):
        if logger is None:
            logger = create_logger(self.__class__.__name__)
        self.logger = logger

        # The data source is maintained by the tomograph.
        #
        self._observations_data_source = observations_data_source

        # Metatdata - what dimensions are we going to be plotting here?
        #
        self.optimization_problem = optimization_problem
        assert objective_name in self.optimization_problem.objective_space.dimension_names
        self.objective_name = objective_name

        # The adapter is needed if we want to create plots of categorical dimensions. It maps categorical values to integers so
        # that we can consistently place them on the plots.
        #
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        self.feature_dimension_names: List[str] = [
            feature_name
            for feature_name in self._feature_space_adapter.dimension_names
            if feature_name != "contains_context"
        ]
        self.num_features = len(self.feature_dimension_names)

        # Stores figure ranges by name so that we can synchronize zooming and panning
        #
        self._x_ranges_by_name = {}
        self._y_ranges_by_name = {}

        # Stores an array of all plots for all objectives.
        #
        self._figures = [[None for col in range(self.num_features)]
                         for row in range(self.num_features)]

        self._title = Div(text=f"<h1>{self.objective_name}</h1>")

        # Stores the bokeh gridplot object.
        #
        self._grid_plot = None
Beispiel #7
0
    def __init__(self, optimization_problem: OptimizationProblem,
                 parameters_df: pd.DataFrame, context_df: pd.DataFrame,
                 objectives_df: pd.DataFrame, pareto_df: pd.DataFrame):
        self.optimization_problem = optimization_problem
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        self.parameters_df: pd.DataFrame = None
        self.context_df: pd.DataFrame = None
        self.objectives_df: pd.DataFrame = None
        self.pareto_df: pd.DataFrame = None
        self.observations_df: pd.DataFrame = None

        self.data_source: ColumnDataSource = ColumnDataSource()
        self.update_data(parameters_df=parameters_df,
                         context_df=context_df,
                         objectives_df=objectives_df,
                         pareto_df=pareto_df)
    def test_projecting_dataframe_from_categorical_hierarchical_to_discrete_flat_hypergrid(
            self):
        adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=HierarchicalToFlatHypergridAdapter(
                adaptee=self.hierarchical_hypergrid))
        assert not any(
            isinstance(dimension, CategoricalDimension)
            for dimension in adapter.dimensions)
        assert not any("." in dimension.name
                       for dimension in adapter.dimensions)

        original_df = self.hierarchical_hypergrid.random_dataframe(
            num_samples=10000)
        projected_df = adapter.project_dataframe(df=original_df,
                                                 in_place=False)
        unprojected_df = adapter.unproject_dataframe(df=projected_df,
                                                     in_place=False)
        assert original_df.equals(unprojected_df)
Beispiel #9
0
    def test_translating_dataframe_from_categorical_to_discrete_simple_hypergrid(self):
        adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.simple_hypergrid)
        original_df = self.simple_hypergrid.random_dataframe(num_samples=10000)
        translated_df = adapter.translate_dataframe(original_df, in_place=False)
        # Let's make sure we have a deep copy.
        #
        self.assertTrue(id(original_df) != id(translated_df)) # Make sure that a deep copy was made.
        self.assertFalse(original_df.equals(translated_df))

        # TODO: assert translated df only has numbers
        # Let's copy the translated_df before testing if all is numeric - the test might change the data.
        copied_df = translated_df.copy(deep=True)
        columns = copied_df.columns.values.tolist()
        for column in columns:
            # For each column let's validate that it contains only numerics. We'll do this by coercing all values to numerics.
            # If such coercion fails, it produces a null value, so we can validate that there are no nulls in the output.
            self.assertTrue(pd.to_numeric(copied_df[column], errors='coerce').notnull().all())

        # To make sure the check above is capable of failing, let's try the same trick on the input where we know there are non-numeric values
        #
        copied_original_df = original_df.copy(deep=True)
        self.assertFalse(pd.to_numeric(copied_original_df['categorical_mixed_types'], errors='coerce').notnull().all())


        untranslated_df = adapter.untranslate_dataframe(translated_df, in_place=False)
        self.assertTrue(id(original_df) != id(untranslated_df))
        self.assertTrue(original_df.equals(untranslated_df))

        # Let's make sure that translating in place works as expected.
        translated_in_place_df = adapter.translate_dataframe(original_df)
        self.assertTrue(id(original_df) == id(translated_in_place_df))
        self.assertTrue(translated_in_place_df.equals(translated_df))
        untranslated_in_place_df = adapter.untranslate_dataframe(translated_in_place_df)
        self.assertTrue(id(original_df) == id(untranslated_in_place_df))
        self.assertTrue(untranslated_in_place_df.equals(untranslated_df))
Beispiel #10
0
class ObservationsDataSource:
    """Maintains data source that the individual GridPlots can use.
    """
    def __init__(self, optimization_problem: OptimizationProblem,
                 parameters_df: pd.DataFrame, context_df: pd.DataFrame,
                 objectives_df: pd.DataFrame, pareto_df: pd.DataFrame):
        self.optimization_problem = optimization_problem
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        self.parameters_df: pd.DataFrame = None
        self.context_df: pd.DataFrame = None
        self.objectives_df: pd.DataFrame = None
        self.pareto_df: pd.DataFrame = None
        self.observations_df: pd.DataFrame = None

        self.data_source: ColumnDataSource = ColumnDataSource()
        self.update_data(parameters_df=parameters_df,
                         context_df=context_df,
                         objectives_df=objectives_df,
                         pareto_df=pareto_df)

    def update_data(self, parameters_df: pd.DataFrame,
                    context_df: pd.DataFrame, objectives_df: pd.DataFrame,
                    pareto_df: pd.DataFrame):
        self.parameters_df = parameters_df
        self.context_df = context_df
        self.objectives_df = objectives_df
        self.pareto_df = pareto_df
        self.observations_df = self._construct_observations()

        # In order to preserve the identity of the data source, we create temporary ones, and then copy their data over to the data
        # sources in use by the grid plots.
        #
        temp_data_source = ColumnDataSource(data=self.observations_df)
        self.data_source.data = dict(temp_data_source.data)

    def _construct_observations(self):
        features_df = self.optimization_problem.construct_feature_dataframe(
            parameters_df=self.parameters_df,
            context_df=self.context_df,
            product=False)
        projected_features_df = self._feature_space_adapter.project_dataframe(
            features_df)
        observations_df = pd.concat(
            [projected_features_df, self.objectives_df], axis=1)
        observations_df['is_pareto'] = False
        observations_df.loc[self.pareto_df.index, 'is_pareto'] = True
        return observations_df
Beispiel #11
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 observations_data_source: ObservationsDataSource,
                 logger=None):
        if logger is None:
            logger = create_logger(self.__class__.__name__)
        self.logger = logger

        # The data source is maintained by the tomograph.
        #
        self._observations_data_source = observations_data_source

        # Metatdata - what dimensions are we going to be plotting here?
        #
        self.optimization_problem = optimization_problem
        self.num_objectives = len(
            optimization_problem.objective_space.dimension_names)
        self.objective_names = optimization_problem.objective_space.dimension_names
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        # Stores figure ranges by name so that we can synchronize zooming and panning
        #
        self._x_ranges_by_name = {}
        self._y_ranges_by_name = {}

        # Stores an array of all plots for all objectives.
        #
        self._figures = [[None for col in range(self.num_objectives)]
                         for row in range(self.num_objectives)]

        self._title = Div(text="<h1>Objectives</h1>")

        # Stores the bokeh gridplot object.
        #
        self._grid_plot = None
Beispiel #12
0
 def test_translating_point_from_categorical_to_discrete_composite_hypergrid(self):
     with self.assertRaises(NotImplementedError):
         hierarchical_adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.hierarchical_hypergrid)
         self._test_translating_categorical_to_discrete_point_from_adaptee(adaptee=self.hierarchical_hypergrid, adapter=hierarchical_adapter)
Beispiel #13
0
class DecisionTreeRegressionModel(RegressionModel):
    """ Wraps sklearn's DecisionTreeRegressor.

    TODO: Beef up the RegressionModel base class and actually enforce a consistent interface.
    TODO: See how much boilerplate we can remove from model creation.
    """

    _PREDICTOR_OUTPUT_COLUMNS = [
        Prediction.LegalColumnNames.IS_VALID_INPUT,
        Prediction.LegalColumnNames.PREDICTED_VALUE,
        Prediction.LegalColumnNames.PREDICTED_VALUE_VARIANCE,
        Prediction.LegalColumnNames.SAMPLE_VARIANCE,
        Prediction.LegalColumnNames.SAMPLE_SIZE,
        Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM
    ]

    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger("DecisionTreeRegressionModel")
        self.logger = logger

        assert model_config in decision_tree_config_store.parameter_space
        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)

        self._input_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.input_space)

        self.input_dimension_names = [
            dimension.name
            for dimension in self._input_space_adapter.dimensions
        ]
        self.target_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]
        self.logger.debug(
            f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}."
        )

        assert len(
            self.target_dimension_names
        ) == 1, "For now (and perhaps forever) we only support single target per tree."

        self._regressor = DecisionTreeRegressor(
            criterion=self.model_config.criterion,
            splitter=self.model_config.splitter,
            max_depth=self.model_config.max_depth
            if self.model_config.max_depth != 0 else None,
            min_samples_split=self.model_config.min_samples_split,
            min_samples_leaf=self.model_config.min_samples_leaf,
            min_weight_fraction_leaf=self.model_config.
            min_weight_fraction_leaf,
            max_features=self.model_config.max_features,
            random_state=self.model_config.get("random_state", None),
            max_leaf_nodes=self.model_config.max_leaf_nodes
            if self.model_config.max_leaf_nodes not in (0, 1) else None,
            min_impurity_decrease=self.model_config.min_impurity_decrease,
            ccp_alpha=self.model_config.ccp_alpha)

        # These are used to compute the variance in predictions
        self._observations_per_leaf = dict()
        self._mean_per_leaf = dict()
        self._mean_variance_per_leaf = dict()
        self._sample_variance_per_leaf = dict()
        self._count_per_leaf = dict()

        self._trained = False

    @property
    def trained(self):
        return self._trained

    @property
    def num_observations_used_to_fit(self):
        return self.last_refit_iteration_number

    def should_fit(self, num_samples):
        """ Returns true if the model should be fitted.

        This model should be fitted under the following conditions:
        1) It has not been fitted yet and num_samples is larger than min_samples_to_fit
        2) The model has been fitted and the number of new samples is larger than n_new_samples_before_refit

        :param num_samples:
        :return:
        """
        if not self.trained:
            return num_samples > self.model_config.min_samples_to_fit
        num_new_samples = num_samples - self.num_observations_used_to_fit
        return num_new_samples >= self.model_config.n_new_samples_before_refit

    @trace()
    def fit(self, feature_values_pandas_frame, target_values_pandas_frame,
            iteration_number):
        self.logger.debug(
            f"Fitting a {self.__class__.__name__} with {len(feature_values_pandas_frame.index)} observations."
        )

        # Let's get the numpy arrays out of the panda frames
        #
        feature_values_pandas_frame = self._input_space_adapter.project_dataframe(
            feature_values_pandas_frame, in_place=False)

        feature_values = feature_values_pandas_frame[
            self.input_dimension_names].to_numpy()
        target_values = target_values_pandas_frame[
            self.target_dimension_names].to_numpy()

        # Clean up state before fitting again
        self._observations_per_leaf = dict()

        self._regressor.fit(feature_values, target_values)

        # Now that we have fit the model we can augment our tree by computing the variance
        # TODO: this code can be easily optimized, but premature optimization is the root of all evil.
        node_indices = self._regressor.apply(feature_values)
        self.logger.debug(
            f"The resulting three has {len(node_indices)} leaf nodes.")

        for node_index, sample_target_value in zip(node_indices,
                                                   target_values):
            observations_at_leaf = self._observations_per_leaf.get(
                node_index, [])
            observations_at_leaf.append(sample_target_value)
            self._observations_per_leaf[node_index] = observations_at_leaf

        # Now let's compute all predictions
        for node_index in self._observations_per_leaf:
            # First convert the observations to a numpy array.
            observations_at_leaf = np.array(
                self._observations_per_leaf[node_index])
            self._observations_per_leaf[node_index] = observations_at_leaf

            leaf_mean = np.mean(observations_at_leaf)
            leaf_sample_variance = np.var(
                observations_at_leaf, ddof=1
            )  # ddof = delta degrees of freedom. We want sample variance.
            leaf_mean_variance = leaf_sample_variance / len(
                observations_at_leaf)

            self._mean_per_leaf[node_index] = leaf_mean
            self._mean_variance_per_leaf[node_index] = leaf_mean_variance
            self._sample_variance_per_leaf[node_index] = leaf_sample_variance
            self._count_per_leaf[node_index] = len(observations_at_leaf)

        self._trained = True
        self.last_refit_iteration_number = iteration_number

    @trace()
    def predict(self,
                feature_values_pandas_frame,
                include_only_valid_rows=True):
        self.logger.debug(
            f"Creating predictions for {len(feature_values_pandas_frame.index)} samples."
        )

        # dataframe column shortcuts
        is_valid_input_col = Prediction.LegalColumnNames.IS_VALID_INPUT.value
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        predicted_value_var_col = Prediction.LegalColumnNames.PREDICTED_VALUE_VARIANCE.value
        sample_var_col = Prediction.LegalColumnNames.SAMPLE_VARIANCE.value
        sample_size_col = Prediction.LegalColumnNames.SAMPLE_SIZE.value
        dof_col = Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.value

        valid_rows_index = None
        features_df = None
        if self.trained:
            valid_features_df = self.input_space.filter_out_invalid_rows(
                original_dataframe=feature_values_pandas_frame,
                exclude_extra_columns=True)
            features_df = self._input_space_adapter.project_dataframe(
                valid_features_df, in_place=False)
            valid_rows_index = features_df.index

        predictions = Prediction(
            objective_name=self.target_dimension_names[0],
            predictor_outputs=self._PREDICTOR_OUTPUT_COLUMNS,
            dataframe_index=valid_rows_index)
        prediction_dataframe = predictions.get_dataframe()

        if valid_rows_index is not None and not valid_rows_index.empty:
            prediction_dataframe['leaf_node_index'] = self._regressor.apply(
                features_df.loc[valid_rows_index].to_numpy())
            prediction_dataframe[predicted_value_col] = prediction_dataframe[
                'leaf_node_index'].map(self._mean_per_leaf)
            prediction_dataframe[
                predicted_value_var_col] = prediction_dataframe[
                    'leaf_node_index'].map(self._mean_variance_per_leaf)
            prediction_dataframe[sample_var_col] = prediction_dataframe[
                'leaf_node_index'].map(self._sample_variance_per_leaf)
            prediction_dataframe[sample_size_col] = prediction_dataframe[
                'leaf_node_index'].map(self._count_per_leaf)
            prediction_dataframe[
                dof_col] = prediction_dataframe[sample_size_col] - 1
            prediction_dataframe[is_valid_input_col] = True
            prediction_dataframe.drop(columns=['leaf_node_index'],
                                      inplace=True)

        predictions.validate_dataframe(prediction_dataframe)
        if not include_only_valid_rows:
            predictions.add_invalid_rows_at_missing_indices(
                desired_index=feature_values_pandas_frame.index)
        return predictions
Beispiel #14
0
class GridPlot:
    """Maintains all data, meta-data and styling information required to produce a grid-plot.

    The grid plot is built based on the OptimizationProblem instance, to find out what objectives and what
    features are to be plotted. We use information contained in the dimensions to compute the ranges for all
    axes/ranges on the plot, as well as to configure the color map.

    If the range is infinite (as can be the case with many objectives) we can use the observed range of values to
    configure the range of values to be plotted.

    Each figure in the grid plot contains:
    * Either a scatter plot of feature vs. feature where the color of each point corresponds to the objective value
    * Or a scatter plot of feature vs. objective (if we are on a diagonal).

    Additionally, we could also plot the predicted values as a background heatmap for the feature vs. feature
    plots, and a predicted value with confidence intervals plot for feature vs. objective plots. This of course introduces a complication
    of needing to query the optimizer for each pixel and so we will add it later.
    """
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 objective_name: str,
                 observations_data_source: ObservationsDataSource,
                 logger=None):
        if logger is None:
            logger = create_logger(self.__class__.__name__)
        self.logger = logger

        # The data source is maintained by the tomograph.
        #
        self._observations_data_source = observations_data_source

        # Metatdata - what dimensions are we going to be plotting here?
        #
        self.optimization_problem = optimization_problem
        assert objective_name in self.optimization_problem.objective_space.dimension_names
        self.objective_name = objective_name

        # The adapter is needed if we want to create plots of categorical dimensions. It maps categorical values to integers so
        # that we can consistently place them on the plots.
        #
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        self.feature_dimension_names: List[str] = [
            feature_name
            for feature_name in self._feature_space_adapter.dimension_names
            if feature_name != "contains_context"
        ]
        self.num_features = len(self.feature_dimension_names)

        # Stores figure ranges by name so that we can synchronize zooming and panning
        #
        self._x_ranges_by_name = {}
        self._y_ranges_by_name = {}

        # Stores an array of all plots for all objectives.
        #
        self._figures = [[None for col in range(self.num_features)]
                         for row in range(self.num_features)]

        self._title = Div(text=f"<h1>{self.objective_name}</h1>")

        # Stores the bokeh gridplot object.
        #
        self._grid_plot = None

    @property
    def formatted_plots(self):
        return column([self._title, self._grid_plot])

    def update_plots(self):
        """Updates the plot with observations from data source.
        """

        self._x_ranges_by_name = {}
        self._y_ranges_by_name = {}
        self._grid_plot = None

        tooltips = [(f"{feature_name}", f"@{feature_name}")
                    for feature_name in self.feature_dimension_names]
        tooltips.extend([
            (f"{objective_name}", f"@{objective_name}")
            for objective_name in self.optimization_problem.objective_names
        ])
        hover = HoverTool(tooltips=tooltips)

        plot_options = dict(plot_width=int(2000 / self.num_features),
                            plot_height=int(2000 / self.num_features),
                            tools=[
                                'box_select', 'lasso_select', 'box_zoom',
                                'wheel_zoom', 'reset', hover
                            ])

        final_column_plot_options = dict(
            plot_width=int(2000 / self.num_features) + 75,
            plot_height=int(2000 / self.num_features),
            tools=[
                'box_select', 'lasso_select', 'box_zoom', 'wheel_zoom',
                'reset', hover
            ])

        color_mapper = LinearColorMapper(
            palette='Turbo256',
            low=self._observations_data_source.observations_df[
                self.objective_name].min(),
            high=self._observations_data_source.observations_df[
                self.objective_name].max())

        for row, row_dimension_name in enumerate(self.feature_dimension_names):
            for col, col_dimension_name in enumerate(
                    self.feature_dimension_names):

                x_axis_name = col_dimension_name
                x_ticks, x_tick_label_mapping = self._get_feature_ticks_and_tick_label_mapping(
                    x_axis_name)

                if row == col:
                    # For plots on the diagonals, we want to plot the row dimension vs. objective
                    #
                    y_axis_name = self.objective_name

                    # Since objectives are always continuous, the default ticks and tick-labels provided by bokeh work well.
                    #
                    y_ticks, y_tick_label_mapping = None, None
                else:
                    y_axis_name = row_dimension_name
                    y_ticks, y_tick_label_mapping = self._get_feature_ticks_and_tick_label_mapping(
                        y_axis_name)

                if col == (self.num_features - 1):
                    fig = figure(**final_column_plot_options)
                else:
                    fig = figure(**plot_options)

                fig.scatter(
                    x_axis_name,
                    y_axis_name,
                    color={
                        'field': self.objective_name,
                        'transform': color_mapper
                    },
                    marker='circle',
                    source=self._observations_data_source.data_source,
                )

                fig.xaxis.axis_label = x_axis_name
                fig.yaxis.axis_label = y_axis_name

                fig.xaxis.ticker = x_ticks
                fig.axis.major_label_overrides = x_tick_label_mapping

                if y_ticks is not None:
                    fig.yaxis.ticker = y_ticks
                    fig.yaxis.major_label_overrides = y_tick_label_mapping

                self._set_ranges(fig, x_axis_name, y_axis_name)

                self.logger.debug(
                    f"Assigning figure to [{row}][{col}]. {self.objective_name}, {row_dimension_name}, {col_dimension_name}"
                )
                self._figures[row][col] = fig

            color_bar = ColorBar(color_mapper=color_mapper,
                                 label_standoff=12,
                                 location=(0, 0),
                                 title=self.objective_name)
            self._figures[row][-1].add_layout(color_bar, 'right')

        self._grid_plot = gridplot(self._figures)

    def _get_feature_ticks_and_tick_label_mapping(self, axis_name):
        """Returns tick positions as well as labels for each tick.

        The complication is that tick labels can be categorical, but ticks must be plotted at locations specified by integers.

        Once again adapters come to the rescue: we simply use an adapter to construct a (persistent) mapping between the categorical
        values (needed to label the ticks) and integer values (needed to position the ticks). This mapping is persisted in the
        adapter and here we dole it out to each plot on an as-needed basis.

        :param axis_name:
        :return:
        """
        projected_ticks = self._feature_space_adapter[axis_name].linspace(5)
        projected_ticks_df = pd.DataFrame({axis_name: projected_ticks})
        unprojected_ticks_df = self._feature_space_adapter.unproject_dataframe(
            projected_ticks_df)
        unprojected_col_name = unprojected_ticks_df.columns[0]
        tick_mapping = {
            projected_tick: f"{unprojected_tick:.2f}" if isinstance(
                unprojected_tick, float) else str(unprojected_tick)
            for projected_tick, unprojected_tick in zip(
                projected_ticks, unprojected_ticks_df[unprojected_col_name])
        }
        return projected_ticks, tick_mapping

    def _set_ranges(self, fig, x_axis_name, y_axis_name):
        """Sets the ranges on each axis to enable synchronized panning and zooming.

        Basically, when we see a given range name for the first time we cache the range and set that cached range for all figures
        in the future. This way all plots that share the same range name (so the same dimension) are synchronized for panning and
        zooming.
        """
        if x_axis_name in self._x_ranges_by_name:
            fig.x_range = self._x_ranges_by_name[x_axis_name]
        else:
            self._x_ranges_by_name[x_axis_name] = fig.x_range

        if y_axis_name in self._y_ranges_by_name:
            fig.y_range = self._y_ranges_by_name[y_axis_name]
        else:
            self._y_ranges_by_name[y_axis_name] = fig.y_range