def test_projecting_point_from_categorical_hierachical_to_discrete_flat_hypergrid( self): """ Exercises the stacking functionality. This is a major use case for our models. :return: """ first_adapter = HierarchicalToFlatHypergridAdapter( adaptee=self.hierarchical_hypergrid) adapter = CategoricalToDiscreteHypergridAdapter(adaptee=first_adapter) self.assertFalse( any( isinstance(dimension, CategoricalDimension) for dimension in adapter.dimensions)) self.assertFalse( any("." in dimension.name for dimension in adapter.dimensions)) for _ in range(1000): original_point = self.hierarchical_hypergrid.random() projected_point = adapter.project_point(original_point) self.assertTrue( all( isinstance(dim_value, Number) for dim_name, dim_value in projected_point)) self.assertFalse( any("." in dim_name for dim_name, value in projected_point)) self.assertFalse(projected_point == original_point) unprojected_point = adapter.unproject_point(projected_point) self.assertTrue(unprojected_point in self.hierarchical_hypergrid) self.assertTrue(original_point == unprojected_point)
def __init__(self, model_config: Point, input_space: Hypergrid, output_space: Hypergrid, logger=None): if logger is None: logger = create_logger("DecisionTreeRegressionModel") self.logger = logger assert model_config in decision_tree_config_store.parameter_space RegressionModel.__init__(self, model_type=type(self), model_config=model_config, input_space=input_space, output_space=output_space) self._input_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.input_space) self.input_dimension_names = [ dimension.name for dimension in self._input_space_adapter.dimensions ] self.target_dimension_names = [ dimension.name for dimension in self.output_space.dimensions ] self.logger.debug( f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}." ) assert len( self.target_dimension_names ) == 1, "For now (and perhaps forever) we only support single target per tree." self._regressor = DecisionTreeRegressor( criterion=self.model_config.criterion, splitter=self.model_config.splitter, max_depth=self.model_config.max_depth if self.model_config.max_depth != 0 else None, min_samples_split=self.model_config.min_samples_split, min_samples_leaf=self.model_config.min_samples_leaf, min_weight_fraction_leaf=self.model_config. min_weight_fraction_leaf, max_features=self.model_config.max_features, random_state=self.model_config.get("random_state", None), max_leaf_nodes=self.model_config.max_leaf_nodes if self.model_config.max_leaf_nodes not in (0, 1) else None, min_impurity_decrease=self.model_config.min_impurity_decrease, ccp_alpha=self.model_config.ccp_alpha) # These are used to compute the variance in predictions self._observations_per_leaf = dict() self._mean_per_leaf = dict() self._mean_variance_per_leaf = dict() self._sample_variance_per_leaf = dict() self._count_per_leaf = dict() self._trained = False
def test_translating_dataframe_from_categorical_hierarchical_to_discrete_flat_hypergrid(self): adapter = CategoricalToDiscreteHypergridAdapter( adaptee=HierarchicalToFlatHypergridAdapter( adaptee=self.hierarchical_hypergrid ) ) self.assertFalse(any(isinstance(dimension, CategoricalDimension) for dimension in adapter.dimensions)) self.assertFalse(any("." in dimension.name for dimension in adapter.dimensions)) original_df = self.hierarchical_hypergrid.random_dataframe(num_samples=10000) translated_df = adapter.translate_dataframe(df=original_df, in_place=False) untranslated_df = adapter.untranslate_dataframe(df=translated_df, in_place=False) self.assertTrue(original_df.equals(untranslated_df))
def test_translating_point_from_categorical_to_discrete_simple_hypergrid(self): """ Tests if we can successfully execute all HypergridAdapters on a simple hypergrid. :return: """ adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.simple_hypergrid) self._test_translating_categorical_to_discrete_point_from_adaptee(adaptee=self.simple_hypergrid, adapter=adapter)
def test_projecting_point_from_hierarchical_categorical_to_discrete_hypergrid( self): # This used to raise, but now it's handled internally so let's make sure it doesn't raise anymore. hierarchical_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.hierarchical_hypergrid) self._test_projecting_categorical_to_discrete_point_from_adaptee( adaptee=self.hierarchical_hypergrid, adapter=hierarchical_adapter)
def __init__(self, optimization_problem: OptimizationProblem, objective_name: str, observations_data_source: ObservationsDataSource, logger=None): if logger is None: logger = create_logger(self.__class__.__name__) self.logger = logger # The data source is maintained by the tomograph. # self._observations_data_source = observations_data_source # Metatdata - what dimensions are we going to be plotting here? # self.optimization_problem = optimization_problem assert objective_name in self.optimization_problem.objective_space.dimension_names self.objective_name = objective_name # The adapter is needed if we want to create plots of categorical dimensions. It maps categorical values to integers so # that we can consistently place them on the plots. # self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.optimization_problem.feature_space) self.feature_dimension_names: List[str] = [ feature_name for feature_name in self._feature_space_adapter.dimension_names if feature_name != "contains_context" ] self.num_features = len(self.feature_dimension_names) # Stores figure ranges by name so that we can synchronize zooming and panning # self._x_ranges_by_name = {} self._y_ranges_by_name = {} # Stores an array of all plots for all objectives. # self._figures = [[None for col in range(self.num_features)] for row in range(self.num_features)] self._title = Div(text=f"<h1>{self.objective_name}</h1>") # Stores the bokeh gridplot object. # self._grid_plot = None
def __init__(self, optimization_problem: OptimizationProblem, parameters_df: pd.DataFrame, context_df: pd.DataFrame, objectives_df: pd.DataFrame, pareto_df: pd.DataFrame): self.optimization_problem = optimization_problem self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.optimization_problem.feature_space) self.parameters_df: pd.DataFrame = None self.context_df: pd.DataFrame = None self.objectives_df: pd.DataFrame = None self.pareto_df: pd.DataFrame = None self.observations_df: pd.DataFrame = None self.data_source: ColumnDataSource = ColumnDataSource() self.update_data(parameters_df=parameters_df, context_df=context_df, objectives_df=objectives_df, pareto_df=pareto_df)
def test_projecting_dataframe_from_categorical_hierarchical_to_discrete_flat_hypergrid( self): adapter = CategoricalToDiscreteHypergridAdapter( adaptee=HierarchicalToFlatHypergridAdapter( adaptee=self.hierarchical_hypergrid)) assert not any( isinstance(dimension, CategoricalDimension) for dimension in adapter.dimensions) assert not any("." in dimension.name for dimension in adapter.dimensions) original_df = self.hierarchical_hypergrid.random_dataframe( num_samples=10000) projected_df = adapter.project_dataframe(df=original_df, in_place=False) unprojected_df = adapter.unproject_dataframe(df=projected_df, in_place=False) assert original_df.equals(unprojected_df)
def test_translating_dataframe_from_categorical_to_discrete_simple_hypergrid(self): adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.simple_hypergrid) original_df = self.simple_hypergrid.random_dataframe(num_samples=10000) translated_df = adapter.translate_dataframe(original_df, in_place=False) # Let's make sure we have a deep copy. # self.assertTrue(id(original_df) != id(translated_df)) # Make sure that a deep copy was made. self.assertFalse(original_df.equals(translated_df)) # TODO: assert translated df only has numbers # Let's copy the translated_df before testing if all is numeric - the test might change the data. copied_df = translated_df.copy(deep=True) columns = copied_df.columns.values.tolist() for column in columns: # For each column let's validate that it contains only numerics. We'll do this by coercing all values to numerics. # If such coercion fails, it produces a null value, so we can validate that there are no nulls in the output. self.assertTrue(pd.to_numeric(copied_df[column], errors='coerce').notnull().all()) # To make sure the check above is capable of failing, let's try the same trick on the input where we know there are non-numeric values # copied_original_df = original_df.copy(deep=True) self.assertFalse(pd.to_numeric(copied_original_df['categorical_mixed_types'], errors='coerce').notnull().all()) untranslated_df = adapter.untranslate_dataframe(translated_df, in_place=False) self.assertTrue(id(original_df) != id(untranslated_df)) self.assertTrue(original_df.equals(untranslated_df)) # Let's make sure that translating in place works as expected. translated_in_place_df = adapter.translate_dataframe(original_df) self.assertTrue(id(original_df) == id(translated_in_place_df)) self.assertTrue(translated_in_place_df.equals(translated_df)) untranslated_in_place_df = adapter.untranslate_dataframe(translated_in_place_df) self.assertTrue(id(original_df) == id(untranslated_in_place_df)) self.assertTrue(untranslated_in_place_df.equals(untranslated_df))
class ObservationsDataSource: """Maintains data source that the individual GridPlots can use. """ def __init__(self, optimization_problem: OptimizationProblem, parameters_df: pd.DataFrame, context_df: pd.DataFrame, objectives_df: pd.DataFrame, pareto_df: pd.DataFrame): self.optimization_problem = optimization_problem self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.optimization_problem.feature_space) self.parameters_df: pd.DataFrame = None self.context_df: pd.DataFrame = None self.objectives_df: pd.DataFrame = None self.pareto_df: pd.DataFrame = None self.observations_df: pd.DataFrame = None self.data_source: ColumnDataSource = ColumnDataSource() self.update_data(parameters_df=parameters_df, context_df=context_df, objectives_df=objectives_df, pareto_df=pareto_df) def update_data(self, parameters_df: pd.DataFrame, context_df: pd.DataFrame, objectives_df: pd.DataFrame, pareto_df: pd.DataFrame): self.parameters_df = parameters_df self.context_df = context_df self.objectives_df = objectives_df self.pareto_df = pareto_df self.observations_df = self._construct_observations() # In order to preserve the identity of the data source, we create temporary ones, and then copy their data over to the data # sources in use by the grid plots. # temp_data_source = ColumnDataSource(data=self.observations_df) self.data_source.data = dict(temp_data_source.data) def _construct_observations(self): features_df = self.optimization_problem.construct_feature_dataframe( parameters_df=self.parameters_df, context_df=self.context_df, product=False) projected_features_df = self._feature_space_adapter.project_dataframe( features_df) observations_df = pd.concat( [projected_features_df, self.objectives_df], axis=1) observations_df['is_pareto'] = False observations_df.loc[self.pareto_df.index, 'is_pareto'] = True return observations_df
def __init__(self, optimization_problem: OptimizationProblem, observations_data_source: ObservationsDataSource, logger=None): if logger is None: logger = create_logger(self.__class__.__name__) self.logger = logger # The data source is maintained by the tomograph. # self._observations_data_source = observations_data_source # Metatdata - what dimensions are we going to be plotting here? # self.optimization_problem = optimization_problem self.num_objectives = len( optimization_problem.objective_space.dimension_names) self.objective_names = optimization_problem.objective_space.dimension_names self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.optimization_problem.feature_space) # Stores figure ranges by name so that we can synchronize zooming and panning # self._x_ranges_by_name = {} self._y_ranges_by_name = {} # Stores an array of all plots for all objectives. # self._figures = [[None for col in range(self.num_objectives)] for row in range(self.num_objectives)] self._title = Div(text="<h1>Objectives</h1>") # Stores the bokeh gridplot object. # self._grid_plot = None
def test_translating_point_from_categorical_to_discrete_composite_hypergrid(self): with self.assertRaises(NotImplementedError): hierarchical_adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.hierarchical_hypergrid) self._test_translating_categorical_to_discrete_point_from_adaptee(adaptee=self.hierarchical_hypergrid, adapter=hierarchical_adapter)
class DecisionTreeRegressionModel(RegressionModel): """ Wraps sklearn's DecisionTreeRegressor. TODO: Beef up the RegressionModel base class and actually enforce a consistent interface. TODO: See how much boilerplate we can remove from model creation. """ _PREDICTOR_OUTPUT_COLUMNS = [ Prediction.LegalColumnNames.IS_VALID_INPUT, Prediction.LegalColumnNames.PREDICTED_VALUE, Prediction.LegalColumnNames.PREDICTED_VALUE_VARIANCE, Prediction.LegalColumnNames.SAMPLE_VARIANCE, Prediction.LegalColumnNames.SAMPLE_SIZE, Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM ] def __init__(self, model_config: Point, input_space: Hypergrid, output_space: Hypergrid, logger=None): if logger is None: logger = create_logger("DecisionTreeRegressionModel") self.logger = logger assert model_config in decision_tree_config_store.parameter_space RegressionModel.__init__(self, model_type=type(self), model_config=model_config, input_space=input_space, output_space=output_space) self._input_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.input_space) self.input_dimension_names = [ dimension.name for dimension in self._input_space_adapter.dimensions ] self.target_dimension_names = [ dimension.name for dimension in self.output_space.dimensions ] self.logger.debug( f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}." ) assert len( self.target_dimension_names ) == 1, "For now (and perhaps forever) we only support single target per tree." self._regressor = DecisionTreeRegressor( criterion=self.model_config.criterion, splitter=self.model_config.splitter, max_depth=self.model_config.max_depth if self.model_config.max_depth != 0 else None, min_samples_split=self.model_config.min_samples_split, min_samples_leaf=self.model_config.min_samples_leaf, min_weight_fraction_leaf=self.model_config. min_weight_fraction_leaf, max_features=self.model_config.max_features, random_state=self.model_config.get("random_state", None), max_leaf_nodes=self.model_config.max_leaf_nodes if self.model_config.max_leaf_nodes not in (0, 1) else None, min_impurity_decrease=self.model_config.min_impurity_decrease, ccp_alpha=self.model_config.ccp_alpha) # These are used to compute the variance in predictions self._observations_per_leaf = dict() self._mean_per_leaf = dict() self._mean_variance_per_leaf = dict() self._sample_variance_per_leaf = dict() self._count_per_leaf = dict() self._trained = False @property def trained(self): return self._trained @property def num_observations_used_to_fit(self): return self.last_refit_iteration_number def should_fit(self, num_samples): """ Returns true if the model should be fitted. This model should be fitted under the following conditions: 1) It has not been fitted yet and num_samples is larger than min_samples_to_fit 2) The model has been fitted and the number of new samples is larger than n_new_samples_before_refit :param num_samples: :return: """ if not self.trained: return num_samples > self.model_config.min_samples_to_fit num_new_samples = num_samples - self.num_observations_used_to_fit return num_new_samples >= self.model_config.n_new_samples_before_refit @trace() def fit(self, feature_values_pandas_frame, target_values_pandas_frame, iteration_number): self.logger.debug( f"Fitting a {self.__class__.__name__} with {len(feature_values_pandas_frame.index)} observations." ) # Let's get the numpy arrays out of the panda frames # feature_values_pandas_frame = self._input_space_adapter.project_dataframe( feature_values_pandas_frame, in_place=False) feature_values = feature_values_pandas_frame[ self.input_dimension_names].to_numpy() target_values = target_values_pandas_frame[ self.target_dimension_names].to_numpy() # Clean up state before fitting again self._observations_per_leaf = dict() self._regressor.fit(feature_values, target_values) # Now that we have fit the model we can augment our tree by computing the variance # TODO: this code can be easily optimized, but premature optimization is the root of all evil. node_indices = self._regressor.apply(feature_values) self.logger.debug( f"The resulting three has {len(node_indices)} leaf nodes.") for node_index, sample_target_value in zip(node_indices, target_values): observations_at_leaf = self._observations_per_leaf.get( node_index, []) observations_at_leaf.append(sample_target_value) self._observations_per_leaf[node_index] = observations_at_leaf # Now let's compute all predictions for node_index in self._observations_per_leaf: # First convert the observations to a numpy array. observations_at_leaf = np.array( self._observations_per_leaf[node_index]) self._observations_per_leaf[node_index] = observations_at_leaf leaf_mean = np.mean(observations_at_leaf) leaf_sample_variance = np.var( observations_at_leaf, ddof=1 ) # ddof = delta degrees of freedom. We want sample variance. leaf_mean_variance = leaf_sample_variance / len( observations_at_leaf) self._mean_per_leaf[node_index] = leaf_mean self._mean_variance_per_leaf[node_index] = leaf_mean_variance self._sample_variance_per_leaf[node_index] = leaf_sample_variance self._count_per_leaf[node_index] = len(observations_at_leaf) self._trained = True self.last_refit_iteration_number = iteration_number @trace() def predict(self, feature_values_pandas_frame, include_only_valid_rows=True): self.logger.debug( f"Creating predictions for {len(feature_values_pandas_frame.index)} samples." ) # dataframe column shortcuts is_valid_input_col = Prediction.LegalColumnNames.IS_VALID_INPUT.value predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value predicted_value_var_col = Prediction.LegalColumnNames.PREDICTED_VALUE_VARIANCE.value sample_var_col = Prediction.LegalColumnNames.SAMPLE_VARIANCE.value sample_size_col = Prediction.LegalColumnNames.SAMPLE_SIZE.value dof_col = Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.value valid_rows_index = None features_df = None if self.trained: valid_features_df = self.input_space.filter_out_invalid_rows( original_dataframe=feature_values_pandas_frame, exclude_extra_columns=True) features_df = self._input_space_adapter.project_dataframe( valid_features_df, in_place=False) valid_rows_index = features_df.index predictions = Prediction( objective_name=self.target_dimension_names[0], predictor_outputs=self._PREDICTOR_OUTPUT_COLUMNS, dataframe_index=valid_rows_index) prediction_dataframe = predictions.get_dataframe() if valid_rows_index is not None and not valid_rows_index.empty: prediction_dataframe['leaf_node_index'] = self._regressor.apply( features_df.loc[valid_rows_index].to_numpy()) prediction_dataframe[predicted_value_col] = prediction_dataframe[ 'leaf_node_index'].map(self._mean_per_leaf) prediction_dataframe[ predicted_value_var_col] = prediction_dataframe[ 'leaf_node_index'].map(self._mean_variance_per_leaf) prediction_dataframe[sample_var_col] = prediction_dataframe[ 'leaf_node_index'].map(self._sample_variance_per_leaf) prediction_dataframe[sample_size_col] = prediction_dataframe[ 'leaf_node_index'].map(self._count_per_leaf) prediction_dataframe[ dof_col] = prediction_dataframe[sample_size_col] - 1 prediction_dataframe[is_valid_input_col] = True prediction_dataframe.drop(columns=['leaf_node_index'], inplace=True) predictions.validate_dataframe(prediction_dataframe) if not include_only_valid_rows: predictions.add_invalid_rows_at_missing_indices( desired_index=feature_values_pandas_frame.index) return predictions
class GridPlot: """Maintains all data, meta-data and styling information required to produce a grid-plot. The grid plot is built based on the OptimizationProblem instance, to find out what objectives and what features are to be plotted. We use information contained in the dimensions to compute the ranges for all axes/ranges on the plot, as well as to configure the color map. If the range is infinite (as can be the case with many objectives) we can use the observed range of values to configure the range of values to be plotted. Each figure in the grid plot contains: * Either a scatter plot of feature vs. feature where the color of each point corresponds to the objective value * Or a scatter plot of feature vs. objective (if we are on a diagonal). Additionally, we could also plot the predicted values as a background heatmap for the feature vs. feature plots, and a predicted value with confidence intervals plot for feature vs. objective plots. This of course introduces a complication of needing to query the optimizer for each pixel and so we will add it later. """ def __init__(self, optimization_problem: OptimizationProblem, objective_name: str, observations_data_source: ObservationsDataSource, logger=None): if logger is None: logger = create_logger(self.__class__.__name__) self.logger = logger # The data source is maintained by the tomograph. # self._observations_data_source = observations_data_source # Metatdata - what dimensions are we going to be plotting here? # self.optimization_problem = optimization_problem assert objective_name in self.optimization_problem.objective_space.dimension_names self.objective_name = objective_name # The adapter is needed if we want to create plots of categorical dimensions. It maps categorical values to integers so # that we can consistently place them on the plots. # self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter( adaptee=self.optimization_problem.feature_space) self.feature_dimension_names: List[str] = [ feature_name for feature_name in self._feature_space_adapter.dimension_names if feature_name != "contains_context" ] self.num_features = len(self.feature_dimension_names) # Stores figure ranges by name so that we can synchronize zooming and panning # self._x_ranges_by_name = {} self._y_ranges_by_name = {} # Stores an array of all plots for all objectives. # self._figures = [[None for col in range(self.num_features)] for row in range(self.num_features)] self._title = Div(text=f"<h1>{self.objective_name}</h1>") # Stores the bokeh gridplot object. # self._grid_plot = None @property def formatted_plots(self): return column([self._title, self._grid_plot]) def update_plots(self): """Updates the plot with observations from data source. """ self._x_ranges_by_name = {} self._y_ranges_by_name = {} self._grid_plot = None tooltips = [(f"{feature_name}", f"@{feature_name}") for feature_name in self.feature_dimension_names] tooltips.extend([ (f"{objective_name}", f"@{objective_name}") for objective_name in self.optimization_problem.objective_names ]) hover = HoverTool(tooltips=tooltips) plot_options = dict(plot_width=int(2000 / self.num_features), plot_height=int(2000 / self.num_features), tools=[ 'box_select', 'lasso_select', 'box_zoom', 'wheel_zoom', 'reset', hover ]) final_column_plot_options = dict( plot_width=int(2000 / self.num_features) + 75, plot_height=int(2000 / self.num_features), tools=[ 'box_select', 'lasso_select', 'box_zoom', 'wheel_zoom', 'reset', hover ]) color_mapper = LinearColorMapper( palette='Turbo256', low=self._observations_data_source.observations_df[ self.objective_name].min(), high=self._observations_data_source.observations_df[ self.objective_name].max()) for row, row_dimension_name in enumerate(self.feature_dimension_names): for col, col_dimension_name in enumerate( self.feature_dimension_names): x_axis_name = col_dimension_name x_ticks, x_tick_label_mapping = self._get_feature_ticks_and_tick_label_mapping( x_axis_name) if row == col: # For plots on the diagonals, we want to plot the row dimension vs. objective # y_axis_name = self.objective_name # Since objectives are always continuous, the default ticks and tick-labels provided by bokeh work well. # y_ticks, y_tick_label_mapping = None, None else: y_axis_name = row_dimension_name y_ticks, y_tick_label_mapping = self._get_feature_ticks_and_tick_label_mapping( y_axis_name) if col == (self.num_features - 1): fig = figure(**final_column_plot_options) else: fig = figure(**plot_options) fig.scatter( x_axis_name, y_axis_name, color={ 'field': self.objective_name, 'transform': color_mapper }, marker='circle', source=self._observations_data_source.data_source, ) fig.xaxis.axis_label = x_axis_name fig.yaxis.axis_label = y_axis_name fig.xaxis.ticker = x_ticks fig.axis.major_label_overrides = x_tick_label_mapping if y_ticks is not None: fig.yaxis.ticker = y_ticks fig.yaxis.major_label_overrides = y_tick_label_mapping self._set_ranges(fig, x_axis_name, y_axis_name) self.logger.debug( f"Assigning figure to [{row}][{col}]. {self.objective_name}, {row_dimension_name}, {col_dimension_name}" ) self._figures[row][col] = fig color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0, 0), title=self.objective_name) self._figures[row][-1].add_layout(color_bar, 'right') self._grid_plot = gridplot(self._figures) def _get_feature_ticks_and_tick_label_mapping(self, axis_name): """Returns tick positions as well as labels for each tick. The complication is that tick labels can be categorical, but ticks must be plotted at locations specified by integers. Once again adapters come to the rescue: we simply use an adapter to construct a (persistent) mapping between the categorical values (needed to label the ticks) and integer values (needed to position the ticks). This mapping is persisted in the adapter and here we dole it out to each plot on an as-needed basis. :param axis_name: :return: """ projected_ticks = self._feature_space_adapter[axis_name].linspace(5) projected_ticks_df = pd.DataFrame({axis_name: projected_ticks}) unprojected_ticks_df = self._feature_space_adapter.unproject_dataframe( projected_ticks_df) unprojected_col_name = unprojected_ticks_df.columns[0] tick_mapping = { projected_tick: f"{unprojected_tick:.2f}" if isinstance( unprojected_tick, float) else str(unprojected_tick) for projected_tick, unprojected_tick in zip( projected_ticks, unprojected_ticks_df[unprojected_col_name]) } return projected_ticks, tick_mapping def _set_ranges(self, fig, x_axis_name, y_axis_name): """Sets the ranges on each axis to enable synchronized panning and zooming. Basically, when we see a given range name for the first time we cache the range and set that cached range for all figures in the future. This way all plots that share the same range name (so the same dimension) are synchronized for panning and zooming. """ if x_axis_name in self._x_ranges_by_name: fig.x_range = self._x_ranges_by_name[x_axis_name] else: self._x_ranges_by_name[x_axis_name] = fig.x_range if y_axis_name in self._y_ranges_by_name: fig.y_range = self._y_ranges_by_name[y_axis_name] else: self._y_ranges_by_name[y_axis_name] = fig.y_range