Exemple #1
0
    def test_translating_point_from_categorical_to_discrete_simple_hypergrid(self):
        """ Tests if we can successfully execute all HypergridAdapters on a simple hypergrid.

        :return:
        """
        adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.simple_hypergrid)
        self._test_translating_categorical_to_discrete_point_from_adaptee(adaptee=self.simple_hypergrid, adapter=adapter)
Exemple #2
0
    def test_translating_dataframe_from_categorical_to_discrete_simple_hypergrid(self):
        adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.simple_hypergrid)
        original_df = self.simple_hypergrid.random_dataframe(num_samples=10000)
        translated_df = adapter.translate_dataframe(original_df, in_place=False)
        # Let's make sure we have a deep copy.
        #
        self.assertTrue(id(original_df) != id(translated_df)) # Make sure that a deep copy was made.
        self.assertFalse(original_df.equals(translated_df))

        # TODO: assert translated df only has numbers
        # Let's copy the translated_df before testing if all is numeric - the test might change the data.
        copied_df = translated_df.copy(deep=True)
        columns = copied_df.columns.values.tolist()
        for column in columns:
            # For each column let's validate that it contains only numerics. We'll do this by coercing all values to numerics.
            # If such coercion fails, it produces a null value, so we can validate that there are no nulls in the output.
            self.assertTrue(pd.to_numeric(copied_df[column], errors='coerce').notnull().all())

        # To make sure the check above is capable of failing, let's try the same trick on the input where we know there are non-numeric values
        #
        copied_original_df = original_df.copy(deep=True)
        self.assertFalse(pd.to_numeric(copied_original_df['categorical_mixed_types'], errors='coerce').notnull().all())


        untranslated_df = adapter.untranslate_dataframe(translated_df, in_place=False)
        self.assertTrue(id(original_df) != id(untranslated_df))
        self.assertTrue(original_df.equals(untranslated_df))

        # Let's make sure that translating in place works as expected.
        translated_in_place_df = adapter.translate_dataframe(original_df)
        self.assertTrue(id(original_df) == id(translated_in_place_df))
        self.assertTrue(translated_in_place_df.equals(translated_df))
        untranslated_in_place_df = adapter.untranslate_dataframe(translated_in_place_df)
        self.assertTrue(id(original_df) == id(untranslated_in_place_df))
        self.assertTrue(untranslated_in_place_df.equals(untranslated_df))
    def test_projecting_point_from_categorical_hierachical_to_discrete_flat_hypergrid(
            self):
        """ Exercises the stacking functionality.

        This is a major use case for our models.

        :return:
        """
        first_adapter = HierarchicalToFlatHypergridAdapter(
            adaptee=self.hierarchical_hypergrid)
        adapter = CategoricalToDiscreteHypergridAdapter(adaptee=first_adapter)
        self.assertFalse(
            any(
                isinstance(dimension, CategoricalDimension)
                for dimension in adapter.dimensions))
        self.assertFalse(
            any("." in dimension.name for dimension in adapter.dimensions))

        for _ in range(1000):
            original_point = self.hierarchical_hypergrid.random()
            projected_point = adapter.project_point(original_point)

            self.assertTrue(
                all(
                    isinstance(dim_value, Number)
                    for dim_name, dim_value in projected_point))
            self.assertFalse(
                any("." in dim_name for dim_name, value in projected_point))
            self.assertFalse(projected_point == original_point)

            unprojected_point = adapter.unproject_point(projected_point)
            self.assertTrue(unprojected_point in self.hierarchical_hypergrid)
            self.assertTrue(original_point == unprojected_point)
 def test_projecting_point_from_hierarchical_categorical_to_discrete_hypergrid(
         self):
     # This used to raise, but now it's handled internally so let's make sure it doesn't raise anymore.
     hierarchical_adapter = CategoricalToDiscreteHypergridAdapter(
         adaptee=self.hierarchical_hypergrid)
     self._test_projecting_categorical_to_discrete_point_from_adaptee(
         adaptee=self.hierarchical_hypergrid, adapter=hierarchical_adapter)
Exemple #5
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger("DecisionTreeRegressionModel")
        self.logger = logger

        assert model_config in decision_tree_config_store.parameter_space
        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)

        self._input_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.input_space)

        self.input_dimension_names = [
            dimension.name
            for dimension in self._input_space_adapter.dimensions
        ]
        self.target_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]
        self.logger.debug(
            f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}."
        )

        assert len(
            self.target_dimension_names
        ) == 1, "For now (and perhaps forever) we only support single target per tree."

        self._regressor = DecisionTreeRegressor(
            criterion=self.model_config.criterion,
            splitter=self.model_config.splitter,
            max_depth=self.model_config.max_depth
            if self.model_config.max_depth != 0 else None,
            min_samples_split=self.model_config.min_samples_split,
            min_samples_leaf=self.model_config.min_samples_leaf,
            min_weight_fraction_leaf=self.model_config.
            min_weight_fraction_leaf,
            max_features=self.model_config.max_features,
            random_state=self.model_config.get("random_state", None),
            max_leaf_nodes=self.model_config.max_leaf_nodes
            if self.model_config.max_leaf_nodes not in (0, 1) else None,
            min_impurity_decrease=self.model_config.min_impurity_decrease,
            ccp_alpha=self.model_config.ccp_alpha)

        # These are used to compute the variance in predictions
        self._observations_per_leaf = dict()
        self._mean_per_leaf = dict()
        self._mean_variance_per_leaf = dict()
        self._sample_variance_per_leaf = dict()
        self._count_per_leaf = dict()

        self._trained = False
Exemple #6
0
    def test_translating_dataframe_from_categorical_hierarchical_to_discrete_flat_hypergrid(self):
        adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=HierarchicalToFlatHypergridAdapter(
                adaptee=self.hierarchical_hypergrid
            )
        )
        self.assertFalse(any(isinstance(dimension, CategoricalDimension) for dimension in adapter.dimensions))
        self.assertFalse(any("." in dimension.name for dimension in adapter.dimensions))

        original_df = self.hierarchical_hypergrid.random_dataframe(num_samples=10000)
        translated_df = adapter.translate_dataframe(df=original_df, in_place=False)
        untranslated_df = adapter.untranslate_dataframe(df=translated_df, in_place=False)
        self.assertTrue(original_df.equals(untranslated_df))
Exemple #7
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 objective_name: str,
                 observations_data_source: ObservationsDataSource,
                 logger=None):
        if logger is None:
            logger = create_logger(self.__class__.__name__)
        self.logger = logger

        # The data source is maintained by the tomograph.
        #
        self._observations_data_source = observations_data_source

        # Metatdata - what dimensions are we going to be plotting here?
        #
        self.optimization_problem = optimization_problem
        assert objective_name in self.optimization_problem.objective_space.dimension_names
        self.objective_name = objective_name

        # The adapter is needed if we want to create plots of categorical dimensions. It maps categorical values to integers so
        # that we can consistently place them on the plots.
        #
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        self.feature_dimension_names: List[str] = [
            feature_name
            for feature_name in self._feature_space_adapter.dimension_names
            if feature_name != "contains_context"
        ]
        self.num_features = len(self.feature_dimension_names)

        # Stores figure ranges by name so that we can synchronize zooming and panning
        #
        self._x_ranges_by_name = {}
        self._y_ranges_by_name = {}

        # Stores an array of all plots for all objectives.
        #
        self._figures = [[None for col in range(self.num_features)]
                         for row in range(self.num_features)]

        self._title = Div(text=f"<h1>{self.objective_name}</h1>")

        # Stores the bokeh gridplot object.
        #
        self._grid_plot = None
Exemple #8
0
    def __init__(self, optimization_problem: OptimizationProblem,
                 parameters_df: pd.DataFrame, context_df: pd.DataFrame,
                 objectives_df: pd.DataFrame, pareto_df: pd.DataFrame):
        self.optimization_problem = optimization_problem
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        self.parameters_df: pd.DataFrame = None
        self.context_df: pd.DataFrame = None
        self.objectives_df: pd.DataFrame = None
        self.pareto_df: pd.DataFrame = None
        self.observations_df: pd.DataFrame = None

        self.data_source: ColumnDataSource = ColumnDataSource()
        self.update_data(parameters_df=parameters_df,
                         context_df=context_df,
                         objectives_df=objectives_df,
                         pareto_df=pareto_df)
    def test_projecting_dataframe_from_categorical_hierarchical_to_discrete_flat_hypergrid(
            self):
        adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=HierarchicalToFlatHypergridAdapter(
                adaptee=self.hierarchical_hypergrid))
        assert not any(
            isinstance(dimension, CategoricalDimension)
            for dimension in adapter.dimensions)
        assert not any("." in dimension.name
                       for dimension in adapter.dimensions)

        original_df = self.hierarchical_hypergrid.random_dataframe(
            num_samples=10000)
        projected_df = adapter.project_dataframe(df=original_df,
                                                 in_place=False)
        unprojected_df = adapter.unproject_dataframe(df=projected_df,
                                                     in_place=False)
        assert original_df.equals(unprojected_df)
Exemple #10
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 observations_data_source: ObservationsDataSource,
                 logger=None):
        if logger is None:
            logger = create_logger(self.__class__.__name__)
        self.logger = logger

        # The data source is maintained by the tomograph.
        #
        self._observations_data_source = observations_data_source

        # Metatdata - what dimensions are we going to be plotting here?
        #
        self.optimization_problem = optimization_problem
        self.num_objectives = len(
            optimization_problem.objective_space.dimension_names)
        self.objective_names = optimization_problem.objective_space.dimension_names
        self._feature_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.optimization_problem.feature_space)

        # Stores figure ranges by name so that we can synchronize zooming and panning
        #
        self._x_ranges_by_name = {}
        self._y_ranges_by_name = {}

        # Stores an array of all plots for all objectives.
        #
        self._figures = [[None for col in range(self.num_objectives)]
                         for row in range(self.num_objectives)]

        self._title = Div(text="<h1>Objectives</h1>")

        # Stores the bokeh gridplot object.
        #
        self._grid_plot = None
Exemple #11
0
 def test_translating_point_from_categorical_to_discrete_composite_hypergrid(self):
     with self.assertRaises(NotImplementedError):
         hierarchical_adapter = CategoricalToDiscreteHypergridAdapter(adaptee=self.hierarchical_hypergrid)
         self._test_translating_categorical_to_discrete_point_from_adaptee(adaptee=self.hierarchical_hypergrid, adapter=hierarchical_adapter)