Example #1
0
    def test_basic_functionality_on_2d_objective_space(self):
        """Basic sanity check. Mainly used to help us develop the API.
        """

        # Let's just create a bunch of random points, build a pareto frontier
        # and verify that the invariants hold.
        #
        parameter_space = SimpleHypergrid(
            name='params',
            dimensions=[
                ContinuousDimension(name='x1', min=0, max=10)
            ]
        )

        objective_space = SimpleHypergrid(
            name='objectives',
            dimensions=[
                ContinuousDimension(name='y1', min=0, max=10),
                ContinuousDimension(name='y2', min=0, max=10)
            ]
        )

        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[
                Objective(name='y1', minimize=False),
                Objective(name='y2', minimize=False)
            ]
        )

        num_rows = 100000
        random_objectives_df = objective_space.random_dataframe(num_rows)

        # They don't match but they don't need to for this test.
        #
        random_params_df = parameter_space.random_dataframe(num_rows)

        pareto_frontier = ParetoFrontier(
            optimization_problem=optimization_problem,
            objectives_df=random_objectives_df,
            parameters_df=random_params_df
        )
        pareto_df = pareto_frontier.pareto_df

        non_pareto_index = random_objectives_df.index.difference(pareto_df.index)
        for i, row in pareto_df.iterrows():
            # Now let's make sure that no point in pareto is dominated by any non-pareto point.
            #
            assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index)

            # Let's also make sure that no point on the pareto is dominated by any other point there.
            #
            other_rows = pareto_df.index.difference([i])
            assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
Example #2
0
    def test_construct_feature_dataframe_context(self):
        def f(parameters, context):
            return pd.DataFrame({
                'function_value':
                -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2)
            })

        input_space = SimpleHypergrid(
            name="my_input_name",
            dimensions=[ContinuousDimension(name="x", min=0, max=1)])
        output_space = SimpleHypergrid(name="objective",
                                       dimensions=[
                                           ContinuousDimension(
                                               name="function_value",
                                               min=-10,
                                               max=10)
                                       ])
        context_space = SimpleHypergrid(
            name="my_context_name",
            dimensions=[ContinuousDimension(name="y", min=-1, max=1)])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            # we want to minimize the function
            objectives=[Objective(name="function_value", minimize=True)],
            context_space=context_space)
        n_samples = 100
        parameter_df = input_space.random_dataframe(n_samples)
        context_df = context_space.random_dataframe(n_samples)
        with pytest.raises(ValueError, match="Context required"):
            optimization_problem.construct_feature_dataframe(
                parameters_df=parameter_df)

        feature_df = optimization_problem.construct_feature_dataframe(
            parameters_df=parameter_df, context_df=context_df)

        assert isinstance(feature_df, pd.DataFrame)
        assert feature_df.shape == (n_samples, 3)
        assert (feature_df.columns == [
            'my_input_name.x', 'contains_context', 'my_context_name.y'
        ]).all()
        assert feature_df.contains_context.all()
class TestDecisionTreeRegressionModel:
    @classmethod
    def setup_class(cls) -> None:
        global_values.declare_singletons()
        global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0)

    @classmethod
    def teardown_class(cls) -> None:
        temp_dir = os.path.join(os.getcwd(), "temp")
        if not os.path.exists(temp_dir):
            os.mkdir(temp_dir)
        trace_output_path = os.path.join(
            temp_dir, "TestDecisionTreeRegressionModel.json")
        print(f"Dumping trace to {trace_output_path}")
        global_values.tracer.dump_trace_to_file(
            output_file_path=trace_output_path)

    def setup_method(self, method):
        # Let's create a simple linear mapping
        self.gradient = 10
        self.y_intercept = 10
        self.input_values = np.linspace(start=0,
                                        stop=100,
                                        num=101,
                                        endpoint=True)
        self.output_values = self.input_values * self.gradient + self.y_intercept

        self.input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=100)])

        self.output_space = SimpleHypergrid(name="output",
                                            dimensions=[
                                                ContinuousDimension(
                                                    name="y",
                                                    min=-math.inf,
                                                    max=math.inf)
                                            ])

        self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values})
        self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values})

    def test_default_decision_tree_model(self):
        model_config = decision_tree_config_store.default
        model = DecisionTreeRegressionModel(model_config=model_config,
                                            input_space=self.input_space,
                                            output_space=self.output_space)
        model.fit(self.input_pandas_dataframe,
                  self.output_pandas_dataframe,
                  iteration_number=len(self.input_pandas_dataframe.index))
        gof_metrics = model.compute_goodness_of_fit(
            features_df=self.input_pandas_dataframe,
            target_df=self.output_pandas_dataframe,
            data_set_type=DataSetType.TRAIN)
        print(gof_metrics)

    def test_random_decision_tree_models(self):
        sample_inputs_pandas_dataframe = self.input_space.random_dataframe(
            num_samples=100)

        num_iterations = 50
        for i in range(num_iterations):
            if i % 10 == 0:
                print(f"{datetime.datetime.utcnow()} {i}/{num_iterations}")
            model_config = decision_tree_config_store.parameter_space.random()
            print(str(model_config))
            model = DecisionTreeRegressionModel(model_config=model_config,
                                                input_space=self.input_space,
                                                output_space=self.output_space)
            model.fit(self.input_pandas_dataframe,
                      self.output_pandas_dataframe,
                      iteration_number=len(
                          sample_inputs_pandas_dataframe.index))
            gof_metrics = model.compute_goodness_of_fit(
                features_df=self.input_pandas_dataframe,
                target_df=self.output_pandas_dataframe,
                data_set_type=DataSetType.TRAIN)
            print(gof_metrics)
Example #4
0
    def test_optimization_with_context(self):
        # Gaussian blob in x with position dependent on context variable y.
        def f(parameters, context):
            if isinstance(parameters, pd.DataFrame):
                index = parameters.index
            else:
                index = [0]
            return pd.DataFrame(
                {
                    'function_value':
                    -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2)
                },
                index=index)

        input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=1)])
        output_space = SimpleHypergrid(name="objective",
                                       dimensions=[
                                           ContinuousDimension(
                                               name="function_value",
                                               min=-10,
                                               max=10)
                                       ])
        context_space = SimpleHypergrid(
            name="context",
            dimensions=[ContinuousDimension(name="y", min=-1, max=1)])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            # we want to minimize the function
            objectives=[Objective(name="function_value", minimize=True)],
            context_space=context_space)

        # create some data points to eval
        n_samples = 5000
        parameter_df = input_space.random_dataframe(n_samples)
        context_df = context_space.random_dataframe(n_samples)

        target_df = f(parameter_df, context_df)

        local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem, )

        with pytest.raises(ValueError, match="Context required"):
            local_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df)

        with pytest.raises(
                ValueError,
                match="Incompatible shape of parameters and context"):
            local_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df,
                context_values_pandas_frame=context_df.iloc[:-1])

        local_optimizer.register(parameter_values_pandas_frame=parameter_df,
                                 target_values_pandas_frame=target_df,
                                 context_values_pandas_frame=context_df)

        with pytest.raises(ValueError, match="Context required"):
            local_optimizer.suggest()

        with pytest.raises(ValueError, match="Context required"):
            local_optimizer.predict(parameter_values_pandas_frame=parameter_df)

        suggestion = local_optimizer.suggest(context=context_space.random())
        assert isinstance(suggestion, Point)
        assert suggestion in input_space

        with pytest.raises(
                ValueError,
                match="Incompatible shape of parameters and context"):
            # unaligned parameters and context
            local_optimizer.predict(
                parameter_values_pandas_frame=parameter_df,
                context_values_pandas_frame=context_df.iloc[:-1])

        predictions = local_optimizer.predict(
            parameter_values_pandas_frame=parameter_df,
            context_values_pandas_frame=context_df)
        predictions_df = predictions.get_dataframe()
        assert len(predictions_df) == len(parameter_df)

        remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=optimization_problem, )

        with pytest.raises(ValueError,
                           match="not supported if context is provided"):
            local_optimizer.optimum(
                optimum_definition=OptimumDefinition.BEST_OBSERVATION,
                context=Point(y=0).to_dataframe())

        with pytest.raises(ValueError,
                           match="not supported if context is provided"):
            local_optimizer.optimum(
                optimum_definition=OptimumDefinition.BEST_OBSERVATION)

        with pytest.raises(ValueError,
                           match="requires context to be not None"):
            local_optimizer.optimum(optimum_definition=OptimumDefinition.
                                    BEST_SPECULATIVE_WITHIN_CONTEXT)

        # can't register, predict, suggest with context on remote optimizer
        with pytest.raises(NotImplementedError,
                           match="Context not currently supported"):
            remote_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df,
                context_values_pandas_frame=context_df)

        with pytest.raises(NotImplementedError,
                           match="Context not currently supported"):
            remote_optimizer.predict(
                parameter_values_pandas_frame=parameter_df,
                context_values_pandas_frame=context_df)

        with pytest.raises(NotImplementedError,
                           match="Context not currently supported"):
            remote_optimizer.suggest(context=context_df)

        # context is missing but required by problem, should give error
        with pytest.raises(grpc.RpcError):
            remote_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df)

        # run some iterations on local optimizer to see we do something sensible
        for _ in range(100):
            # pick context at random
            context = context_space.random()
            suggested_config = local_optimizer.suggest(context=context)
            target_values = f(suggested_config, context)
            local_optimizer.register(
                parameter_values_pandas_frame=suggested_config.to_dataframe(),
                target_values_pandas_frame=target_values,
                context_values_pandas_frame=context.to_dataframe())

        optimum_y_1 = local_optimizer.optimum(
            optimum_definition=OptimumDefinition.
            BEST_SPECULATIVE_WITHIN_CONTEXT,
            context=Point(y=-1).to_dataframe())
        optimum_y1 = local_optimizer.optimum(
            optimum_definition=OptimumDefinition.
            BEST_SPECULATIVE_WITHIN_CONTEXT,
            context=Point(y=1).to_dataframe())
        assert optimum_y1.x > .6
        assert optimum_y_1.x < .4