Exemple #1
0
    def setUp(self):
        dependent_variable = DatasetVariable(0)
        independent_variables = [
            DatasetVariable(1),
            DatasetVariable(2),
            DatasetVariable(3)
        ]
        settings = AbstractSettings({
            "differential_evolution.crossover_probability":
            1.0,
            "differential_evolution.differential_weight":
            1.0,
            "independent_variable_selection.initial_independent_variables_percentage":
            0.25
        })

        model_population = []
        for i in xrange(4):
            model = PredictiveModel(settings, ParameterSet({}),
                                    dependent_variable, independent_variables)
            independent_variable_selection = IndependentVariableSelection(
                settings, dependent_variable, independent_variables)
            model_population.append(
                OptimizationAlgorithmModelWrapper(
                    model, independent_variable_selection))
        self.model_population = model_population
        self.settings = settings
        self.potential_independent_variables = independent_variables
        self.target_wrapper = model_population[0]

        self.de_variable_selection = DEIndependentVariableSelection(
            self.settings, self.target_wrapper, self.model_population,
            self.potential_independent_variables)
Exemple #2
0
    def test_required_parameters(self):
        dictionary = {
            "param1": {
                "type": "range",
                "lower_bound": 0.0,
                "upper_bound": 1.0,
                "value": 0.5
            },
            "param2": {
                "type": "range",
                "lower_bound": 0.0,
                "upper_bound": 1.0,
                "value": 0.5
            },
            "param3": {
                "type": "range",
                "lower_bound": 0.0,
                "upper_bound": 1.0,
                "value": 0.5
            }
        }

        settings = {}
        parameter_set = ParameterSet.create_from_dict(dictionary)
        dependent_variable = DatasetVariable(0)
        independent_variables = [DatasetVariable(1)]

        RequiredParametersPredictiveModel(settings, parameter_set,
                                          dependent_variable,
                                          independent_variables)
Exemple #3
0
    def test_missing_required_parameters_raises_error(self):
        dictionary = {
            "param1": {
                "type": "range",
                "lower_bound": 0.0,
                "upper_bound": 1.0,
                "value": 0.5
            },
            "param3": {
                "type": "range",
                "lower_bound": 0.0,
                "upper_bound": 1.0,
                "value": 0.5
            }
        }

        settings = {}
        parameter_set = ParameterSet.create_from_dict(dictionary)
        dependent_variable = DatasetVariable(0)
        independent_variables = [DatasetVariable(1)]

        with self.assertRaises(AssertionError):
            RequiredParametersPredictiveModel(settings, parameter_set,
                                              dependent_variable,
                                              independent_variables)
Exemple #4
0
    def setUp(self):
        data_matrix = [[1, 2, 3], [2, 3, 4], [5, 6, 7]]
        headers = ["column_0", "column_1", "column_2"]
        self.header_dataset = Dataset(data_matrix, headers)
        self.nonheader_dataset = Dataset(data_matrix)

        self.headered_dependent_variable = DatasetVariable("column_0")
        self.nonheadered_dependent_variable = DatasetVariable(0)
Exemple #5
0
    def test_get_filtered_matrix(self):
        variables = [DatasetVariable(0), DatasetVariable("string")]

        filtered_matrix = self.dataset.get_filtered_matrix(variables)
        self.assertEqual(4, len(filtered_matrix))
        for i in xrange(len(filtered_matrix)):
            self.assertEqual(2, len(filtered_matrix[i]))

        self.assertListEqual([0, "detective"], filtered_matrix[0])
        self.assertListEqual([1, "pablo"], filtered_matrix[1])
        self.assertListEqual([2, "african"], filtered_matrix[2])
        self.assertListEqual([3, "applause"], filtered_matrix[3])
Exemple #6
0
    def test_increasing_probability_of_variables(self):
        settings = AbstractSettings()
        independent_variables = self.header_dataset.get_independent_variables(
            self.headered_dependent_variable)
        selection = IndependentVariableSelection(
            settings, self.headered_dependent_variable, independent_variables)
        selection.increase_probability(DatasetVariable("column_1"))

        self.assertLess(0.5,
                        selection.get_probability(DatasetVariable("column_1")))
        self.assertGreater(
            0.5, selection.get_probability(DatasetVariable("column_2")))
Exemple #7
0
    def test_training_decision_tree_on_simple_dataset(self):
        settings = AbstractSettings({})
        dependent_variable = DatasetVariable(0)
        independent_variables = [DatasetVariable(1)]
        regression = RandomForestRegression(settings, self.parameter_set, dependent_variable, independent_variables)

        data_matrix = [[1,1], [2,2], [3,3], [4,4]]
        dataset = Dataset(data_matrix)

        trained = regression.train(dataset)
        array = trained.predict(dataset)

        self.assertEqual(4, len(array))
Exemple #8
0
    def test_header_transformation_for_dataset_with_headers(self):
        data_matrix = [[1, 2, 3], [4, 5, 6]]
        headers = ["h0", "h1", "h2"]
        dataset = Dataset(data_matrix, headers)

        variable = DatasetVariable(0)
        header = self.dataset_transformation.get_transformed_header(
            dataset, variable)
        self.assertEqual("identitytransformation_h0", header)

        variable = DatasetVariable("h2")
        header = self.dataset_transformation.get_transformed_header(
            dataset, variable)
        self.assertEqual("identitytransformation_h2", header)
Exemple #9
0
    def test_getting_independent_variables(self):
        dependent_variable_index = DatasetVariable(0)
        dependent_variable_header = DatasetVariable("number")

        independent_variables = self.dataset.get_independent_variables(
            dependent_variable_index)
        self.assertEqual(2, len(independent_variables))
        for variable in independent_variables:
            self.assertIn(variable.variable, ["string", "another_string"])

        independent_variables = self.dataset.get_independent_variables(
            dependent_variable_header)
        self.assertEqual(2, len(independent_variables))
        for variable in independent_variables:
            self.assertIn(variable.variable, ["string", "another_string"])
Exemple #10
0
    def setUp(self):
        data_matrix = [[1, 2, 3, "a"], [2, 3, 2, "b"], [3, 2, 1, "a"],
                       [5, 5, 1, "c"], [2, 2, 2, "a"]]
        self.dataset = Dataset(data_matrix)
        self.settings = AbstractSettings({})
        self.parameter_set = ParameterSet({})
        self.dependent_variable = DatasetVariable(0)
        self.independent_variables = [
            DatasetVariable(1),
            DatasetVariable(2),
            DatasetVariable(3)
        ]

        self.sklearn_model = SklearnModel(self.settings, self.parameter_set,
                                          self.dependent_variable,
                                          self.independent_variables)
Exemple #11
0
    def test_header_transformation_for_dataset_without_headers(self):
        data_matrix = [[1, 2, 3], [4, 5, 6]]
        dataset = Dataset(data_matrix)
        variable = DatasetVariable(0)
        header = self.dataset_transformation.get_transformed_header(
            dataset, variable)

        self.assertEqual(None, header)
Exemple #12
0
    def test_training_ridge_on_simple_dataset(self):
        settings = {}
        dependent_variable = DatasetVariable(0)
        independent_variables = [DatasetVariable(1)]
        regression = LassoRegression(settings, self.parameter_set,
                                     dependent_variable, independent_variables)

        data_matrix = [[1, 1], [2, 2], [3, 3], [4, 4]]
        dataset = Dataset(data_matrix)

        trained = regression.train(dataset)
        array = trained.predict(dataset)

        self.assertAlmostEqual(1.6, array[0])
        self.assertAlmostEqual(2.2, array[1])
        self.assertAlmostEqual(2.8, array[2])
        self.assertAlmostEqual(3.4, array[3])
Exemple #13
0
    def setUp(self):
        self.settings = AbstractSettings()
        self.initialization = WallaceInitialization(self.settings)

        path = os.path.abspath(
            os.path.join(os.path.dirname(__file__),
                         './sample_regression_data.csv'))
        self.dataset = self.initialization.read_filename(path)
        self.dependent_variable = DatasetVariable("X1")
Exemple #14
0
    def initialize(klass, settings, dependent_variable, dataset_filename):
        initialization = WallaceInitialization(settings)
        initialization.settings.set("dataset.dataset_filename",
                                    dataset_filename)
        dataset = initialization.read_filename(dataset_filename)

        if not isinstance(dependent_variable, DatasetVariable):
            dependent_variable = DatasetVariable(dependent_variable)

        initialization.run_differential_evolution(dataset, dependent_variable)
    def test_training_ols_on_simple_dataset(self):
        settings = {}
        parameter_set = {}
        dependent_variable = DatasetVariable(0)
        independent_variables = [DatasetVariable(1)]
        regression = OLSLinearRegression(settings, parameter_set,
                                         dependent_variable,
                                         independent_variables)

        data_matrix = [[1, 1], [2, 2], [3, 3], [4, 4]]
        dataset = Dataset(data_matrix)

        trained = regression.train(dataset)
        array = trained.predict(dataset)

        self.assertEqual(1, array[0])
        self.assertEqual(2, array[1])
        self.assertEqual(3, array[2])
        self.assertEqual(4, array[3])
    def transform(self, dataset, variables=None):
        transformed_columns = []
        transformed_headers = []
        if variables == None:
            variables = [DatasetVariable(i) for i in xrange(dataset.num_cols)]

        resulting_datasets = [dataset]
        for transformation in self.transformations:
            current_dataset = transformation.transform(dataset, variables)
            if current_dataset != None:
                resulting_datasets.append(current_dataset)

        return self.merge_datasets(resulting_datasets)
    def setUp(self):
        settings = AbstractSettings(
            {"optimization_algorithm.population_size": 5})
        predictive_model_generator = PredictiveModelGenerator(settings)
        predictive_model_generator.add_model_type(FakePredictiveModel)
        data_matrix = [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [0, 1, 3, 4]]
        dataset = Dataset(data_matrix)
        dependent_variable = DatasetVariable(0)

        optimization_algorithm = OptimizationAlgorithm(
            dataset, dependent_variable, settings, predictive_model_generator)
        optimization_algorithm.initialize_population()
        self.model_population = optimization_algorithm.model_population
Exemple #18
0
    def select_independent_variables(self, probabilities=None):
        num_variables = self.variable_count_probabilities.choose()
        if probabilities == None:
            probabilities = self.selection_probabilities

        variables = WeightedSelection.sample(probabilities, num_variables)
        result = []
        for var in variables:
            if isinstance(var, DatasetVariable):
                result.append(var)
            else:
                result.append(DatasetVariable(var))
        return result
    def setUp(self):
        dependent_variable = DatasetVariable(0)
        independent_variables = [DatasetVariable(1), DatasetVariable(2)]
        settings = AbstractSettings({
            "differential_evolution.crossover_probability":
            1.0,
            "differential_evolution.differential_weight":
            1.0,
        })

        model_population = []
        for i in xrange(4):
            if i == 0:
                param1 = 0.2
            else:
                param1 = 0.3
            parameter_values = {
                "range_param_0": 0.25 + 0.1 * i,
                "range_param_1": param1,
                "range_param_2": 0.5,
                "category_param_0": "0",
                "category_param_1": "0"
            }
            parameter_set = ParameterSet(parameter_values)
            model = FakePredictiveModel(settings, parameter_set,
                                        dependent_variable,
                                        independent_variables)
            wrapper = OptimizationAlgorithmModelWrapper(
                model, "fake_independent_variable_selection")
            model_population.append(wrapper)

        self.model_population = model_population
        self.validity_check = FakePredictiveModel.validity_check()
        self.settings = settings
        self.target_wrapper = model_population[0]
        self.de_parameter_selection = DEParameterSelection(
            settings, self.target_wrapper, model_population,
            self.validity_check)
Exemple #20
0
    def transform(self, dataset, variables=None):
        if variables == None:
            variables = [DatasetVariable(i) for i in xrange(dataset.num_cols)]

        filtered_matrix = dataset.get_filtered_matrix(variables)
        filtered_data_types = dataset.get_filtered_data_types(variables)
        num_cols = len(filtered_matrix[0])
        transformed_columns = []
        for j in xrange(num_cols):
            if filtered_data_types[j].data_type in self.valid_data_types():
                self.transform_and_append_column(j, filtered_matrix,
                                                 transformed_columns)

        if len(transformed_columns) > 0:
            data_matrix = self.rotate_matrix(transformed_columns)
            headers = self.get_transformed_headers(dataset, variables)
            return Dataset(data_matrix, headers)
Exemple #21
0
    def setUp(self):
        data_matrix = [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [0, 1, 3, 4]]
        dataset = Dataset(data_matrix)
        dependent_variable = DatasetVariable(0)
        settings = AbstractSettings({
            "differential_evolution.crossover_probability":
            1.0,
            "differential_evolution.differential_weight":
            1.0,
            "optimization_algorithm.population_size":
            5,
            "independent_variable_selection.initial_independent_variables_percentage":
            1.0
        })

        predictive_model_generator = PredictiveModelGenerator(settings)
        predictive_model_generator.add_model_type(FakePredictiveModel)

        self.optimization_algorithm = OptimizationAlgorithm(
            dataset, dependent_variable, settings, predictive_model_generator)