def setUp(self): dependent_variable = DatasetVariable(0) independent_variables = [ DatasetVariable(1), DatasetVariable(2), DatasetVariable(3) ] settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0, "independent_variable_selection.initial_independent_variables_percentage": 0.25 }) model_population = [] for i in xrange(4): model = PredictiveModel(settings, ParameterSet({}), dependent_variable, independent_variables) independent_variable_selection = IndependentVariableSelection( settings, dependent_variable, independent_variables) model_population.append( OptimizationAlgorithmModelWrapper( model, independent_variable_selection)) self.model_population = model_population self.settings = settings self.potential_independent_variables = independent_variables self.target_wrapper = model_population[0] self.de_variable_selection = DEIndependentVariableSelection( self.settings, self.target_wrapper, self.model_population, self.potential_independent_variables)
def test_required_parameters(self): dictionary = { "param1": { "type": "range", "lower_bound": 0.0, "upper_bound": 1.0, "value": 0.5 }, "param2": { "type": "range", "lower_bound": 0.0, "upper_bound": 1.0, "value": 0.5 }, "param3": { "type": "range", "lower_bound": 0.0, "upper_bound": 1.0, "value": 0.5 } } settings = {} parameter_set = ParameterSet.create_from_dict(dictionary) dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1)] RequiredParametersPredictiveModel(settings, parameter_set, dependent_variable, independent_variables)
def test_missing_required_parameters_raises_error(self): dictionary = { "param1": { "type": "range", "lower_bound": 0.0, "upper_bound": 1.0, "value": 0.5 }, "param3": { "type": "range", "lower_bound": 0.0, "upper_bound": 1.0, "value": 0.5 } } settings = {} parameter_set = ParameterSet.create_from_dict(dictionary) dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1)] with self.assertRaises(AssertionError): RequiredParametersPredictiveModel(settings, parameter_set, dependent_variable, independent_variables)
def setUp(self): data_matrix = [[1, 2, 3], [2, 3, 4], [5, 6, 7]] headers = ["column_0", "column_1", "column_2"] self.header_dataset = Dataset(data_matrix, headers) self.nonheader_dataset = Dataset(data_matrix) self.headered_dependent_variable = DatasetVariable("column_0") self.nonheadered_dependent_variable = DatasetVariable(0)
def test_get_filtered_matrix(self): variables = [DatasetVariable(0), DatasetVariable("string")] filtered_matrix = self.dataset.get_filtered_matrix(variables) self.assertEqual(4, len(filtered_matrix)) for i in xrange(len(filtered_matrix)): self.assertEqual(2, len(filtered_matrix[i])) self.assertListEqual([0, "detective"], filtered_matrix[0]) self.assertListEqual([1, "pablo"], filtered_matrix[1]) self.assertListEqual([2, "african"], filtered_matrix[2]) self.assertListEqual([3, "applause"], filtered_matrix[3])
def test_increasing_probability_of_variables(self): settings = AbstractSettings() independent_variables = self.header_dataset.get_independent_variables( self.headered_dependent_variable) selection = IndependentVariableSelection( settings, self.headered_dependent_variable, independent_variables) selection.increase_probability(DatasetVariable("column_1")) self.assertLess(0.5, selection.get_probability(DatasetVariable("column_1"))) self.assertGreater( 0.5, selection.get_probability(DatasetVariable("column_2")))
def test_training_decision_tree_on_simple_dataset(self): settings = AbstractSettings({}) dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1)] regression = RandomForestRegression(settings, self.parameter_set, dependent_variable, independent_variables) data_matrix = [[1,1], [2,2], [3,3], [4,4]] dataset = Dataset(data_matrix) trained = regression.train(dataset) array = trained.predict(dataset) self.assertEqual(4, len(array))
def test_header_transformation_for_dataset_with_headers(self): data_matrix = [[1, 2, 3], [4, 5, 6]] headers = ["h0", "h1", "h2"] dataset = Dataset(data_matrix, headers) variable = DatasetVariable(0) header = self.dataset_transformation.get_transformed_header( dataset, variable) self.assertEqual("identitytransformation_h0", header) variable = DatasetVariable("h2") header = self.dataset_transformation.get_transformed_header( dataset, variable) self.assertEqual("identitytransformation_h2", header)
def test_getting_independent_variables(self): dependent_variable_index = DatasetVariable(0) dependent_variable_header = DatasetVariable("number") independent_variables = self.dataset.get_independent_variables( dependent_variable_index) self.assertEqual(2, len(independent_variables)) for variable in independent_variables: self.assertIn(variable.variable, ["string", "another_string"]) independent_variables = self.dataset.get_independent_variables( dependent_variable_header) self.assertEqual(2, len(independent_variables)) for variable in independent_variables: self.assertIn(variable.variable, ["string", "another_string"])
def setUp(self): data_matrix = [[1, 2, 3, "a"], [2, 3, 2, "b"], [3, 2, 1, "a"], [5, 5, 1, "c"], [2, 2, 2, "a"]] self.dataset = Dataset(data_matrix) self.settings = AbstractSettings({}) self.parameter_set = ParameterSet({}) self.dependent_variable = DatasetVariable(0) self.independent_variables = [ DatasetVariable(1), DatasetVariable(2), DatasetVariable(3) ] self.sklearn_model = SklearnModel(self.settings, self.parameter_set, self.dependent_variable, self.independent_variables)
def test_header_transformation_for_dataset_without_headers(self): data_matrix = [[1, 2, 3], [4, 5, 6]] dataset = Dataset(data_matrix) variable = DatasetVariable(0) header = self.dataset_transformation.get_transformed_header( dataset, variable) self.assertEqual(None, header)
def test_training_ridge_on_simple_dataset(self): settings = {} dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1)] regression = LassoRegression(settings, self.parameter_set, dependent_variable, independent_variables) data_matrix = [[1, 1], [2, 2], [3, 3], [4, 4]] dataset = Dataset(data_matrix) trained = regression.train(dataset) array = trained.predict(dataset) self.assertAlmostEqual(1.6, array[0]) self.assertAlmostEqual(2.2, array[1]) self.assertAlmostEqual(2.8, array[2]) self.assertAlmostEqual(3.4, array[3])
def setUp(self): self.settings = AbstractSettings() self.initialization = WallaceInitialization(self.settings) path = os.path.abspath( os.path.join(os.path.dirname(__file__), './sample_regression_data.csv')) self.dataset = self.initialization.read_filename(path) self.dependent_variable = DatasetVariable("X1")
def initialize(klass, settings, dependent_variable, dataset_filename): initialization = WallaceInitialization(settings) initialization.settings.set("dataset.dataset_filename", dataset_filename) dataset = initialization.read_filename(dataset_filename) if not isinstance(dependent_variable, DatasetVariable): dependent_variable = DatasetVariable(dependent_variable) initialization.run_differential_evolution(dataset, dependent_variable)
def test_training_ols_on_simple_dataset(self): settings = {} parameter_set = {} dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1)] regression = OLSLinearRegression(settings, parameter_set, dependent_variable, independent_variables) data_matrix = [[1, 1], [2, 2], [3, 3], [4, 4]] dataset = Dataset(data_matrix) trained = regression.train(dataset) array = trained.predict(dataset) self.assertEqual(1, array[0]) self.assertEqual(2, array[1]) self.assertEqual(3, array[2]) self.assertEqual(4, array[3])
def transform(self, dataset, variables=None): transformed_columns = [] transformed_headers = [] if variables == None: variables = [DatasetVariable(i) for i in xrange(dataset.num_cols)] resulting_datasets = [dataset] for transformation in self.transformations: current_dataset = transformation.transform(dataset, variables) if current_dataset != None: resulting_datasets.append(current_dataset) return self.merge_datasets(resulting_datasets)
def setUp(self): settings = AbstractSettings( {"optimization_algorithm.population_size": 5}) predictive_model_generator = PredictiveModelGenerator(settings) predictive_model_generator.add_model_type(FakePredictiveModel) data_matrix = [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [0, 1, 3, 4]] dataset = Dataset(data_matrix) dependent_variable = DatasetVariable(0) optimization_algorithm = OptimizationAlgorithm( dataset, dependent_variable, settings, predictive_model_generator) optimization_algorithm.initialize_population() self.model_population = optimization_algorithm.model_population
def select_independent_variables(self, probabilities=None): num_variables = self.variable_count_probabilities.choose() if probabilities == None: probabilities = self.selection_probabilities variables = WeightedSelection.sample(probabilities, num_variables) result = [] for var in variables: if isinstance(var, DatasetVariable): result.append(var) else: result.append(DatasetVariable(var)) return result
def setUp(self): dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1), DatasetVariable(2)] settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0, }) model_population = [] for i in xrange(4): if i == 0: param1 = 0.2 else: param1 = 0.3 parameter_values = { "range_param_0": 0.25 + 0.1 * i, "range_param_1": param1, "range_param_2": 0.5, "category_param_0": "0", "category_param_1": "0" } parameter_set = ParameterSet(parameter_values) model = FakePredictiveModel(settings, parameter_set, dependent_variable, independent_variables) wrapper = OptimizationAlgorithmModelWrapper( model, "fake_independent_variable_selection") model_population.append(wrapper) self.model_population = model_population self.validity_check = FakePredictiveModel.validity_check() self.settings = settings self.target_wrapper = model_population[0] self.de_parameter_selection = DEParameterSelection( settings, self.target_wrapper, model_population, self.validity_check)
def transform(self, dataset, variables=None): if variables == None: variables = [DatasetVariable(i) for i in xrange(dataset.num_cols)] filtered_matrix = dataset.get_filtered_matrix(variables) filtered_data_types = dataset.get_filtered_data_types(variables) num_cols = len(filtered_matrix[0]) transformed_columns = [] for j in xrange(num_cols): if filtered_data_types[j].data_type in self.valid_data_types(): self.transform_and_append_column(j, filtered_matrix, transformed_columns) if len(transformed_columns) > 0: data_matrix = self.rotate_matrix(transformed_columns) headers = self.get_transformed_headers(dataset, variables) return Dataset(data_matrix, headers)
def setUp(self): data_matrix = [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [0, 1, 3, 4]] dataset = Dataset(data_matrix) dependent_variable = DatasetVariable(0) settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0, "optimization_algorithm.population_size": 5, "independent_variable_selection.initial_independent_variables_percentage": 1.0 }) predictive_model_generator = PredictiveModelGenerator(settings) predictive_model_generator.add_model_type(FakePredictiveModel) self.optimization_algorithm = OptimizationAlgorithm( dataset, dependent_variable, settings, predictive_model_generator)