def __init__(self, settings, models=None): if isinstance(settings, AbstractSettings): self.settings = settings else: self.settings = AbstractSettings(settings) self.models = models self.logger = logging.getLogger(__name__) self.logger.info("Initializing Wallace.")
def setUp(self): dependent_variable = DatasetVariable(0) independent_variables = [ DatasetVariable(1), DatasetVariable(2), DatasetVariable(3) ] settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0, "independent_variable_selection.initial_independent_variables_percentage": 0.25 }) model_population = [] for i in xrange(4): model = PredictiveModel(settings, ParameterSet({}), dependent_variable, independent_variables) independent_variable_selection = IndependentVariableSelection( settings, dependent_variable, independent_variables) model_population.append( OptimizationAlgorithmModelWrapper( model, independent_variable_selection)) self.model_population = model_population self.settings = settings self.potential_independent_variables = independent_variables self.target_wrapper = model_population[0] self.de_variable_selection = DEIndependentVariableSelection( self.settings, self.target_wrapper, self.model_population, self.potential_independent_variables)
def setUp(self): settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0 }) self.de_selection = DESelection(settings)
def setUp(self): self.settings = AbstractSettings() self.initialization = WallaceInitialization(self.settings) path = os.path.abspath( os.path.join(os.path.dirname(__file__), './sample_regression_data.csv')) self.dataset = self.initialization.read_filename(path) self.dependent_variable = DatasetVariable("X1")
def test_initializing_small_dataset_with_header(self): settings = AbstractSettings() independent_variables = self.header_dataset.get_independent_variables( self.headered_dependent_variable) selection = IndependentVariableSelection( settings, self.headered_dependent_variable, independent_variables) variables = selection.initialize_independent_variables(2) headers = [var.variable for var in variables] self.assertIn("column_1", headers) self.assertIn("column_2", headers)
def test_mutate_parameters_without_crossover_probability(self): settings = AbstractSettings({ "differential_evolution.crossover_probability": 0.0, "differential_evolution.differential_weight": 1.0 }) de_selection = DESelection(settings) params = [1.0, 1.5, 2.0] result = de_selection.mutate(*params) self.assertEqual(None, result)
def test_increasing_probability_of_variables(self): settings = AbstractSettings() independent_variables = self.header_dataset.get_independent_variables( self.headered_dependent_variable) selection = IndependentVariableSelection( settings, self.headered_dependent_variable, independent_variables) selection.increase_probability(DatasetVariable("column_1")) self.assertLess(0.5, selection.get_probability(DatasetVariable("column_1"))) self.assertGreater( 0.5, selection.get_probability(DatasetVariable("column_2")))
def setUp(self): self.settings = AbstractSettings({ "dataset.remove_rows_with_missing_data": True, "dataset.maximum_missing_data_percentage": 1.0 }) self.data_matrix = [["3421", "1232", "hello", "t", "5/12/2003"], ["2123", "2221", "mello", "f", "3/12/1995"], ["5234", "", "treble", "f", "5/5/2013"], ["NaN", "NaN", "bobble", "t", "3/1/2004"]] self.dataset_cleaner = DatasetCleaner(self.settings, self.data_matrix)
def test_training_decision_tree_on_simple_dataset(self): settings = AbstractSettings({}) dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1)] regression = RandomForestRegression(settings, self.parameter_set, dependent_variable, independent_variables) data_matrix = [[1,1], [2,2], [3,3], [4,4]] dataset = Dataset(data_matrix) trained = regression.train(dataset) array = trained.predict(dataset) self.assertEqual(4, len(array))
def setUp(self): settings = AbstractSettings( {"optimization_algorithm.population_size": 5}) predictive_model_generator = PredictiveModelGenerator(settings) predictive_model_generator.add_model_type(FakePredictiveModel) data_matrix = [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [0, 1, 3, 4]] dataset = Dataset(data_matrix) dependent_variable = DatasetVariable(0) optimization_algorithm = OptimizationAlgorithm( dataset, dependent_variable, settings, predictive_model_generator) optimization_algorithm.initialize_population() self.model_population = optimization_algorithm.model_population
def setUp(self): data_matrix = [[1, 2, 3, "a"], [2, 3, 2, "b"], [3, 2, 1, "a"], [5, 5, 1, "c"], [2, 2, 2, "a"]] self.dataset = Dataset(data_matrix) self.settings = AbstractSettings({}) self.parameter_set = ParameterSet({}) self.dependent_variable = DatasetVariable(0) self.independent_variables = [ DatasetVariable(1), DatasetVariable(2), DatasetVariable(3) ] self.sklearn_model = SklearnModel(self.settings, self.parameter_set, self.dependent_variable, self.independent_variables)
def test_writing_tracking_history_to_file(self): temporary_file = tempfile.NamedTemporaryFile() settings = AbstractSettings({ "optimization_algorithm_tracking.tracking_log_filename": temporary_file.name }) tracking = OptimizationAlgorithmTracking(settings) tracking.track_step(0, self.model_population) tracking.track_step(1, self.model_population) logging_results = tracking.read_logging_file(temporary_file.name) self.assertEqual(2, len(logging_results)) self.assertEqual(0, logging_results[0]["step"]) self.assertEqual(5, len(logging_results[0]["model_population"])) self.assertEqual(1, logging_results[1]["step"]) self.assertEqual(5, len(logging_results[1]["model_population"]))
def test_cleaning_inconsistent_columns(self): data_matrix = [["102", "512", "null", "null"], ["212", "234", "ss", "mm"], ["2.1", "4.3", "ss", "mm"], ["231", "321", "ss", "mm"], ["null", "null", "bb", "cc"], ["4.1", "3.2", "kk", "vv"]] settings = AbstractSettings({ "dataset.remove_rows_with_missing_data": True, "dataset.maximum_missing_data_percentage": 0.25 }) cleaner = DatasetCleaner(settings, data_matrix) result_matrix = cleaner.clean() self.assertEqual(4, len(result_matrix)) self.assertListEqual([212.0, 234.0, "ss", "mm"], result_matrix[0]) self.assertListEqual([2.1, 4.3, "ss", "mm"], result_matrix[1]) self.assertListEqual([231.0, 321.0, "ss", "mm"], result_matrix[2]) self.assertListEqual([4.1, 3.2, "kk", "vv"], result_matrix[3])
def setUp(self): data_matrix = [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [0, 1, 3, 4]] dataset = Dataset(data_matrix) dependent_variable = DatasetVariable(0) settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0, "optimization_algorithm.population_size": 5, "independent_variable_selection.initial_independent_variables_percentage": 1.0 }) predictive_model_generator = PredictiveModelGenerator(settings) predictive_model_generator.add_model_type(FakePredictiveModel) self.optimization_algorithm = OptimizationAlgorithm( dataset, dependent_variable, settings, predictive_model_generator)
def test_generation_of_parameter_set_without_crossover(self): settings = AbstractSettings({ "differential_evolution.crossover_probability": 0.0, "differential_evolution.differential_weight": 0.75, }) de_parameter_selection = DEParameterSelection(settings, self.target_wrapper, self.model_population, self.validity_check) parameter_set = de_parameter_selection.generate_parameter_set() self.assertEqual(0.25, parameter_set.get("range_param_0")) self.assertEqual(0.2, parameter_set.get("range_param_1")) self.assertEqual(0.5, parameter_set.get("range_param_2")) self.assertIn(parameter_set.get("category_param_0"), ["0", "1", "2", "3"]) self.assertIn(parameter_set.get("category_param_1"), ["0", "1", "2", "3"])
def test_generation_of_parameter_set_with_different_mutation_params(self): settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 0.75, }) de_parameter_selection = DEParameterSelection(settings, self.target_wrapper, self.model_population, self.validity_check) parameter_set = de_parameter_selection.generate_parameter_set() self.assertIn(round(parameter_set.get("range_param_0"), 3), [0.275, 0.300, 0.425, 0.475, 0.600, 0.625]) self.assertAlmostEqual(0.3, parameter_set.get("range_param_1")) self.assertAlmostEqual(0.5, parameter_set.get("range_param_2")) self.assertIn(parameter_set.get("category_param_0"), ["0", "1", "2", "3"]) self.assertIn(parameter_set.get("category_param_1"), ["0", "1", "2", "3"])
def test_cleaning_sparse_columns(self): settings = AbstractSettings({ "dataset.remove_rows_with_missing_data": True, "dataset.maximum_missing_data_percentage": 0.25 }) non_sparse_data_matrix = [["13", "123"], ["23", "234"], ["34", "455"], ["12", "345"], ["11", "235"], ["34", "234"], [None, "234"]] sparse_data_matrix = [ [None, "324"], [None, "232"], [None, "123"], [None, "234"], [None, "234"], ["1111", "234"], ["4324", None], ] non_sparse_dataset_cleaner = DatasetCleaner(settings, non_sparse_data_matrix) sparse_dataset_cleaner = DatasetCleaner(settings, sparse_data_matrix) matrix = non_sparse_dataset_cleaner.clean() self.assertEqual(6, len(matrix)) self.assertListEqual([13, 123], matrix[0]) self.assertListEqual([23, 234], matrix[1]) self.assertListEqual([34, 455], matrix[2]) self.assertListEqual([12, 345], matrix[3]) self.assertListEqual([11, 235], matrix[4]) self.assertListEqual([34, 234], matrix[5]) matrix = sparse_dataset_cleaner.clean() self.assertEqual(6, len(matrix)) self.assertListEqual([None, 324], matrix[0]) self.assertListEqual([None, 232], matrix[1]) self.assertListEqual([None, 123], matrix[2]) self.assertListEqual([None, 234], matrix[3]) self.assertListEqual([None, 234], matrix[4]) self.assertListEqual([1111, 234], matrix[5])
def setUp(self): dependent_variable = DatasetVariable(0) independent_variables = [DatasetVariable(1), DatasetVariable(2)] settings = AbstractSettings({ "differential_evolution.crossover_probability": 1.0, "differential_evolution.differential_weight": 1.0, }) model_population = [] for i in xrange(4): if i == 0: param1 = 0.2 else: param1 = 0.3 parameter_values = { "range_param_0": 0.25 + 0.1 * i, "range_param_1": param1, "range_param_2": 0.5, "category_param_0": "0", "category_param_1": "0" } parameter_set = ParameterSet(parameter_values) model = FakePredictiveModel(settings, parameter_set, dependent_variable, independent_variables) wrapper = OptimizationAlgorithmModelWrapper( model, "fake_independent_variable_selection") model_population.append(wrapper) self.model_population = model_population self.validity_check = FakePredictiveModel.validity_check() self.settings = settings self.target_wrapper = model_population[0] self.de_parameter_selection = DEParameterSelection( settings, self.target_wrapper, model_population, self.validity_check)
def setUp(self): settings = AbstractSettings({}) path = os.path.abspath( os.path.join(os.path.dirname(__file__), './sample_dataset.csv')) self.dataset_file_reader = DatasetFileReader(settings, path)
class WallaceInitialization(object): DEFAULT_PREDICTIVE_MODELS = { BayesianRidgeRegression: None, DecisionTreeRegression: None, LarsLassoRegression: None, LarsRegression: None, LassoRegression: None, OLSLinearRegression: None, RidgeRegression: None, SvmSvcRegression: None } def __init__(self, settings, models=None): if isinstance(settings, AbstractSettings): self.settings = settings else: self.settings = AbstractSettings(settings) self.models = models self.logger = logging.getLogger(__name__) self.logger.info("Initializing Wallace.") def create_predictive_model_generator(self, models=None): if models == None: models = WeightedSelection(self.DEFAULT_PREDICTIVE_MODELS).normalize_weights() elif isinstance(models, list): predictive_models = {} for model in models: predictive_models[model] = None models = WeightedSelection(predictive_models).normalize_weights() else: models = WeightedSelection(models).normalize_weights() predictive_model_generator = PredictiveModelGenerator(self.settings) predictive_model_generator.set_model_types(models) return predictive_model_generator def run_differential_evolution(self, dataset, dependent_variable): self.logger.info("Running differential evolution on dataset.") predictive_model_generator = self.create_predictive_model_generator() differential_evolution = DifferentialEvolution(dataset, dependent_variable, self.settings, predictive_model_generator) differential_evolution.run() def read_filename(self, dataset_filename): dataset = DatasetFileReader(self.settings, dataset_filename).read() if self.settings.get("dataset_transformation.transform_datasets"): return self.clean_and_transform_data(dataset) else: return dataset def clean_and_transform_data(self, dataset): return DatasetTransformer(self.settings).transform(dataset) @classmethod def initialize(klass, settings, dependent_variable, dataset_filename): initialization = WallaceInitialization(settings) initialization.settings.set("dataset.dataset_filename", dataset_filename) dataset = initialization.read_filename(dataset_filename) if not isinstance(dependent_variable, DatasetVariable): dependent_variable = DatasetVariable(dependent_variable) initialization.run_differential_evolution(dataset, dependent_variable) @classmethod def initialize_multiprocess_pool(klass, settings, dependent_variable, dataset_filename, processes=10): pool = Pool(processes=processes) result = pool.apply_async(klass.initialize, args=(settings, dependent_variable, dataset_filename))
class WallaceInitialization(object): DEFAULT_PREDICTIVE_MODELS = { BayesianRidgeRegression: None, DecisionTreeRegression: None, LarsLassoRegression: None, LarsRegression: None, LassoRegression: None, OLSLinearRegression: None, RidgeRegression: None, SvmSvcRegression: None } def __init__(self, settings, models=None): if isinstance(settings, AbstractSettings): self.settings = settings else: self.settings = AbstractSettings(settings) self.models = models self.logger = logging.getLogger(__name__) self.logger.info("Initializing Wallace.") def create_predictive_model_generator(self, models=None): if models == None: models = WeightedSelection( self.DEFAULT_PREDICTIVE_MODELS).normalize_weights() elif isinstance(models, list): predictive_models = {} for model in models: predictive_models[model] = None models = WeightedSelection(predictive_models).normalize_weights() else: models = WeightedSelection(models).normalize_weights() predictive_model_generator = PredictiveModelGenerator(self.settings) predictive_model_generator.set_model_types(models) return predictive_model_generator def run_differential_evolution(self, dataset, dependent_variable): self.logger.info("Running differential evolution on dataset.") predictive_model_generator = self.create_predictive_model_generator() differential_evolution = DifferentialEvolution( dataset, dependent_variable, self.settings, predictive_model_generator) differential_evolution.run() def read_filename(self, dataset_filename): dataset = DatasetFileReader(self.settings, dataset_filename).read() if self.settings.get("dataset_transformation.transform_datasets"): return self.clean_and_transform_data(dataset) else: return dataset def clean_and_transform_data(self, dataset): return DatasetTransformer(self.settings).transform(dataset) @classmethod def initialize(klass, settings, dependent_variable, dataset_filename): initialization = WallaceInitialization(settings) initialization.settings.set("dataset.dataset_filename", dataset_filename) dataset = initialization.read_filename(dataset_filename) if not isinstance(dependent_variable, DatasetVariable): dependent_variable = DatasetVariable(dependent_variable) initialization.run_differential_evolution(dataset, dependent_variable) @classmethod def initialize_multiprocess_pool(klass, settings, dependent_variable, dataset_filename, processes=10): pool = Pool(processes=processes) result = pool.apply_async(klass.initialize, args=(settings, dependent_variable, dataset_filename))
def setUp(self): self.settings = AbstractSettings( {"dataset_transformation.default_transformations": True}) self.transformations = [IdentityTransformation] self.transformer = DatasetTransformer(self.settings, self.transformations)
def setUp(self): self.settings = AbstractSettings()
def setUp(self): settings = AbstractSettings() self.dataset_transformation = IdentityTransformation(settings)