def make_snapshot_simulation(name, identity, data_id, learner, path): simulation = autem.Simulation( name, [ loaders.OpenMLLoader(data_id), scorers.LeagueScorer(scorers.accuracy_score, [ [ 1, 4, 5 ] ]), workflows.SnapshotWorkflow(), baselines.BaselineStats(identity['dataset']), learner_builders[learner](), reporters.Csv(path), ]) return simulation
def make_short_standard_simulation(name, identity, data_id, learner, path): simulation = autem.Simulation( name, [ loaders.OpenMLLoader(data_id), scorers.LeagueScorer(scorers.accuracy_score, [ [ 1, 4, 5 ] ]), workflows.StandardWorkflow(), baselines.BaselineStats(identity['dataset']), learner_builders[learner](), reporters.Csv(path), ]) settings = autem.SimulationSettings(simulation) settings.set_max_species(3) return simulation
def run_test_simulation(baseline_name=None, seed=None): baseline_name = get_test_baseline_name( ) if baseline_name is None else baseline_name experiment = baseline_name if seed is None else "%s_%d" % (baseline_name, seed) study = get_test_study() seed = seed if not seed is None else 2 version = get_test_version() configuration = baselines.get_baseline_configuration(baseline_name) path = get_test_simulations_path().joinpath(study).joinpath(experiment) memory = str(path.joinpath("cache")) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name simulation_name = "Test %s_%s_v%d" % (study, experiment, version) identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Snapshot(), baselines.BaselineStats(baseline_name), reporters.Csv(path), hyper_learners.ClassificationSnapshot(), ], seed=seed, n_jobs=4, identity=identity, memory=memory) simulation.run()
def run_balance_scale_mastery(seed): baseline_name = "balance-scale" experiment = "mastery_%s_s%d" % (baseline_name, seed) study = "DEV" version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.LeagueScorer(scorers.accuracy_score), workflows.MasteryWorkflow(), baselines.BaselineStats(baseline_name), hyper_learners.ClassificationSVM(), reporters.Csv(path), ]) settings = autem.SimulationSettings(simulation) settings.set_identity(identity) settings.set_n_jobs(4) settings.set_seed(seed) simulation.run()
def run_cylinder_bands_mastery(): seed = 1 study = "DEV" baseline_name = "cylinder-bands" experiment = baseline_name version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Mastery(["Learner"]), baselines.BaselineStats(baseline_name), hyper_learners.ClassificationSVM(), reporters.Csv(path), ], seed=seed, n_jobs=4, identity=identity) simulation.run()
def make_standard_simulation(study, baseline_name, hyperlearner): prepare_OpenML() hyper_configuration = configuration.get_hyper_configuration(baseline_name) task_id = hyper_configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id name = "%s %s" % (baseline_name, study) path = configuration.get_hyper_simulations_path().joinpath(study).joinpath( baseline_name) n_jobs = 4 seed = 1 memory = str(path.joinpath("cache")) identity = { 'study': study, 'dataset': baseline_name, 'scorer': 'League1x10', 'workflow': 'standard', 'learner': hyperlearner, } simulation = autem.Simulation(name, [ loaders.OpenMLLoader(data_id), scorers.LeagueScorer(scorers.accuracy_score, [[1, 4, 5]]), workflows.StandardWorkflow(), learner_builders[hyperlearner](), reporters.Csv(path), ]) settings = autem.SimulationSettings(simulation) settings.set_identity(identity) settings.set_n_jobs(4) settings.set_seed(seed) settings.set_memory(memory) return simulation
def make_openml_light_classifier_simulation(study, experiment, baseline_name, task_id, seed, population_size, path, properties = {}): task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name version = get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) properties['study'] = study properties['experiment'] = experiment properties['dataset'] = dataset_name properties['version'] = version simulation = autem.Simulation( simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), evaluators.ChoicePredictedScoreEvaluator(), makers.TopChoiceMaker(), evaluators.ScoreEvaluator(), evaluators.AccuracyContest(), evaluators.DiverseContest(0.99), evaluators.VotingContest(), evaluators.SurvivalJudge(), evaluators.PromotionJudge(), evaluators.CrossValidationRater(), evaluators.OpenMLRater(task_id), evaluators.DummyClassifierAccuracy(), evaluators.ValidationAccuracy(), baselines.BaselineStats(baseline_name), reporters.Path(path), # Scalers autem.Choice("Scaler", [ preprocessors.MaxAbsScaler(), preprocessors.MinMaxScaler(), preprocessors.Normalizer(), preprocessors.RobustScaler(), preprocessors.StandardScaler(), preprocessors.Binarizer(), preprocessors.BoxCoxTransform(), preprocessors.YeoJohnsonTransform() ]), # Feature Selectors autem.Choice("Selector", [ preprocessors.NoSelector(), preprocessors.SelectPercentile(), preprocessors.VarianceThreshold() ]), # Feature Reducers autem.Choice("Reducer", [ preprocessors.NoReducer(), preprocessors.FastICA(), preprocessors.FeatureAgglomeration(), preprocessors.PCA(), ]), # Approximators autem.Choice("Approximator", [ preprocessors.NoApproximator(), preprocessors.RBFSampler(), preprocessors.Nystroem(), ]), autem.Choice("Learner", [ learners.GaussianNB(), learners.BernoulliNB(), learners.MultinomialNB(), learners.DecisionTreeClassifier(), learners.KNeighborsClassifier(), learners.LinearSVC(), learners.RadialBasisSVC(), learners.PolySVC(), learners.LogisticRegression(), learners.LinearDiscriminantAnalysis(), learners.RandomForestClassifier(), learners.ExtraTreesClassifier(), ]), ], population_size = population_size, seed = seed, properties = properties, n_jobs=6) return simulation
def run_cylinder_bands_custom(): seed = 1 study = "DEV" baseline_name = "cylinder-bands" experiment = baseline_name version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation( simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Standard(), baselines.BaselineStats(baseline_name), # Scalers autem.Choice("Scaler", [ preprocessors.RobustScaler(), preprocessors.StandardScaler(), preprocessors.BoxCoxTransform(), preprocessors.YeoJohnsonTransform() ]), # Feature Selectors autem.Choice( "Selector", [ #preprocessors.NoSelector(), #preprocessors.SelectPercentile(), preprocessors.VarianceThreshold() ]), # Feature Reducers autem.Choice( "Reducer", [ preprocessors.NoReducer(), #preprocessors.FastICA(), #preprocessors.FeatureAgglomeration(), #preprocessors.PCA(), ]), # Approximators autem.Choice("Approximator", [ preprocessors.NoApproximator(), ]), autem.Choice( "Learner", [ # learners.LinearSVC(), learners.PolySVC(), # learners.RadialBasisSVC(), ]), reporters.Csv(path), ], seed=seed, n_jobs=4, identity=identity) simulation.run()