def run_balance_scale_mastery(seed): baseline_name = "balance-scale" experiment = "mastery_%s_s%d" % (baseline_name, seed) study = "DEV" version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.LeagueScorer(scorers.accuracy_score), workflows.MasteryWorkflow(), baselines.BaselineStats(baseline_name), hyper_learners.ClassificationSVM(), reporters.Csv(path), ]) settings = autem.SimulationSettings(simulation) settings.set_identity(identity) settings.set_n_jobs(4) settings.set_seed(seed) simulation.run()
def run_cylinder_bands_mastery(): seed = 1 study = "DEV" baseline_name = "cylinder-bands" experiment = baseline_name version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Mastery(["Learner"]), baselines.BaselineStats(baseline_name), hyper_learners.ClassificationSVM(), reporters.Csv(path), ], seed=seed, n_jobs=4, identity=identity) simulation.run()
def get_test_version(): return benchmark.get_version()
def run_cylinder_bands_custom(): seed = 1 study = "DEV" baseline_name = "cylinder-bands" experiment = baseline_name version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation( simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Standard(), baselines.BaselineStats(baseline_name), # Scalers autem.Choice("Scaler", [ preprocessors.RobustScaler(), preprocessors.StandardScaler(), preprocessors.BoxCoxTransform(), preprocessors.YeoJohnsonTransform() ]), # Feature Selectors autem.Choice( "Selector", [ #preprocessors.NoSelector(), #preprocessors.SelectPercentile(), preprocessors.VarianceThreshold() ]), # Feature Reducers autem.Choice( "Reducer", [ preprocessors.NoReducer(), #preprocessors.FastICA(), #preprocessors.FeatureAgglomeration(), #preprocessors.PCA(), ]), # Approximators autem.Choice("Approximator", [ preprocessors.NoApproximator(), ]), autem.Choice( "Learner", [ # learners.LinearSVC(), learners.PolySVC(), # learners.RadialBasisSVC(), ]), reporters.Csv(path), ], seed=seed, n_jobs=4, identity=identity) simulation.run()