def fit(self, pipeline_config, run_result_dir): instance_file_config_parser = ConfigFileParser([ ConfigOption(name='path', type='directory', required=True), ConfigOption(name='is_classification', type=to_bool, required=True), ConfigOption(name='is_multilabel', type=to_bool, required=True), ConfigOption(name='num_features', type=int, required=True), ConfigOption(name='categorical_features', type=bool, required=True, list=True), ConfigOption( name='instance_shape', type=[ast.literal_eval, lambda x: isinstance(x, tuple)], required=True) ]) instance_info = instance_file_config_parser.read( os.path.join(run_result_dir, 'instance.info')) instance_info = instance_file_config_parser.set_defaults(instance_info) dm = DataManager() if instance_info["is_multilabel"]: dm.problem_type = ProblemType.FeatureMultilabel elif instance_info["is_classification"]: dm.problem_type = ProblemType.FeatureClassification else: dm.problem_type = ProblemType.FeatureClassification return {'instance_info': instance_info, 'data_manager': dm}
def fit(self, pipeline_config, instance): assert pipeline_config['problem_type'] in [ 'feature_classification', 'feature_multilabel', 'feature_regression' ] dm = DataManager(verbose=pipeline_config["data_manager_verbose"]) dm.read_data(instance, is_classification=(pipeline_config["problem_type"] in [ 'feature_classification', 'feature_multilabel' ]), test_split=pipeline_config["test_split"]) return {"data_manager": dm}
__version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) import logging from autoPyTorch import AutoNetClassification, AutoNetMultilabel import autoPyTorch.pipeline.nodes as autonet_nodes from autoPyTorch.components.metrics.additional_logs import test_result import autoPyTorch.components.metrics as autonet_metrics from autoPyTorch.data_management.data_manager import DataManager dm = DataManager(verbose=1) dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'datasets')) # choose between the 5 defined testcases TEST_CASE = 1 """ TEST CASE 1: Sparse data """ if TEST_CASE == 1: dm.read_data(os.path.join(dataset_dir, "automl/newsgroups/newsgroups_public.info"), is_classification=True) metric = "pac_metric" additional_metrices = ["accuracy"] """ TEST CASE 2: Sparse binary data """ if TEST_CASE == 2: dm.read_data(os.path.join(dataset_dir, "automl/dorothea/dorothea_public.info"), is_classification=True) metric = "auc_metric"
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os import sys import logging from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager from autoPyTorch.utils.mem_test_thread import MemoryLogger dm = DataManager(verbose=1) dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'datasets')) dm.read_data(os.path.join(dataset_dir, "classification/dataset_28_optdigits.csv"), is_classification=True) # 5620 samples, 10 classes, 65 features ---> 98% validation accuracy mem_logger = MemoryLogger() mem_logger.start()
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager # Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) dm = DataManager() dm.generate_classification(num_classes=3, num_features=21, num_samples=1500) # Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test. autonet = AutoNetClassification(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info') res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train) print(res) print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) from autoPyTorch import AutoNetRegression from autoPyTorch.data_management.data_manager import DataManager # Note: You can write your own datamanager! Call fit train, valid data (numpy matrices) dm = DataManager() dm.generate_regression(num_features=21, num_samples=1500) X_train = dm.X Y_train = dm.Y X_valid = dm.X_train Y_valid = dm.Y_train # Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test. autonet = AutoNetRegression(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info') res = autonet.fit(X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid) print(res)
os.mkdir(outputs_folder) if not os.path.exists(output_base_dir): os.mkdir(output_base_dir) if not os.path.exists(result_dir): os.mkdir(result_dir) # iterate over all runs for run_number in runs_range: replacement_dict["RUN_NUMBER"] = run_number for config_id in configs_range: replacement_dict["CONFIG_ID"] = config_id replacement_dict["CONFIG_FILE"] = all_configs[config_id] # get autonet dm = DataManager() dm.problem_type = { "feature_classification": ProblemType.FeatureClassification, "feature_multilabel": ProblemType.FeatureMultilabel, "feature_regression": ProblemType.FeatureRegression }[benchmark_config["problem_type"]] autonet = CreateAutoNet().fit(benchmark_config, dm)["autonet"] autonet_config_file = benchmark_config["autonet_configs"][ config_id] for instance_id in instances_range: replacement_dict["INSTANCE_ID"] = instance_id replacement_dict["INSTANCE_FILE"] = all_instances[instance_id] # read autonet config SetAutoNetConfig().fit(benchmark_config, autonet,
def fit(self, pipeline_config, instance): # Get data manager for train, val, test data if pipeline_config['problem_type'] in ['feature_classification', 'feature_multilabel', 'feature_regression']: dm = DataManager(verbose=pipeline_config["data_manager_verbose"]) if pipeline_config['test_instances'] is not None: dm_test = DataManager(verbose=pipeline_config["data_manager_verbose"]) else: dm = ImageManager(verbose=pipeline_config["data_manager_verbose"]) if pipeline_config['test_instances'] is not None: dm_test = ImageManager(verbose=pipeline_config["data_manager_verbose"]) # Read data if pipeline_config['test_instances'] is not None: # Use given test set dm.read_data(instance, is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']), test_split=0.0) dm_test.read_data(pipeline_config['test_instances'], is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']), test_split=0.0) dm.X_test, dm.Y_test = dm_test.X_train, dm_test.Y_train.astype(np.int32) else: # Use test split dm.read_data(instance, is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']), test_split=pipeline_config["test_split"]) return {"data_manager": dm}
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) import logging from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager dm = DataManager(verbose=1) dataset_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'datasets')) # choose between the 10 classification testcases on real data. TEST_CASE = 4 if TEST_CASE == 1: dm.read_data(os.path.join(dataset_dir, "classification/dataset_22_mfeat-zernike.csv"), is_classification=True) # 2000 samples, 10 classes, 48 features if TEST_CASE == 2: dm.read_data(os.path.join(dataset_dir, "classification/phpbL6t4U.csv"), is_classification=True) # 13910 samples, 6 classes, 128 features if TEST_CASE == 3:
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) import logging from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager dm = DataManager(verbose=1) dataset_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'datasets')) # choose between the 10 classification testcases on real data. TEST_CASE = 4 if TEST_CASE == 1: dm.read_data("openml:22", is_classification=True) # 2000 samples, 10 classes, 48 features if TEST_CASE == 2: dm.read_data("openml:1476", is_classification=True) # 13910 samples, 6 classes, 128 features if TEST_CASE == 3: dm.read_data("openml:1464", is_classification=True) # 748 samples, 2 classes, 4 features