def test_grid_scores(self): output = os.path.join(self.test_dir, '..', '.tmp_grid_scores') self._setUp(output) cls = AutoSklearnClassifier(time_left_for_this_task=15, per_run_time_limit=5, output_folder=output, tmp_folder=output, shared_mode=False, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0) cls_ = cls.build_automl() automl = cls_._automl automl.runhistory_ = unittest.mock.MagicMock() RunKey = collections.namedtuple( 'RunKey', ['config_id', 'instance_id', 'seed']) RunValue = collections.namedtuple( 'RunValue', ['cost', 'time', 'status', 'additional_info']) runhistory = dict() runhistory[RunKey(1, 1, 1)] = RunValue(1, 1, 1, '') automl.runhistory_.data = runhistory grid_scores_ = automl.grid_scores_ self.assertIsInstance(grid_scores_[0], _CVScoreTuple) # In the runhistory we store losses, thus the score is zero self.assertEqual(grid_scores_[0].mean_validation_score, 0) self.assertEqual(grid_scores_[0].cv_validation_scores, [0]) self.assertIsInstance(grid_scores_[0].parameters, unittest.mock.MagicMock) del automl self._tearDown(output)
def test_grid_scores(self): output = os.path.join(self.test_dir, '..', '.tmp_grid_scores') self._setUp(output) cls = AutoSklearnClassifier(time_left_for_this_task=15, per_run_time_limit=15, output_folder=output, tmp_folder=output, shared_mode=False, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0) cls_ = cls.build_automl() automl = cls_._automl automl._proc_smac = mock.MagicMock() RunKey = collections.namedtuple( 'RunKey', ['config_id', 'instance_id', 'seed']) RunValue = collections.namedtuple( 'RunValue', ['cost', 'time', 'status', 'additional_info']) runhistory = dict() runhistory[RunKey(1, 1, 1)] = RunValue(1, 1, 1, '') automl._proc_smac.runhistory.data = runhistory grid_scores_ = automl.grid_scores_ self.assertIsInstance(grid_scores_[0], _CVScoreTuple) # In the runhistory we store losses, thus the score is zero self.assertEqual(grid_scores_[0].mean_validation_score, 0) self.assertEqual(grid_scores_[0].cv_validation_scores, [0]) self.assertIsInstance(grid_scores_[0].parameters, mock.MagicMock) del automl self._tearDown(output)
def main(working_directory, time_limit, per_run_time_limit, task_id, seed): # Load data and other info. X_train, y_train, X_test, y_test, cat = load_task(task_id) # path to the metadata directory. Is there ar better way to get this? metadata_directory = os.path.abspath(os.path.dirname(__file__)) metadata_directory = os.path.join( metadata_directory, "/home/tau/hrakotoa/Code/reproduce/auto-sklearn/auto-sklearn/autosklearn/metalearning/files/" ) #metadata_directory = os.path.dirname(autosklearn.metalearning.files.__file__) # Create new metadata directory not containing task_id. new_metadata_directory = os.path.abspath( os.path.join(working_directory, "metadata_%i" % task_id)) try: os.makedirs(new_metadata_directory) remove_dataset(metadata_directory, new_metadata_directory, task_id) except: pass # pass because new metadata is created for this task. # We need to get task type, metric, is_sparse_or_dense information to # construct the path to the specific metadata directory. For details see # get_metalearning_suggestion() in smbo.py. TASK_TYPES_TO_STRING = { # Mimic the same dict in autosklearn.constants 'binary': 'binary.classification', 'multiclass': 'multiclass.classification', } task_type = type_of_target(y_train) metadata_for_this_task = os.path.abspath( os.path.join( working_directory, "metadata_%i/balanced_accuracy_%s_sparse" % (task_id, TASK_TYPES_TO_STRING[task_type]))) # how to check if data is sparse before running? configuration_output_dir = os.path.join(working_directory, str(seed)) tmp_dir = os.path.join(configuration_output_dir, str(task_id)) try: if not os.path.exists(configuration_output_dir): os.makedirs(configuration_output_dir) except Exception as _: print( "Direcotry {0} aleardy created.".format(configuration_output_dir)) automl_arguments = { 'time_left_for_this_task': time_limit, 'per_run_time_limit': per_run_time_limit, 'initial_configurations_via_metalearning': 25, 'ensemble_size': 0, 'seed': seed, 'ml_memory_limit': 3072, 'resampling_strategy': 'holdout', 'resampling_strategy_arguments': { 'train_size': 0.67 }, 'tmp_folder': tmp_dir, 'delete_tmp_folder_after_terminate': False, 'disable_evaluator_output': False, } automl = AutoSklearnClassifier(**automl_arguments) # automl._automl._metadata_directory does not work cause clf._automl is not # created until fit is called. Therefore, we need to manually create # automl._automl and specify metadata_directory there. automl._automl = automl.build_automl() automl._automl._metadata_directory = metadata_for_this_task # Fit. automl._automl.fit( X_train, y_train, dataset_name=str(task_id), X_test=X_test, y_test=y_test, metric=balanced_accuracy, ) with open(os.path.join(tmp_dir, "score_metalearning.csv"), 'w') as fh: T = 0 fh.write("Time,Train Performance,Test Performance\n") # Add start time:0, Train Performance:1, Test Performance: 1 best_loss = 1 fh.write("{0},{1},{2}\n".format(T, 0, 0)) for key, value in automl._automl.runhistory_.data.items(): t = value.time loss = value.cost T += t if loss < best_loss: fh.write("{0},{1},{2}\n".format( T, 1 - loss, 1 - value.additional_info.get('test_loss', 1.0))) best_loss = loss