def main(basename, input_dir, params): output_dir = os.getcwd() D = store_and_or_load_data(data_dir=input_dir, dataset=basename, outputdir=output_dir) cs = get_class(D.info).get_hyperparameter_search_space() configuration = configuration_space.Configuration(cs, **params) global evaluator evaluator = HoldoutEvaluator( datamanager=D, configuration=configuration, with_predictions=True, all_scoring_functions=True, output_dir=output_dir) evaluator.fit() evaluator.finish_up()
def make_mode_holdout(data, seed, configuration, num_run): try: debug_log("Run: %s" % make_mode_holdout.__name__) evaluator = HoldoutEvaluator(data, configuration, seed=seed, num_run=num_run, **_get_base_dict()) debug_log("Fit evaluator") evaluator.fit() signal.signal(15, empty_signal_handler) debug_log("Fit finish up") evaluator.finish_up() model_directory = os.path.join(os.getcwd(), 'models_%d' % seed) debug_log("Check model directory: %s" % model_directory) assert os.path.exists( model_directory), "Not found model directory: %s" % model_directory debug_log("Save models in files") model_filename = os.path.join(model_directory, '%s.model' % num_run) with open(model_filename, 'w') as fh: pickle.dump(evaluator.model, fh, -1) except AssertionError as e: debug_log(str(e))
def test_5000_classes(self): weights = ([0.0002] * 4750) + ([0.0001] * 250) X, Y = sklearn.datasets.make_classification(n_samples=10000, n_features=20, n_classes=5000, n_clusters_per_class=1, n_informative=15, n_redundant=5, n_repeated=0, weights=weights, flip_y=0, class_sep=1.0, hypercube=True, shift=None, scale=1.0, shuffle=True, random_state=1) self.assertEqual(250, np.sum(np.bincount(Y) == 1)) D = Dummy() D.info = { 'metric': 'r2_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 1 } D.data = {'X_train': X, 'Y_train': Y, 'X_valid': X, 'X_test': X} D.feat_type = ['numerical'] * 5000 configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) evaluator.fit()