def test_model_workflow_partial_mode(): """Run the workflow of deep forest with a local buffer.""" case_kwargs = copy.deepcopy(kwargs) case_kwargs.update({"partial_mode": True}) model = CascadeForestClassifier(**case_kwargs) model.fit(X_train, y_train) # Predictions before saving y_pred_before = model.predict(X_test) # Save and Reload model.save(save_dir) model = CascadeForestClassifier(**case_kwargs) model.load(save_dir) # Predictions after loading y_pred_after = model.predict(X_test) # Make sure the same predictions before and after model serialization assert_array_equal(y_pred_before, y_pred_after) model.clean() # clear the buffer shutil.rmtree(save_dir)
def test_model_sample_weight(): """Run the workflow of deep forest with a local buffer.""" case_kwargs = copy.deepcopy(kwargs) # Training without sample_weight model = CascadeForestClassifier(**case_kwargs) model.fit(X_train, y_train) y_pred_no_sample_weight = model.predict(X_test) # Training with equal sample_weight model = CascadeForestClassifier(**case_kwargs) sample_weight = np.ones(y_train.size) model.fit(X_train, y_train, sample_weight=sample_weight) y_pred_equal_sample_weight = model.predict(X_test) # Make sure the same predictions with None and equal sample_weight assert_array_equal(y_pred_no_sample_weight, y_pred_equal_sample_weight) model = CascadeForestClassifier(**case_kwargs) sample_weight = np.where(y_train == 0, 0.1, y_train) model.fit(X_train, y_train, sample_weight=y_train) y_pred_skewed_sample_weight = model.predict(X_test) # Make sure the different predictions with None and equal sample_weight assert_raises(AssertionError, assert_array_equal, y_pred_skewed_sample_weight, y_pred_equal_sample_weight) model.clean() # clear the buffer
def test_model_workflow_partial_mode(backend): """Run the workflow of deep forest with a local buffer.""" case_kwargs = copy.deepcopy(kwargs) case_kwargs.update({"partial_mode": True}) case_kwargs.update({"backend": backend}) model = CascadeForestClassifier(**case_kwargs) model.fit(X_train, y_train) # Test feature_importances_ if backend == "sklearn": model.get_layer_feature_importances(0) else: with pytest.raises(RuntimeError) as excinfo: model.get_layer_feature_importances(0) assert "Please use the sklearn backend" in str(excinfo.value) # Predictions before saving y_pred_before = model.predict(X_test) # Save and Reload model.save(save_dir) model = CascadeForestClassifier(**case_kwargs) model.load(save_dir) # Predictions after loading y_pred_after = model.predict(X_test) # Make sure the same predictions before and after model serialization assert_array_equal(y_pred_before, y_pred_after) model.clean() # clear the buffer shutil.rmtree(save_dir)
def test_model_input_label_encoder(): """Test if the model behaves the same with and without label encoding.""" # Load data X, y = load_digits(return_X_y=True) y_as_str = np.char.add("label_", y.astype(str)) # Train model on integer labels. Labels should look like: 1, 2, 3, ... model = CascadeForestClassifier(random_state=1) model.fit(X, y) y_pred_int_labels = model.predict(X) # Train model on string labels. Labels should look like: "label_1", "label_2", "label_3", ... model = CascadeForestClassifier(random_state=1) model.fit(X, y_as_str) y_pred_str_labels = model.predict(X) # Check if the underlying data are the same y_pred_int_labels_as_str = np.char.add( "label_", y_pred_int_labels.astype(str) ) assert_array_equal(y_pred_str_labels, y_pred_int_labels_as_str) # Clean up buffer model.clean()
def test_custom_cascade_layer_workflow_partial_mode(): model = CascadeForestClassifier(partial_mode=True) n_estimators = 4 estimators = [DecisionTreeClassifier() for _ in range(n_estimators)] model.set_estimator(estimators) # set custom base estimators predictor = DecisionTreeClassifier() model.set_predictor(predictor) model.fit(X_train, y_train) y_pred_before = model.predict(X_test) # Save and Reload model.save(save_dir) model = CascadeForestClassifier() model.load(save_dir) # Predictions after loading y_pred_after = model.predict(X_test) # Make sure the same predictions before and after model serialization assert_array_equal(y_pred_before, y_pred_after) model.clean() # clear the buffer shutil.rmtree(save_dir)
def test_classifier_custom_cascade_layer_workflow_in_memory(): model = CascadeForestClassifier() n_estimators = 4 estimators = [DecisionTreeClassifier() for _ in range(n_estimators)] model.set_estimator(estimators) # set custom base estimators predictor = DecisionTreeClassifier() model.set_predictor(predictor) model.fit(X_train_clf, y_train_clf) y_pred_before = model.predict(X_test_clf) # Save and Reload model.save(save_dir) model = CascadeForestClassifier() model.load(save_dir) # Predictions after loading y_pred_after = model.predict(X_test_clf) # Make sure the same predictions before and after model serialization assert_array_equal(y_pred_before, y_pred_after) assert (model.get_estimator(0, 0, "custom") is model._get_layer(0).estimators_["0-0-custom"].estimator_) model.clean() # clear the buffer shutil.rmtree(save_dir)
partial_mode=partial_mode, delta=delta, n_jobs=n_jobs, random_state=random_state, verbose=verbose, ) tic = time.time() model.fit(X_train, y_train) toc = time.time() training_time = toc - tic tic = time.time() y_pred = model.predict(X_test) toc = time.time() testing_time = toc - tic testing_acc = accuracy_score(y_test, y_pred) records.append( (training_time, testing_time, testing_acc, len(model))) model.clean() # Writing with open("{}_deep_forest_classification.txt".format(dataset), 'w') as file: for training_time, testing_time, testing_acc, n_layers in records: string = "{:.5f}\t{:.5f}\t{:.5f}\t{}\n".format( training_time, testing_time, testing_acc, n_layers) file.write(string) file.close()