def test_incremental_score(self): X_train = [[-1], [-2], [3], [4]] y_train = [0, 1, 2, 2] X_test = [[5], [6]] y_test = [2, 2] model = ClassicalModel(2, GradientBoostingClassifier, clf_params={ "n_estimators": 1, "max_depth": 2 }) sklearn.utils.shuffle = MagicMock() sklearn.utils.shuffle.return_value = X_train, y_train train_scores, test_scores = model.incremental_score(X_train, y_train, X_test, y_test, increments=2) # will get 0.5 of training dataset right first as it only sees first 2 points, then 1 as it will see all points # and can fully capture all the data self.assertListEqual(train_scores, [0.5, 1.0]) # will get 0 of testing dataset right first as it does not see points with label 2, then 1 as it will see all # points and can fully capture all the data self.assertListEqual(test_scores, [0., 1.0])
def test_train_and_predict(self): x = pd.DataFrame([[-1], [-5], [1], [5]]) y = pd.Series([0, 0, 1, 1]) model = ClassicalModel(2, GradientBoostingClassifier, clf_params={ "n_estimators": 1, "max_depth": 1 }) model.train(x, y) y_pred = pd.Series(model.predict(x)) self.assertTrue(y.equals(y_pred))
def test_get_confusion_matrix(self): model = ClassicalModel(3, GradientBoostingClassifier, clf_params={ "n_estimators": 1, "max_depth": 2 }) model.predict = MagicMock() model.predict.return_value = [0, 0, 0, 1, 1, 1, 2, 2, 2] y_true = [0, 1, 2, 0, 1, 2, 0, 1, 2] conf_mat = model.get_confusion_matrix(y_true, X_test=[[1], [2], [3]]) true_conf_mat = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) self.assertTrue(np.array_equal(conf_mat, true_conf_mat))
def test_score(self): x = pd.DataFrame([[-1], [-5], [1], [5]]) y = pd.Series([0, 0, 1, 2]) model = ClassicalModel(2, GradientBoostingClassifier, clf_params={ "n_estimators": 1, "max_depth": 1 }, label_mapper=lambda x: x > 0) model.train(x, y) score = model.score(x, y) y_pred = pd.Series(model.predict(x)) self.assertTrue(y_pred.equals(pd.Series([False, False, True, True]))) self.assertEqual(score, 1)
def test_spawn_clf(self): model = ClassicalModel(2, GradientBoostingClassifier, clf_params={"n_estimators": 0}) self.assertEqual(GradientBoostingClassifier, model.clf_type) self.assertIsNone(model.clf) #create clf and check that it is created correctly model.spawn_clf() self.assertEqual(GradientBoostingClassifier, model.clf_type) self.assertIsNotNone(model.clf) self.assertEqual(type(model.clf), model.clf_type) self.assertEqual(model.clf.n_estimators, 0) # spawn a new clf and check that a new one is created and created correctly old_clf = model.clf model.spawn_clf({"n_estimators": 1}) self.assertEqual(GradientBoostingClassifier, model.clf_type) self.assertIsNotNone(model.clf) self.assertEqual(type(model.clf), model.clf_type) self.assertNotEqual(old_clf, model.clf) self.assertEqual(model.clf.n_estimators, 1)
# "fully_connected_cells": 8, # "num_rnn_units": 128, # "learning_rate": 0.001 # } # Read and setup the dataset dataset = DataSet() dataset.setup(ignore_trials=settings.ignore_trials) # Calculate the features feature_names = feature_extraction.calc_features(dataset, get_lengths=True) # Binary Classical Model model = ClassicalModel(2, clf_type, clf_params=clf_params_bin, label_mapper=label_mapper, upsampling=True) # Multiclass Classical Model # model = ClassicalModel(3, clf_type, clf_params=clf_params_bin, label_mapper=None, upsampling=True) # Binary RNN Model # model = RNNModel(2, len(feature_names), label_mapper=label_mapper, clf_params=clf_params_bin, num_epochs=20, upsampling=True) # Multiclass RNN Model # model = RNNModel(3, len(feature_names), label_mapper=None, clf_params=clf_params_bin, num_epochs=20, upsampling=True) # The splitter - Leave One (whole participant) Out splitter splitter = LeaveOneOutSplitter( split_param_name="Participant code", split_param_values=dataset.get_data_attribute("Participant code").unique())
splitter.reset() print("Sensors", settings.used_sensors, "Score", cum_score / 100) # The name of the label in the dataset label_name = "Group" # The label mappers to be used label_maps = [lambda y: y > 0, None] # The various classifiers and their parameters clfs = [ ( GradientBoostingClassifier, {'min_samples_leaf': 2, 'learning_rate': 0.15, 'n_estimators': 80, 'max_depth': 1} ), ( RandomForestClassifier, {'min_samples_leaf': 5, 'n_estimators': 120, 'max_depth': None} ) ] for label_map in label_maps: print("Binary" if label_map else "Multi-Class", "Results") print("------------------------------------------------------------------------------------------------------------------------") for clf_type, clf_params in clfs: print("Model:", clf_type, clf_params) clf = ClassicalModel(2 if label_map else 3, clf_type, label_mapper=label_map, upsampling=True, clf_params=clf_params) all_sensor_perms(clf) print()