def test_dualsgd_softmax(): print("========== Test DualSGD for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = DualSGD(model_name="DualSGD_hinge", k=20, D=200, gamma=1.0, lbd=3.3593684387335183e-05, loss='hinge', maintain='k-merging', max_budget_size=100, random_state=random_seed()) clf.fit(x_train, y_train) print("Mistake rate = %.4f" % clf.mistake) print("Budget size = %d" % clf.budget_size) # offline prediction print("Offline prediction") y_train_pred = clf.predict(x_train) y_test_pred = clf.predict(x_test) train_err = 1 - metrics.accuracy_score(y_train, y_train_pred) test_err = 1 - metrics.accuracy_score(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_fogd_softmax(): print("========== Test FOGD for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = FOGD(model_name="FOGD_hinge", D=100, lbd=0.0, gamma=0.5, loss='hinge', random_state=random_seed()) clf.fit(x_train, y_train) print("Mistake rate = %.4f" % clf.mistake) # offline prediction print("Offline prediction") y_train_pred = clf.predict(x_train) y_test_pred = clf.predict(x_test) train_err = 1 - metrics.accuracy_score(y_train, y_train_pred) test_err = 1 - metrics.accuracy_score(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_glm_softmax(): print("========== Test GLM for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = GLM(model_name="GLM_softmax", link='softmax', loss='softmax', random_state=random_seed()) print("Use {} optimizer".format(clf.optimizer)) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) clf = GLM(model_name="GLM_softmax", optimizer='sgd', link='softmax', loss='softmax', random_state=random_seed()) print("Use {} optimizer".format(clf.optimizer)) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_save_load(): print("========== Test save, load tensorflow models ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = TensorFlowGLM(model_name="iris_TensorFlowGLM_softmax", link='softmax', loss='softmax', num_epochs=5, random_state=random_seed()) clf.fit(x_train, y_train) print("After training:") train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) save_file_path = clf.save() clf1 = TensorFlowModel.load_model(save_file_path) print("After save and load:") train_err1 = 1.0 - clf1.score(x_train, y_train) test_err1 = 1.0 - clf1.score(x_test, y_test) print("Training error = %.4f" % train_err1) print("Testing error = %.4f" % test_err1) assert abs(train_err - train_err1) < 1e-6 assert abs(test_err - test_err1) < 1e-6
def test_rrf_cv_gridsearch(): print( "========== Tune parameters for RRF including cross-validation ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test, x_test]) y = np.concatenate([y_train, y_test, y_test]) params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.05, 0.1]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [-1] * x_test.shape[0] + [1] * x_test.shape[0]) early_stopping = EarlyStopping(monitor='val_err', patience=2) filepath = os.path.join( model_dir(), "male/RRF/search/mnist_{epoch:04d}_{val_err:.6f}.pkl") checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_err', verbose=0, save_best_only=True) clf = RRF(model_name="RRF_hinge", D=100, lbd=0.01, gamma=0.125, mode='batch', loss='hinge', num_epochs=10, learning_rate=0.001, learning_rate_gamma=0.001, metrics=['loss', 'err'], callbacks=[early_stopping, checkpoint], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1 - gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(np.vstack([x_train, x_test]), np.concatenate([y_train, y_test])) train_err = 1.0 - best_clf.score(x_train, y_train) test_err = 1.0 - best_clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
def test_kmm_cv_gridsearch(): print( "========== Tune parameters for KMM including cross-validation ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test, x_test]) y = np.concatenate([y_train, y_test, y_test]) params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [-1] * x_test.shape[0] + [1] * x_test.shape[0]) early_stopping = EarlyStopping(monitor='val_loss', patience=2) clf = KMM(model_name="KMM_hinge", D=20, lbd=0.0, gamma=0.1, mode='batch', loss='hinge', num_kernels=4, batch_size=100, temperature=1.0, num_epochs=10, num_nested_epochs=1, learning_rate=0.1, learning_rate_mu=0.0, learning_rate_gamma=0.1, learning_rate_alpha=0.1, metrics=['loss', 'err'], callbacks=[early_stopping], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1 - gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(np.vstack([x_train, x_test]), np.concatenate([y_train, y_test])) train_err = 1.0 - best_clf.score(x_train, y_train) test_err = 1.0 - best_clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) assert abs(test_err - (1 - gs.best_score_)) < 1e-4
def test_kmm_softmax(): print("========== Test KMM for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = KMM(model_name="KMM_hinge", D=4, lbd=0.01, gamma=0.01, mode='batch', loss='hinge', num_kernels=4, batch_size=100, temperature=0.1, num_epochs=10, num_nested_epochs=1, learning_rate=0.001, learning_rate_mu=0.001, learning_rate_gamma=0.001, learning_rate_alpha=0.001, random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) clf = KMM(model_name="KMM_hinge", D=100, lbd=0.0, gamma=0.01, mode='online', loss='hinge', num_kernels=4, batch_size=100, temperature=0.1, num_nested_epochs=1, learning_rate=0.001, learning_rate_mu=0.001, learning_rate_gamma=0.001, learning_rate_alpha=0.001, random_state=random_seed(), verbose=1) clf.fit(x_train, y_train) print("Mistake rate = %.4f" % clf.mistake)
def test_continue_training(): print("========== Test continue training pytorch models ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() x_train = x_train.astype(np.float32) y_train = y_train.astype(np.uint8) x_test = x_test.astype(np.float32) y_test = y_test.astype(np.uint8) print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) num_epochs = 5 clf = PyTorchMLP(model_name='PyTorchMLP', arch='MLPv1', num_epochs=4, batch_size=10, metrics=['loss', 'err'], random_state=random_seed(), verbose=1) clf.fit(x_train, y_train) print("After training for {0:d} epochs".format(num_epochs)) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) clf.num_epochs = 10 print("Set number of epoch to {0:d}, then continue training...".format( clf.num_epochs)) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) save_file_path = clf.save() clf1 = PyTorchModel.load_model(save_file_path) clf1.num_epochs = 15 print("Save, load, set number of epoch to {0:d}, " "then continue training...".format(clf1.num_epochs)) clf1.fit(x_train, y_train) train_err = 1.0 - clf1.score(x_train, y_train) test_err = 1.0 - clf1.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_kmm_cv(): print("========== Test cross-validation for KMM ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test]) y = np.concatenate([y_train, y_test]) early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1) filepath = os.path.join(model_dir(), "male/KMM/iris_{epoch:04d}_{val_err:.6f}.pkl") checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_err', verbose=0, save_best_only=True) clf = KMM(model_name="KMM_hinge", D=20, lbd=0.0, gamma=0.1, mode='batch', loss='hinge', num_kernels=3, batch_size=100, temperature=1.0, num_epochs=10, num_nested_epochs=1, learning_rate=0.1, learning_rate_mu=0.0, learning_rate_gamma=0.1, learning_rate_alpha=0.1, metrics=['loss', 'err'], callbacks=[early_stopping, checkpoint], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], random_state=random_seed(), verbose=1) clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_early_stopping(): np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test]) y = np.concatenate([y_train, y_test]) early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1) optz = SGD(learning_rate=0.01) clf = GLM(model_name="early_stopping_callback", link='softmax', loss='softmax', optimizer=optz, num_epochs=20, batch_size=10, task='classification', metrics=['loss', 'err'], callbacks=[early_stopping], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], random_state=random_seed(), verbose=1) clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Model has been stopped at epoch #{0:d}".format(clf.epoch)) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) print("Continue training...") clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Model has been stopped at epoch #{0:d}".format(clf.epoch)) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) print("Disable early stopping and continue training to the end...") clf.callbacks = [] clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_continue_training(): print("========== Test continue training the models ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) num_epochs = 5 clf = GLM(model_name="iris_glm_softmax", link='softmax', loss='softmax', optimizer='sgd', batch_size=10, num_epochs=num_epochs, random_state=random_seed(), verbose=1) clf.fit(x_train, y_train) print("After training for {0:d} epochs".format(num_epochs)) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) clf.num_epochs = 10 print("Set number of epoch to {0:d}, then continue training...".format( clf.num_epochs)) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) save_file_path = clf.save() clf1 = Model.load_model(save_file_path) clf1.num_epochs = 15 print("Save, load, set number of epoch to {0:d}, " "then continue training...".format(clf.num_epochs)) clf1.fit(x_train, y_train) train_err = 1.0 - clf1.score(x_train, y_train) test_err = 1.0 - clf1.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_fogd_softmax_gridsearch(): print( "========== Tune parameters for FOGD for multiclass classification ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.5, 0.1]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = FOGD(model_name="FOGD_hinge", D=100, lbd=0.0, gamma=0.5, loss='hinge', catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) print("Mistake rate = %.4f" % best_clf.mistake) # offline prediction print("Offline prediction") y_train_pred = best_clf.predict(x_train) y_test_pred = best_clf.predict(x_test) train_err = 1 - metrics.accuracy_score(y_train, y_train_pred) test_err = 1 - metrics.accuracy_score(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_checkpoint(): np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test]) y = np.concatenate([y_train, y_test]) filepath = os.path.join( model_dir(), "male/glm/checkpoint_{epoch:04d}_{val_loss:.6f}.pkl") checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_loss', verbose=1, save_best_only=True) optz = SGD(learning_rate=0.01) clf = GLM(model_name="checkpoint_callback", link='softmax', loss='softmax', optimizer=optz, num_epochs=5, batch_size=10, task='classification', metrics=['loss', 'err'], callbacks=[checkpoint], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], random_state=random_seed(), verbose=1) clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) model_filepath = filepath.format(epoch=5, val_loss=0.968786) print("Load model at checkpoint: ", model_filepath, ", and predict:") clf1 = Model.load_model(model_filepath) train_err = 1.0 - clf1.score(x_train, y_train) test_err = 1.0 - clf1.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_rks_softmax_gridsearch(): print( "========== Tune parameters for RKS for multiclass classification ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.03, 0.1]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = RKS(model_name="RKS_hinge", D=100, lbd=0.0, gamma=0.5, loss='hinge', num_epochs=10, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1 - gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) train_err = 1.0 - best_clf.score(x_train, y_train) test_err = 1.0 - best_clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
def test_mlp_softmax_gridsearch(): print( "========== Tune parameters for MLP for multiclass classification ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = { 'learning_rate': [0.1, 0.05, 0.01], 'hidden_units_list': [(1, ), (5, ), (20, )], 'reg_lambda': [0.01, 0.001, 0.0001] } ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = MLP(model_name="mlp_softmax_gridsearch", num_epochs=4, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1 - gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) train_err = 1.0 - best_clf.score(x_train, y_train) test_err = 1.0 - best_clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
def test_tfglm_softmax_gridsearch(): print( "========== Tune parameters for TensorFlowGLM for multiclass classification ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = TensorFlowGLM(model_name="TensorFlowGLM_softmax_gridsearch", link='softmax', loss='softmax', num_epochs=10, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1 - gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) train_err = 1.0 - best_clf.score(x_train, y_train) test_err = 1.0 - best_clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
def test_rrf_softmax(): print("========== Test RRF for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = RRF(model_name="RRF_hinge", D=100, lbd=0.01, gamma=0.125, mode='batch', loss='hinge', num_epochs=10, learning_rate=0.001, learning_rate_gamma=0.001, random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) clf = RRF(model_name="RRF_hinge", D=100, lbd=0.01, gamma=0.125, mode='online', loss='hinge', learning_rate=0.001, learning_rate_gamma=0.001, random_state=random_seed()) clf.fit(x_train, y_train) print("Mistake rate = %.4f" % clf.mistake)
def test_save_load(): print("========== Test save, load pytorch models ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() x_train = x_train.astype(np.float32) y_train = y_train.astype(np.uint8) x_test = x_test.astype(np.float32) y_test = y_test.astype(np.uint8) print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = PyTorchMLP(model_name='PyTorchMLP', arch='MLPv1', num_epochs=4, batch_size=10, metrics=['loss', 'err'], random_state=random_seed(), verbose=1) clf.fit(x_train, y_train) print("After training:") train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) save_file_path = clf.save() clf1 = PyTorchModel.load_model(save_file_path) print("After save and load:") train_err1 = 1.0 - clf1.score(x_train, y_train) test_err1 = 1.0 - clf1.score(x_test, y_test) print("Training error = %.4f" % train_err1) print("Testing error = %.4f" % test_err1) assert abs(train_err - train_err1) < 1e-6 assert abs(test_err - test_err1) < 1e-6
def test_tfglm_softmax(): print( "========== Test TensorFlowGLM for multiclass classification ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = TensorFlowGLM(model_name="TensorFlowGLM_softmax", link='softmax', loss='softmax', num_epochs=10, random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_rks_softmax(): print("========== Test RKS for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = RKS(model_name="RKS_hinge", D=100, lbd=0.0, gamma=0.5, loss='hinge', num_epochs=10, random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_mlp_softmax(): print("========== Test MLP for multiclass classification ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = MLP(model_name="MLP_softmax", hidden_units_list=(5, ), batch_size=16, num_epochs=4, learning_rate=0.1, reg_lambda=0.01, random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_rmsprop_glm(): np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) print("Training GLM using RMSProp with default parameters...") clf = GLM(model_name="rmsprop_glm", optimizer='rmsprop', num_epochs=10, link='softmax', loss='softmax', random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) print("Training GLM using RMSProp with customized parameters...") optz = RMSProp(learning_rate=0.01) clf = GLM(model_name="rmsprop_glm", optimizer=optz, num_epochs=10, link='softmax', loss='softmax', random_state=random_seed()) clf.fit(x_train, y_train) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_kmm_softmax_gridsearch(): print( "========== Tune parameters for KMM for multiclass classification ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = KMM(model_name="KMM_hinge", D=4, lbd=0.01, gamma=0.01, mode='batch', loss='hinge', num_kernels=4, batch_size=100, temperature=0.1, num_epochs=50, num_nested_epochs=1, learning_rate=0.001, learning_rate_mu=0.001, learning_rate_gamma=0.001, learning_rate_alpha=0.001, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1.0 - gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) y_train_pred = best_clf.predict(x_train) y_test_pred = best_clf.predict(x_test) train_err = 1.0 - metrics.accuracy_score(y_train, y_train_pred) test_err = 1.0 - metrics.accuracy_score(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) clf = KMM(model_name="KMM_hinge", D=100, lbd=0.01, gamma=0.01, mode='online', loss='hinge', num_kernels=4, batch_size=100, temperature=0.1, num_epochs=10, num_nested_epochs=1, learning_rate=0.001, learning_rate_mu=0.001, learning_rate_gamma=0.001, learning_rate_alpha=0.001, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) print("Mistake rate = %.4f" % best_clf.mistake)
def test_glm_save_load(show=False, block_figure_on_end=False): print("========== Test Save and Load functions for GLM ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test]) y = np.concatenate([y_train, y_test]) early_stopping = EarlyStopping(monitor='val_err', patience=5, verbose=1) filepath = os.path.join(model_dir(), "male/GLM/iris_{epoch:04d}_{val_err:.6f}.pkl") checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_err', verbose=0, save_best_only=True) loss_display = Display(title="Learning curves", dpi='auto', layout=(3, 1), freq=1, show=show, block_on_end=block_figure_on_end, monitor=[ { 'metrics': ['loss', 'val_loss'], 'type': 'line', 'labels': ["training loss", "validation loss"], 'title': "Learning losses", 'xlabel': "epoch", 'ylabel': "loss", }, { 'metrics': ['err', 'val_err'], 'type': 'line', 'title': "Learning errors", 'xlabel': "epoch", 'ylabel': "error", }, { 'metrics': ['err'], 'type': 'line', 'labels': ["training error"], 'title': "Learning errors", 'xlabel': "epoch", 'ylabel': "error", }, ]) weight_display = Display(title="Filters", dpi='auto', layout=(1, 1), figsize=(6, 15), freq=1, show=show, block_on_end=block_figure_on_end, monitor=[ { 'metrics': ['weights'], 'title': "Learned weights", 'type': 'img', 'disp_dim': (2, 2), 'tile_shape': (3, 1), }, ]) clf = GLM( model_name="GLM_softmax_cv", link='softmax', loss='softmax', optimizer='sgd', num_epochs=4, batch_size=10, task='classification', metrics=['loss', 'err'], callbacks=[early_stopping, checkpoint, loss_display, weight_display], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], random_state=random_seed(), verbose=1) clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) save_file_path = os.path.join(model_dir(), "male/GLM/saved_model.pkl") clf.save(file_path=save_file_path) clf1 = Model.load_model(save_file_path) clf1.num_epochs = 10 clf1.fit(x, y) train_err = 1.0 - clf1.score(x_train, y_train) test_err = 1.0 - clf1.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_kmm_cv_disp(show=False, block_figure_on_end=False): print("========== Test cross-validation for KMM with Display ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_iris() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack([x_train, x_test]) y = np.concatenate([y_train, y_test]) early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1) filepath = os.path.join(model_dir(), "male/KMM/iris_{epoch:04d}_{val_err:.6f}.pkl") checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_err', verbose=0, save_best_only=True) display = Display(layout=(3, 1), dpi='auto', show=show, block_on_end=block_figure_on_end, monitor=[ { 'metrics': ['loss', 'val_loss'], 'type': 'line', 'labels': ["training loss", "validation loss"], 'title': "Learning losses", 'xlabel': "epoch", 'ylabel': "loss", }, { 'metrics': ['err', 'val_err'], 'type': 'line', 'title': "Learning errors", 'xlabel': "epoch", 'ylabel': "error", }, { 'metrics': ['err'], 'type': 'line', 'labels': ["training error"], 'title': "Learning errors", 'xlabel': "epoch", 'ylabel': "error", }, ]) clf = KMM(model_name="KMM_hinge", D=20, lbd=0.0, gamma=0.1, mode='batch', loss='hinge', num_kernels=3, batch_size=100, temperature=1.0, num_epochs=10, num_nested_epochs=1, learning_rate=0.1, learning_rate_mu=0.0, learning_rate_gamma=0.1, learning_rate_alpha=0.1, metrics=['loss', 'err'], callbacks=[display, early_stopping, checkpoint], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], random_state=random_seed(), verbose=1) clf.fit(x, y) train_err = 1.0 - clf.score(x_train, y_train) test_err = 1.0 - clf.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)