def test_bank_svmguide1(): print("========== Test BaNK on svmguide1 dataset ==========") data_name = 'svmguide1' (x_train, y_train), (x_test, y_test) = demo.load_svmguide1() learner = BaNK(gamma=1.0 / 1.25727, rf_dim=384, inner_regularization=0.125, outer_regularization=0.125, alpha=10.0, kappa=0.1, inner_max_loop=1, max_outer_loop=5, batch_size=5) learner.fit(x_train, y_train) y_train_pred = learner.predict(x_train) y_test_pred = learner.predict(x_test) print("Dataset: {}".format(data_name)) print("Training error = %.4f" % (1 - metrics.accuracy_score(y_train, y_train_pred))) print("Test error = %.4f" % (1 - metrics.accuracy_score(y_test, y_test_pred)))
def test_svmguide1(show_figure=False, block_figure_on_end=False): print("========== Test SVRG on svmguide1 dataset ==========") data_name = 'svmguide1' (x_train, y_train), (x_test, y_test) = demo.load_svmguide1() loss_display = Display( freq=1, dpi='auto', show=show_figure, block_on_end=block_figure_on_end, monitor=[{'metrics': ['train_loss', 'obj_func'], 'type': 'line', 'title': "Learning losses", 'xlabel': "X1", 'ylabel': "X2", }] ) print(x_train.shape) learner = SVRG( mode='online', regular_param=0.01, learning_rate_scale=0.7, gamma=2.0, rf_dim=400, num_epochs=1, cache_size=50, freq_update_full_model=100, oracle=SVRG.COVERAGE, core_max=100, coverage_radius=0.9, loss_func=SVRG.LOGISTIC, smooth_hinge_theta=0.5, smooth_hinge_tau=0.5, callbacks=[loss_display], metrics=['train_loss', 'obj_func'], freq_calc_metrics=300, random_state=random_seed() ) learner.fit(x_train, y_train) y_train_pred = learner.predict(x_train) y_test_pred = learner.predict(x_test) print("Dataset: {}".format(data_name)) print("Training error = %.4f" % (1 - metrics.accuracy_score(y_train, y_train_pred))) print("Test error = %.4f" % (1 - metrics.accuracy_score(y_test, y_test_pred)))
def test_sgd_svmguide1_bin(): (x_train, y_train), (x_test, y_test) = demo.load_svmguide1() learner = KSGD(lbd=0.1, eps=0.01, gamma=2, kernel='gaussian', loss='hinge', batch_size=10, avg_weight=False, random_state=random_seed()) learner.fit(x_train, y_train) train_err = 1.0 - learner.score(x_train, y_train) test_err = 1.0 - learner.score(x_test, y_test) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_svmguide1(): print("========== Test IGKOL on svmguide1 dataset ==========") data_name = 'svmguide1' (x_train, y_train), (x_test, y_test) = demo.load_svmguide1() display = Display( freq=1, dpi='auto', block_on_end=True, monitor=[{'metrics': ['mistake_rate'], 'type': 'line', 'title': "Mistake Rate", 'xlabel': "data points", 'ylabel': "Error", }] ) learner = OnlineDualSVRG( regular_param=0.01, learning_rate_scale=1.0, gamma=2.0, rf_dim=400, cache_size=50, freq_update_full_model=10, oracle='budget', core_max=100, coverage_radius=0.5, loss_func='hinge', smooth_hinge_theta=0.5, smooth_hinge_tau=0.5, callbacks=[display], metrics=['mistake_rate'], random_state=random_seed()) learner.fit(x_train, y_train) print('OFFLINE Testing') y_train_pred = learner.predict(x_train) y_test_pred = learner.predict(x_test) print("Dataset: {}".format(data_name)) print("Training error = %.4f" % (1 - metrics.accuracy_score(y_train, y_train_pred))) print("Test error = %.4f" % (1 - metrics.accuracy_score(y_test, y_test_pred)))
def run_svmguide1_gridsearch(): print("========== Tune parameters for SVRG for classification ==========") data_name = 'svmguide1' (x_total, y_total), (x_test, y_test) = demo.load_svmguide1() n_total = x_total.shape[0] percent = 0.8 n_train = int(percent * n_total) idx_train = np.random.permutation(n_total)[:n_train] mask = np.zeros(n_total, dtype=bool) mask[idx_train] = True x_train = x_total[mask, :] x_valid = x_total[~mask, :] y_train = y_total[mask] y_valid = y_total[~mask] print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_valid.shape[0])) x = np.vstack((x_train, x_valid)) y = np.concatenate((y_train, y_valid)) params = {'regular_param': [0.0001, 0.00001], 'gamma': [0.25, 0.5]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_valid.shape[0]) clf = SVRG( mode='batch', regular_param=0.01, learning_rate_scale=0.8, gamma=2.0, rf_dim=400, num_epochs=1, cache_size=50, freq_update_full_model=100, oracle=SVRG.COVERAGE, core_max=100, coverage_radius=0.9, loss_func=SVRG.LOGISTIC, smooth_hinge_theta=0.5, smooth_hinge_tau=0.5, random_state=random_seed() ) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best error {} @ params {}".format(1.0 - gs.best_score_, gs.best_params_)) # best param prediction print("Best param prediction") best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_total, y_total) y_total_pred = best_clf.predict(x_total) y_test_pred = best_clf.predict(x_test) total_err = 1 - metrics.accuracy_score(y_total, y_total_pred) test_err = 1 - metrics.accuracy_score(y_test, y_test_pred) print("Training error = %.4f" % total_err) print("Testing error = %.4f" % test_err)