Esempio n. 1
0
def test_dualsgd_bin():
    print("========== Test DualSGD for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = DualSGD(model_name="DualSGD_hinge",
                  k=20,
                  D=200,
                  gamma=1.0,
                  lbd=3.3593684387335183e-05,
                  loss='hinge',
                  maintain='k-merging',
                  max_budget_size=100,
                  random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
    print("Budget size = %d" % clf.budget_size)

    # offline prediction
    print("Offline prediction")
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Esempio n. 2
0
def test_fogd_bin():
    print("========== Test FOGD for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = FOGD(model_name="FOGD_hinge",
               D=100,
               lbd=0.0,
               gamma=0.5,
               loss='hinge',
               random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)

    # offline prediction
    print("Offline prediction")
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Esempio n. 3
0
def test_kmm_bin():
    print("========== Test KMM for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = KMM(model_name="KMM_hinge",
              D=4,
              lbd=0.01,
              gamma=0.125,
              mode='batch',
              loss='hinge',
              num_kernels=4,
              temperature=0.1,
              num_epochs=10,
              num_nested_epochs=2,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = KMM(model_name="KMM_hinge",
              D=4,
              lbd=0.01,
              gamma=0.125,
              mode='online',
              loss='hinge',
              num_kernels=4,
              temperature=0.1,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
Esempio n. 4
0
def test_auc():
    print("========== Test AUC score ==========")

    (x_train, y_train), (x_test, y_test) = demo.load_pima()

    model = GLM(model_name="GLM_logit",
                l1_penalty=0.0,
                l2_penalty=0.0,
                random_state=random_seed())

    model.fit(x_train, y_train)
    train_auc = auc(y_train, model.predict(x_train))
    test_auc = auc(y_test, model.predict(x_test))
    print("Training AUC = %.4f" % train_auc)
    print("Testing AUC = %.4f" % test_auc)
Esempio n. 5
0
def test_glm_logit():
    print("========== Test GLM for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_logit",
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = GLM(model_name="GLM_logit",
              optimizer='sgd',
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    optz = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    clf = GLM(model_name="GLM_logit",
              optimizer=optz,
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Esempio n. 6
0
def test_srbm_load_to_continue_training():
    print(
        "========== Test load to continue training Supervised RBM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()

    model = SupervisedRBM(num_hidden=5,
                          num_visible=x_train.shape[1],
                          batch_size=10,
                          num_epochs=2,
                          learning_rate=0.1,
                          momentum_method='sudden',
                          weight_cost=0.0,
                          inference_engine='variational_inference',
                          approx_method='second_order',
                          metrics=['recon_err', 'loss', 'err'],
                          random_state=random_seed(),
                          verbose=1)

    model.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(model.num_epochs))
    train_err = 1.0 - model.score(x_train, y_train)
    test_err = 1.0 - model.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = model.save()
    model1 = Model.load_model(save_file_path)
    print("After save and load:")
    train_err1 = 1.0 - model1.score(x_train, y_train)
    test_err1 = 1.0 - model1.score(x_test, y_test)
    print("Training error = %.4f" % train_err1)
    print("Testing error = %.4f" % test_err1)
    assert abs(train_err - train_err1) < 1e-6
    assert abs(test_err - test_err1) < 1e-6

    model.num_epochs = 4
    model.fit(x_train, y_train)
    print("Set number of epoch to {0:d}, then continue training...".format(
        model.num_epochs))
    train_err = 1.0 - model.score(x_train, y_train)
    test_err = 1.0 - model.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Esempio n. 7
0
def test_tfglm_logit():
    print("========== Test TensorFlowGLM for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = TensorFlowGLM(model_name="TensorFlowGLM_logit",
                        l1_penalty=0.0,
                        l2_penalty=0.0,
                        num_epochs=10,
                        random_state=random_seed())

    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Esempio n. 8
0
def test_pytorch_glm_logit():
    print('========== Test PytorchGLM for binary classification ==========')

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print('Number of training samples = {}'.format(x_train.shape[0]))
    print('Number of testing samples = {}'.format(x_test.shape[0]))

    clf = PyTorchGLM(model_name='PytorchGLM_logit',
                     l1_penalty=0.0001,
                     l2_penalty=0.001,
                     optimizer='sgd',
                     random_state=random_seed())

    print('Use {} optimizer'.format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print('Training error = %.4f' % train_err)
    print('Testing error = %.4f' % test_err)
Esempio n. 9
0
def test_rrf_bin():
    print("========== Test RRF for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = RRF(model_name="RRF_hinge",
              D=100,
              lbd=0.01,
              gamma=0.125,
              mode='batch',
              loss='hinge',
              num_epochs=10,
              learning_rate=0.001,
              learning_rate_gamma=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = RRF(model_name="RRF_hinge",
              D=100,
              lbd=0.01,
              gamma=0.125,
              mode='online',
              loss='hinge',
              learning_rate=0.001,
              learning_rate_gamma=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
Esempio n. 10
0
def test_tfglm_logit_gridsearch():
    print(
        "========== Tune parameters for TensorFlowGLM for binary classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = TensorFlowGLM(model_name="TensorFlowGLM_logit_gridsearch",
                        num_epochs=10,
                        catch_exception=True,
                        random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Esempio n. 11
0
def test_rks_bin():
    print("========== Test RKS for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = RKS(model_name="RKS_hinge",
              D=100,
              lbd=0.0,
              gamma=0.5,
              loss='hinge',
              num_epochs=10,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)