Beispiel #1
0
def test_dualsgd_softmax():
    print("========== Test DualSGD for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = DualSGD(model_name="DualSGD_hinge",
                  k=20,
                  D=200,
                  gamma=1.0,
                  lbd=3.3593684387335183e-05,
                  loss='hinge',
                  maintain='k-merging',
                  max_budget_size=100,
                  random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
    print("Budget size = %d" % clf.budget_size)

    # offline prediction
    print("Offline prediction")
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #2
0
def test_fogd_softmax():
    print("========== Test FOGD for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = FOGD(model_name="FOGD_hinge",
               D=100,
               lbd=0.0,
               gamma=0.5,
               loss='hinge',
               random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)

    # offline prediction
    print("Offline prediction")
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #3
0
def test_glm_softmax():
    print("========== Test GLM for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_softmax",
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = GLM(model_name="GLM_softmax",
              optimizer='sgd',
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #4
0
def test_save_load():
    print("========== Test save, load tensorflow models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = TensorFlowGLM(model_name="iris_TensorFlowGLM_softmax",
                        link='softmax',
                        loss='softmax',
                        num_epochs=5,
                        random_state=random_seed())

    clf.fit(x_train, y_train)

    print("After training:")
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()

    clf1 = TensorFlowModel.load_model(save_file_path)

    print("After save and load:")
    train_err1 = 1.0 - clf1.score(x_train, y_train)
    test_err1 = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err1)
    print("Testing error = %.4f" % test_err1)
    assert abs(train_err - train_err1) < 1e-6
    assert abs(test_err - test_err1) < 1e-6
Beispiel #5
0
def test_rrf_cv_gridsearch():
    print(
        "========== Tune parameters for RRF including cross-validation =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test, x_test])
    y = np.concatenate([y_train, y_test, y_test])

    params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.05, 0.1]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [-1] * x_test.shape[0] + [1] * x_test.shape[0])

    early_stopping = EarlyStopping(monitor='val_err', patience=2)
    filepath = os.path.join(
        model_dir(), "male/RRF/search/mnist_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)

    clf = RRF(model_name="RRF_hinge",
              D=100,
              lbd=0.01,
              gamma=0.125,
              mode='batch',
              loss='hinge',
              num_epochs=10,
              learning_rate=0.001,
              learning_rate_gamma=0.001,
              metrics=['loss', 'err'],
              callbacks=[early_stopping, checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)

    best_clf.fit(np.vstack([x_train, x_test]),
                 np.concatenate([y_train, y_test]))

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Beispiel #6
0
def test_kmm_cv_gridsearch():
    print(
        "========== Tune parameters for KMM including cross-validation =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test, x_test])
    y = np.concatenate([y_train, y_test, y_test])

    params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [-1] * x_test.shape[0] + [1] * x_test.shape[0])

    early_stopping = EarlyStopping(monitor='val_loss', patience=2)

    clf = KMM(model_name="KMM_hinge",
              D=20,
              lbd=0.0,
              gamma=0.1,
              mode='batch',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=1.0,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.1,
              learning_rate_mu=0.0,
              learning_rate_gamma=0.1,
              learning_rate_alpha=0.1,
              metrics=['loss', 'err'],
              callbacks=[early_stopping],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(np.vstack([x_train, x_test]),
                 np.concatenate([y_train, y_test]))

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1 - gs.best_score_)) < 1e-4
Beispiel #7
0
def test_kmm_softmax():
    print("========== Test KMM for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = KMM(model_name="KMM_hinge",
              D=4,
              lbd=0.01,
              gamma=0.01,
              mode='batch',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = KMM(model_name="KMM_hinge",
              D=100,
              lbd=0.0,
              gamma=0.01,
              mode='online',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed(),
              verbose=1)

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
Beispiel #8
0
def test_continue_training():
    print("========== Test continue training pytorch models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    x_train = x_train.astype(np.float32)
    y_train = y_train.astype(np.uint8)
    x_test = x_test.astype(np.float32)
    y_test = y_test.astype(np.uint8)
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    num_epochs = 5
    clf = PyTorchMLP(model_name='PyTorchMLP',
                     arch='MLPv1',
                     num_epochs=4,
                     batch_size=10,
                     metrics=['loss', 'err'],
                     random_state=random_seed(),
                     verbose=1)

    clf.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(num_epochs))
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf.num_epochs = 10
    print("Set number of epoch to {0:d}, then continue training...".format(
        clf.num_epochs))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = PyTorchModel.load_model(save_file_path)
    clf1.num_epochs = 15
    print("Save, load, set number of epoch to {0:d}, "
          "then continue training...".format(clf1.num_epochs))
    clf1.fit(x_train, y_train)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #9
0
def test_kmm_cv():
    print("========== Test cross-validation for KMM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1)
    filepath = os.path.join(model_dir(),
                            "male/KMM/iris_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)

    clf = KMM(model_name="KMM_hinge",
              D=20,
              lbd=0.0,
              gamma=0.1,
              mode='batch',
              loss='hinge',
              num_kernels=3,
              batch_size=100,
              temperature=1.0,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.1,
              learning_rate_mu=0.0,
              learning_rate_gamma=0.1,
              learning_rate_alpha=0.1,
              metrics=['loss', 'err'],
              callbacks=[early_stopping, checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #10
0
def test_early_stopping():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1)
    optz = SGD(learning_rate=0.01)
    clf = GLM(model_name="early_stopping_callback",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=20,
              batch_size=10,
              task='classification',
              metrics=['loss', 'err'],
              callbacks=[early_stopping],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Model has been stopped at epoch #{0:d}".format(clf.epoch))
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Continue training...")
    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Model has been stopped at epoch #{0:d}".format(clf.epoch))
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Disable early stopping and continue training to the end...")
    clf.callbacks = []
    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #11
0
def test_continue_training():
    print("========== Test continue training the models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    num_epochs = 5
    clf = GLM(model_name="iris_glm_softmax",
              link='softmax',
              loss='softmax',
              optimizer='sgd',
              batch_size=10,
              num_epochs=num_epochs,
              random_state=random_seed(),
              verbose=1)

    clf.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(num_epochs))
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf.num_epochs = 10
    print("Set number of epoch to {0:d}, then continue training...".format(
        clf.num_epochs))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = Model.load_model(save_file_path)
    clf1.num_epochs = 15
    print("Save, load, set number of epoch to {0:d}, "
          "then continue training...".format(clf.num_epochs))
    clf1.fit(x_train, y_train)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #12
0
def test_fogd_softmax_gridsearch():
    print(
        "========== Tune parameters for FOGD for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.5, 0.1]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = FOGD(model_name="FOGD_hinge",
               D=100,
               lbd=0.0,
               gamma=0.5,
               loss='hinge',
               catch_exception=True,
               random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % best_clf.mistake)

    # offline prediction
    print("Offline prediction")
    y_train_pred = best_clf.predict(x_train)
    y_test_pred = best_clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #13
0
def test_checkpoint():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    filepath = os.path.join(
        model_dir(), "male/glm/checkpoint_{epoch:04d}_{val_loss:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True)
    optz = SGD(learning_rate=0.01)
    clf = GLM(model_name="checkpoint_callback",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=5,
              batch_size=10,
              task='classification',
              metrics=['loss', 'err'],
              callbacks=[checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    model_filepath = filepath.format(epoch=5, val_loss=0.968786)
    print("Load model at checkpoint: ", model_filepath, ", and predict:")
    clf1 = Model.load_model(model_filepath)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #14
0
def test_rks_softmax_gridsearch():
    print(
        "========== Tune parameters for RKS for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.03, 0.1]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = RKS(model_name="RKS_hinge",
              D=100,
              lbd=0.0,
              gamma=0.5,
              loss='hinge',
              num_epochs=10,
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Beispiel #15
0
def test_mlp_softmax_gridsearch():
    print(
        "========== Tune parameters for MLP for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {
        'learning_rate': [0.1, 0.05, 0.01],
        'hidden_units_list': [(1, ), (5, ), (20, )],
        'reg_lambda': [0.01, 0.001, 0.0001]
    }

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = MLP(model_name="mlp_softmax_gridsearch",
              num_epochs=4,
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Beispiel #16
0
def test_tfglm_softmax_gridsearch():
    print(
        "========== Tune parameters for TensorFlowGLM for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = TensorFlowGLM(model_name="TensorFlowGLM_softmax_gridsearch",
                        link='softmax',
                        loss='softmax',
                        num_epochs=10,
                        catch_exception=True,
                        random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Beispiel #17
0
def test_rrf_softmax():
    print("========== Test RRF for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = RRF(model_name="RRF_hinge",
              D=100,
              lbd=0.01,
              gamma=0.125,
              mode='batch',
              loss='hinge',
              num_epochs=10,
              learning_rate=0.001,
              learning_rate_gamma=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = RRF(model_name="RRF_hinge",
              D=100,
              lbd=0.01,
              gamma=0.125,
              mode='online',
              loss='hinge',
              learning_rate=0.001,
              learning_rate_gamma=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
Beispiel #18
0
def test_save_load():
    print("========== Test save, load pytorch models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    x_train = x_train.astype(np.float32)
    y_train = y_train.astype(np.uint8)
    x_test = x_test.astype(np.float32)
    y_test = y_test.astype(np.uint8)
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = PyTorchMLP(model_name='PyTorchMLP',
                     arch='MLPv1',
                     num_epochs=4,
                     batch_size=10,
                     metrics=['loss', 'err'],
                     random_state=random_seed(),
                     verbose=1)

    clf.fit(x_train, y_train)
    print("After training:")

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = PyTorchModel.load_model(save_file_path)

    print("After save and load:")
    train_err1 = 1.0 - clf1.score(x_train, y_train)
    test_err1 = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err1)
    print("Testing error = %.4f" % test_err1)
    assert abs(train_err - train_err1) < 1e-6
    assert abs(test_err - test_err1) < 1e-6
Beispiel #19
0
def test_tfglm_softmax():
    print(
        "========== Test TensorFlowGLM for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = TensorFlowGLM(model_name="TensorFlowGLM_softmax",
                        link='softmax',
                        loss='softmax',
                        num_epochs=10,
                        random_state=random_seed())

    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #20
0
def test_rks_softmax():
    print("========== Test RKS for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = RKS(model_name="RKS_hinge",
              D=100,
              lbd=0.0,
              gamma=0.5,
              loss='hinge',
              num_epochs=10,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #21
0
def test_mlp_softmax():
    print("========== Test MLP for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()

    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = MLP(model_name="MLP_softmax",
              hidden_units_list=(5, ),
              batch_size=16,
              num_epochs=4,
              learning_rate=0.1,
              reg_lambda=0.01,
              random_state=random_seed())

    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #22
0
def test_rmsprop_glm():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    print("Training GLM using RMSProp with default parameters...")
    clf = GLM(model_name="rmsprop_glm",
              optimizer='rmsprop',
              num_epochs=10,
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Training GLM using RMSProp with customized parameters...")
    optz = RMSProp(learning_rate=0.01)
    clf = GLM(model_name="rmsprop_glm",
              optimizer=optz,
              num_epochs=10,
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #23
0
def test_kmm_softmax_gridsearch():
    print(
        "========== Tune parameters for KMM for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = KMM(model_name="KMM_hinge",
              D=4,
              lbd=0.01,
              gamma=0.01,
              mode='batch',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_epochs=50,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1.0 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    y_train_pred = best_clf.predict(x_train)
    y_test_pred = best_clf.predict(x_test)
    train_err = 1.0 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1.0 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = KMM(model_name="KMM_hinge",
              D=100,
              lbd=0.01,
              gamma=0.01,
              mode='online',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)
    print("Mistake rate = %.4f" % best_clf.mistake)
Beispiel #24
0
def test_glm_save_load(show=False, block_figure_on_end=False):
    print("========== Test Save and Load functions for GLM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=5, verbose=1)
    filepath = os.path.join(model_dir(),
                            "male/GLM/iris_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)
    loss_display = Display(title="Learning curves",
                           dpi='auto',
                           layout=(3, 1),
                           freq=1,
                           show=show,
                           block_on_end=block_figure_on_end,
                           monitor=[
                               {
                                   'metrics': ['loss', 'val_loss'],
                                   'type': 'line',
                                   'labels':
                                   ["training loss", "validation loss"],
                                   'title': "Learning losses",
                                   'xlabel': "epoch",
                                   'ylabel': "loss",
                               },
                               {
                                   'metrics': ['err', 'val_err'],
                                   'type': 'line',
                                   'title': "Learning errors",
                                   'xlabel': "epoch",
                                   'ylabel': "error",
                               },
                               {
                                   'metrics': ['err'],
                                   'type': 'line',
                                   'labels': ["training error"],
                                   'title': "Learning errors",
                                   'xlabel': "epoch",
                                   'ylabel': "error",
                               },
                           ])

    weight_display = Display(title="Filters",
                             dpi='auto',
                             layout=(1, 1),
                             figsize=(6, 15),
                             freq=1,
                             show=show,
                             block_on_end=block_figure_on_end,
                             monitor=[
                                 {
                                     'metrics': ['weights'],
                                     'title': "Learned weights",
                                     'type': 'img',
                                     'disp_dim': (2, 2),
                                     'tile_shape': (3, 1),
                                 },
                             ])

    clf = GLM(
        model_name="GLM_softmax_cv",
        link='softmax',
        loss='softmax',
        optimizer='sgd',
        num_epochs=4,
        batch_size=10,
        task='classification',
        metrics=['loss', 'err'],
        callbacks=[early_stopping, checkpoint, loss_display, weight_display],
        cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
        random_state=random_seed(),
        verbose=1)

    clf.fit(x, y)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = os.path.join(model_dir(), "male/GLM/saved_model.pkl")
    clf.save(file_path=save_file_path)
    clf1 = Model.load_model(save_file_path)
    clf1.num_epochs = 10
    clf1.fit(x, y)

    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #25
0
def test_kmm_cv_disp(show=False, block_figure_on_end=False):
    print("========== Test cross-validation for KMM with Display ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1)
    filepath = os.path.join(model_dir(),
                            "male/KMM/iris_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)
    display = Display(layout=(3, 1),
                      dpi='auto',
                      show=show,
                      block_on_end=block_figure_on_end,
                      monitor=[
                          {
                              'metrics': ['loss', 'val_loss'],
                              'type': 'line',
                              'labels': ["training loss", "validation loss"],
                              'title': "Learning losses",
                              'xlabel': "epoch",
                              'ylabel': "loss",
                          },
                          {
                              'metrics': ['err', 'val_err'],
                              'type': 'line',
                              'title': "Learning errors",
                              'xlabel': "epoch",
                              'ylabel': "error",
                          },
                          {
                              'metrics': ['err'],
                              'type': 'line',
                              'labels': ["training error"],
                              'title': "Learning errors",
                              'xlabel': "epoch",
                              'ylabel': "error",
                          },
                      ])

    clf = KMM(model_name="KMM_hinge",
              D=20,
              lbd=0.0,
              gamma=0.1,
              mode='batch',
              loss='hinge',
              num_kernels=3,
              batch_size=100,
              temperature=1.0,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.1,
              learning_rate_mu=0.0,
              learning_rate_gamma=0.1,
              learning_rate_alpha=0.1,
              metrics=['loss', 'err'],
              callbacks=[display, early_stopping, checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)