Beispiel #1
0
def test_rsrbm_pipeline():
    print(
        "========== Test the pipeline of "
        "ReplicatedSoftmaxRBM followed by k-nearest-neighbors (kNN) =========="
    )

    np.random.seed(random_seed())

    from sklearn.pipeline import Pipeline
    from sklearn.neighbors import KNeighborsClassifier

    (x_train, y_train), (x_test, y_test) = demo.load_20newsgroups()

    estimators = [('rbm',
                   ReplicatedSoftmaxRBM(num_hidden=15,
                                        num_visible=5000,
                                        batch_size=32,
                                        num_epochs=4,
                                        learning_rate=0.001,
                                        learning_rate_hidden=0.00001,
                                        momentum_method='sudden',
                                        weight_cost=2e-4,
                                        random_state=random_seed(),
                                        verbose=0)),
                  ('knn', KNeighborsClassifier(n_neighbors=1))]

    clf = Pipeline(estimators)

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #2
0
def test_mlp_check_grad():
    eps = 1e-6
    num_data = 10
    num_features = 4
    num_classes = 3
    x = np.random.rand(num_data, num_features)
    y = np.random.randint(0, num_classes, num_data)

    model = MLP(model_name="checkgrad_MLP_softmax",
                task='classification',
                hidden_units_list=(5, ),
                reg_lambda=0.0,
                random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = MLP(model_name="checkgrad_MLP_softmax",
                task='classification',
                hidden_units_list=(5, ),
                reg_lambda=0.01,
                random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = MLP(model_name="checkgrad_MLP_softmax",
                task='classification',
                hidden_units_list=(5, ),
                reg_lambda=0.1,
                random_state=random_seed())
    assert model.check_grad(x, y) < eps
Beispiel #3
0
def test_bbrbm_logpartition():
    print(
        "========== Test Computing log-partition function of BernoulliBernoulliRBM =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()

    model = BernoulliBernoulliRBM(num_hidden=15,
                                  num_visible=784,
                                  batch_size=100,
                                  num_epochs=4,
                                  momentum_method='sudden',
                                  weight_cost=2e-4,
                                  metrics=['recon_err'],
                                  random_state=random_seed(),
                                  verbose=1)

    model.fit(x_train)

    exact_logpart = model.get_logpartition(method='exact')
    print("Exact log-partition function = %.4f" % exact_logpart)

    test_exact_loglik = model.get_loglik(x_test, method='exact').mean()
    print("Exact log-likelihood of testing data = %.4f" % test_exact_loglik)

    test_csl_loglik = model.get_loglik(x_test,
                                       method='csl',
                                       num_hidden_samples=100,
                                       num_steps=100).mean()
    print("CSL log-likelihood of testing data = %.4f" % test_csl_loglik)
Beispiel #4
0
def test_glm_softmax():
    print("========== Test GLM for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_softmax",
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = GLM(model_name="GLM_softmax",
              optimizer='sgd',
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #5
0
def test_fogd_softmax():
    print("========== Test FOGD for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = FOGD(model_name="FOGD_hinge",
               D=100,
               lbd=0.0,
               gamma=0.5,
               loss='hinge',
               random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)

    # offline prediction
    print("Offline prediction")
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #6
0
def test_bbrbm_csl():
    print("========== Test Conservative Sampling-based Likelihood (CSL) "
          "of BernoulliBernoulliRBM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()

    model = BernoulliBernoulliRBM(num_hidden=15,
                                  num_visible=784,
                                  batch_size=100,
                                  num_epochs=4,
                                  momentum_method='sudden',
                                  weight_cost=2e-4,
                                  metrics=['recon_err'],
                                  random_state=random_seed(),
                                  verbose=1)

    model.fit(x_train)

    train_csl = model.get_loglik(x_train,
                                 method='csl',
                                 num_hidden_samples=100,
                                 num_steps=10).mean()
    print("Training log-likelihood computed using CSL = %.4f" % train_csl)

    test_csl = model.get_loglik(x_test,
                                method='csl',
                                num_hidden_samples=100,
                                num_steps=10).mean()
    print("Testing log-likelihood computed using CSL = %.4f" % test_csl)
Beispiel #7
0
def test_cgan_mnist(show_figure=False, block_figure_on_end=False):
    print("========== Test CGAN on MNIST data ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()
    x_train = x_train.astype(np.float32).reshape([-1, 28, 28, 1]) / 0.5 - 1.
    x_test = x_test.astype(np.float32).reshape([-1, 28, 28, 1]) / 0.5 - 1.

    loss_display = Display(layout=(1, 1),
                           dpi='auto',
                           show=show_figure,
                           block_on_end=block_figure_on_end,
                           monitor=[
                               {
                                   'metrics': ['d_loss', 'g_loss'],
                                   'type':
                                   'line',
                                   'labels':
                                   ["discriminator loss", "generator loss"],
                                   'title':
                                   "Losses",
                                   'xlabel':
                                   "epoch",
                                   'ylabel':
                                   "loss",
                               },
                           ])
    sample_display = Display(layout=(1, 1),
                             dpi='auto',
                             figsize=(10, 10),
                             freq=1,
                             show=show_figure,
                             block_on_end=block_figure_on_end,
                             monitor=[
                                 {
                                     'metrics': ['x_samples'],
                                     'title': "Generated data",
                                     'type': 'img',
                                     'num_samples': 100,
                                     'tile_shape': (10, 10),
                                 },
                             ])

    model = CGAN(
        model_name="CGAN_MNIST",
        num_z=10,  # set to 100 for a full run
        z_prior=Uniform1D(low=-1.0, high=1.0),
        img_size=(28, 28, 1),
        batch_size=64,  # set to 64 for a full run
        num_conv_layers=3,  # set to 3 for a full run
        num_gen_feature_maps=2,  # set to 32 for a full run
        num_dis_feature_maps=2,  # set to 32 for a full run
        metrics=['d_loss', 'g_loss'],
        callbacks=[loss_display, sample_display],
        num_epochs=1,  # set to 100 for a full run
        random_state=random_seed(),
        verbose=1)

    model.fit(x_train, y_train)
Beispiel #8
0
def test_glm_regression():
    print("========== Test GLM for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(
        model_name="GLM_regression",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = -clf.score(x_train, y_train)
    test_err = -clf.score(x_test, y_test)
    print("Training MSE = %.4f" % train_err)
    print("Testing MSE = %.4f" % test_err)
Beispiel #9
0
def test_dualsgd_softmax():
    print("========== Test DualSGD for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = DualSGD(model_name="DualSGD_hinge",
                  k=20,
                  D=200,
                  gamma=1.0,
                  lbd=3.3593684387335183e-05,
                  loss='hinge',
                  maintain='k-merging',
                  max_budget_size=100,
                  random_state=random_seed())

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
    print("Budget size = %d" % clf.budget_size)

    # offline prediction
    print("Offline prediction")
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    train_err = 1 - metrics.accuracy_score(y_train, y_train_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #10
0
def test_dualsgd_regression():
    print("========== Test DualSGD for Regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    est = DualSGD(model_name="DualSGD_eps_insensitive",
                  k=20,
                  D=200,
                  gamma=1.0,
                  eps=0.001,
                  lbd=0.00128,
                  loss='eps_insensitive',
                  maintain='k-merging',
                  max_budget_size=100,
                  random_state=random_seed())
    est.fit(x_train, y_train)

    print("Mistake rate = %.4f" % est.mistake)
    print("Budget size = %d" % est.budget_size)

    # offline prediction
    print("Offline prediction")
    y_train_pred = est.predict(x_train)
    y_test_pred = est.predict(x_test)
    train_err = metrics.mean_squared_error(y_train, y_train_pred)
    test_err = metrics.mean_squared_error(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #11
0
def test_save_load():
    print("========== Test save, load tensorflow models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = TensorFlowGLM(model_name="iris_TensorFlowGLM_softmax",
                        link='softmax',
                        loss='softmax',
                        num_epochs=5,
                        random_state=random_seed())

    clf.fit(x_train, y_train)

    print("After training:")
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()

    clf1 = TensorFlowModel.load_model(save_file_path)

    print("After save and load:")
    train_err1 = 1.0 - clf1.score(x_train, y_train)
    test_err1 = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err1)
    print("Testing error = %.4f" % test_err1)
    assert abs(train_err - train_err1) < 1e-6
    assert abs(test_err - test_err1) < 1e-6
Beispiel #12
0
def test_node2vec_wiki_pos(show_figure=False, block_figure_on_end=False):
    graph, labels, walks = demo.load_wikipos()
    y = labels
    x = np.array(range(len(labels)))
    model = Node2Vec(
        model_name='Node2Vec',
        emb_size=16,
        window=3,
        num_walks=2,
        walk_length=10,
        p=1.0,
        q=1.0,
        num_workers=4,
        directed=False,
    )
    model.fit(x, y, walks=walks)

    train_idx, test_idx = next(
        ShuffleSplit(n_splits=1, test_size=0.1,
                     random_state=random_seed()).split(x))
    x_train, y_train = model.transform(x[train_idx]), y[train_idx]
    x_test, y_test = model.transform(x[test_idx]), y[test_idx]

    clf = GLM(model_name="GLM_multilogit",
              task='multilabel',
              link='logit',
              loss='multilogit',
              random_state=random_seed())

    clf.fit(x_train, y_train)
    train_f1 = clf.score(x_train, y_train)
    test_f1 = clf.score(x_test, y_test)
    print("Training weighted-F1-macro = %.4f" % train_f1)
    print("Testing weighted-F1-macro = %.4f" % test_f1)
Beispiel #13
0
def test_bbrbm_pipeline():
    print(
        "========== Test the pipeline of "
        "BernoulliBernoulliRBM followed by k-nearest-neighbors (kNN) =========="
    )

    np.random.seed(random_seed())

    from sklearn.pipeline import Pipeline
    from sklearn.neighbors import KNeighborsClassifier

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()

    estimators = [('rbm',
                   BernoulliBernoulliRBM(num_hidden=15,
                                         num_visible=784,
                                         batch_size=100,
                                         num_epochs=4,
                                         momentum_method='sudden',
                                         weight_cost=2e-4,
                                         random_state=random_seed(),
                                         verbose=0)),
                  ('knn', KNeighborsClassifier(n_neighbors=1))]

    clf = Pipeline(estimators)

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #14
0
def test_dfm_save_and_load(show_figure=False, block_figure_on_end=False):
    print("========== Test Save and Load functions of DFM on MNIST data ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()
    x_train = x_train.astype(np.float32).reshape([-1, 28, 28, 1]) / 0.5 - 1.
    x_test = x_test.astype(np.float32).reshape([-1, 28, 28, 1]) / 0.5 - 1.

    loss_display = Display(layout=(1, 1),
                           dpi='auto',
                           show=show_figure,
                           block_on_end=block_figure_on_end,
                           monitor=[{'metrics': ['d_loss', 'g_loss'],
                                     'type': 'line',
                                     'labels': ["discriminator loss", "generator loss"],
                                     'title': "Losses",
                                     'xlabel': "epoch",
                                     'ylabel': "loss",
                                     },
                                    ])
    sample_display = Display(layout=(1, 1),
                             dpi='auto',
                             figsize=(10, 10),
                             freq=1,
                             show=show_figure,
                             block_on_end=block_figure_on_end,
                             monitor=[{'metrics': ['x_samples'],
                                       'title': "Generated data",
                                       'type': 'img',
                                       'num_samples': 100,
                                       'tile_shape': (10, 10),
                                       },
                                      ])

    model = DFM(model_name="DFM_MNIST_SaveLoad",
                num_z=10,  # set to 100 for a full run
                img_size=(28, 28, 1),
                batch_size=32,  # set to 64 for a full run
                num_conv_layers=3,  # set to 3 for a full run
                num_gen_feature_maps=4,  # set to 32 for a full run
                num_dis_feature_maps=4,  # set to 32 for a full run
                alpha=0.03 / 10,  # 0.03 / 1024
                noise_std=1.0,
                num_dfm_layers=1,  # 2
                num_dfm_hidden=10,  # 1024
                metrics=['d_loss', 'g_loss'],
                callbacks=[loss_display, sample_display],
                num_epochs=4,  # set to 100 for a full run
                random_state=random_seed(),
                verbose=1)

    model.fit(x_train)

    save_file_path = model.save()

    model1 = TensorFlowModel.load_model(save_file_path)
    model1.num_epochs = 10
    model1.fit(x_train)
Beispiel #15
0
def test_rrf_cv_gridsearch():
    print(
        "========== Tune parameters for RRF including cross-validation =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test, x_test])
    y = np.concatenate([y_train, y_test, y_test])

    params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.05, 0.1]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [-1] * x_test.shape[0] + [1] * x_test.shape[0])

    early_stopping = EarlyStopping(monitor='val_err', patience=2)
    filepath = os.path.join(
        model_dir(), "male/RRF/search/mnist_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)

    clf = RRF(model_name="RRF_hinge",
              D=100,
              lbd=0.01,
              gamma=0.125,
              mode='batch',
              loss='hinge',
              num_epochs=10,
              learning_rate=0.001,
              learning_rate_gamma=0.001,
              metrics=['loss', 'err'],
              callbacks=[early_stopping, checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)

    best_clf.fit(np.vstack([x_train, x_test]),
                 np.concatenate([y_train, y_test]))

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Beispiel #16
0
def test_kmm_cv_gridsearch():
    print(
        "========== Tune parameters for KMM including cross-validation =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test, x_test])
    y = np.concatenate([y_train, y_test, y_test])

    params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [-1] * x_test.shape[0] + [1] * x_test.shape[0])

    early_stopping = EarlyStopping(monitor='val_loss', patience=2)

    clf = KMM(model_name="KMM_hinge",
              D=20,
              lbd=0.0,
              gamma=0.1,
              mode='batch',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=1.0,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.1,
              learning_rate_mu=0.0,
              learning_rate_gamma=0.1,
              learning_rate_alpha=0.1,
              metrics=['loss', 'err'],
              callbacks=[early_stopping],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(np.vstack([x_train, x_test]),
                 np.concatenate([y_train, y_test]))

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1 - gs.best_score_)) < 1e-4
Beispiel #17
0
def test_wgan_cifar10(show_figure=False, block_figure_on_end=False):
    print("========== Test WGAN on CIFAR10 data ==========")

    np.random.seed(random_seed())

    num_data = 128
    (x_train, y_train), (x_test, y_test) = demo.load_cifar10()
    x_train = x_train[:num_data].astype(np.float32).reshape([-1, 32, 32, 3]) / 0.5 - 1.
    x_test = x_test.astype(np.float32).reshape([-1, 32, 32, 3]) / 0.5 - 1.

    root_dir = os.path.join(model_dir(), "male/WGAN/CIFAR10")
    loss_display = Display(layout=(1, 1),
                           dpi='auto',
                           show=show_figure,
                           block_on_end=block_figure_on_end,
                           filepath=[os.path.join(root_dir, "loss/loss_{epoch:04d}.png"),
                                     os.path.join(root_dir, "loss/loss_{epoch:04d}.pdf")],
                           monitor=[{'metrics': ['d_loss', 'g_loss'],
                                     'type': 'line',
                                     'labels': ["discriminator loss", "generator loss"],
                                     'title': "Losses",
                                     'xlabel': "epoch",
                                     'ylabel': "loss",
                                     },
                                    ])
    sample_display = Display(layout=(1, 1),
                             dpi='auto',
                             figsize=(10, 10),
                             freq=1,
                             show=show_figure,
                             block_on_end=block_figure_on_end,
                             filepath=os.path.join(root_dir, "samples/samples_{epoch:04d}.png"),
                             monitor=[{'metrics': ['x_samples'],
                                       'title': "Generated data",
                                       'type': 'img',
                                       'num_samples': 100,
                                       'tile_shape': (10, 10),
                                       },
                                      ])

    model = WGAN(model_name="WGAN_CIFAR10",
                 num_z=10,  # set to 100 for a full run
                 z_prior=Uniform1D(low=-1.0, high=1.0),
                 img_size=(32, 32, 3),
                 batch_size=16,  # set to 64 for a full run
                 num_conv_layers=3,  # set to 3 for a full run
                 num_gen_feature_maps=4,  # set to 32 for a full run
                 num_dis_feature_maps=4,  # set to 32 for a full run
                 metrics=['d_loss', 'g_loss'],
                 callbacks=[loss_display, sample_display],
                 num_epochs=4,  # set to 100 for a full run
                 log_path=os.path.join(root_dir, "logs"),
                 random_state=random_seed(),
                 verbose=1)

    model.fit(x_train)
Beispiel #18
0
def test_kmm_softmax():
    print("========== Test KMM for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = KMM(model_name="KMM_hinge",
              D=4,
              lbd=0.01,
              gamma=0.01,
              mode='batch',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = KMM(model_name="KMM_hinge",
              D=100,
              lbd=0.0,
              gamma=0.01,
              mode='online',
              loss='hinge',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              random_state=random_seed(),
              verbose=1)

    clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % clf.mistake)
def run_one_candidate(candidate_params):
    num_runs = 3
    np.random.seed(random_seed())
    mistake_rate_avg = 0
    train_time_avg = 0
    for ri in range(num_runs):
        print('----------------------------------')
        print('Run #{0}:'.format(ri + 1))

        data_name = dataset
        n_features = 300

        train_file_name = os.path.join(data_dir(), data_name + '_train.libsvm')
        test_file_name = os.path.join(data_dir(), data_name + '_test.libsvm')

        if not os.path.exists(train_file_name):
            raise Exception('File not found')
        if not os.path.exists(test_file_name):
            raise Exception('File not found')

        x_train, y_train = load_svmlight_file(train_file_name, n_features=n_features)
        x_test, y_test = load_svmlight_file(test_file_name, n_features=n_features)

        x_train = x_train.toarray()
        x_test = x_test.toarray()

        x_total = np.vstack((x_train, x_test))
        y_total = np.concatenate((y_train, y_test))

        print('Num total samples: {}'.format(x_total.shape[0]))

        clf = OnlineDualSVRG(
            # regular_param=0.01,
            learning_rate_scale=0.8,
            # gamma=2.0,
            rf_dim=4000,
            num_epochs=1,
            freq_update_full_model=100,
            oracle=oracle,
            core_max=100,
            coverage_radius=0.9,
            loss_func=loss_func,
            smooth_hinge_theta=0.5,
            smooth_hinge_tau=0.5,
            random_state=random_seed(),
            **candidate_params,
        )
        print('Running ...')
        clf.fit(x_total, y_total)
        print('Mistake rate: {0:.2f}%, Training time: {1} seconds'.format(clf.mistake_rate * 100, int(clf.train_time)))
        mistake_rate_avg += clf.mistake_rate
        train_time_avg += clf.train_time
    return mistake_rate_avg / num_runs, train_time_avg / num_runs, candidate_params
Beispiel #20
0
def test_weighted_gan_cifar10(show_figure=False, block_figure_on_end=False):
    print("========== Test Weighted-GAN on CIFAR10 data ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_cifar10()
    x_train = x_train.astype(np.float32).reshape([-1, 32, 32, 3]) / 0.5 - 1.
    x_test = x_test.astype(np.float32).reshape([-1, 32, 32, 3]) / 0.5 - 1.

    loss_display = Display(layout=(1, 1),
                           dpi='auto',
                           show=show_figure,
                           block_on_end=block_figure_on_end,
                           monitor=[{'metrics': ['d_loss', 'g_loss'],
                                     'type': 'line',
                                     'labels': ["discriminator loss", "generator loss"],
                                     'title': "Losses",
                                     'xlabel': "epoch",
                                     'ylabel': "loss",
                                     },
                                    ])
    sample_display = Display(layout=(1, 1),
                             dpi='auto',
                             figsize=(10, 10),
                             freq=1,
                             show=show_figure,
                             block_on_end=block_figure_on_end,
                             monitor=[{'metrics': ['x_samples'],
                                       'title': "Generated data",
                                       'type': 'img',
                                       'num_samples': 100,
                                       'tile_shape': (10, 10),
                                       },
                                      ])

    model = WeightedGAN(model_name="WeightedGAN_CIFAR10",
                        alpha=0.5,
                        num_z=10,  # set to 100 for a full run
                        img_size=(32, 32, 3),
                        batch_size=32,  # set to 64 for a full run
                        num_conv_layers=3,  # set to 3 for a full run
                        num_gen_feature_maps=4,  # set to 32 for a full run
                        num_dis_feature_maps=4,  # set to 32 for a full run
                        metrics=['d_loss', 'g_loss'],
                        callbacks=[loss_display, sample_display],
                        num_epochs=4,  # set to 100 for a full run
                        random_state=random_seed(),
                        verbose=1)

    model.fit(x_train)
Beispiel #21
0
def test_srbm_gridsearch():
    print("========== Tune parameters for Supervised RBM ==========")

    from sklearn.model_selection import PredefinedSplit
    from sklearn.model_selection import GridSearchCV

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()

    x = np.vstack([x_train, x_test, x_test])
    y = np.concatenate([y_train, y_test, y_test])

    early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1)

    params = {
        'batch_size': [64, 100],
        'learning_rate': [0.1, 0.01],
        'weight_cost': [0.001, 0.0001]
    }

    model = SupervisedRBM(num_hidden=15,
                          num_visible=784,
                          batch_size=100,
                          num_epochs=4,
                          learning_rate=0.01,
                          momentum_method='sudden',
                          weight_cost=0.0,
                          inference_engine='variational_inference',
                          approx_method='first_order',
                          metrics=['loss'],
                          callbacks=[early_stopping],
                          cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
                          random_state=random_seed(),
                          verbose=0)

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [-1] * x_test.shape[0] + [1] * x_test.shape[0])

    gs = GridSearchCV(model,
                      params,
                      cv=ps,
                      n_jobs=-1,
                      refit=False,
                      verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1.0 - gs.best_score_,
                                             gs.best_params_))
Beispiel #22
0
def test_continue_training():
    print("========== Test continue training pytorch models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    x_train = x_train.astype(np.float32)
    y_train = y_train.astype(np.uint8)
    x_test = x_test.astype(np.float32)
    y_test = y_test.astype(np.uint8)
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    num_epochs = 5
    clf = PyTorchMLP(model_name='PyTorchMLP',
                     arch='MLPv1',
                     num_epochs=4,
                     batch_size=10,
                     metrics=['loss', 'err'],
                     random_state=random_seed(),
                     verbose=1)

    clf.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(num_epochs))
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf.num_epochs = 10
    print("Set number of epoch to {0:d}, then continue training...".format(
        clf.num_epochs))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = PyTorchModel.load_model(save_file_path)
    clf1.num_epochs = 15
    print("Save, load, set number of epoch to {0:d}, "
          "then continue training...".format(clf1.num_epochs))
    clf1.fit(x_train, y_train)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #23
0
def test_kmm_cv():
    print("========== Test cross-validation for KMM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1)
    filepath = os.path.join(model_dir(),
                            "male/KMM/iris_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)

    clf = KMM(model_name="KMM_hinge",
              D=20,
              lbd=0.0,
              gamma=0.1,
              mode='batch',
              loss='hinge',
              num_kernels=3,
              batch_size=100,
              temperature=1.0,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.1,
              learning_rate_mu=0.0,
              learning_rate_gamma=0.1,
              learning_rate_alpha=0.1,
              metrics=['loss', 'err'],
              callbacks=[early_stopping, checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #24
0
def check_grad():
    print("========== Check gradients ==========")
    np.random.seed(random_seed())

    # <editor-fold desc="Binary classification">
    eps = 1e-6
    num_data = 10
    num_features = 5
    num_classes = 2
    x = np.random.rand(num_data, num_features)
    y = np.random.randint(0, num_classes, num_data)
    print(np.unique(y))

    model = SVRG(
        regular_param=0.01,
        learning_rate_scale=1.0,
        gamma=10,
        rf_dim=400,
        num_epochs=2,
        cache_size=6,
        freq_update_full_model=10,
        oracle=SVRG.COVERAGE,
        core_max=10,
        coverage_radius=100.0,
        loss_func=SVRG.LOGISTIC,
        smooth_hinge_theta=0.5,
        smooth_hinge_tau=0.5,
        freq_calc_metrics=20,
    )

    model.fit(x, y)

    for n in range(num_data):
        assert model.check_grad(x[n, :], y[n]) < eps
Beispiel #25
0
def test_sgd_visualization_2d(show=False, block_figure_on_end=False):
    (x_train, y_train), (_, _) = demo.load_synthetic_2d()

    display = Display(freq=10,
                      dpi='auto',
                      show=show,
                      block_on_end=block_figure_on_end,
                      monitor=[{'metrics': ['predict'],
                                'title': "Learning losses",
                                'xlabel': "X1",
                                'ylabel': "X2",
                                'grid_size': 10,
                                'marker_size': 10,
                                'left': None,
                                'right': None,
                                'top': None,
                                'bottom': None
                                }]
                      )

    learner = KSGD(lbd=0.0001,
                   eps=0.001,
                   gamma=30,
                   kernel='gaussian',
                   loss='hinge',
                   batch_size=1,
                   callbacks=[display],
                   avg_weight=False,
                   random_state=random_seed())

    learner.fit(x_train, y_train)
    print("Training error = %.4f" % (1 - learner.score(x_train, y_train)))
Beispiel #26
0
def test_early_stopping():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1)
    optz = SGD(learning_rate=0.01)
    clf = GLM(model_name="early_stopping_callback",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=20,
              batch_size=10,
              task='classification',
              metrics=['loss', 'err'],
              callbacks=[early_stopping],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Model has been stopped at epoch #{0:d}".format(clf.epoch))
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Continue training...")
    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Model has been stopped at epoch #{0:d}".format(clf.epoch))
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Disable early stopping and continue training to the end...")
    clf.callbacks = []
    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #27
0
def test_srbm_load_to_continue_training():
    print(
        "========== Test load to continue training Supervised RBM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()

    model = SupervisedRBM(num_hidden=5,
                          num_visible=x_train.shape[1],
                          batch_size=10,
                          num_epochs=2,
                          learning_rate=0.1,
                          momentum_method='sudden',
                          weight_cost=0.0,
                          inference_engine='variational_inference',
                          approx_method='second_order',
                          metrics=['recon_err', 'loss', 'err'],
                          random_state=random_seed(),
                          verbose=1)

    model.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(model.num_epochs))
    train_err = 1.0 - model.score(x_train, y_train)
    test_err = 1.0 - model.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = model.save()
    model1 = Model.load_model(save_file_path)
    print("After save and load:")
    train_err1 = 1.0 - model1.score(x_train, y_train)
    test_err1 = 1.0 - model1.score(x_test, y_test)
    print("Training error = %.4f" % train_err1)
    print("Testing error = %.4f" % test_err1)
    assert abs(train_err - train_err1) < 1e-6
    assert abs(test_err - test_err1) < 1e-6

    model.num_epochs = 4
    model.fit(x_train, y_train)
    print("Set number of epoch to {0:d}, then continue training...".format(
        model.num_epochs))
    train_err = 1.0 - model.score(x_train, y_train)
    test_err = 1.0 - model.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #28
0
def test_glm_logit():
    print("========== Test GLM for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_logit",
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = GLM(model_name="GLM_logit",
              optimizer='sgd',
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    optz = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    clf = GLM(model_name="GLM_logit",
              optimizer=optz,
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Beispiel #29
0
def test_logsumeone():
    np.random.seed(random_seed())
    from male.utils.func_utils import logsumone
    assert (np.abs(logsumone(10.0) - 10.000045398899218) < 1e-6)
    x = np.random.rand(2, 3)
    r = np.array([[1.30170729, 1.0712702, 1.00492985],
                  [1.14584669, 1.10508236, 0.94158162]])
    assert np.all(np.abs(logsumone(x) - r) < 1e-2)
Beispiel #30
0
def test_continue_training():
    print("========== Test continue training the models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    num_epochs = 5
    clf = GLM(model_name="iris_glm_softmax",
              link='softmax',
              loss='softmax',
              optimizer='sgd',
              batch_size=10,
              num_epochs=num_epochs,
              random_state=random_seed(),
              verbose=1)

    clf.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(num_epochs))
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf.num_epochs = 10
    print("Set number of epoch to {0:d}, then continue training...".format(
        clf.num_epochs))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = Model.load_model(save_file_path)
    clf1.num_epochs = 15
    print("Save, load, set number of epoch to {0:d}, "
          "then continue training...".format(clf.num_epochs))
    clf1.fit(x_train, y_train)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)