Ejemplo n.º 1
0
def test_save_load():
    print("========== Test save and load models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="iris_glm_softmax",
              link='softmax',
              loss='softmax',
              random_state=random_seed(),
              verbose=1)

    clf.fit(x_train, y_train)

    print("After training:")
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = Model.load_model(save_file_path)
    print("After save and load:")
    train_err1 = 1.0 - clf1.score(x_train, y_train)
    test_err1 = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err1)
    print("Testing error = %.4f" % test_err1)
    assert abs(train_err - train_err1) < 1e-6
    assert abs(test_err - test_err1) < 1e-6
Ejemplo n.º 2
0
def test_image_saver_callback():
    np.random.seed(random_seed())

    (x_train, y_train), (_, _) = demo.load_mnist()
    (cifar10_train, _), (_, _) = demo.load_cifar10()

    imgsaver1 = ImageSaver(freq=1,
                           filepath=os.path.join(
                               model_dir(), "male/callbacks/imagesaver/"
                               "mnist/mnist_{epoch:04d}.png"),
                           monitor={
                               'metrics': 'x_data',
                               'img_size': (28, 28, 1),
                               'tile_shape': (10, 10),
                               'images': x_train[:100].reshape([-1, 28, 28, 1])
                           })
    imgsaver2 = ImageSaver(freq=1,
                           filepath=os.path.join(
                               model_dir(), "male/callbacks/imagesaver/"
                               "cifar10/cifar10_{epoch:04d}.png"),
                           monitor={
                               'metrics':
                               'x_data',
                               'img_size': (32, 32, 3),
                               'tile_shape': (10, 10),
                               'images':
                               cifar10_train[:100].reshape([-1, 32, 32, 3])
                           })

    optz = SGD(learning_rate=0.001)
    clf = GLM(model_name="imagesaver_callback",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=4,
              batch_size=100,
              task='classification',
              callbacks=[imgsaver1, imgsaver2],
              random_state=random_seed(),
              verbose=1)
    clf.fit(x_train, y_train)
Ejemplo n.º 3
0
def test_glm_regression():
    print("========== Test GLM for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(
        model_name="GLM_regression",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = -clf.score(x_train, y_train)
    test_err = -clf.score(x_test, y_test)
    print("Training MSE = %.4f" % train_err)
    print("Testing MSE = %.4f" % test_err)
Ejemplo n.º 4
0
def test_node2vec_wiki_pos(show_figure=False, block_figure_on_end=False):
    graph, labels, walks = demo.load_wikipos()
    y = labels
    x = np.array(range(len(labels)))
    model = Node2Vec(
        model_name='Node2Vec',
        emb_size=16,
        window=3,
        num_walks=2,
        walk_length=10,
        p=1.0,
        q=1.0,
        num_workers=4,
        directed=False,
    )
    model.fit(x, y, walks=walks)

    train_idx, test_idx = next(
        ShuffleSplit(n_splits=1, test_size=0.1,
                     random_state=random_seed()).split(x))
    x_train, y_train = model.transform(x[train_idx]), y[train_idx]
    x_test, y_test = model.transform(x[test_idx]), y[test_idx]

    clf = GLM(model_name="GLM_multilogit",
              task='multilabel',
              link='logit',
              loss='multilogit',
              random_state=random_seed())

    clf.fit(x_train, y_train)
    train_f1 = clf.score(x_train, y_train)
    test_f1 = clf.score(x_test, y_test)
    print("Training weighted-F1-macro = %.4f" % train_f1)
    print("Testing weighted-F1-macro = %.4f" % test_f1)
Ejemplo n.º 5
0
def test_glm_regression_gridsearch():
    print("========== Tune parameters for GLM for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = GLM(
        model_name="GLM_regression_gridsearch",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        catch_exception=True,
        random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = -best_clf.score(x_train, y_train)
    test_err = -best_clf.score(x_test, y_test)
    print("Training MSE = %.4f" % train_err)
    print("Testing MSE = %.4f" % test_err)
    assert abs(test_err + gs.best_score_) < 1e-4
Ejemplo n.º 6
0
def test_auc():
    print("========== Test AUC score ==========")

    (x_train, y_train), (x_test, y_test) = demo.load_pima()

    model = GLM(model_name="GLM_logit",
                l1_penalty=0.0,
                l2_penalty=0.0,
                random_state=random_seed())

    model.fit(x_train, y_train)
    train_auc = auc(y_train, model.predict(x_train))
    test_auc = auc(y_test, model.predict(x_test))
    print("Training AUC = %.4f" % train_auc)
    print("Testing AUC = %.4f" % test_auc)
Ejemplo n.º 7
0
def test_glm_multilogit_gridsearch():
    print(
        "========== Tune parameters for GLM for multilabel classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_yeast()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = GLM(model_name="GLM_multilogit_gridsearch",
              task='multilabel',
              link='logit',
              loss='multilogit',
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best weighted-F1-macro {} @ params {}".format(
        gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_f1 = best_clf.score(x_train, y_train)
    test_f1 = best_clf.score(x_test, y_test)
    print("Training weighted-F1-macro = %.4f" % train_f1)
    print("Testing weighted-F1-macro = %.4f" % test_f1)
    assert abs(test_f1 - gs.best_score_) < 1e-4
Ejemplo n.º 8
0
def test_glm_softmax_gridsearch():
    print(
        "========== Tune parameters for GLM for multiclass classification =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = GLM(model_name="GLM_softmax_gridsearch",
              link='softmax',
              loss='softmax',
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1 - gs.best_score_,
                                             gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = 1.0 - best_clf.score(x_train, y_train)
    test_err = 1.0 - best_clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err - (1.0 - gs.best_score_)) < 1e-4
Ejemplo n.º 9
0
def test_checkpoint():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    filepath = os.path.join(
        model_dir(), "male/glm/checkpoint_{epoch:04d}_{val_loss:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True)
    optz = SGD(learning_rate=0.01)
    clf = GLM(model_name="checkpoint_callback",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=5,
              batch_size=10,
              task='classification',
              metrics=['loss', 'err'],
              callbacks=[checkpoint],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    model_filepath = filepath.format(epoch=5, val_loss=0.968786)
    print("Load model at checkpoint: ", model_filepath, ", and predict:")
    clf1 = Model.load_model(model_filepath)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 10
0
def test_glm_save_load(show=False, block_figure_on_end=False):
    print("========== Test Save and Load functions for GLM ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=5, verbose=1)
    filepath = os.path.join(model_dir(),
                            "male/GLM/iris_{epoch:04d}_{val_err:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_err',
                                 verbose=0,
                                 save_best_only=True)
    loss_display = Display(title="Learning curves",
                           dpi='auto',
                           layout=(3, 1),
                           freq=1,
                           show=show,
                           block_on_end=block_figure_on_end,
                           monitor=[
                               {
                                   'metrics': ['loss', 'val_loss'],
                                   'type': 'line',
                                   'labels':
                                   ["training loss", "validation loss"],
                                   'title': "Learning losses",
                                   'xlabel': "epoch",
                                   'ylabel': "loss",
                               },
                               {
                                   'metrics': ['err', 'val_err'],
                                   'type': 'line',
                                   'title': "Learning errors",
                                   'xlabel': "epoch",
                                   'ylabel': "error",
                               },
                               {
                                   'metrics': ['err'],
                                   'type': 'line',
                                   'labels': ["training error"],
                                   'title': "Learning errors",
                                   'xlabel': "epoch",
                                   'ylabel': "error",
                               },
                           ])

    weight_display = Display(title="Filters",
                             dpi='auto',
                             layout=(1, 1),
                             figsize=(6, 15),
                             freq=1,
                             show=show,
                             block_on_end=block_figure_on_end,
                             monitor=[
                                 {
                                     'metrics': ['weights'],
                                     'title': "Learned weights",
                                     'type': 'img',
                                     'disp_dim': (2, 2),
                                     'tile_shape': (3, 1),
                                 },
                             ])

    clf = GLM(
        model_name="GLM_softmax_cv",
        link='softmax',
        loss='softmax',
        optimizer='sgd',
        num_epochs=4,
        batch_size=10,
        task='classification',
        metrics=['loss', 'err'],
        callbacks=[early_stopping, checkpoint, loss_display, weight_display],
        cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
        random_state=random_seed(),
        verbose=1)

    clf.fit(x, y)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = os.path.join(model_dir(), "male/GLM/saved_model.pkl")
    clf.save(file_path=save_file_path)
    clf1 = Model.load_model(save_file_path)
    clf1.num_epochs = 10
    clf1.fit(x, y)

    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 11
0
def test_glm_multilogit():
    print("========== Test GLM for multilabel classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_yeast()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_multilogit",
              task='multilabel',
              link='logit',
              loss='multilogit',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_f1 = clf.score(x_train, y_train)
    test_f1 = clf.score(x_test, y_test)
    print("Training weighted-F1-macro = %.4f" % train_f1)
    print("Testing weighted-F1-macro = %.4f" % test_f1)

    optz = SGD(learning_rate=0.1, momentum=0.9, nesterov=True)
    clf = GLM(model_name="GLM_multilogit",
              task='multilabel',
              optimizer=optz,
              link='logit',
              loss='multilogit',
              random_state=random_seed(),
              verbose=1)

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_f1 = clf.score(x_train, y_train)
    test_f1 = clf.score(x_test, y_test)
    print("Training weighted-F1-macro = %.4f" % train_f1)
    print("Testing weighted-F1-macro = %.4f" % test_f1)
Ejemplo n.º 12
0
def test_early_stopping():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    early_stopping = EarlyStopping(monitor='val_err', patience=2, verbose=1)
    optz = SGD(learning_rate=0.01)
    clf = GLM(model_name="early_stopping_callback",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=20,
              batch_size=10,
              task='classification',
              metrics=['loss', 'err'],
              callbacks=[early_stopping],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Model has been stopped at epoch #{0:d}".format(clf.epoch))
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Continue training...")
    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Model has been stopped at epoch #{0:d}".format(clf.epoch))
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Disable early stopping and continue training to the end...")
    clf.callbacks = []
    clf.fit(x, y)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 13
0
def test_glm_logit():
    print("========== Test GLM for binary classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_pima()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_logit",
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = GLM(model_name="GLM_logit",
              optimizer='sgd',
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    optz = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    clf = GLM(model_name="GLM_logit",
              optimizer=optz,
              l1_penalty=0.0,
              l2_penalty=0.0,
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 14
0
def test_glm_check_grad():
    print("========== Check gradients ==========")
    # <editor-fold desc="Binary classification">
    eps = 1e-6
    num_data = 10
    num_features = 5
    num_classes = 2
    x = np.random.rand(num_data, num_features)
    y = np.random.randint(0, num_classes, num_data)

    model = GLM(
        model_name="checkgrad_GLM_logit",
        task='classification',
        link='logit',  # link function
        loss='logit',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_logit",
        task='classification',
        link='logit',  # link function
        loss='logit',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_logit",
        task='classification',
        link='logit',  # link function
        loss='logit',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_logit",
        task='classification',
        link='logit',  # link function
        loss='logit',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps
    # </editor-fold>

    # <editor-fold desc="Multiclass classification">
    eps = 1e-6
    num_data = 10
    num_features = 5
    num_classes = 3
    x = np.random.rand(num_data, num_features)
    y = np.random.randint(0, num_classes, num_data)

    model = GLM(
        model_name="checkgrad_GLM_softmax",
        task='classification',
        link='softmax',  # link function
        loss='softmax',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_softmax",
        task='classification',
        link='softmax',  # link function
        loss='softmax',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_softmax",
        task='classification',
        link='softmax',  # link function
        loss='softmax',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_softmax",
        task='classification',
        link='softmax',  # link function
        loss='softmax',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps
    # </editor-fold>

    # <editor-fold desc="Multilabel classification">
    eps = 1e-6
    num_data = 10
    num_features = 5
    num_classes = 3
    x = np.random.rand(num_data, num_features)
    y = np.empty(num_data, dtype=np.object)
    for i in range(num_data):
        y[i] = tuple(
            np.random.choice(num_classes,
                             np.random.randint(num_classes) + 1,
                             replace=False))

    model = GLM(
        model_name="checkgrad_GLM_multilogit",
        task='multilabel',
        link='logit',  # link function
        loss='multilogit',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_multilogit",
        task='multilabel',
        link='logit',  # link function
        loss='multilogit',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_multilogit",
        task='multilabel',
        link='logit',  # link function
        loss='multilogit',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_multilogit",
        task='multilabel',
        link='logit',  # link function
        loss='multilogit',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps
    # </editor-fold>

    # <editor-fold desc="Regression">
    eps = 1e-6
    num_data = 10
    num_features = 5
    x = np.random.rand(num_data, num_features)
    y = np.random.rand(num_data)

    model = GLM(
        model_name="checkgrad_GLM_quad",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_quad",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_quad",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps

    model = GLM(
        model_name="checkgrad_GLM_quad",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.1,  # ridge regularization
        l1_penalty=0.01,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())
    assert model.check_grad(x, y) < eps
Ejemplo n.º 15
0
def test_rmsprop_glm():
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    print("Training GLM using RMSProp with default parameters...")
    clf = GLM(model_name="rmsprop_glm",
              optimizer='rmsprop',
              num_epochs=10,
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    print("Training GLM using RMSProp with customized parameters...")
    optz = RMSProp(learning_rate=0.01)
    clf = GLM(model_name="rmsprop_glm",
              optimizer=optz,
              num_epochs=10,
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 16
0
def test_glm_softmax():
    print("========== Test GLM for multiclass classification ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(model_name="GLM_softmax",
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf = GLM(model_name="GLM_softmax",
              optimizer='sgd',
              link='softmax',
              loss='softmax',
              random_state=random_seed())

    print("Use {} optimizer".format(clf.optimizer))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 17
0
def test_continue_training():
    print("========== Test continue training the models ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_iris()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    num_epochs = 5
    clf = GLM(model_name="iris_glm_softmax",
              link='softmax',
              loss='softmax',
              optimizer='sgd',
              batch_size=10,
              num_epochs=num_epochs,
              random_state=random_seed(),
              verbose=1)

    clf.fit(x_train, y_train)

    print("After training for {0:d} epochs".format(num_epochs))
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    clf.num_epochs = 10
    print("Set number of epoch to {0:d}, then continue training...".format(
        clf.num_epochs))
    clf.fit(x_train, y_train)
    train_err = 1.0 - clf.score(x_train, y_train)
    test_err = 1.0 - clf.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)

    save_file_path = clf.save()
    clf1 = Model.load_model(save_file_path)
    clf1.num_epochs = 15
    print("Save, load, set number of epoch to {0:d}, "
          "then continue training...".format(clf.num_epochs))
    clf1.fit(x_train, y_train)
    train_err = 1.0 - clf1.score(x_train, y_train)
    test_err = 1.0 - clf1.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Ejemplo n.º 18
0
def test_display_callbacks(show=False, block_figure_on_end=False):
    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_mnist()

    idx_train = np.random.permutation(x_train.shape[0])
    x_train = x_train[idx_train]
    y_train = y_train[idx_train]
    print("Number of training samples = {}".format(x_train.shape[0]))

    idx_test = np.random.permutation(x_test.shape[0])
    x_test = x_test[idx_test]
    y_test = y_test[idx_test]
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    err_display = Display(title="Error curves",
                          dpi='auto',
                          layout=(1, 1),
                          freq=1,
                          show=show,
                          block_on_end=block_figure_on_end,
                          monitor=[{
                              'metrics': ['err', 'val_err'],
                              'type': 'line',
                              'title': "Learning errors",
                              'xlabel': "epoch",
                              'ylabel': "error",
                          }])
    loss_display = Display(
        title="Learning curves",
        dpi='auto',
        layout=(3, 1),
        freq=1,
        show=show,
        block_on_end=block_figure_on_end,
        filepath=[
            os.path.join(model_dir(), "male/callbacks/"
                         "display/loss/loss_{epoch:04d}.png"),
            os.path.join(model_dir(), "male/callbacks/"
                         "display/loss/loss_{epoch:04d}.pdf")
        ],
        monitor=[
            {
                'metrics': ['loss', 'val_loss'],
                'type': 'line',
                'labels': ["training loss", "validation loss"],
                'title': "Learning losses",
                'xlabel': "epoch",
                'xlabel_params': {
                    'fontsize': 50
                },
                'ylabel': "loss",
            },
            {
                'metrics': ['err', 'val_err'],
                'type': 'line',
                'title': "Learning errors",
                'xlabel': "epoch",
                'ylabel': "error",
            },
            {
                'metrics': ['err'],
                'type': 'line',
                'labels': ["training error"],
                'title': "Learning errors",
                'xlabel': "epoch",
                'ylabel': "error",
            },
        ])

    weight_display = Display(title="Filters",
                             dpi='auto',
                             layout=(1, 1),
                             figsize=(6, 15),
                             freq=1,
                             show=show,
                             block_on_end=block_figure_on_end,
                             filepath=os.path.join(
                                 model_dir(), "male/callbacks/display/"
                                 "weights/weights_{epoch:04d}.png"),
                             monitor=[
                                 {
                                     'metrics': ['weights'],
                                     'title': "Learned weights",
                                     'type': 'img',
                                     'tile_shape': (5, 2),
                                 },
                             ])

    optz = SGD(learning_rate=0.001)
    clf = GLM(model_name="display_callbacks",
              link='softmax',
              loss='softmax',
              optimizer=optz,
              num_epochs=20,
              batch_size=100,
              task='classification',
              metrics=['loss', 'err'],
              callbacks=[loss_display, weight_display, err_display],
              cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0],
              random_state=random_seed(),
              verbose=1)

    clf.fit(x, y)
    print("Training error = %.4f" % (1.0 - clf.score(x_train, y_train)))
    print("Testing error = %.4f" % (1.0 - clf.score(x_test, y_test)))