コード例 #1
0
 def setUp(self):
     self.sgd = LogisticSGD(1, 1e-3)
     self.estimator = LogisticRegression(optimizer=self.sgd)
     np.random.seed(0)
     x1 = np.random.normal(loc=(-1, -1), scale=(1, 1), size=(10, 2))
     x2 = np.random.normal(loc=(1, 1), scale=(1, 1), size=(10, 2))
     self.x = np.concatenate([x1, x2])
     y1 = -np.ones(shape=10)
     y2 = np.ones(shape=10)
     self.y = np.concatenate([y1, y2])
def plot_gausian_arr():
    data_name = "Arrhythmia"
    nb_epoch = 40

    x, y = import_data_arrhythmia()

    h = 10
    prop = 0.1
    # base of projection
    Base_proj = create_base(x, prop=prop)
    x = gaussian_proj(x, Base_proj, h)

    # normalization
    normalizer = Normalizer(x)
    x = normalizer.normalize(x)

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.15)

    # make estimator
    sgd = LogisticSGD(c=10**3, eps=10**-6)
    sgd_clf = LogisticRegression(optimizer=sgd)

    sdca = LogisticSDCA(c=10**-1)
    sdca_clf = LogisticRegression(optimizer=sdca)

    # train estimator with history
    sgd_hist_w, sgd_hist_loss = sgd_clf.fit(X_train,
                                            y_train,
                                            epochs=nb_epoch,
                                            save_hist=True)
    sgd_hist_w = np.array(sgd_hist_w)

    sdca_hist_w, sdca_hist_loss = sdca_clf.fit(X_train,
                                               y_train,
                                               epochs=nb_epoch,
                                               save_hist=True)
    sdca_hist_w = np.array(sdca_hist_w)

    plt.figure()
    plt.plot(sgd_hist_loss)
    plt.title("SGD learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")

    plt.figure()
    plt.title("SDCA learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    plt.plot(sdca_hist_loss)

    sgd_hist_accuracy = get_hist_accuracy(X_test, y_test, sgd_hist_w, sgd_clf)
    sdca_hist_accuracy = get_hist_accuracy(X_test, y_test, sdca_hist_w,
                                           sdca_clf)
    plt.figure()
    plt.plot(sgd_hist_accuracy, c='b', label="SGD")
    plt.plot(sdca_hist_accuracy, c='g', label="SDCA")
    plt.title("Test accuracy vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Accuracy")
    plt.legend()
def eval_eps(data, labels, vect_param, nb_epoch, data_name, param_c=10**1):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    vect_train_accuracy_sgd = []

    vect_test_accuracy_sgd = []

    for param in vect_param:
        # make estimator
        sgd = LogisticSGD(c=param_c, eps=param)
        sgd_clf = LogisticRegression(optimizer=sgd)

        # train estimators without history
        sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)

        vect_train_accuracy_sgd.append(sgd_clf.score_accuracy(
            X_train, y_train))

        vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test))

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test")
    plt.title("Accuracy of SGD vs. hyperparameter epsilon \non data set " +
              data_name)
    plt.xlabel("Epsilon")
    plt.ylabel("Accuracy")
    plt.legend()
コード例 #4
0
class TestLogisticSGD(unittest.TestCase):
    def setUp(self):
        self.sgd = LogisticSGD(1, 1e-3)
        self.estimator = LogisticRegression(optimizer=self.sgd)
        np.random.seed(0)
        x1 = np.random.normal(loc=(-1, -1), scale=(1, 1), size=(10, 2))
        x2 = np.random.normal(loc=(1, 1), scale=(1, 1), size=(10, 2))
        self.x = np.concatenate([x1, x2])
        y1 = -np.ones(shape=10)
        y2 = np.ones(shape=10)
        self.y = np.concatenate([y1, y2])

    def testFit(self):
        np.random.seed(10)
        self.estimator.fit(self.x, self.y)

    def testPredict(self):
        w = [0.20140517, 0.26121764]
        self.estimator.w = w
        y_pred = self.estimator.predict(self.x)
        errors = y_pred != self.y
        error_rate = sum(errors) / len(errors)
        # regression test
        self.assertEqual(error_rate, 0.1)
def compute_search(
    x_train: np.ndarray,
    y_train: np.ndarray,
    x_test: np.ndarray,
    y_test: np.ndarray,
    param_name: str,
    param_values: Union[list, np.ndarray],
    optimizer_type,
    optimizer_kwargs: dict = None,
    projection: Callable[[np.ndarray],
                         np.ndarray] = projections.identity_projection):
    scores_train = list()
    scores_test = list()

    for param_value in param_values:
        np.random.seed(50307)

        # gather parameters
        param_kwarg = {param_name: param_value}
        if optimizer_kwargs is None:
            optimizer_kwargs = param_kwarg
        else:
            optimizer_kwargs.update(param_kwarg)

        # init optimizer and estimator
        optimizer = optimizer_type(**optimizer_kwargs)
        estimator = LogisticRegression(optimizer=optimizer,
                                       projection=projection)

        # fit estimator
        estimator.fit(x_train, y_train, epochs=15, save_hist=True)

        # evaluate
        score_train = estimator.score_accuracy(x_train, y_train)
        score_test = estimator.score_accuracy(x_test, y_test)
        scores_train.append(score_train)
        scores_test.append(score_test)

    return scores_train, scores_test
def eval_c(data, labels, vect_param, nb_epoch, data_name, eps_base=10**-6):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    vect_train_accuracy_sgd = []
    vect_train_accuracy_sdca = []

    vect_test_accuracy_sgd = []
    vect_test_accuracy_sdca = []

    for param in vect_param:
        # make estimator
        sgd = LogisticSGD(c=param, eps=eps_base)
        sgd_clf = LogisticRegression(optimizer=sgd)

        sdca = LogisticSDCA(c=param)
        sdca_clf = LogisticRegression(optimizer=sdca)

        # train estimators without history
        sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)
        sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)

        vect_train_accuracy_sgd.append(sgd_clf.score_accuracy(
            X_train, y_train))
        vect_train_accuracy_sdca.append(
            sdca_clf.score_accuracy(X_train, y_train))

        vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test))
        vect_test_accuracy_sdca.append(sdca_clf.score_accuracy(X_test, y_test))

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test")
    plt.title("SGD accuracy vs. hyperparameter C\n on data set " + data_name)
    plt.xlabel("C")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sdca, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sdca, 'r', label="test")
    plt.title("SDCA accuracy vs. hyperparameter C\n on data set " + data_name)
    plt.xlabel("C")
    plt.ylabel("Accuracy")
    plt.legend()
def eval_h(data, labels, vect_param, nb_epoch, data_name, prop_base, c_sgd,
           c_sdca, eps_sgd):
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    Base_proj = create_base(x_train, prop=prop_base)
    dim, _ = Base_proj.shape
    print("dim :", dim)

    vect_train_accuracy_sgd = []
    vect_train_accuracy_sdca = []

    vect_test_accuracy_sgd = []
    vect_test_accuracy_sdca = []

    for param in vect_param:
        X_train = gaussian_proj(x_train, Base_proj, param)
        X_test = gaussian_proj(x_test, Base_proj, param)

        # normalisation
        normalizer = Normalizer(X_train)
        X_train = normalizer.normalize(X_train)
        X_test = normalizer.normalize(X_test)

        # make estimator
        sgd = LogisticSGD(c=c_sgd, eps=eps_sgd)
        sgd_clf = LogisticRegression(optimizer=sgd)

        sdca = LogisticSDCA(c=c_sdca)
        sdca_clf = LogisticRegression(optimizer=sdca)

        # train estimators without history
        sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)
        sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)

        vect_train_accuracy_sgd.append(sgd_clf.score_accuracy(
            X_train, y_train))
        vect_train_accuracy_sdca.append(
            sdca_clf.score_accuracy(X_train, y_train))

        vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test))
        vect_test_accuracy_sdca.append(sdca_clf.score_accuracy(X_test, y_test))

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test")
    plt.title(
        "SGD accuracy vs. hyperparameter h\nfor gaussian projection (dim = {})\n on data set "
        .format(dim) + data_name)
    plt.xlabel("h")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sdca, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sdca, 'r', label="test")
    plt.title(
        "SDCA accuracy vs. hyperparameter h\nfor gaussian projection (dim = {})\n on data set "
        .format(dim) + data_name)
    plt.xlabel("h")
    plt.ylabel("Accuracy")
    plt.legend()
def plot_training(data, labels, nb_epoch, data_name, c_sgd, c_sdca, eps_sgd):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    # make estimator
    sgd = LogisticSGD(c=c_sgd, eps=eps_sgd)
    sgd_clf = LogisticRegression(optimizer=sgd)

    sdca = LogisticSDCA(c=c_sdca)
    sdca_clf = LogisticRegression(optimizer=sdca)

    # train estimator with history
    sgd_hist_w, sgd_hist_loss = sgd_clf.fit(X_train,
                                            y_train,
                                            epochs=nb_epoch,
                                            save_hist=True)
    sgd_hist_w = np.array(sgd_hist_w)

    # plot histories
    '''plt.figure()
    plt.title("Evolution of the weights : SGD")
    for d in range(sgd_hist_w.shape[1]):
        plt.plot(sgd_hist_w[:, d])'''

    plt.figure()
    plt.plot(sgd_hist_loss)
    plt.title("SGD learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")

    # final accuracy
    print("final accuracy SGD :", sgd_clf.score_accuracy(X_test, y_test))

    # do it again with SDCA !

    sdca_hist_w, sdca_hist_loss = sdca_clf.fit(X_train,
                                               y_train,
                                               epochs=nb_epoch,
                                               save_hist=True)
    sdca_hist_w = np.array(sdca_hist_w)
    '''plt.figure()
    plt.title("Evolution of the weights : SDCA")
    for d in range(sdca_hist_w.shape[1]):
        plt.plot(sdca_hist_w[:, d])'''

    plt.figure()
    plt.title("SDCA learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    plt.plot(sdca_hist_loss)

    # final accuracy
    print("final accuracy SDCA :", sdca_clf.score_accuracy(X_test, y_test))

    sgd_hist_accuracy = get_hist_accuracy(X_test, y_test, sgd_hist_w, sgd_clf)
    sdca_hist_accuracy = get_hist_accuracy(X_test, y_test, sdca_hist_w,
                                           sdca_clf)
    plt.figure()
    plt.plot(sgd_hist_accuracy, c='b', label="SGD")
    plt.plot(sdca_hist_accuracy, c='g', label="SDCA")
    plt.title("Test accuracy vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Accuracy")
    plt.legend()
コード例 #9
0
def plot_learning(
    x,
    y,
    chosen_sgd=DEFAULT_SGD,
    chosen_sdca=DEFAULT_SDCA,
    nb_epochs=1,
    comp_sgd=True,
    comp_sdca=True,
    is_malaptool=False,
    verbose_all=False,
    projection: Callable[[np.ndarray],
                         np.ndarray] = projections.identity_projection):
    # make estimator
    if comp_sgd:
        sgd = chosen_sgd
        sgd_clf = LogisticRegression(optimizer=sgd, projection=projection)

        # train estimator with history
        sgd_hist_w, sgd_hist_loss = sgd_clf.fit(x,
                                                y,
                                                epochs=nb_epochs,
                                                save_hist=True)
        sgd_hist_w = np.array(sgd_hist_w)

        if verbose_all:
            # plot histories
            plt.figure()
            plt.title("Evolution of the weights")
            for d in range(sgd_hist_w.shape[1]):
                plt.plot(sgd_hist_w[:, d])
            plt.show()

            plt.figure()
            plt.title("Evolution of the loss")
            plt.plot(sgd_hist_loss)
            plt.show()

            # verify result
            if is_malaptool:
                plt.figure()
                plt.title("Estimator regions")
                malaptools.plot_frontiere(x, sgd_clf.predict)
                malaptools.plot_data(x, y)
                plt.show()

    # do it again with SDCA !
    if comp_sdca:
        sdca = chosen_sdca
        sdca_clf = LogisticRegression(optimizer=sdca, projection=projection)

        sdca_hist_w, sdca_hist_loss = sdca_clf.fit(x,
                                                   y,
                                                   epochs=nb_epochs,
                                                   save_hist=True)
        sdca_hist_w = np.array(sdca_hist_w)

        if verbose_all:
            plt.figure()
            plt.title("Evolution of the weights")
            for d in range(sdca_hist_w.shape[1]):
                plt.plot(sdca_hist_w[:, d])
            plt.show()

            plt.figure()
            plt.title("Evolution of the loss")
            plt.plot(sdca_hist_loss)
            plt.show()

            if is_malaptool:
                plt.figure()
                plt.title("Estimator regions")
                malaptools.plot_frontiere(x, sdca_clf.predict)
                malaptools.plot_data(x, y)
                plt.show()

    # comparison
    if comp_sgd and comp_sdca:
        plt.figure()
        plt.title("Comparison of the evolution of the loss")
        plt.plot(sgd_hist_loss, label="SGD")
        plt.plot(sdca_hist_loss, label="SDCA")
        plt.legend()
        plt.show()
コード例 #10
0
        k += 2
        for j in range(i + 1, dim):
            Z[:, k] = np.multiply(X[:, i], X[:, j])
            k += 1
    return Z


if False:
    X_poly = proj_degr2(X)
    normalizer = Normalizer(X_poly)
    Xnorm_poly = normalizer.normalize(X_poly)

if False:
    # make estimator
    sgd = LogisticSGD(c=10, eps=1e-38)
    sgd_clf = LogisticRegression(optimizer=sgd)

    sdca = LogisticSDCA(c=10)
    sdca_clf = LogisticRegression(optimizer=sdca)

    nb_epoch = 5

    X_proj = proj_degr2(X)
    X_proj_norm = normalize(X_proj)
    # train estimator with history
    sgd_hist_w_proj, sgd_hist_loss_proj = sgd_clf.fit(X_proj_norm,
                                                      Y,
                                                      epochs=nb_epoch,
                                                      save_hist=True)
    sgd_hist_w_proj = np.array(sgd_hist_w_proj)