def setUp(self): self.sgd = LogisticSGD(1, 1e-3) self.estimator = LogisticRegression(optimizer=self.sgd) np.random.seed(0) x1 = np.random.normal(loc=(-1, -1), scale=(1, 1), size=(10, 2)) x2 = np.random.normal(loc=(1, 1), scale=(1, 1), size=(10, 2)) self.x = np.concatenate([x1, x2]) y1 = -np.ones(shape=10) y2 = np.ones(shape=10) self.y = np.concatenate([y1, y2])
def plot_gausian_arr(): data_name = "Arrhythmia" nb_epoch = 40 x, y = import_data_arrhythmia() h = 10 prop = 0.1 # base of projection Base_proj = create_base(x, prop=prop) x = gaussian_proj(x, Base_proj, h) # normalization normalizer = Normalizer(x) x = normalizer.normalize(x) X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.15) # make estimator sgd = LogisticSGD(c=10**3, eps=10**-6) sgd_clf = LogisticRegression(optimizer=sgd) sdca = LogisticSDCA(c=10**-1) sdca_clf = LogisticRegression(optimizer=sdca) # train estimator with history sgd_hist_w, sgd_hist_loss = sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=True) sgd_hist_w = np.array(sgd_hist_w) sdca_hist_w, sdca_hist_loss = sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=True) sdca_hist_w = np.array(sdca_hist_w) plt.figure() plt.plot(sgd_hist_loss) plt.title("SGD learning loss vs. iteration\non data set " + data_name) plt.xlabel("Iteration") plt.ylabel("Loss") plt.figure() plt.title("SDCA learning loss vs. iteration\non data set " + data_name) plt.xlabel("Iteration") plt.ylabel("Loss") plt.plot(sdca_hist_loss) sgd_hist_accuracy = get_hist_accuracy(X_test, y_test, sgd_hist_w, sgd_clf) sdca_hist_accuracy = get_hist_accuracy(X_test, y_test, sdca_hist_w, sdca_clf) plt.figure() plt.plot(sgd_hist_accuracy, c='b', label="SGD") plt.plot(sdca_hist_accuracy, c='g', label="SDCA") plt.title("Test accuracy vs. iteration\non data set " + data_name) plt.xlabel("Iteration") plt.ylabel("Accuracy") plt.legend()
def eval_eps(data, labels, vect_param, nb_epoch, data_name, param_c=10**1): X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.15) vect_train_accuracy_sgd = [] vect_test_accuracy_sgd = [] for param in vect_param: # make estimator sgd = LogisticSGD(c=param_c, eps=param) sgd_clf = LogisticRegression(optimizer=sgd) # train estimators without history sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False) vect_train_accuracy_sgd.append(sgd_clf.score_accuracy( X_train, y_train)) vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test)) plt.figure() plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train") plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test") plt.title("Accuracy of SGD vs. hyperparameter epsilon \non data set " + data_name) plt.xlabel("Epsilon") plt.ylabel("Accuracy") plt.legend()
class TestLogisticSGD(unittest.TestCase): def setUp(self): self.sgd = LogisticSGD(1, 1e-3) self.estimator = LogisticRegression(optimizer=self.sgd) np.random.seed(0) x1 = np.random.normal(loc=(-1, -1), scale=(1, 1), size=(10, 2)) x2 = np.random.normal(loc=(1, 1), scale=(1, 1), size=(10, 2)) self.x = np.concatenate([x1, x2]) y1 = -np.ones(shape=10) y2 = np.ones(shape=10) self.y = np.concatenate([y1, y2]) def testFit(self): np.random.seed(10) self.estimator.fit(self.x, self.y) def testPredict(self): w = [0.20140517, 0.26121764] self.estimator.w = w y_pred = self.estimator.predict(self.x) errors = y_pred != self.y error_rate = sum(errors) / len(errors) # regression test self.assertEqual(error_rate, 0.1)
def compute_search( x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray, y_test: np.ndarray, param_name: str, param_values: Union[list, np.ndarray], optimizer_type, optimizer_kwargs: dict = None, projection: Callable[[np.ndarray], np.ndarray] = projections.identity_projection): scores_train = list() scores_test = list() for param_value in param_values: np.random.seed(50307) # gather parameters param_kwarg = {param_name: param_value} if optimizer_kwargs is None: optimizer_kwargs = param_kwarg else: optimizer_kwargs.update(param_kwarg) # init optimizer and estimator optimizer = optimizer_type(**optimizer_kwargs) estimator = LogisticRegression(optimizer=optimizer, projection=projection) # fit estimator estimator.fit(x_train, y_train, epochs=15, save_hist=True) # evaluate score_train = estimator.score_accuracy(x_train, y_train) score_test = estimator.score_accuracy(x_test, y_test) scores_train.append(score_train) scores_test.append(score_test) return scores_train, scores_test
def eval_c(data, labels, vect_param, nb_epoch, data_name, eps_base=10**-6): X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.15) vect_train_accuracy_sgd = [] vect_train_accuracy_sdca = [] vect_test_accuracy_sgd = [] vect_test_accuracy_sdca = [] for param in vect_param: # make estimator sgd = LogisticSGD(c=param, eps=eps_base) sgd_clf = LogisticRegression(optimizer=sgd) sdca = LogisticSDCA(c=param) sdca_clf = LogisticRegression(optimizer=sdca) # train estimators without history sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False) sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False) vect_train_accuracy_sgd.append(sgd_clf.score_accuracy( X_train, y_train)) vect_train_accuracy_sdca.append( sdca_clf.score_accuracy(X_train, y_train)) vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test)) vect_test_accuracy_sdca.append(sdca_clf.score_accuracy(X_test, y_test)) plt.figure() plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train") plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test") plt.title("SGD accuracy vs. hyperparameter C\n on data set " + data_name) plt.xlabel("C") plt.ylabel("Accuracy") plt.legend() plt.figure() plt.semilogx(vect_param, vect_train_accuracy_sdca, 'b', label="train") plt.semilogx(vect_param, vect_test_accuracy_sdca, 'r', label="test") plt.title("SDCA accuracy vs. hyperparameter C\n on data set " + data_name) plt.xlabel("C") plt.ylabel("Accuracy") plt.legend()
def eval_h(data, labels, vect_param, nb_epoch, data_name, prop_base, c_sgd, c_sdca, eps_sgd): x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.15) Base_proj = create_base(x_train, prop=prop_base) dim, _ = Base_proj.shape print("dim :", dim) vect_train_accuracy_sgd = [] vect_train_accuracy_sdca = [] vect_test_accuracy_sgd = [] vect_test_accuracy_sdca = [] for param in vect_param: X_train = gaussian_proj(x_train, Base_proj, param) X_test = gaussian_proj(x_test, Base_proj, param) # normalisation normalizer = Normalizer(X_train) X_train = normalizer.normalize(X_train) X_test = normalizer.normalize(X_test) # make estimator sgd = LogisticSGD(c=c_sgd, eps=eps_sgd) sgd_clf = LogisticRegression(optimizer=sgd) sdca = LogisticSDCA(c=c_sdca) sdca_clf = LogisticRegression(optimizer=sdca) # train estimators without history sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False) sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False) vect_train_accuracy_sgd.append(sgd_clf.score_accuracy( X_train, y_train)) vect_train_accuracy_sdca.append( sdca_clf.score_accuracy(X_train, y_train)) vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test)) vect_test_accuracy_sdca.append(sdca_clf.score_accuracy(X_test, y_test)) plt.figure() plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train") plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test") plt.title( "SGD accuracy vs. hyperparameter h\nfor gaussian projection (dim = {})\n on data set " .format(dim) + data_name) plt.xlabel("h") plt.ylabel("Accuracy") plt.legend() plt.figure() plt.semilogx(vect_param, vect_train_accuracy_sdca, 'b', label="train") plt.semilogx(vect_param, vect_test_accuracy_sdca, 'r', label="test") plt.title( "SDCA accuracy vs. hyperparameter h\nfor gaussian projection (dim = {})\n on data set " .format(dim) + data_name) plt.xlabel("h") plt.ylabel("Accuracy") plt.legend()
def plot_training(data, labels, nb_epoch, data_name, c_sgd, c_sdca, eps_sgd): X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.15) # make estimator sgd = LogisticSGD(c=c_sgd, eps=eps_sgd) sgd_clf = LogisticRegression(optimizer=sgd) sdca = LogisticSDCA(c=c_sdca) sdca_clf = LogisticRegression(optimizer=sdca) # train estimator with history sgd_hist_w, sgd_hist_loss = sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=True) sgd_hist_w = np.array(sgd_hist_w) # plot histories '''plt.figure() plt.title("Evolution of the weights : SGD") for d in range(sgd_hist_w.shape[1]): plt.plot(sgd_hist_w[:, d])''' plt.figure() plt.plot(sgd_hist_loss) plt.title("SGD learning loss vs. iteration\non data set " + data_name) plt.xlabel("Iteration") plt.ylabel("Loss") # final accuracy print("final accuracy SGD :", sgd_clf.score_accuracy(X_test, y_test)) # do it again with SDCA ! sdca_hist_w, sdca_hist_loss = sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=True) sdca_hist_w = np.array(sdca_hist_w) '''plt.figure() plt.title("Evolution of the weights : SDCA") for d in range(sdca_hist_w.shape[1]): plt.plot(sdca_hist_w[:, d])''' plt.figure() plt.title("SDCA learning loss vs. iteration\non data set " + data_name) plt.xlabel("Iteration") plt.ylabel("Loss") plt.plot(sdca_hist_loss) # final accuracy print("final accuracy SDCA :", sdca_clf.score_accuracy(X_test, y_test)) sgd_hist_accuracy = get_hist_accuracy(X_test, y_test, sgd_hist_w, sgd_clf) sdca_hist_accuracy = get_hist_accuracy(X_test, y_test, sdca_hist_w, sdca_clf) plt.figure() plt.plot(sgd_hist_accuracy, c='b', label="SGD") plt.plot(sdca_hist_accuracy, c='g', label="SDCA") plt.title("Test accuracy vs. iteration\non data set " + data_name) plt.xlabel("Iteration") plt.ylabel("Accuracy") plt.legend()
def plot_learning( x, y, chosen_sgd=DEFAULT_SGD, chosen_sdca=DEFAULT_SDCA, nb_epochs=1, comp_sgd=True, comp_sdca=True, is_malaptool=False, verbose_all=False, projection: Callable[[np.ndarray], np.ndarray] = projections.identity_projection): # make estimator if comp_sgd: sgd = chosen_sgd sgd_clf = LogisticRegression(optimizer=sgd, projection=projection) # train estimator with history sgd_hist_w, sgd_hist_loss = sgd_clf.fit(x, y, epochs=nb_epochs, save_hist=True) sgd_hist_w = np.array(sgd_hist_w) if verbose_all: # plot histories plt.figure() plt.title("Evolution of the weights") for d in range(sgd_hist_w.shape[1]): plt.plot(sgd_hist_w[:, d]) plt.show() plt.figure() plt.title("Evolution of the loss") plt.plot(sgd_hist_loss) plt.show() # verify result if is_malaptool: plt.figure() plt.title("Estimator regions") malaptools.plot_frontiere(x, sgd_clf.predict) malaptools.plot_data(x, y) plt.show() # do it again with SDCA ! if comp_sdca: sdca = chosen_sdca sdca_clf = LogisticRegression(optimizer=sdca, projection=projection) sdca_hist_w, sdca_hist_loss = sdca_clf.fit(x, y, epochs=nb_epochs, save_hist=True) sdca_hist_w = np.array(sdca_hist_w) if verbose_all: plt.figure() plt.title("Evolution of the weights") for d in range(sdca_hist_w.shape[1]): plt.plot(sdca_hist_w[:, d]) plt.show() plt.figure() plt.title("Evolution of the loss") plt.plot(sdca_hist_loss) plt.show() if is_malaptool: plt.figure() plt.title("Estimator regions") malaptools.plot_frontiere(x, sdca_clf.predict) malaptools.plot_data(x, y) plt.show() # comparison if comp_sgd and comp_sdca: plt.figure() plt.title("Comparison of the evolution of the loss") plt.plot(sgd_hist_loss, label="SGD") plt.plot(sdca_hist_loss, label="SDCA") plt.legend() plt.show()
k += 2 for j in range(i + 1, dim): Z[:, k] = np.multiply(X[:, i], X[:, j]) k += 1 return Z if False: X_poly = proj_degr2(X) normalizer = Normalizer(X_poly) Xnorm_poly = normalizer.normalize(X_poly) if False: # make estimator sgd = LogisticSGD(c=10, eps=1e-38) sgd_clf = LogisticRegression(optimizer=sgd) sdca = LogisticSDCA(c=10) sdca_clf = LogisticRegression(optimizer=sdca) nb_epoch = 5 X_proj = proj_degr2(X) X_proj_norm = normalize(X_proj) # train estimator with history sgd_hist_w_proj, sgd_hist_loss_proj = sgd_clf.fit(X_proj_norm, Y, epochs=nb_epoch, save_hist=True) sgd_hist_w_proj = np.array(sgd_hist_w_proj)