def test_sdca_hinge_elastic(): clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="hinge", random_state=0) clf.fit(X_bin, y_bin) assert clf.score(X_bin, y_bin) == 1.0
def test_sdca_hinge_multiclass(): clf = SDCAClassifier(alpha=1e-2, max_iter=100, loss="hinge", random_state=0) clf.fit(X, y) assert_almost_equal(clf.score(X, y), 0.947, 3)
def test_sdca_squared_hinge_elastic(): clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="squared_hinge", random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def linear_cv(dataset_name, max_iter=1000, tol=1e-3, compute_jac=True): max_iter = max_iters[dataset_name] X, y = load_libsvm(dataset_name) X = X.tocsr() num_nonzeros = np.diff(X.indptr) X = X[num_nonzeros != 0] y = y[num_nonzeros != 0] n_samples, n_features = X.shape C = Cs[dataset_name] # Computation of dual solution of SVM via cvxopt clf = SDCAClassifier( alpha=1/(C * n_samples), loss='hinge', verbose=True, tol=1e-16, max_iter=max_iter) clf.fit(X, y) beta_star = np.abs(clf.dual_coef_[0]) primal_star = np.sum(X.T.multiply(y * beta_star), axis=1) # full_supp = np.logical_and(beta_star > 0, beta_star < C) full_supp = np.logical_and(np.logical_not(np.isclose(beta_star, 0)), np.logical_not(np.isclose(beta_star, C))) # Q = (X.multiply(y[:, np.newaxis])) @ (X.multiply(y[:, np.newaxis])).T yX = X.multiply(y[:, np.newaxis]) yX = yX.tocsr() # TODO to optimize temp3 = np.zeros(n_samples) temp3[np.isclose(beta_star, C)] = np.ones( (np.isclose(beta_star, C)).sum()) * C # temp3 = temp3[full_supp] v = temp3[full_supp] - yX[full_supp, :] @ (yX[np.isclose(beta_star, C), :].T @ temp3[np.isclose(beta_star, C)]) # v = np.array((np.eye(n_samples, n_samples) - Q)[np.ix_(full_supp, np.isclose(beta_star, C))] @ (np.ones((np.isclose(beta_star, C)).sum()) * C)) # v = np.squeeze(v) temp = yX[full_supp, :] @ yX[full_supp, :].T temp = csc_matrix(temp) # temp = temp[:, full_supp] # Q = csc_matrix(Q) print("size system to solve %i" % v.shape[0]) jac_dense = cg(temp, v, tol=1e-12) jac_star = np.zeros(n_samples) jac_star[full_supp] = jac_dense[0] jac_star[np.isclose(beta_star, C)] = C primal_jac_star = np.sum(X.T.multiply(y * jac_star), axis=1) model = SVM(X, y, np.log(C), max_iter=max_iter, tol=tol) list_beta, list_jac = compute_beta( X, y, np.log(C), model, save_iterates=True, tol=1e-32, max_iter=max_iter, compute_jac=True) M = X.T @ (list_beta * y).T M_jac = X.T @ (list_jac * y).T diff_beta = norm(M - primal_star, axis=0) diff_jac = norm(M_jac - primal_jac_star, axis=0) full_supp_star = full_supp full_supp_star = np.logical_and(np.logical_not(np.isclose(list_beta[-1], 0)), np.logical_not(np.isclose(list_beta[-1], C))) n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: full_supp = np.logical_and(np.logical_not(np.isclose(list_beta[i, :], 0)), np.logical_not(np.isclose(list_beta[i, :], C))) if not np.all(full_supp == full_supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, C, diff_beta, diff_jac, n_iter, supp_id
def test_sdca_squared_hinge_elastic(bin_train_data): X_bin, y_bin = bin_train_data clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="squared_hinge", random_state=0) clf.fit(X_bin, y_bin) assert clf.score(X_bin, y_bin) == 1.0
def test_sdca_smooth_hinge_elastic(): clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="smooth_hinge", random_state=0) clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_sdca_hinge_multiclass(train_data): X, y = train_data clf = SDCAClassifier(alpha=1e-2, max_iter=100, loss="hinge", random_state=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.933, 3)
def test_sdca_squared_l1_only(): clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="squared", tol=1e-2, max_iter=100, random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_sdca_smooth_hinge_l1_only(): clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="smooth_hinge", tol=1e-2, max_iter=200, random_state=0) clf.fit(X_bin, y_bin) assert clf.score(X_bin, y_bin) == 1.0
def test_sdca_smooth_hinge_elastic(bin_train_data): X_bin, y_bin = bin_train_data clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="smooth_hinge", random_state=0) clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert clf.score(X_bin, y_bin) == 1.0
def test_sdca_squared_l1_only(bin_train_data): X_bin, y_bin = bin_train_data clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="squared", tol=1e-2, max_iter=100, random_state=0) clf.fit(X_bin, y_bin) assert clf.score(X_bin, y_bin) == 1.0
def test_sdca_callback(): class Callback(object): def __init__(self, X, y): self.X = X self.y = y self.acc = [] def __call__(self, clf): score = clf.score(self.X, self.y) self.acc.append(score) cb = Callback(X_bin, y_bin) clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="hinge", callback=cb, random_state=0) clf.fit(X_bin, y_bin) assert_equal(cb.acc[0], 0.5) assert_equal(cb.acc[-1], 1.0)
def test_sdca_squared(): clf = SDCAClassifier(loss="squared", random_state=0) clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert clf.score(X_bin, y_bin) == 1.0
def test_bin_classes(): clf = SDCAClassifier() clf.fit(X_bin, y_bin) assert list(clf.classes_) == [-1, 1]
def test_multiclass_classes(): clf = SDCAClassifier() clf.fit(X, y) assert list(clf.classes_) == [0, 1, 2]
def test_sdca_absolute_l1_only(): clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="absolute", tol=1e-2, max_iter=200, random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
print intercept clf = SDCAClassifier(loss="hinge", alpha=alpha, C=C, max_iter=200, n_calls=X.shape[0], random_state=0, l1_ratio=0, verbose=0, tol=0, intercept=intercept) cb = Callback(X, y) clf.callback = cb clf.fit(X.tocsr(), y) times[intercept] = cb.times obj[intercept] = cb.obj dual_obj[intercept] = cb.dual_obj gap[intercept] = cb.gap plt.figure() plt.plot(times[0], gap[0], '.-', times[1], gap[1], '-', times[2], gap[2],
def test_sdca_absolute(): clf = SDCAClassifier(loss="absolute", random_state=0) clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_sdca_absolute(): clf = SDCAClassifier(loss="absolute", random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_bin_classes(bin_train_data): X_bin, y_bin = bin_train_data clf = SDCAClassifier() clf.fit(X_bin, y_bin) assert list(clf.classes_) == [-1, 1]
import pandas as pd from sklearn.model_selection import train_test_split from lightning.classification import SDCAClassifier from sklearn import preprocessing df = pd.read_csv("iris-data.txt", index_col=0,header=None) le = preprocessing.LabelEncoder() le.fit(df.values[:,3]) data = df.values[:,:3] result = le.transform(df.values[:,3]) data_train, data_test, result_train,result_test = train_test_split(data,result,test_size=0.3, random_state=100) clf = SDCAClassifier() clf.fit(data_train, result_train) predicted = le.inverse_transform(clf.predict(data_test)) with open("./result.csv","w") as f : for line in predicted : f.write(line + "\n" )
import sys from sklearn.externals import joblib from lightning.classification import SDCAClassifier if len(sys.argv) == 1: print """ Please enter the path to amazon7_uncompressed_pkl/amazon7.pkl Download data from http://www.mblondel.org/data/amazon7_uncompressed_pkl.tar.bz2 """ exit() data = joblib.load(sys.argv[1], mmap_mode="r") X = data["X"] y = data["y"].copy() # copy is needed to modify y. y[y >= 1] = 1 # Create a binary classification problem. clf = SDCAClassifier(tol=1e-5, max_iter=10, verbose=1) clf.fit(X, y) print clf.score(X, y)
def test_sdca_squared(): clf = SDCAClassifier(loss="squared", random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_multiclass_classes(): clf = SDCAClassifier() clf.fit(X, y) assert_equal(list(clf.classes_), [0, 1, 2])
def test_bin_classes(): clf = SDCAClassifier() clf.fit(X_bin, y_bin) assert_equal(list(clf.classes_), [-1, 1])
def test_multiclass_classes(train_data): X, y = train_data clf = SDCAClassifier() clf.fit(X, y) assert list(clf.classes_) == [0, 1, 2]
def test_sdca_hinge(bin_train_data): X_bin, y_bin = bin_train_data clf = SDCAClassifier(loss="hinge", random_state=0) clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert clf.score(X_bin, y_bin) == 1.0
gap = [[],[],[]] for intercept in [0,1]: # [0, 1, 2]: # intercept == 0: sdca # intercept == 1: primal-dual cd # intercept == 2: necoara's constrained cd print intercept clf = SDCAClassifier(loss="hinge", alpha=alpha, C=C, max_iter=200, n_calls=X.shape[0], random_state=0, l1_ratio=0, verbose=0, tol=0, intercept=intercept) cb = Callback(X, y) clf.callback = cb clf.fit(X.tocsr(), y) times[intercept] = cb.times obj[intercept] = cb.obj dual_obj[intercept] = cb.dual_obj gap[intercept] = cb.gap plt.figure() plt.plot(times[0], gap[0], '.-', times[1], gap[1], '-', times[2], gap[2], '--', linewidth=2) plt.yscale("log") plt.xlabel("CPU time (s)") plt.ylabel("Duality gap") plt.legend(["SDCA (Shalev-Shwartz & Zhang)",