np.random.seed(0) n_samples_1 = 1000 n_samples_2 = 100 X = np.r_[1.5*np.random.randn(n_samples_1, 2), 0.5*np.random.randn(n_samples_2, 2) + [2, 2]] y = np.array([0]*(n_samples_1) + [1]*(n_samples_2), dtype=np.float64) idx = np.arange(y.shape[0]) np.random.shuffle(idx) X = X[idx] y = y[idx] mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std # fit the model and get the separating hyperplane clf = SGDClassifier(n_iter=100, alpha=0.01) clf.fit(X, y) w = clf.coef_.ravel() a = -w[0] / w[1] xx = np.linspace(-5, 5) yy = a * xx - clf.intercept_ / w[1] # get the separating hyperplane using weighted classes wclf = SGDClassifier(n_iter=100, alpha=0.01) wclf.fit(X, y, class_weight={1: 10}) ww = wclf.coef_.ravel() wa = -ww[0] / ww[1] wyy = wa * xx - wclf.intercept_ / ww[1]
} liblinear_res = benchmark(LinearSVC(**liblinear_parameters)) liblinear_err, liblinear_train_time, liblinear_test_time = liblinear_res ###################################################################### ## Train GNB model gnb_err, gnb_train_time, gnb_test_time = benchmark(GNB()) ###################################################################### ## Train SGD model sgd_parameters = { 'alpha': 0.001, 'n_iter': 2, } sgd_err, sgd_train_time, sgd_test_time = benchmark( SGDClassifier(**sgd_parameters)) ###################################################################### ## Print classification performance print("") print("Classification performance:") print("===========================") print("") def print_row(clf_type, train_time, test_time, err): print("%s %s %s %s" % (clf_type.ljust(12), ("%.4fs" % train_time).center(10), ("%.4fs" % test_time).center(10), ("%.4f" % err).center(10)))
# shuffle idx = np.arange(X.shape[0]) np.random.seed(13) np.random.shuffle(idx) X = X[idx] y = y[idx] # standardize mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std h = .02 # step size in the mesh clf = SGDClassifier(alpha=0.001, n_iter=100).fit(X, y) # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) pl.set_cmap(pl.cm.Paired) # Plot the decision boundary. For that, we will asign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) pl.set_cmap(pl.cm.Paired)
'eps': 1e-3, } liblinear_res = benchmark(LinearSVC(**liblinear_parameters)) liblinear_err, liblinear_train_time, liblinear_test_time = liblinear_res ###################################################################### ## Train GNB model gnb_err, gnb_train_time, gnb_test_time = benchmark(GNB()) ###################################################################### ## Train SGD model sgd_parameters = { 'alpha': 0.001, 'n_iter': 2, } sgd_err, sgd_train_time, sgd_test_time = benchmark(SGDClassifier( **sgd_parameters)) ###################################################################### ## Print classification performance print("") print("Classification performance:") print("===========================") print("") def print_row(clf_type, train_time, test_time, err): print("%s %s %s %s" % (clf_type.ljust(12), ("%.4fs" % train_time).center(10), ("%.4fs" % test_time).center(10), ("%.4f" % err).center(10)))
separable dataset using a linear Support Vector Machines classifier trained using SGD. """ print __doc__ import numpy as np import pylab as pl from scikits.learn.linear_model import SGDClassifier # we create 40 separable points np.random.seed(0) X = np.r_[np.random.randn(20, 2) - [2,2], np.random.randn(20, 2) + [2, 2]] Y = [0]*20 + [1]*20 # fit the model clf = SGDClassifier(loss="hinge", alpha = 0.01, n_iter=50, fit_intercept=True) clf.fit(X, Y) # plot the line, the points, and the nearest vectors to the plane xx = np.linspace(-5, 5, 10) yy = np.linspace(-5, 5, 10) X1, X2 = np.meshgrid(xx, yy) Z = np.empty(X1.shape) for (i,j), val in np.ndenumerate(X1): x1 = val x2 = X2[i,j] p = clf.decision_function([x1, x2]) Z[i,j] = p[0] levels = [-1.0, 0.0, 1.0] linestyles = ['dashed','solid', 'dashed'] colors = 'k'