the plotted HP is not the max margin HP. """ print __doc__ import numpy as np import pylab as pl from scikits.learn.sgd.sparse import SGD # we create 40 separable points np.random.seed(0) X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] Y = [0] * 20 + [1] * 20 # fit the model clf = SGD(loss="hinge", alpha=0.01, n_iter=50, fit_intercept=True) clf.fit(X, Y) # plot the line, the points, and the nearest vectors to the plane xx = np.linspace(-5, 5, 10) yy = np.linspace(-5, 5, 10) X1, X2 = np.meshgrid(xx, yy) Z = np.empty(X1.shape) for (i, j), val in np.ndenumerate(X1): x1 = val x2 = X2[i, j] p = clf.predict_margin([x1, x2]) Z[i, j] = p[0] levels = [-1.0, 0.0, 1.0] linestyles = ['dashed', 'solid', 'dashed'] colors = 'k' pl.set_cmap(pl.cm.Paired)
print "" <<<<<<< HEAD print "Training a linear SVM (hinge loss and L2 regularizer) using SGD:" clf = SGD(n_iter=50, alpha=0.00001, fit_intercept=True) print clf t0 = time() ======= print "Training a linear SVM (hinge loss and L2 regularizer) using SGD.\n"\ "SGD(n_iter=50, alpha=0.00001, fit_intercept=True)" t0 = time() clf = SGD(n_iter=50, alpha=0.00001, fit_intercept=True) #clf = LinearSVC(**parameters) >>>>>>> remote clf.fit(data, target) print "done in %fs" % (time() - t0) print "Percentage of non zeros coef: %f" % (np.mean(clf.coef_ != 0) * 100) print "Loading 20 newsgroups test set... " t0 = time() news_test = load_mlcomp('20news-18828', 'test', sparse=True) print "done in %fs" % (time() - t0) target = news_test.target pos_idx = np.where(target == pos)[0] neg_idx = np.where(target == neg)[0] idx = np.concatenate((pos_idx, neg_idx)) data = news_test.data[idx] target = news_test.target[idx]
print "%d categories" % len(news_train.target_names) print "Extracting features from the dataset using a sparse vectorizer" t0 = time() vectorizer = Vectorizer() X_train = vectorizer.fit_transform((open(f).read() for f in news_train.filenames)) print "done in %fs" % (time() - t0) print "n_samples: %d, n_features: %d" % X_train.shape assert sp.issparse(X_train) y_train = news_train.target print "Training a linear SVM (hinge loss and L2 regularizer) using SGD.\n"\ "SGD(n_iter=50, alpha=0.00001, fit_intercept=True)" t0 = time() clf = SGD(n_iter=50, alpha=0.00001, fit_intercept=True) clf.fit(X_train, y_train) print "done in %fs" % (time() - t0) print "Percentage of non zeros coef: %f" % (np.mean(clf.coef_ != 0) * 100) print "Loading 20 newsgroups test set... " news_test = load_mlcomp('20news-18828', 'test', categories=['alt.atheism', 'comp.graphics']) t0 = time() print "done in %fs" % (time() - t0) print "Predicting the labels of the test set..." print "%d documents" % len(news_test.filenames) print "%d categories" % len(news_test.target_names) print "Extracting features from the dataset using the same vectorizer"
the plotted HP is not the max margin HP. """ print __doc__ import numpy as np import pylab as pl from scikits.learn.sgd.sparse import SGD # we create 40 separable points np.random.seed(0) X = np.r_[np.random.randn(20, 2) - [2,2], np.random.randn(20, 2) + [2, 2]] Y = [0]*20 + [1]*20 # fit the model clf = SGD(loss="hinge", alpha = 0.01, n_iter=50, fit_intercept=True) clf.fit(X, Y) # plot the line, the points, and the nearest vectors to the plane xx = np.linspace(-5, 5, 10) yy = np.linspace(-5, 5, 10) X1, X2 = np.meshgrid(xx, yy) Z = np.empty(X1.shape) for (i,j), val in np.ndenumerate(X1): x1 = val x2 = X2[i,j] p = clf.predict_margin([x1, x2]) Z[i,j] = p[0] levels = [-1.0, 0.0, 1.0] linestyles = ['dashed','solid', 'dashed'] colors = 'k' pl.set_cmap(pl.cm.Paired)