def linear_reg(algo={},tmp={},testset={},X=None,Y=None): Xtrain, Ytrain, Xtest, Ytest = getSample(X=X, Y=Y,ratio=0.6) #lm.LinearRegression print "linear_reg" reg = lm.Ridge(alpha=0.25) #reg=lm.Lasso(alpha=0.0005) pred=reg.fit(Xtrain, Ytrain).predict(Xtest) #print algo.keys() #print reg.coef_ #print reg.intercept_ RMSE2(pred=pred, test=Ytest) MAE(pred=pred, test=Ytest) ClassifyError(pred=pred, test=Ytest) print "LogisticRegression" logreg=lm.LogisticRegression(penalty="l2",C=2); pred=logreg.fit(Xtrain,Ytrain).predict(Xtest) RMSE2(pred=pred, test=Ytest) MAE(pred=pred, test=Ytest) ClassifyError(pred=pred, test=Ytest)
def test_weight(): """ Test class weights """ clf = svm.SVC() # we give a small weights to class 1 clf.fit(X, Y, {1: 0.1}) # so all predicted values belong to class 2 assert_array_almost_equal(clf.predict(X), [2] * 6) X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1], seed=0) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(), svm.SVC()): clf.fit(X_[: 180], y_[: 180], class_weight={0: 5}) y_pred = clf.predict(X_[180:]) assert np.sum(y_pred == y_[180:]) >= 11
iris = datasets.load_iris() X = iris.data y = iris.target X = X[y != 2] y = y[y != 2] X -= np.mean(X, 0) ################################################################################ # Demo path functions alphas = np.logspace(2, -4, 100) print "Computing regularization path ..." start = datetime.now() clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6) coefs_ = [clf.fit(X, y, C=1.0/alpha).coef_.ravel().copy() for alpha in alphas] print "This took ", datetime.now() - start coefs_ = np.array(coefs_) pl.plot(-np.log10(alphas), coefs_) ymin, ymax = pl.ylim() pl.xlabel('-log(alpha)') pl.ylabel('Coefficients') pl.title('Logistic Regression Path') pl.axis('tight') pl.show()
from scikits.learn import datasets, neighbors, linear_model digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) X_train = X_digits[:.9 * n_samples] y_train = y_digits[:.9 * n_samples] X_test = X_digits[.9 * n_samples:] y_test = y_digits[.9 * n_samples:] knn = neighbors.NeighborsClassifier() logistic = linear_model.LogisticRegression() print 'KNN score:', knn.fit(X_train, y_train).score(X_test, y_test) print 'LogisticRegression score:', logistic.fit(X_train, y_train).score(X_test, y_test)
from scikits.learn import linear_model # this is our test set, it's just a straight line with some # gaussian noise xmin, xmax = -5, 5 n_samples = 100 np.random.seed(0) X = np.random.normal(size=n_samples) y = (X > 0).astype(np.float) X[X > 0] *= 4 X += .3 * np.random.normal(size=n_samples) X = X[:, np.newaxis] # run the classifier clf = linear_model.LogisticRegression(C=1e5) clf.fit(X, y) # and plot the result pl.figure(1, figsize=(4, 3)) pl.clf() pl.scatter(X.ravel(), y, color='black', zorder=20) X_test = np.linspace(-5, 10, 300) def model(x): return 1 / (1 + np.exp(-x)) loss = model(X_test * clf.coef_ + clf.intercept_).ravel() pl.plot(X_test, loss, color='blue', linewidth=3)