def extra_material(): X, y = make_moons(n_samples=1000, noise=0.4, random_state=42) # plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bs") # plt.plot(X[:, 0][y==1], X[:, 1][y==1], "g^") # plt.savefig(PNG_PATH + "extra_moons", dpi=300) # plt.close() # tol = 0.1 # tols = [] # times = [] # for i in range(10): # svm_clf = SVC(kernel="poly", gamma=3, C=10, tol=tol, verbose=1) # t1 = time.time() # svm_clf.fit(X, y) # t2 = time.time() # times.append(t2-t1) # tols.append(tol) # print(i, tol, t2-t1) # tol /= 10 # plt.semilogx(tols, times) # plt.savefig(PNG_PATH + "learning_time", dpi=300) # plt.close() # Training set iris = datasets.load_iris() X = iris["data"][:, (2, 3)] # petal length, petal width y = (iris["target"] == 2).astype(np.float64).reshape(-1, 1) # Iris-Virginica C = 2 svm_clf = MyLinearSVC(C=C, eta0=10, eta_d=1000, n_epochs=60000, random_state=2) pdb.set_trace() svm_clf.fit(X, y) svm_clf.predict(np.array([[5, 2], [4, 1]])) plt.plot(range(svm_clf.n_epochs), svm_clf.Js) plt.axis([0, svm_clf.n_epochs, 0, 100]) plt.savefig(PNG_PATH + "svm_cost_vs_epochs", dpi=300) plt.close() print(svm_clf.intercept_, svm_clf.coef_) svm_clf2 = SVC(kernel="linear", C=C) svm_clf2.fit(X, y.ravel()) print(svm_clf2.intercept_, svm_clf2.coef_) yr = y.ravel() plt.figure(figsize=(12, 3.2)) plt.subplot(121) plt.plot(X[:, 0][yr == 1], X[:, 1][yr == 1], "g^", label="Iris-Virginica") plt.plot(X[:, 0][yr == 0], X[:, 1][yr == 0], "bs", label="Not Iris-Virginica") plot_svc_decision_boundary(svm_clf, 4, 6) plt.xlabel("Petal length", fontsize=14) plt.ylabel("Petal width", fontsize=14) plt.title("MyLinearSVC", fontsize=14) plt.axis([4, 6, 0.8, 2.8]) plt.subplot(122) plt.plot(X[:, 0][yr == 1], X[:, 1][yr == 1], "g^") plt.plot(X[:, 0][yr == 0], X[:, 1][yr == 0], "bs") plot_svc_decision_boundary(svm_clf2, 4, 6) plt.xlabel("Petal length", fontsize=14) plt.title("SVC", fontsize=14) plt.axis([4, 6, 0.8, 2.8]) plt.savefig(PNG_PATH + "mine_vs_theirs", dpi=300) plt.close() sgd_clf = SGDClassifier(loss="hinge", alpha=0.017, max_iter=50, random_state=42) sgd_clf.fit(X, y.ravel()) m = len(X) t = y * 2 - 1 # -1 if t==0, +1 if t==1 X_b = np.c_[np.ones((m, 1)), X] # Add bias input x0=1 X_b_t = X_b * t sgd_theta = np.r_[sgd_clf.intercept_[0], sgd_clf.coef_[0]] print(sgd_theta) support_vectors_idx = (X_b_t.dot(sgd_theta) < 1).ravel() sgd_clf.support_vectors_ = X[support_vectors_idx] sgd_clf.C = C plt.figure(figsize=(5.5, 3.2)) plt.plot(X[:, 0][yr == 1], X[:, 1][yr == 1], "g^") plt.plot(X[:, 0][yr == 0], X[:, 1][yr == 0], "bs") plot_svc_decision_boundary(sgd_clf, 4, 6) plt.xlabel("Petal length", fontsize=14) plt.ylabel("Petal width", fontsize=14) plt.title("SGDClassifier", fontsize=14) plt.axis([4, 6, 0.8, 2.8]) plt.savefig(PNG_PATH + "sgd_clssifier", dpi=300) plt.close()
def mySVC_vs_scikit(): from sklearn.svm import SVC from sklearn.linear_model import SGDClassifier from sklearn.datasets import load_iris iris = load_iris() X = iris["data"][:, (2, 3)] y = (iris["target"] == 2).astype(np.float64).reshape(-1, 1) C = 2 my_clf = MyBinaryLinearSVC(C=C, eta0=10, eta_d=1000, n_epochs=60000, random_state=42) my_clf.fit(X, y) scikit_svc = SVC(kernel="linear", C=C) scikit_svc.fit(X, y.ravel()) scikit_SGD = SGDClassifier(loss="hinge", alpha=0.017, max_iter=50, random_state=42) scikit_SGD.fit(X, y.ravel()) m = len(X) t = y * 2 - 1 X_b = np.c_[np.ones((m, 1)), X] X_b_t = X_b * t sgd_theta = np.r_[scikit_SGD.intercept_[0], scikit_SGD.coef_[0]] support_vectors_idx = (X_b_t.dot(sgd_theta) < 1).ravel() scikit_SGD.support_vectors_ = X[support_vectors_idx] scikit_SGD.C = C print("Parameters of my SVC model - ") print("weight : {}, bias : {}".format(np.squeeze(my_clf.coef_, axis=0), my_clf.intercept_)) print("Parameters of scikit SVC model - ") print("weight : {}, bias : {}".format(scikit_svc.coef_, scikit_svc.intercept_)) print("Parameters of scikit SGD model - ") print("weight : {}, bias : {}".format(scikit_SGD.coef_[0], scikit_SGD.intercept_[0])) y_ravel = y.ravel() plt.figure(figsize=(12, 4.8)) plt.subplot(131) plt.plot(X[:, 0][y_ravel == 1], X[:, 1][y_ravel == 1], "g^", label="Iris-Virginica") plt.plot(X[:, 0][y_ravel == 0], X[:, 1][y_ravel == 0], "bs", label="Not Iris-Virginica") plt.xlabel("Petal Length(cm)", fontsize=14) plt.ylabel("Petal Width(cm)", fontsize=14) plot_svc_decision_boundary(my_clf, 4, 6) plt.title("MyLinearSVM") plt.axis([4, 6, 0.8, 2.8]) plt.subplot(132) plt.plot(X[:, 0][y_ravel == 1], X[:, 1][y_ravel == 1], "g^", label="Iris-Virginica") plt.plot(X[:, 0][y_ravel == 0], X[:, 1][y_ravel == 0], "bs", label="Not Iris-Virginica") plt.xlabel("Petal Length(cm)", fontsize=14) plot_svc_decision_boundary(scikit_svc, 4, 6) plt.title("scikit LinearSVC") plt.axis([4, 6, 0.8, 2.8]) plt.subplot(133) plt.plot(X[:, 0][y_ravel == 1], X[:, 1][y_ravel == 1], "g^", label="Iris-Virginica") plt.plot(X[:, 0][y_ravel == 0], X[:, 1][y_ravel == 0], "bs", label="Not Iris-Virginica") plt.xlabel("Petal Length(cm)", fontsize=14) plot_svc_decision_boundary(scikit_SGD, 4, 6) plt.title("SGDClassifier") plt.axis([4, 6, 0.8, 2.8]) save_fig("Comparison_mySVM_vs_Scikit") plt.show()
sgd_clf = SGDClassifier(loss="hinge", alpha=0.017, max_iter=50, random_state=42) sgd_clf.fit(X, y.ravel()) m = len(X) t = y * 2 - 1 # -1 if t==0, +1 if t==1 X_b = np.c_[np.ones((m, 1)), X] # Add bias input x0=1 X_b_t = X_b * t sgd_theta = np.r_[sgd_clf.intercept_[0], sgd_clf.coef_[0]] print(sgd_theta) support_vectors_idx = (X_b_t.dot(sgd_theta) < 1).ravel() sgd_clf.support_vectors_ = X[support_vectors_idx] sgd_clf.C = C plt.figure(figsize=(5.5, 3.2)) plt.plot(X[:, 0][yr == 1], X[:, 1][yr == 1], "g^") plt.plot(X[:, 0][yr == 0], X[:, 1][yr == 0], "bs") plot_svc_decision_boundary(sgd_clf, 4, 6) plt.xlabel("Petal length", fontsize=14) plt.ylabel("Petal width", fontsize=14) plt.title("SGDClassifier", fontsize=14) plt.axis([4, 6, 0.8, 2.8]) # # Exercise solutions # ## 1. to 7. # See appendix A.
# initialize NB classifier #clf = MultinomialNB() #clf = BernoulliNB() #clf.class_prior =[0.041175856307435255,0.9588241436925647] #clf = svm.SVC() #clf.cache_size = 4000 #clf.n_jobs = -1 #clf.C = .1 clf = SGDClassifier() clf.n_jobs = -1 clf.C =1 clf.alpha = .00000001 clf.n_iter = 10000 #clf = DecisionTreeClassifier() #clf.max_depth = 3 #scores = cross_validation.cross_val_score(clf, feat_vecs, labels, cv=10,scoring='recall') #print scores #set_trace() def mp(t,k=None): # set_trace() trainI,testI = t if k: ch2 = SelectKBest(chi2, k=k)