def main2(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, y_train) pdb.set_trace() plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'logistic_regression.png', dpi=300)
def lda_scikit(): df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_test_std = sc.transform(X_test) pdb.set_trace() lda = LDA(n_components=3) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.tight_layout() plt.savefig(PL5 + 'lda_scikit.png', dpi=300) plt.close() X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.tight_layout() plt.savefig(PL5 + 'lda_scikit_test.png', dpi=300)
def build_dec_tree(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) tree.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, classifier=tree, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'decision_tree_decision.png', dpi=300) export_graphviz(tree, out_file='tree.dot', feature_names=['petal length', 'petal width'])
def KNN_model(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined = np.vstack((X_train, X_test)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=knn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'k_nearest_neighbors.png', dpi=300)
def nonlinear_svm(): np.random.seed(0) X_xor = np.random.randn(200, 2) y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) plt.scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='b', marker='x', label='1') plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c='r', marker='s', label='-1') plt.xlim([-3, 3]) plt.ylim([-3, 3]) plt.legend(loc='best') plt.tight_layout() plt.savefig(PIC_LOC + 'xor.png', dpi=300) # plt.show() plt.close() # Dealing with nonlinear dataset svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) svm.fit(X_xor, y_xor) plot_decision_regions(X_xor, y_xor, classifier=svm) plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'rbf_xor.png', dpi=300)
def main1(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'iris_perceptron_scikit.png', dpi=300)
def linear_svm(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) svm.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'support_vector_machine_linear.png', dpi=300) # plt.show() plt.close() svm = SVC(kernel='rbf', random_state=0, gamma=100.0, C=10.0) svm.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'svm_linear_highgamma.png', dpi=300)
def random_forests(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) forest = RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) forest.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, classifier=forest, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'random_forest.png', dpi=300)
plt.show() # Train our Perceptron and plot the number of misclassifications(errors) for each # epoch (iteration). The goal is that the more we train the perceptronthe less # the misclassifications. # # The model is ready when the number of errors converges to and finally reaches 0. ppn = pt.Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of updates') plt.show() # Test the model with the same training examples it used helpers.plot_decision_regions(X, y, classifier=ppn) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show() # pg 91 # It often requires some experimentation to find a good learning for optimal # convergence. Here we'll experiment with a learning rate of '0.1' and '0.0001', # and plot the cost functions versus the epochs to see how well the Adaline # implementation learns from the training data. # FYI, the learning rate and number of epochs are the hyperparameters (tuning parameters) # of the perceptron and Adaline learning algorithms. # Findings: