y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, y_train) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=lr, labels=labels, test_idx=range(len(X_train_std), len(X_combined_std)), title='logistic_regression') weights, params = [], [] for c in np.arange(-5, 5): lr = LogisticRegression(C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[0]) params.append(10**c) title = 'regression_path' weights, params = [], [] for c in np.arange(-5, 5): lr = LogisticRegression(C=10**c, random_state=0)
@author: richard lyman ''' import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(X_train_std, y_train) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=knn, labels=labels, test_idx=range(len(X_test_std),len(X_combined_std)), title='k_nearest_neighbors') print ('\n########################### No Errors ####################################')
accuracy_score(y_train, y_train_pred), lda.n_components, lr.coef_.shape)) # print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape)) X_errors_image = X_train[y_train != y_train_pred] X_errors2D = np.reshape( X_errors_image, (X_errors_image.shape[0], character_size, character_size)) ocr_utils.montage(X_errors2D, title='LDA Error Images, components={}'.format(n_components)) # X_combined = np.vstack((X_train_lda, X_test_lda)) # y_combined = np.hstack((y_train, y_test)) if X_train_lda.shape[1] > 1: ocr_utils.plot_decision_regions( X=X_train_lda, y=y_train, classifier=lr, labels=['LDA1', 'LDA2'], title='logistic_regression after 2 component LDA') ###################################################################################### # now that the font is trained, pick up some text and encode a message ###################################################################################### base_file = '15-01-01 459_Mont_Lyman' output_base = '/tmp/plots/15-01-01 459_Mont_Lyman_encrypted' base_file, skew_indices = encode_and_save_file( base_file, output_base, character_size, white_space, secret_message='your first born is mine')
sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) from sklearn.tree import DecisionTreeClassifier tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) tree.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) ocr_utils.plot_decision_regions(X=X_combined, y=y_combined, classifier=tree, test_idx=range(len(X_test), len(X_combined)), labels=labels, title='decision tree entropy') from sklearn.ensemble import RandomForestClassifier forest = RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) forest.fit(X_train, y_train) ocr_utils.plot_decision_regions(X=X_combined, y=y_combined, classifier=forest, labels=labels,
plt.tight_layout() ocr_utils.show_figures(plt, title) ###############################################################################3 lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) title = 'Linear Descriminant Analysis Training Set' ocr_utils.plot_decision_regions(X_train_lda, y_train, classifier=lr, labels=['LD 1', 'LD 2'], title=title) title = 'Linear Descriminant Analysis Test Set' ocr_utils.plot_decision_regions(X_test_lda, y_test, classifier=lr, labels=['LD 1', 'LD 2'], title=title) ############################################################################### n_components = 10 lda = LDA(n_components=n_components) X_train_lda = lda.fit_transform(X_train_std, y_train)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, y_train) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) ocr_utils.plot_decision_regions( X=X_combined_std, y=y_combined, classifier=lr, labels = labels, test_idx=range(len(X_train_std),len(X_combined_std)), title='logistic_regression') weights, params = [], [] for c in np.arange(0, 5): lr = LogisticRegression(C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[0]) params.append(10**c) title = 'regression_path' weights, params = [], []
y_train_pred = logistic_fitted.predict(X_train_lda) print('\nLDA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_train, y_train_pred),lda.n_components,lr.coef_.shape)) # print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape)) X_errors_image = X_train[y_train!=y_train_pred] X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], character_size, character_size)) ocr_utils.montage(X_errors2D,title='LDA Error Images, components={}'.format (n_components)) # X_combined = np.vstack((X_train_lda, X_test_lda)) # y_combined = np.hstack((y_train, y_test)) if X_train_lda.shape[1] > 1: ocr_utils.plot_decision_regions( X=X_train_lda, y=y_train, classifier=lr, labels = ['LDA1','LDA2'] , title='logistic_regression after 2 component LDA') ###################################################################################### # now that the font is trained, pick up some text and encode a message ###################################################################################### base_file = '15-01-01 459_Mont_Lyman' output_base = '/tmp/plots/15-01-01 459_Mont_Lyman_encrypted' base_file,skew_indices = encode_and_save_file(base_file, output_base, character_size, white_space, secret_message='your first born is mine') print ('base file to decode = {}'.format(base_file)) df,t1 = ocr_utils.file_to_df(base_file, character_size, title = 'Encrypted File',white_space=white_space,input_filters_dict=input_filters_dict)
print('PCA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),pca.n_components,lr.coef_.shape)) X_errors_image = X_test[y_test!=y_test_pred] y_errors = y_test[y_test!=y_test_pred] X_errors_pca = X_test_pca[y_test!=y_test_pred] # change to a 2D shape X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], 20, 20)) ocr_utils.montage(X_errors2D,title='PCA Error Images, components={}'.format (n_components)) X_combined = np.vstack((X_train_pca, X_test_pca)) y_combined = np.hstack((y_train, y_test)) ocr_utils.plot_decision_regions( X=X_combined, y=y_combined, classifier=lr, labels = ['PC1','PC2'] , title='logistic_regression after 2 component PCA') ######################################################################### # run Linear Discriminant Analysis first then Logistic Regression from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA n_components = 2 lda = LDA(n_components=n_components) X_train_lda = lda.fit_transform(X_train, y_train) X_test_lda = lda.transform(X_test) print('\nLDA components = {}'.format(pca.components_.shape)) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_lda, y_train)
if __name__ == '__main__': y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) from sklearn.ensemble import RandomForestClassifier forest = RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) forest.fit(X_train, y_train) ocr_utils.plot_decision_regions(X=X_combined, y=y_combined, classifier=forest, labels=labels, test_idx=range(len(X_test_std),len(X_combined_std)), title='random_forest') print ('\n########################### No Errors ####################################')
def predict(self, X): """Return class label after unit step""" return np.where(self.net_input(X) >= 0.0, 1, -1) ############################################################################# # convert targets 9'0','1') to -1,+1 # fit train the Perceptron # plot the misclassifications versus Epochs # plot the decision regions y = np.where(y == ascii_characters_to_train[0], -1, 1) ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) title = 'Simple Perception' plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') plt.title(title) plt.tight_layout() ocr_utils.show_figures(plt, title) ocr_utils.plot_decision_regions(X=X, y=y, classifier=ppn, labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], title="Perceptron Decision Regions") print ('\n########################### No Errors ####################################')
# # ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y) # title = 'Gradient Descent Learning rate 0.0001' # plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title) # plt.title(title) # ocr_utils.show_figures(plt, title) # standardize features X_std = np.copy(X) X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std() X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) ocr_utils.plot_decision_regions(X=X_std, y=y, classifier=ada, labels= labels, title='Adaline - Gradient Descent standardized rate 0.01') title = 'Standardized Gradient Descent Learning rate 0.01' plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title) plt.title(title) ocr_utils.show_figures(plt, title) plt.plot(range(1,len(ada.cost_)+1), np.log10(ada.cost_), marker='v', label='standardized rate 0.01') plt.xlabel('Epochs') plt.ylabel('log(Sum-squared-error)') plt.legend(loc='lower left') plt.title('Adaline - Gradient Descent') plt.tight_layout() ocr_utils.show_figures(plt, 'Adaline - Gradient Descent')
X_test_pca = pca.transform(X_test_image) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_pca, y_train) print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format( accuracy_score(y_train, logistic_fitted.predict(X_train_pca)), pca.n_components)) print('PCA Test Accuracy: {:4.6f}, n_components={}'.format( accuracy_score(y_test, logistic_fitted.predict(X_test_pca)), pca.n_components)) title = 'train pc1 versus pc2' ocr_utils.plot_decision_regions(X=X_train_pca, y=y_train, classifier=lr, labels=['pc1', 'pc2'], title=title) title = 'test pc1 versus pc2' ocr_utils.plot_decision_regions(X=X_test_pca, y=y_test, classifier=lr, labels=['pc1', 'pc2'], title=title) X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) ######################################################################################## pca = PCA(n_components=n_components) X_train_pca = pca.fit_transform(X_train_image)
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils from sklearn.svm import SVC np.random.seed(0) X_xor = np.random.randn(200, 2) y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) ocr_utils.scatter_plot(X=X_xor, y=y_xor, title='xor', xlim=(-3,3), ylim=(-3,3)) svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) svm.fit(X_xor, y_xor) ocr_utils.plot_decision_regions(X=X_xor, y=y_xor, classifier=svm,title='support vector machine rbf xor') print ('\n########################### No Errors ####################################')
def predict(self, X): """Return class label after unit step""" return np.where(self.activation(X) >= 0.0, 1, -1) ############################################################################# # standardize features,fit, and plot X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1) ada.fit(X_std, y) ocr_utils.plot_decision_regions(X=X_std, y=y, classifier=ada, title='Adaline - Stochastic Gradient Descent', labels=labels) title = 'Adaline - Stochastic Gradient Descent' plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Average Cost') plt.title(title) plt.tight_layout() ocr_utils.show_figures(plt, title) print( '\n########################### No Errors ####################################' )
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) from sklearn.svm import SVC svm = SVC(kernel='linear', C=1.0, random_state=0) svm.fit(X_train_std, y_train) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=svm, test_idx=range(len(X_test_std),len(X_combined_std)), labels = labels, title='support_vector_machine_linear') print ('\n########################### No Errors ####################################')
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B( chars_to_train=(48, 49, 50), columns=(9, 17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) from sklearn.svm import SVC svm = SVC(kernel='linear', C=1.0, random_state=0) svm.fit(X_train_std, y_train) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=svm, test_idx=range(len(X_test_std), len(X_combined_std)), labels=labels, title='support_vector_machine_linear') print( '\n########################### No Errors ####################################' )
# standardize the features X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) ocr_utils.plot_decision_regions(X_combined_std, y_combined, ppn, test_idx=range(len(X_test_std),len(X_combined_std)), labels=labels, title='perceptron_scikit') print ('\n########################### No Errors ####################################')
title='Projecting Feature Set onto New Feature Space' plt.title(title) plt.tight_layout() ocr_utils.show_figures(plt,title) ###############################################################################3 lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) title = 'Linear Descriminant Analysis Training Set' ocr_utils.plot_decision_regions(X_train_lda, y_train, classifier=lr, labels=['LD 1','LD 2'], title=title) title = 'Linear Descriminant Analysis Test Set' ocr_utils.plot_decision_regions(X_test_lda, y_test, classifier=lr, labels=['LD 1','LD 2'], title=title) ############################################################################### n_components = 10 lda = LDA(n_components=n_components) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) print ('n_components={}'.format(lda.n_components)) lr = LogisticRegression()
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) svm = SVC(kernel='rbf', random_state=0, gamma=0.2, C=1.0) svm.fit(X_train_std, y_train) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=svm, labels = labels, test_idx=range(len(X_test_std),len(X_combined_std)), title='SVM with gamma 0.2') svm = SVC(kernel='rbf', random_state=0, gamma=100.0, C=1.0) svm.fit(X_train_std, y_train) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=svm, labels = labels, test_idx=range(len(X_test_std),len(X_combined_std)), title='SVM with gamma 100') print ('\n########################### No Errors ####################################')
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B( chars_to_train=(48, 49, 50), columns=(9, 17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(X_train_std, y_train) ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=knn, labels=labels, test_idx=range(len(X_test_std), len(X_combined_std)), title='k_nearest_neighbors') print( '\n########################### No Errors ####################################' )
random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) ocr_utils.plot_decision_regions(X_combined_std, y_combined, ppn, test_idx=range(len(X_test_std), len(X_combined_std)), labels=labels, title='perceptron_scikit') print( '\n########################### No Errors ####################################' )
######################################################################################## pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) lr = LogisticRegression() logistic_fitted =lr.fit(X_train_pca, y_train) print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_train, logistic_fitted.predict(X_train_pca)),pca.n_components)) print('PCA Test Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_test, logistic_fitted.predict(X_test_pca)),pca.n_components)) title = 'train pc1 versus pc2' ocr_utils.plot_decision_regions(X=X_train_pca, y=y_train, classifier=lr, labels=['pc1','pc2'], title=title) title = 'test pc1 versus pc2' ocr_utils.plot_decision_regions(X=X_test_pca, y=y_test, classifier=lr, labels=['pc1','pc2'], title=title) X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) ######################################################################################## pca = PCA(n_components=n_components) X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_pca, y_train) y_train_pred = logistic_fitted.predict(X_train_pca)
# # ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y) # title = 'Gradient Descent Learning rate 0.0001' # plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title) # plt.title(title) # ocr_utils.show_figures(plt, title) # standardize features X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) ocr_utils.plot_decision_regions( X=X_std, y=y, classifier=ada, labels=labels, title='Adaline - Gradient Descent standardized rate 0.01') title = 'Standardized Gradient Descent Learning rate 0.01' plt.plot(range(1, len(ada2.cost_) + 1), np.log10(ada2.cost_), marker='x', label=title) plt.title(title) ocr_utils.show_figures(plt, title) plt.plot(range(1, len(ada.cost_) + 1), np.log10(ada.cost_),