y,
                                                    test_size=0.3,
                                                    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
lr = LogisticRegression(C=1000.0, random_state=0)
lr.fit(X_train_std, y_train)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
ocr_utils.plot_decision_regions(X=X_combined_std,
                                y=y_combined,
                                classifier=lr,
                                labels=labels,
                                test_idx=range(len(X_train_std),
                                               len(X_combined_std)),
                                title='logistic_regression')

weights, params = [], []
for c in np.arange(-5, 5):
    lr = LogisticRegression(C=10**c, random_state=0)
    lr.fit(X_train_std, y_train)
    weights.append(lr.coef_[0])
    params.append(10**c)

title = 'regression_path'
weights, params = [], []
for c in np.arange(-5, 5):
    lr = LogisticRegression(C=10**c, random_state=0)
@author: richard lyman
'''
import numpy as np
import ocr_utils
from sklearn.preprocessing import StandardScaler

y_train, X_train, y_test,  X_test, labels  = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) 


sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(X_train_std, y_train)

ocr_utils.plot_decision_regions(X=X_combined_std, 
                      y=y_combined, 
                      classifier=knn, 
                      labels=labels,                      
                      test_idx=range(len(X_test_std),len(X_combined_std)),
                      title='k_nearest_neighbors')
print ('\n########################### No Errors ####################################')
Example #3
0
    accuracy_score(y_train, y_train_pred), lda.n_components, lr.coef_.shape))
# print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape))

X_errors_image = X_train[y_train != y_train_pred]

X_errors2D = np.reshape(
    X_errors_image, (X_errors_image.shape[0], character_size, character_size))
ocr_utils.montage(X_errors2D,
                  title='LDA Error Images, components={}'.format(n_components))

#  X_combined = np.vstack((X_train_lda, X_test_lda))
#  y_combined = np.hstack((y_train, y_test))
if X_train_lda.shape[1] > 1:
    ocr_utils.plot_decision_regions(
        X=X_train_lda,
        y=y_train,
        classifier=lr,
        labels=['LDA1', 'LDA2'],
        title='logistic_regression after 2 component LDA')

######################################################################################
# now that the font is trained, pick up some text and encode a message
######################################################################################

base_file = '15-01-01 459_Mont_Lyman'
output_base = '/tmp/plots/15-01-01 459_Mont_Lyman_encrypted'
base_file, skew_indices = encode_and_save_file(
    base_file,
    output_base,
    character_size,
    white_space,
    secret_message='your first born is mine')
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
tree.fit(X_train, y_train)

X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
ocr_utils.plot_decision_regions(X=X_combined,
                                y=y_combined,
                                classifier=tree,
                                test_idx=range(len(X_test), len(X_combined)),
                                labels=labels,
                                title='decision tree entropy')

from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(criterion='entropy',
                                n_estimators=10,
                                random_state=1,
                                n_jobs=2)
forest.fit(X_train, y_train)

ocr_utils.plot_decision_regions(X=X_combined,
                                y=y_combined,
                                classifier=forest,
                                labels=labels,
Example #5
0
plt.tight_layout()
ocr_utils.show_figures(plt, title)

###############################################################################3
lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda = lda.transform(X_test_std)

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)

title = 'Linear Descriminant Analysis Training Set'
ocr_utils.plot_decision_regions(X_train_lda,
                                y_train,
                                classifier=lr,
                                labels=['LD 1', 'LD 2'],
                                title=title)

title = 'Linear Descriminant Analysis Test Set'

ocr_utils.plot_decision_regions(X_test_lda,
                                y_test,
                                classifier=lr,
                                labels=['LD 1', 'LD 2'],
                                title=title)

###############################################################################
n_components = 10
lda = LDA(n_components=n_components)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.3, random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
lr = LogisticRegression(C=1000.0, random_state=0)
lr.fit(X_train_std, y_train)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
ocr_utils.plot_decision_regions(
                                         X=X_combined_std,                                        
                                         y=y_combined,                                        
                                         classifier=lr,         
                                         labels = labels, 
                                         test_idx=range(len(X_train_std),len(X_combined_std)),
                                         title='logistic_regression')


weights, params = [], []
for c in np.arange(0, 5):
    lr = LogisticRegression(C=10**c, random_state=0)
    lr.fit(X_train_std, y_train)
    weights.append(lr.coef_[0])
    params.append(10**c)


title = 'regression_path'
weights, params = [], []
y_train_pred = logistic_fitted.predict(X_train_lda)

print('\nLDA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_train, y_train_pred),lda.n_components,lr.coef_.shape))
# print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape))

X_errors_image = X_train[y_train!=y_train_pred]

X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], character_size, character_size))
ocr_utils.montage(X_errors2D,title='LDA Error Images, components={}'.format (n_components))

#  X_combined = np.vstack((X_train_lda, X_test_lda))
    #  y_combined = np.hstack((y_train, y_test))
if X_train_lda.shape[1] > 1:
    ocr_utils.plot_decision_regions(
        X=X_train_lda,                                        
        y=y_train,                                        
        classifier=lr,  
        labels = ['LDA1','LDA2']  ,     
        title='logistic_regression after 2 component LDA')
    
######################################################################################
# now that the font is trained, pick up some text and encode a message
######################################################################################

base_file = '15-01-01 459_Mont_Lyman'
output_base = '/tmp/plots/15-01-01 459_Mont_Lyman_encrypted'
base_file,skew_indices = encode_and_save_file(base_file, output_base, character_size, white_space, secret_message='your first born is mine')  
print ('base file to decode = {}'.format(base_file))   


df,t1  = ocr_utils.file_to_df(base_file, character_size, title = 'Encrypted File',white_space=white_space,input_filters_dict=input_filters_dict)
Example #8
0
print('PCA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),pca.n_components,lr.coef_.shape))

X_errors_image = X_test[y_test!=y_test_pred]
y_errors = y_test[y_test!=y_test_pred]
X_errors_pca = X_test_pca[y_test!=y_test_pred]

# change to a 2D shape 
X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], 20, 20))
ocr_utils.montage(X_errors2D,title='PCA Error Images, components={}'.format (n_components))

X_combined = np.vstack((X_train_pca, X_test_pca))
y_combined = np.hstack((y_train, y_test))

ocr_utils.plot_decision_regions(
                                         X=X_combined,                                        
                                         y=y_combined,                                        
                                         classifier=lr,  
                                         labels = ['PC1','PC2']  ,     
                                         title='logistic_regression after 2 component PCA')

#########################################################################
# run Linear Discriminant Analysis first then Logistic Regression

from sklearn.discriminant_analysis import  LinearDiscriminantAnalysis as LDA
n_components = 2
lda = LDA(n_components=n_components)

X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)
print('\nLDA components = {}'.format(pca.components_.shape))
lr = LogisticRegression()
logistic_fitted = lr.fit(X_train_lda, y_train)
if __name__ == '__main__':
    
    y_train, X_train, y_test,  X_test, labels  = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) 


    sc = StandardScaler()
    sc.fit(X_train)
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)
    X_combined_std = np.vstack((X_train_std, X_test_std))
    y_combined = np.hstack((y_train, y_test))
    X_combined = np.vstack((X_train, X_test))
    y_combined = np.hstack((y_train, y_test))

    from sklearn.ensemble import RandomForestClassifier

    forest = RandomForestClassifier(criterion='entropy',
                                    n_estimators=10, 
                                    random_state=1,
                                    n_jobs=2)
    forest.fit(X_train, y_train)

    ocr_utils.plot_decision_regions(X=X_combined, 
                                             y=y_combined, 
                                             classifier=forest, 
                                             labels=labels,                                         
                                             test_idx=range(len(X_test_std),len(X_combined_std)),
                                             title='random_forest')

    print ('\n########################### No Errors ####################################')
    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.net_input(X) >= 0.0, 1, -1)

#############################################################################
# convert targets 9'0','1') to -1,+1
# fit train the Perceptron
# plot the misclassifications versus Epochs
# plot the decision regions
 
y = np.where(y == ascii_characters_to_train[0], -1, 1)
ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X, y)

title = 'Simple Perception'
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of misclassifications')
plt.title(title)
plt.tight_layout()
ocr_utils.show_figures(plt, title)

ocr_utils.plot_decision_regions(X=X, 
                           y=y, 
                           classifier=ppn,
                           labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], 
                           title="Perceptron Decision Regions")



print ('\n########################### No Errors ####################################')
# 
# ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y)
# title = 'Gradient Descent Learning rate 0.0001'
# plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title)
# plt.title(title)
# ocr_utils.show_figures(plt, title)
# standardize features
X_std = np.copy(X)
X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std()
X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std()

ada = AdalineGD(n_iter=15, eta=0.01)
ada.fit(X_std, y)
ocr_utils.plot_decision_regions(X=X_std, 
                            y=y,
                            classifier=ada, 
                            labels= labels,
                            title='Adaline - Gradient Descent standardized rate 0.01')

title = 'Standardized Gradient Descent Learning rate 0.01'
plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title)
plt.title(title)
ocr_utils.show_figures(plt, title)

plt.plot(range(1,len(ada.cost_)+1), np.log10(ada.cost_), marker='v', label='standardized rate 0.01')
plt.xlabel('Epochs')
plt.ylabel('log(Sum-squared-error)')
plt.legend(loc='lower left')
plt.title('Adaline - Gradient Descent')
plt.tight_layout()
ocr_utils.show_figures(plt, 'Adaline - Gradient Descent')
Example #12
0
X_test_pca = pca.transform(X_test_image)

lr = LogisticRegression()
logistic_fitted = lr.fit(X_train_pca, y_train)

print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format(
    accuracy_score(y_train, logistic_fitted.predict(X_train_pca)),
    pca.n_components))
print('PCA Test Accuracy: {:4.6f}, n_components={}'.format(
    accuracy_score(y_test, logistic_fitted.predict(X_test_pca)),
    pca.n_components))

title = 'train pc1 versus pc2'
ocr_utils.plot_decision_regions(X=X_train_pca,
                                y=y_train,
                                classifier=lr,
                                labels=['pc1', 'pc2'],
                                title=title)

title = 'test pc1 versus pc2'
ocr_utils.plot_decision_regions(X=X_test_pca,
                                y=y_test,
                                classifier=lr,
                                labels=['pc1', 'pc2'],
                                title=title)
X_train_pca = pca.fit_transform(X_train_image)
X_test_pca = pca.transform(X_test_image)

########################################################################################
pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train_image)
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@author: richard lyman
'''
import numpy as np

import ocr_utils

from sklearn.svm import SVC

np.random.seed(0)
X_xor = np.random.randn(200, 2)
y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0)
y_xor = np.where(y_xor, 1, -1)

ocr_utils.scatter_plot(X=X_xor, 
                  y=y_xor,                   
                  title='xor',
                  xlim=(-3,3),
                  ylim=(-3,3))


svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0)
svm.fit(X_xor, y_xor)
ocr_utils.plot_decision_regions(X=X_xor, y=y_xor, 
                      classifier=svm,title='support vector machine rbf xor')
print ('\n########################### No Errors ####################################')
Example #14
0
    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.activation(X) >= 0.0, 1, -1)


#############################################################################
# standardize features,fit, and plot
X_std = np.copy(X)
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()
ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1)
ada.fit(X_std, y)

ocr_utils.plot_decision_regions(X=X_std,
                                y=y,
                                classifier=ada,
                                title='Adaline - Stochastic Gradient Descent',
                                labels=labels)

title = 'Adaline - Stochastic Gradient Descent'
plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Average Cost')
plt.title(title)
plt.tight_layout()
ocr_utils.show_figures(plt, title)

print(
    '\n########################### No Errors ####################################'
)
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@author: richard lyman
'''
import numpy as np
import ocr_utils
from sklearn.preprocessing import StandardScaler

y_train, X_train, y_test,  X_test, labels  = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) 

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

from sklearn.svm import SVC

svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train_std, y_train)

ocr_utils.plot_decision_regions(X=X_combined_std, 
                                         y=y_combined,                       
                                         classifier=svm, 
                                         test_idx=range(len(X_test_std),len(X_combined_std)),
                                         labels = labels, 
                                         title='support_vector_machine_linear')
print ('\n########################### No Errors ####################################')
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(
    chars_to_train=(48, 49, 50),
    columns=(9, 17),
    test_size=0.3,
    nChars=300,
    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

from sklearn.svm import SVC

svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train_std, y_train)

ocr_utils.plot_decision_regions(X=X_combined_std,
                                y=y_combined,
                                classifier=svm,
                                test_idx=range(len(X_test_std),
                                               len(X_combined_std)),
                                labels=labels,
                                title='support_vector_machine_linear')
print(
    '\n########################### No Errors ####################################'
)
# standardize the features

X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.3, random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())

from sklearn.metrics import accuracy_score

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
       
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

ocr_utils.plot_decision_regions(X_combined_std, y_combined, ppn, 
                           test_idx=range(len(X_test_std),len(X_combined_std)),
                           labels=labels, 
                           title='perceptron_scikit')



print ('\n########################### No Errors ####################################')
title='Projecting Feature Set onto New Feature Space'
plt.title(title)
plt.tight_layout()
ocr_utils.show_figures(plt,title)

###############################################################################3
lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda =  lda.transform(X_test_std)

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)

title = 'Linear Descriminant Analysis Training Set'
ocr_utils.plot_decision_regions(X_train_lda, y_train, classifier=lr, labels=['LD 1','LD 2'], title=title)

title = 'Linear Descriminant Analysis Test Set'

ocr_utils.plot_decision_regions(X_test_lda, y_test, classifier=lr, labels=['LD 1','LD 2'], title=title)


###############################################################################
n_components = 10
lda = LDA(n_components=n_components)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda = lda.transform(X_test_std)

print ('n_components={}'.format(lda.n_components))

lr = LogisticRegression()
y_train, X_train, y_test,  X_test, labels  = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0) 

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))


svm = SVC(kernel='rbf', random_state=0, gamma=0.2, C=1.0)
svm.fit(X_train_std, y_train)

ocr_utils.plot_decision_regions(X=X_combined_std, 
                      y=y_combined, 
                      classifier=svm, 
                      labels = labels,
                      test_idx=range(len(X_test_std),len(X_combined_std)),
                      title='SVM with gamma 0.2')

svm = SVC(kernel='rbf', random_state=0, gamma=100.0, C=1.0)
svm.fit(X_train_std, y_train)

ocr_utils.plot_decision_regions(X=X_combined_std, 
                      y=y_combined, 
                      classifier=svm, 
                      labels = labels,
                      test_idx=range(len(X_test_std),len(X_combined_std)),
                      title='SVM with gamma 100')


print ('\n########################### No Errors ####################################')
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(
    chars_to_train=(48, 49, 50),
    columns=(9, 17),
    test_size=0.3,
    nChars=300,
    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(X_train_std, y_train)

ocr_utils.plot_decision_regions(X=X_combined_std,
                                y=y_combined,
                                classifier=knn,
                                labels=labels,
                                test_idx=range(len(X_test_std),
                                               len(X_combined_std)),
                                title='k_nearest_neighbors')
print(
    '\n########################### No Errors ####################################'
)
                                                    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())

from sklearn.metrics import accuracy_score

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

ocr_utils.plot_decision_regions(X_combined_std,
                                y_combined,
                                ppn,
                                test_idx=range(len(X_test_std),
                                               len(X_combined_std)),
                                labels=labels,
                                title='perceptron_scikit')

print(
    '\n########################### No Errors ####################################'
)
########################################################################################


pca = PCA(n_components=2)

X_train_pca = pca.fit_transform(X_train_image)
X_test_pca = pca.transform(X_test_image)

lr = LogisticRegression()
logistic_fitted =lr.fit(X_train_pca, y_train)

print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_train, logistic_fitted.predict(X_train_pca)),pca.n_components))
print('PCA Test Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_test, logistic_fitted.predict(X_test_pca)),pca.n_components))

title = 'train pc1 versus pc2'    
ocr_utils.plot_decision_regions(X=X_train_pca, y=y_train, classifier=lr, labels=['pc1','pc2'], title=title)

title = 'test pc1 versus pc2'  
ocr_utils.plot_decision_regions(X=X_test_pca, y=y_test, classifier=lr, labels=['pc1','pc2'], title=title)
X_train_pca = pca.fit_transform(X_train_image)
X_test_pca = pca.transform(X_test_image)

########################################################################################
pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train_image)
X_test_pca = pca.transform(X_test_image)

lr = LogisticRegression()
logistic_fitted = lr.fit(X_train_pca, y_train)

y_train_pred = logistic_fitted.predict(X_train_pca)
#
# ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y)
# title = 'Gradient Descent Learning rate 0.0001'
# plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title)
# plt.title(title)
# ocr_utils.show_figures(plt, title)
# standardize features
X_std = np.copy(X)
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

ada = AdalineGD(n_iter=15, eta=0.01)
ada.fit(X_std, y)
ocr_utils.plot_decision_regions(
    X=X_std,
    y=y,
    classifier=ada,
    labels=labels,
    title='Adaline - Gradient Descent standardized rate 0.01')

title = 'Standardized Gradient Descent Learning rate 0.01'
plt.plot(range(1,
               len(ada2.cost_) + 1),
         np.log10(ada2.cost_),
         marker='x',
         label=title)
plt.title(title)
ocr_utils.show_figures(plt, title)

plt.plot(range(1,
               len(ada.cost_) + 1),
         np.log10(ada.cost_),