def test(self, truthed_features): feed = {self._keep_prob: 1.0} # assign feature data to each placeholder error_images = np.empty((0, self._nRows, self._nCols)) test_accuracy = 0 m = 0 for j in range(truthed_features.num_features): feed[self._ph[j]] = truthed_features.features[j] result = self._sess.run( [self._accuracy, self._x_image, self._correct_prediction], feed_dict=feed) test_accuracy += result[0] error_images = np.append(error_images, result[1][:, :, :, 0][result[2] == False], axis=0) m += 1 try: print("test accuracy {} for font: {}".format( test_accuracy / m, input_filters_dict['font']), flush=True) ocr_utils.montage(error_images, title='TensorFlow {} Error Images'.format( input_filters_dict['font'])) except: if m == 0: print("test accuracy 1", flush=True) else: print("test accuracy {}".format(test_accuracy / m), flush=True) ocr_utils.montage(error_images, title='TensorFlow Error Images')
def test2(self, truthed_data, title=''): # assign feature data to each placeholder output_images = np.empty( (0, int(self._nRows / 2), int(self._nCols / 2))) input_images = np.empty((0, int(self._nRows), int(self._nCols))) test_accuracy = 0 m = 0 for i in range(int(len(truthed_data.features[0]) / 100)): batch = truthed_data.next_batch(100) # assign feature data to each placeholder # the batch list is returned in the same order as the features requested feed = {self._keep_prob: 1.0} for j in range(truthed_data.num_features): feed[self._ph[j]] = batch[j] result = self._sess.run([ self._accuracy, self._x_image, self._correct_prediction, self._x_image2 ], feed_dict=feed) test_accuracy += result[0] input_images = np.append(input_images, result[1][:, :, :, 0], axis=0) output_images = np.append(output_images, result[3][:, :, :, 0], axis=0) m += 1 try: print("test accuracy {} for : {}".format(test_accuracy / m, title), flush=True) ocr_utils.montage(input_images, title='TensorFlow {} Input Images'.format(title)) ocr_utils.montage( output_images, title='TensorFlow {} Output Images'.format(title)) except: if m == 0: print("test accuracy 1", flush=True) else: print("test accuracy {}".format(test_accuracy / m), flush=True) ocr_utils.montage(output_images, title='TensorFlow Output Images') ocr_utils.montage(input_images, title='TensorFlow Input Images')
def test(self, truthed_features): feed={self._keep_prob: 1.0} # assign feature data to each placeholder error_images = np.empty((0,self._nRows,self._nCols)) test_accuracy=0 m=0 for j in range(truthed_features.num_features): feed[self._ph[j]] =truthed_features.features[j] result = self._sess.run([self._accuracy, self._x_image, self._correct_prediction], feed_dict=feed) test_accuracy += result[0] error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0) m += 1 try: print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) except: if m==0: print ("test accuracy 1",flush=True) else: print ("test accuracy {}".format(test_accuracy/m),flush=True) ocr_utils.montage(error_images,title='TensorFlow Error Images')
def test2(self, truthed_data, title = ''): # assign feature data to each placeholder output_images = np.empty((0,int(self._nRows/2),int(self._nCols/2))) input_images = np.empty((0,int(self._nRows),int(self._nCols))) test_accuracy=0 m=0 for i in range(int(len(truthed_data.features[0])/100)): batch = truthed_data.next_batch(100) # assign feature data to each placeholder # the batch list is returned in the same order as the features requested feed = {self._keep_prob: 1.0} for j in range(truthed_data.num_features): feed[self._ph[j]] = batch[j] result = self._sess.run([self._accuracy, self._x_image, self._correct_prediction, self._x_image2], feed_dict=feed) test_accuracy += result[0] input_images = np.append(input_images, result[1][:,:,:,0],axis=0) output_images = np.append(output_images, result[3][:,:,:,0],axis=0) m += 1 try: print ("test accuracy {} for : {}".format(test_accuracy/m, title),flush=True) ocr_utils.montage(input_images,title='TensorFlow {} Input Images'.format(title)) ocr_utils.montage(output_images,title='TensorFlow {} Output Images'.format(title)) except: if m==0: print ("test accuracy 1",flush=True) else: print ("test accuracy {}".format(test_accuracy/m),flush=True) ocr_utils.montage(output_images,title='TensorFlow Output Images') ocr_utils.montage(input_images,title='TensorFlow Input Images')
s=250, marker='*', c='red', label='k++') ocr_utils.scatter_plot(X=X, y=y_km, legend_entries='', axis_labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], title='column sums k++ means centroids') for i in range(0,km.cluster_centers_.shape[0]): image_index2 = np.argwhere(y_km == i) x2d = X_image[image_index2].reshape((image_index2.shape[0],ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(x2d,title='k++ cluster {}'.format(i)) ############################################## # separate the original images by cluster # print(km.cluster_centers_.shape) n=30000 chars_to_train = range(48,58) columnsXY=range(0,20) column_str = 'column_sum{}'.format(list(columnsXY)) skewRange = np.linspace(-0.5,0.5,81) input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'} # output the character label and the image and column sums output_feature_list = ['m_label','image']
# make some skewed versions of the shapes skewRange = np.linspace(-0.5,0.5,81) images = np.empty((3*len(skewRange),20,20)) ys = np.empty((3*len(skewRange))) # make sheared versions of shapes for i,skew in enumerate(skewRange): images[3*i] = shear(plus,skew) images[3*i+1] = shear(box,skew) images[3*i+2] = shear(vee,skew) ys[3*i] = 0 ys[3*i+1] = 1 ys[3*i+2] = 2 title='skewed versions of shapes' ocr_utils.montage(images,title=title) num_image=images.shape[0] images_reshaped = np.reshape(images,(num_image, 20*20)) ######################################################################### # run a Logistic Regression on the raw features with 20 rows, 20 columns from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import train_test_split X_train , X_test, y_train, y_test = train_test_split(images_reshaped, ys, test_size=0.3, random_state=0) lr = LogisticRegression() lr.fit(X_train, y_train) y_train_pred = lr.predict(X_train)
import skimage.transform as af for j in range(shp[0]): for i,skew in enumerate(skewRange): k = i+j*len(skewRange) images.append(ocr_utils.shear(t1[j],skew)) originalH.append(df['originalH'][j]) tops.append(df['m_top'][j]) originalW.append(df['originalW'][j]) lefts.append(df['m_left'][j]) orientation.append(skew) recognized_label.append( df['m_label'][j]) images=np.array(images) ocr_utils.montage(images, title='Base Characters Skewed') images = np.reshape(images,(images.shape[0],images.shape[1]*images.shape[2])) df = ocr_utils.make_df(images, character_size, character_size, originalH, originalW, tops, lefts, orientation, recognized_label ) #df = ocr_utils.make_df(images, character_size, character_size, bottoms, rights, tops, lefts, orientation, recognized_label ) # input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))} input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))} output_feature_list = ['orientation_one_hot','image'] ds = ocr_utils.read_df(df,input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = 0, engine_type='tensorflow', dtype=dtype)
def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = .1, engine_type='tensorflow',dtype=dtype) """# ============================================================================== Start TensorFlow Interactive Session """# ============================================================================== sess = tf.InteractiveSession() """# ============================================================================== Placeholders Compute the size of various layers Create a tensorflow Placeholder for each feature of data returned from the dataset """# ============================================================================== lst = [] extra_features_width = 0 # width of extra features for i,nm in enumerate(output_feature_list): # features[0], is always the target. For instance it may be m_label_one_hot # the second features[1] is the 'image' that is passed to the convolution layers # Any additional features bypass the convolution layers and go directly # into the fully connected layer. # The width of the extra features is calculated in order to allocate # the correct widths of weights, # and inputs # names are assigned to make the look pretty on the tensorboard graph. if i == 0: nm = 'y_'+nm else: nm = 'x_'+nm if i>1: extra_features_width += ds.train.feature_width[i] print (ds.train.features[i].dtype) lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) # ph is a named tuple with key names like 'image', 'm_label', and values that # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, # x_upper_case etc. Place_Holders = namedtuple('Place_Holders', ds.train.feature_names) ph = Place_Holders(*lst) # unpack placeholders into named Tuple nRows = ds.train.num_rows #image height nCols = ds.train.num_columns #image width nSections = 10 w = list(range(nSections*3)) b = list(range(nSections*3)) h = list(range(nSections*3+1)) in_out_width = nRows*nCols internal_width = int(in_out_width/4) # nFc0 = 2048 # size of fully connected layer nFc1 = 2048 # size of fully connected layer # nFc2 = 2048 # size of fully connected layer # nConv1 = 32 # size of first convolution layer # nConv2 = 64 # size of second convolution layer nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' # n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer # n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected # """# ============================================================================== Build a Multilayer Convolutional Network Weight Initialization """# ============================================================================== def weight_variable(shape, dtype): initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) return tf.Variable(initial) def bias_variable(shape, dtype): initial = tf.constant(0.1, shape=shape, dtype=dtype) return tf.Variable(initial) """# ============================================================================== Convolution and Pooling keep our code cleaner, let's also abstract those operations into functions. """# ============================================================================== def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') """# ============================================================================== First Convolutional Layers """# ============================================================================== def shapeOuts(n): print ('n={}, hin={},w={}, b={} ,hout={}\n'.format(n, h[n]._shape, w[n]._variable._shape, b[n]._variable._shape, h[n+1]._shape)) def section(n): with tf.name_scope('section_'+str(n)+'_0') as scope: w[n]=weight_variable([in_out_width, internal_width],dtype) b[n]=bias_variable([internal_width],dtype) h[n+1] = tf.nn.relu(tf.matmul(h[n], w[n]) + b[n]) shapeOuts(n) with tf.name_scope('section_'+str(n)+'_1') as scope: w[n+1]=weight_variable([internal_width, internal_width],dtype) b[n+1]=bias_variable([internal_width],dtype) h[n+2]=tf.nn.relu(tf.matmul(h[n+1], w[n+1]) + b[n+1]) shapeOuts(n+1) with tf.name_scope('section_'+str(n)+'_2') as scope: w[n+2]=weight_variable([internal_width, in_out_width],dtype) b[n+2]=bias_variable([in_out_width],dtype) z= tf.nn.relu(tf.matmul(h[n+2], w[n+2]) + b[n+2]) h[n+3]= tf.add(z ,h[n]) #n+3 print('z shape ={}'.format(z._shape)) shapeOuts(n+2) return def computeSize(s,tens): sumC = 1 tShape = tens.get_shape() nDims = len(tShape) for i in range(nDims): sumC *= tShape[i].value print ('\t{}\t{}'.format(s,sumC),flush=True) return sumC """# ============================================================================== Build sectional network """# ============================================================================== h[0]= ph[1] for i in range(nSections): section(3*i) """# ============================================================================== Dropout """# ============================================================================== keep_prob = tf.placeholder(dtype,name='keep_prob') with tf.name_scope("drop") as scope: h_fc2_drop = tf.nn.dropout(h[nSections*3], keep_prob) """# ============================================================================== Readout Layer """# ============================================================================== with tf.name_scope("softmax") as scope: w_fc3 = weight_variable([in_out_width, nTarget],dtype) b_fc3 = bias_variable([nTarget],dtype) y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3) print ('network size:',flush=True) total = 0 for i in range(nSections*3): total = total + computeSize("w{}".format(i),w[i]) total = total + computeSize ("b_fc3",b_fc3) + \ computeSize ("w_fc3",w_fc3) print('\ttotal\t{}'.format(total),flush=True) """# ============================================================================== Train and Evaluate the Model """# ============================================================================== with tf.name_scope("reshape_x_image") as scope: x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) with tf.name_scope("xent") as scope: # 1e-8 added to eliminate the crash of training when taking log of 0 cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) ce_summ = tf.scalar_summary("cross entropy", cross_entropy) with tf.name_scope("train") as scope: train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope("test") as scope: correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype)) accuracy_summary = tf.scalar_summary("accuracy", accuracy) merged = tf.merge_all_summaries() tm = "" tp = datetime.datetime.now().timetuple() for i in range(4): tm += str(tp[i])+'-' tm += str(tp[4]) writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) # To see the results in Chrome, # Run the following in terminal to activate server. # tensorboard --logdir '/tmp/ds_logs/' # See results on localhost:6006 sess.run(tf.initialize_all_variables()) perfect_count=10 for i in range(nEpochs): batch = ds.train.next_batch(100) # assign feature data to each placeholder # the batch list is returned in the same order as the features requested feed = {keep_prob: 0.5} for j in range(ds.train.num_features): feed[ph[j]] = batch[j] if i%100 == 0: # sh=h_pool2_flat.get_shape() feed[keep_prob] = 1.0 result = sess.run([merged, accuracy ], feed_dict=feed) summary_str = result[0] #acc = result[1] writer.add_summary(summary_str, i) train_accuracy = accuracy.eval(feed) if train_accuracy != 1: perfect_count=10; else: perfect_count -= 1 if perfect_count==0: break; print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) train_step.run(feed_dict=feed) feed={keep_prob: 1.0} # assign feature data to each placeholder error_images = np.empty((0,nRows,nCols)) test_accuracy=0 m=0 for n in range(0,ds.test.features[0].shape[0],100 ): for i in range(ds.train.num_features ): feed[ph[i]] = ds.test.features[i] [n:n+100] result = sess.run([accuracy, x_image, correct_prediction], feed_dict=feed) test_accuracy += result[0] error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0) m += 1 try: print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) except: print ("test accuracy {}".format(test_accuracy/m),flush=True) ocr_utils.montage(error_images,title='TensorFlow Error Images') tf.reset_default_graph() # only necessary when iterating through fonts sess.close()
y_test_pred = logistic_fitted.predict(X_test_pca) print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format( accuracy_score(y_train, y_train_pred), pca.n_components)) print('PCA Test Accuracy: {:4.6f}, n_components={}'.format( accuracy_score(y_test, y_test_pred), pca.n_components)) X_errors_image = X_test_image[y_test != y_test_pred] y_errors = y_test[y_test != y_test_pred] X_errors_pca = X_test_pca[y_test != y_test_pred] X_orig = X_train_image[:500] title = 'originals' X2D = np.reshape(X_orig, (X_orig.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X2D, title=title) X_orig = X_train_pca[:500] title = 'inverse original' X_inverse = pca.inverse_transform(X_orig) X2D = np.reshape(X_inverse, (X_inverse.shape[0], ds.train.num_rows, ds.train.num_columns)) X2D = X2D - np.min(X2D) ocr_utils.montage(X2D, title=title) # change to a 2D shape X_errors2D = np.reshape( X_errors_image, (X_errors_image.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage( X_errors2D,
logistic_fitted = lr.fit(X_train_pca, y_train) y_train_pred = logistic_fitted.predict(X_train_pca) y_test_pred = logistic_fitted.predict(X_test_pca) print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_train, y_train_pred),pca.n_components)) print('PCA Test Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_test, y_test_pred),pca.n_components)) X_errors_image = X_test_image[y_test!=y_test_pred] y_errors = y_test[y_test!=y_test_pred] X_errors_pca = X_test_pca[y_test!=y_test_pred] X_orig = X_train_image[:500] title = 'originals' X2D=np.reshape(X_orig, (X_orig.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X2D,title=title) X_orig = X_train_pca[:500] title = 'inverse original' X_inverse = pca.inverse_transform(X_orig) X2D = np.reshape(X_inverse, (X_inverse.shape[0], ds.train.num_rows, ds.train.num_columns)) X2D = X2D - np.min(X2D) ocr_utils.montage(X2D,title=title) # change to a 2D shape X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X_errors2D,title='PCA Error Characters, components={}'.format (n_components)) title = 'inverse transform errors' X_inverse = pca.inverse_transform(X_errors_pca) X2D=np.reshape(X_inverse, (X_inverse.shape[0], ds.train.num_rows, ds.train.num_columns))
img = im.crop(box=(box._left, box._top, box._right, box._bottom)) img2.paste(img, box=(white_space, white_space)) imgByteArr = img2.tobytes() lst = list(imgByteArr) image = np.array(lst) / 255.0 image = 1.0 - image images.append(image) height = im.height width = im.width t1 = np.array(images) t1 = np.reshape(t1, (t1.shape[0], character_size, character_size)) ocr_utils.montage(t1, title='characters from file') shp = t1.shape totalN = len(skewRange) * shp[0] images = [] import skimage.transform as af for j in range(shp[0]): for i, skew in enumerate(skewRange): images.append(ocr_utils.shear(t1[j], skew)) orientation.append(skew) images = np.array(images) ocr_utils.montage(images, title='characters being trained') images = np.reshape(images, (len(images), character_size * character_size)) ys = ocr_utils.convert_to_unique(orientation)
y=y, legend_entries=legend, axis_labels=[ 'column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY)) ], title='E13B sum of columns') ############################################################################# # read and show character images for '0', and '1' # select the digits in columnsXY in the E13B font fd = {'m_label': ascii_characters_to_train, 'font': 'E13B'} # output only the character label and the image fl = ['m_label', 'image'] # read the complete image (20x20) = 400 pixels for each character ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=fl, dtype=np.int32) y, X = ds.train.features # change to a 2D shape X = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X, title='some E13B Characters') print( '\n########################### No Errors ####################################' )
img = im.crop(box=(box._left, box._top, box._right, box._bottom)) img2.paste(img,box=(white_space,white_space)) imgByteArr = img2.tobytes() lst = list(imgByteArr) image = np.array(lst)/255.0 image = 1.0 - image images.append(image) height = im.height width = im.width t1 = np.array(images) t1=np.reshape(t1,(t1.shape[0],character_size,character_size)) ocr_utils.montage(t1, title='characters from file') shp = t1.shape totalN = len(skewRange)*shp[0] images = [] import skimage.transform as af for j in range(shp[0]): for i,skew in enumerate(skewRange): images.append(ocr_utils.shear(t1[j],skew)) orientation.append(skew) images=np.array(images) ocr_utils.montage(images, title='characters being trained') images=np.reshape(images,(len(images),character_size*character_size)) ys = ocr_utils.convert_to_unique(orientation)
label='k++') ocr_utils.scatter_plot(X=X, y=y_km, legend_entries='', axis_labels=[ 'column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY)) ], title='column sums k++ means centroids') for i in range(0, km.cluster_centers_.shape[0]): image_index2 = np.argwhere(y_km == i) x2d = X_image[image_index2].reshape( (image_index2.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(x2d, title='k++ cluster {}'.format(i)) ############################################## # separate the original images by cluster # print(km.cluster_centers_.shape) n = 30000 chars_to_train = range(48, 58) columnsXY = range(0, 20) column_str = 'column_sum{}'.format(list(columnsXY)) skewRange = np.linspace(-0.5, 0.5, 81) input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'} # output the character label and the image and column sums output_feature_list = ['m_label', 'image']
X_train_lda = lda.fit_transform(X_train, y_train) print('\nLDA components = {}'.format(lda.n_components)) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_lda, y_train) y_train_pred = logistic_fitted.predict(X_train_lda) print('\nLDA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_train, y_train_pred),lda.n_components,lr.coef_.shape)) # print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape)) X_errors_image = X_train[y_train!=y_train_pred] X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], character_size, character_size)) ocr_utils.montage(X_errors2D,title='LDA Error Images, components={}'.format (n_components)) # X_combined = np.vstack((X_train_lda, X_test_lda)) # y_combined = np.hstack((y_train, y_test)) if X_train_lda.shape[1] > 1: ocr_utils.plot_decision_regions( X=X_train_lda, y=y_train, classifier=lr, labels = ['LDA1','LDA2'] , title='logistic_regression after 2 component LDA') ###################################################################################### # now that the font is trained, pick up some text and encode a message ######################################################################################
lr = LogisticRegression() logistic_fitted = lr.fit(X_train_lda, y_train) from sklearn.metrics import accuracy_score y_pred_train = logistic_fitted.predict(X_train_lda) y_pred_test = logistic_fitted.predict(X_test_lda) print('\nLDA Train Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_train, y_pred_train), lda.n_components)) print('LDA Test Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_test, y_pred_test), lda.n_components)) X_errors_image = X_test_image[y_test!=y_pred_test] y_errors = y_test[y_test!=y_pred_test] # change to a 2D shape X2D=np.reshape(X_errors_image, (X_errors_image.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X2D,title='LDA E13B Error Character,components={}'.format(n_components)) ############################################################################### n_components = 10 lda = LDA(n_components=n_components, solver='eigen') X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) print ('n_components={}'.format(lda.n_components)) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_lda, y_train) from sklearn.metrics import accuracy_score y_pred_train = logistic_fitted.predict(X_train_lda)
y_pred_train = logistic_fitted.predict(X_train_lda) y_pred_test = logistic_fitted.predict(X_test_lda) print('\nLDA Train Accuracy: {:4.6f}, n_components={}'.format( accuracy_score(y_train, y_pred_train), lda.n_components)) print('LDA Test Accuracy: {:4.6f}, n_components={}'.format( accuracy_score(y_test, y_pred_test), lda.n_components)) X_errors_image = X_test_image[y_test != y_pred_test] y_errors = y_test[y_test != y_pred_test] # change to a 2D shape X2D = np.reshape( X_errors_image, (X_errors_image.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage( X2D, title='LDA E13B Error Character,components={}'.format(n_components)) ############################################################################### n_components = 10 lda = LDA(n_components=n_components, solver='eigen') X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) print('n_components={}'.format(lda.n_components)) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_lda, y_train) from sklearn.metrics import accuracy_score y_pred_train = logistic_fitted.predict(X_train_lda)
legend=[] for ys in np.unique(y): legend.append('{} \'{}\''.format(ys, chr(ys))) ocr_utils.scatter_plot(X=X, y=y, legend_entries=legend, axis_labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], title='E13B sum of columns') ############################################################################# # read and show character images for '0', and '1' # select the digits in columnsXY in the E13B font fd = {'m_label': ascii_characters_to_train, 'font': 'E13B'} # output only the character label and the image fl = ['m_label','image'] # read the complete image (20x20) = 400 pixels for each character ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=fl, dtype=np.int32) y,X = ds.train.features # change to a 2D shape X=np.reshape(X,(X.shape[0],ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X,title='some E13B Characters') print ('\n########################### No Errors ####################################')
def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = .1, engine_type='tensorflow', dtype=dtype) """# ============================================================================== Start TensorFlow Interactive Session """# ============================================================================== sess = tf.InteractiveSession() """# ============================================================================== Placeholders Compute the size of various layers Create a tensorflow Placeholder for each feature of data returned from the dataset """# ============================================================================== lst = [] extra_features_width = 0 # width of extra features for i,nm in enumerate(output_feature_list): # features[0], is always the target. For instance it may be m_label_one_hot # the second features[1] is the 'image' that is passed to the convolution layers # Any additional features bypass the convolution layers and go directly # into the fully connected layer. # The width of the extra features is calculated in order to allocate # the correct widths of weights, # and inputs # names are assigned to make the look pretty on the tensorboard graph. if i == 0: nm = 'y_'+nm else: nm = 'x_'+nm if i>1: extra_features_width += ds.train.feature_width[i] lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) # ph is a named tuple with key names like 'image', 'm_label', and values that # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, # x_upper_case etc. Place_Holders = namedtuple('Place_Holders', output_feature_list) ph = Place_Holders(*lst) # unpack placeholders into named Tuple nRows = ds.train.num_rows #image height nCols = ds.train.num_columns #image width nFc0 = 2048 # size of fully connected layer nFc1 = 2048 # size of fully connected layer nFc2 = 2048 # size of fully connected layer nConv1 = 32 # size of first convolution layer nConv2 = 64 # size of second convolution layer nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected """# ============================================================================== Build a Multilayer Convolutional Network Weight Initialization """# ============================================================================== def weight_variable(shape, dtype): initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) return tf.Variable(initial) def bias_variable(shape, dtype): initial = tf.constant(0.1, shape=shape, dtype=dtype) return tf.Variable(initial) """# ============================================================================== Convolution and Pooling keep our code cleaner, let's also abstract those operations into functions. """# ============================================================================== def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') """# ============================================================================== First Convolutional Layer """# ============================================================================== with tf.name_scope("w_conv1") as scope: W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) b_conv1 = bias_variable([nConv1],dtype) with tf.name_scope("reshape_x_image") as scope: x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) image_summ = tf.image_summary("x_image", x_image) """# ============================================================================== We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, and finally max pool. """# ============================================================================== with tf.name_scope("convolve_1") as scope: h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) with tf.name_scope("pool_1") as scope: h_pool1 = max_pool_2x2(h_conv1) """# ============================================================================== Second Convolutional Layer In order to build a deep network, we stack several layers of this type. The second layer will have 64 features for each 5x5 patch. """# ============================================================================== with tf.name_scope("convolve_2") as scope: W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) b_conv2 = bias_variable([64],dtype) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) with tf.name_scope("pool_2") as scope: h_pool2 = max_pool_2x2(h_conv2) """# ============================================================================== Densely Connected Layer 0 Now that the image size has been reduced to 7x7, we add a fully-connected layer with neurons to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU. """# ============================================================================== with tf.name_scope("W_fc0_b") as scope: W_fc0 = weight_variable([n_h_pool2_outputsx, nFc0],dtype) b_fc0 = bias_variable([nFc0],dtype) h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) # append the features, the 2nd on, that go directly to the fully connected layer for i in range(2,ds.train.num_features ): h_pool2_flat = tf.concat(1, [h_pool2_flat, ph[i]]) h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) + b_fc0) """# ============================================================================== Densely Connected Layer 1 We add a fully-connected layer with neurons to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU. """# ============================================================================== with tf.name_scope("W_fc1_b") as scope: W_fc1 = weight_variable([nFc0, nFc1],dtype) b_fc1 = bias_variable([nFc1],dtype) h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1) """# ============================================================================== Densely Connected Layer 2 We add a fully-connected layer with neurons to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU. """# ============================================================================== with tf.name_scope("W_fc2_b") as scope: W_fc2 = weight_variable([nFc1, nFc2],dtype) b_fc2 = bias_variable([nFc2],dtype) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) """# ============================================================================== Dropout """# ============================================================================== keep_prob = tf.placeholder(dtype,name='keep_prob') with tf.name_scope("drop") as scope: h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) """# ============================================================================== Readout Layer """# ============================================================================== with tf.name_scope("softmax") as scope: W_fc3 = weight_variable([nFc2, nTarget],dtype) b_fc3 = bias_variable([nTarget],dtype) y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3) """# ============================================================================== Train and Evaluate the Model """# ============================================================================== with tf.name_scope("xent") as scope: # 1e-8 added to eliminate the crash of training when taking log of 0 cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) ce_summ = tf.scalar_summary("cross entropy", cross_entropy) with tf.name_scope("train") as scope: train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope("test") as scope: correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,dtype)) accuracy_summary = tf.scalar_summary("accuracy", accuracy) merged = tf.merge_all_summaries() tm = "" tp = datetime.datetime.now().timetuple() for i in range(4): tm += str(tp[i])+'-' tm += str(tp[4]) writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) # To see the results in Chrome, # Run the following in terminal to activate server. # tensorboard --logdir '/tmp/ds_logs/' # See results on localhost:6006 sess.run(tf.initialize_all_variables()) perfect_count=10 for i in range(nEpochs): batch = ds.train.next_batch(100) # assign feature data to each placeholder # the batch list is returned in the same order as the features requested feed = {keep_prob: 0.5} for j in range(ds.train.num_features): feed[ph[j]] = batch[j] if i%100 == 0: # sh=h_pool2_flat.get_shape() feed[keep_prob] = 1.0 result = sess.run([merged, accuracy ], feed_dict=feed) summary_str = result[0] #acc = result[1] writer.add_summary(summary_str, i) train_accuracy = accuracy.eval(feed) if train_accuracy != 1: perfect_count=10; else: perfect_count -= 1 if perfect_count==0: break; print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) train_step.run(feed_dict=feed) def computeSize(s,tens): sumC = 1 tShape = tens.get_shape() nDims = len(tShape) for i in range(nDims): sumC *= tShape[i].value print ('\t{}\t{}'.format(s,sumC),flush=True) return sumC print ('network size:',flush=True) total = computeSize("W_fc0",W_fc0)+ \ computeSize ("b_fc0",b_fc0) + \ computeSize ("W_conv1",W_conv1) + \ computeSize ("b_conv1",b_conv1) + \ computeSize ("W_conv2",W_conv2) + \ computeSize ("b_conv2",b_conv2) + \ computeSize ("W_fc0",W_fc0) + \ computeSize ("b_fc0",b_fc0) + \ computeSize ("W_fc1",W_fc1) + \ computeSize ("b_fc1",b_fc1) + \ computeSize ("W_fc2",W_fc2) + \ computeSize ("b_fc2",b_fc2) print('\ttotal\t{}'.format(total),flush=True) feed={keep_prob: 1.0} # assign feature data to each placeholder error_images = np.empty((0,nRows,nCols)) test_accuracy=0 m=0 for n in range(0,ds.test.features[0].shape[0],100 ): for i in range(ds.train.num_features ): feed[ph[i]] = ds.test.features[i] [n:n+100] result = sess.run([accuracy, x_image, W_conv1, correct_prediction], feed_dict=feed) test_accuracy += result[0] error_images = np.append(error_images, result[1][:,:,:,0][result[3]==False],axis=0) m += 1 try: print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) except: print ("test accuracy {}".format(test_accuracy/m),flush=True) ocr_utils.montage(error_images,title='TensorFlow Error Images') tf.reset_default_graph() # only necessary when iterating through fonts sess.close()
print('\nLDA components = {}'.format(lda.n_components)) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_lda, y_train) y_train_pred = logistic_fitted.predict(X_train_lda) print('\nLDA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format( accuracy_score(y_train, y_train_pred), lda.n_components, lr.coef_.shape)) # print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape)) X_errors_image = X_train[y_train != y_train_pred] X_errors2D = np.reshape( X_errors_image, (X_errors_image.shape[0], character_size, character_size)) ocr_utils.montage(X_errors2D, title='LDA Error Images, components={}'.format(n_components)) # X_combined = np.vstack((X_train_lda, X_test_lda)) # y_combined = np.hstack((y_train, y_test)) if X_train_lda.shape[1] > 1: ocr_utils.plot_decision_regions( X=X_train_lda, y=y_train, classifier=lr, labels=['LDA1', 'LDA2'], title='logistic_regression after 2 component LDA') ###################################################################################### # now that the font is trained, pick up some text and encode a message ######################################################################################