def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = .1, engine_type='tensorflow', dtype=dtype) """# ============================================================================== Start TensorFlow Interactive Session """# ============================================================================== sess = tf.InteractiveSession() """# ============================================================================== Placeholders Compute the size of various layers Create a tensorflow Placeholder for each feature of data returned from the dataset """# ============================================================================== lst = [] extra_features_width = 0 # width of extra features for i,nm in enumerate(output_feature_list): # features[0], is always the target. For instance it may be m_label_one_hot # the second features[1] is the 'image' that is passed to the convolution layers # Any additional features bypass the convolution layers and go directly # into the fully connected layer. # The width of the extra features is calculated in order to allocate # the correct widths of weights, # and inputs # names are assigned to make the look pretty on the tensorboard graph. if i == 0: nm = 'y_'+nm else: nm = 'x_'+nm if i>1: extra_features_width += ds.train.feature_width[i] lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) # ph is a named tuple with key names like 'image', 'm_label', and values that # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, # x_upper_case etc. Place_Holders = namedtuple('Place_Holders', output_feature_list) ph = Place_Holders(*lst) # unpack placeholders into named Tuple nRows = ds.train.num_rows #image height nCols = ds.train.num_columns #image width nFc0 = 2048 # size of fully connected layer nFc1 = 2048 # size of fully connected layer nFc2 = 2048 # size of fully connected layer nConv1 = 32 # size of first convolution layer nConv2 = 64 # size of second convolution layer nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected """# ============================================================================== Build a Multilayer Convolutional Network Weight Initialization """# ============================================================================== def weight_variable(shape, dtype): initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) return tf.Variable(initial) def bias_variable(shape, dtype): initial = tf.constant(0.1, shape=shape, dtype=dtype) return tf.Variable(initial) """# ============================================================================== Convolution and Pooling keep our code cleaner, let's also abstract those operations into functions. """# ============================================================================== def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') """# ============================================================================== First Convolutional Layer """# ============================================================================== with tf.name_scope("w_conv1") as scope: W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) b_conv1 = bias_variable([nConv1],dtype) with tf.name_scope("reshape_x_image") as scope: x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) image_summ = tf.image_summary("x_image", x_image) """# ============================================================================== We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, and finally max pool. """# ============================================================================== with tf.name_scope("convolve_1") as scope: h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) with tf.name_scope("pool_1") as scope: h_pool1 = max_pool_2x2(h_conv1) """# ============================================================================== Second Convolutional Layer In order to build a deep network, we stack several layers of this type. The second layer will have 64 features for each 5x5 patch. """# ============================================================================== with tf.name_scope("convolve_2") as scope: W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) b_conv2 = bias_variable([64],dtype) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) with tf.name_scope("pool_2") as scope: h_pool2 = max_pool_2x2(h_conv2) """# ============================================================================== Densely Connected Layer 0 Now that the image size has been reduced to 7x7, we add a fully-connected layer with neurons to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU. """# ============================================================================== with tf.name_scope("W_fc0_b") as scope: W_fc0 = weight_variable([n_h_pool2_outputsx, nFc0],dtype) b_fc0 = bias_variable([nFc0],dtype) h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) # append the features, the 2nd on, that go directly to the fully connected layer for i in range(2,ds.train.num_features ): h_pool2_flat = tf.concat(1, [h_pool2_flat, ph[i]]) h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) + b_fc0) """# ============================================================================== Densely Connected Layer 1 We add a fully-connected layer with neurons to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU. """# ============================================================================== with tf.name_scope("W_fc1_b") as scope: W_fc1 = weight_variable([nFc0, nFc1],dtype) b_fc1 = bias_variable([nFc1],dtype) h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1) """# ============================================================================== Densely Connected Layer 2 We add a fully-connected layer with neurons to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU. """# ============================================================================== with tf.name_scope("W_fc2_b") as scope: W_fc2 = weight_variable([nFc1, nFc2],dtype) b_fc2 = bias_variable([nFc2],dtype) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) """# ============================================================================== Dropout """# ============================================================================== keep_prob = tf.placeholder(dtype,name='keep_prob') with tf.name_scope("drop") as scope: h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) """# ============================================================================== Readout Layer """# ============================================================================== with tf.name_scope("softmax") as scope: W_fc3 = weight_variable([nFc2, nTarget],dtype) b_fc3 = bias_variable([nTarget],dtype) y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3) """# ============================================================================== Train and Evaluate the Model """# ============================================================================== with tf.name_scope("xent") as scope: # 1e-8 added to eliminate the crash of training when taking log of 0 cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) ce_summ = tf.scalar_summary("cross entropy", cross_entropy) with tf.name_scope("train") as scope: train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope("test") as scope: correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,dtype)) accuracy_summary = tf.scalar_summary("accuracy", accuracy) merged = tf.merge_all_summaries() tm = "" tp = datetime.datetime.now().timetuple() for i in range(4): tm += str(tp[i])+'-' tm += str(tp[4]) writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) # To see the results in Chrome, # Run the following in terminal to activate server. # tensorboard --logdir '/tmp/ds_logs/' # See results on localhost:6006 sess.run(tf.initialize_all_variables()) perfect_count=10 for i in range(nEpochs): batch = ds.train.next_batch(100) # assign feature data to each placeholder # the batch list is returned in the same order as the features requested feed = {keep_prob: 0.5} for j in range(ds.train.num_features): feed[ph[j]] = batch[j] if i%100 == 0: # sh=h_pool2_flat.get_shape() feed[keep_prob] = 1.0 result = sess.run([merged, accuracy ], feed_dict=feed) summary_str = result[0] #acc = result[1] writer.add_summary(summary_str, i) train_accuracy = accuracy.eval(feed) if train_accuracy != 1: perfect_count=10; else: perfect_count -= 1 if perfect_count==0: break; print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) train_step.run(feed_dict=feed) def computeSize(s,tens): sumC = 1 tShape = tens.get_shape() nDims = len(tShape) for i in range(nDims): sumC *= tShape[i].value print ('\t{}\t{}'.format(s,sumC),flush=True) return sumC print ('network size:',flush=True) total = computeSize("W_fc0",W_fc0)+ \ computeSize ("b_fc0",b_fc0) + \ computeSize ("W_conv1",W_conv1) + \ computeSize ("b_conv1",b_conv1) + \ computeSize ("W_conv2",W_conv2) + \ computeSize ("b_conv2",b_conv2) + \ computeSize ("W_fc0",W_fc0) + \ computeSize ("b_fc0",b_fc0) + \ computeSize ("W_fc1",W_fc1) + \ computeSize ("b_fc1",b_fc1) + \ computeSize ("W_fc2",W_fc2) + \ computeSize ("b_fc2",b_fc2) print('\ttotal\t{}'.format(total),flush=True) feed={keep_prob: 1.0} # assign feature data to each placeholder error_images = np.empty((0,nRows,nCols)) test_accuracy=0 m=0 for n in range(0,ds.test.features[0].shape[0],100 ): for i in range(ds.train.num_features ): feed[ph[i]] = ds.test.features[i] [n:n+100] result = sess.run([accuracy, x_image, W_conv1, correct_prediction], feed_dict=feed) test_accuracy += result[0] error_images = np.append(error_images, result[1][:,:,:,0][result[3]==False],axis=0) m += 1 try: print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) except: print ("test accuracy {}".format(test_accuracy/m),flush=True) ocr_utils.montage(error_images,title='TensorFlow Error Images') tf.reset_default_graph() # only necessary when iterating through fonts sess.close()
# identify the font given the input images #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case'] # train the digits 0-9 for all fonts input_filters_dict = { 'm_label': list(range(48, 58)) + list(range(65, 91)) + list(range(97, 123)), 'fontVariant': 'scanned' } #input_filters_dict = {} output_feature_list = [ 'm_label_one_hot', 'image', 'italic', 'aspect_ratio', 'upper_case' ] ds = ocr_utils.read_data(input_filters_dict=input_filters_dict, output_feature_list=output_feature_list, test_size=.1, engine_type='tensorflow', dtype=dtype) nn = nnetwork.network(ds.train) nn.fit(ds.train, nEpochs=5000) nn.test(ds.test) else: # loop through all the fonts and train individually # pick up the entire list of fonts and font variants. Train each one. df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) import pprint as pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(df1)
def main(model='mlp', num_epochs=50): print("Loading data...") input_filters_dict = {'font': ('HANDPRINT',), 'm_label': range(48,57)} output_feature_list = ['m_label','image'] ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, engine_type='theano', test_size = .1, evaluation_size = .1, dtype='float32') nRows = ds.train.num_rows nCols = ds.train.num_columns X_train = ds.train.features[1] X_val = ds.evaluation.features[1] X_test = ds.test.features[1] y_train = np.array(ds.train.features[0]-48,dtype=np.int32) y_test = np.array(ds.test.features[0]-48,dtype=np.int32) y_val = np.array(ds.evaluation.features[0]-48,dtype=np.int32) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var,nRows, nCols) elif model.startswith('custom_mlp:'): depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') network = build_custom_mlp(input_var, int(depth), int(width), float(drop_in), float(drop_hid)) elif model == 'cnn': network = build_cnn(input_var) else: print("Unrecognized model type %r." % model,flush=True) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype='float32') # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time),flush=True) print(" training loss:\t\t{:.6f}".format(train_err / train_batches),flush=True) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches),flush=True) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:",flush=True) print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches),flush=True) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100),flush=True)
from sklearn.decomposition import PCA from sklearn.decomposition import KernelPCA chars_to_train = range(48,58) columnsXY=range(0,20) column_str = 'column_sum{}'.format(list(columnsXY)) input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'} # output the character label and the image and column sums output_feature_list = ['m_label','image',column_str] # read the complete image (20x20) = 400 pixels for each character ds = ocr_utils.read_data(input_filters_dict=input_filters_dict, output_feature_list=output_feature_list, test_size=.2, random_state=0) windows_limit = 5000 # uses too much memory for my 32 bit windows computer so limit size of sample y_train = ds.train.features[0][:windows_limit] X_train_image = ds.train.features[1][:windows_limit] X_train = ds.train.features[2][:windows_limit] y_test = ds.test.features[0] X_test_image = ds.test.features[1] X_test = ds.test.features[2] cov_mat = np.cov(X_train_image.T) eigen_vals, eigen_vecs = np.linalg.eig(cov_mat) print('\nEigenvalues \n%s' % eigen_vals[:2*n_components])
def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = .1, engine_type='tensorflow',dtype=dtype) """# ============================================================================== Start TensorFlow Interactive Session """# ============================================================================== sess = tf.InteractiveSession() """# ============================================================================== Placeholders Compute the size of various layers Create a tensorflow Placeholder for each feature of data returned from the dataset """# ============================================================================== lst = [] extra_features_width = 0 # width of extra features for i,nm in enumerate(output_feature_list): # features[0], is always the target. For instance it may be m_label_one_hot # the second features[1] is the 'image' that is passed to the convolution layers # Any additional features bypass the convolution layers and go directly # into the fully connected layer. # The width of the extra features is calculated in order to allocate # the correct widths of weights, # and inputs # names are assigned to make the look pretty on the tensorboard graph. if i == 0: nm = 'y_'+nm else: nm = 'x_'+nm if i>1: extra_features_width += ds.train.feature_width[i] print (ds.train.features[i].dtype) lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) # ph is a named tuple with key names like 'image', 'm_label', and values that # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, # x_upper_case etc. Place_Holders = namedtuple('Place_Holders', ds.train.feature_names) ph = Place_Holders(*lst) # unpack placeholders into named Tuple nRows = ds.train.num_rows #image height nCols = ds.train.num_columns #image width nSections = 10 w = list(range(nSections*3)) b = list(range(nSections*3)) h = list(range(nSections*3+1)) in_out_width = nRows*nCols internal_width = int(in_out_width/4) # nFc0 = 2048 # size of fully connected layer nFc1 = 2048 # size of fully connected layer # nFc2 = 2048 # size of fully connected layer # nConv1 = 32 # size of first convolution layer # nConv2 = 64 # size of second convolution layer nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' # n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer # n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected # """# ============================================================================== Build a Multilayer Convolutional Network Weight Initialization """# ============================================================================== def weight_variable(shape, dtype): initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) return tf.Variable(initial) def bias_variable(shape, dtype): initial = tf.constant(0.1, shape=shape, dtype=dtype) return tf.Variable(initial) """# ============================================================================== Convolution and Pooling keep our code cleaner, let's also abstract those operations into functions. """# ============================================================================== def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') """# ============================================================================== First Convolutional Layers """# ============================================================================== def shapeOuts(n): print ('n={}, hin={},w={}, b={} ,hout={}\n'.format(n, h[n]._shape, w[n]._variable._shape, b[n]._variable._shape, h[n+1]._shape)) def section(n): with tf.name_scope('section_'+str(n)+'_0') as scope: w[n]=weight_variable([in_out_width, internal_width],dtype) b[n]=bias_variable([internal_width],dtype) h[n+1] = tf.nn.relu(tf.matmul(h[n], w[n]) + b[n]) shapeOuts(n) with tf.name_scope('section_'+str(n)+'_1') as scope: w[n+1]=weight_variable([internal_width, internal_width],dtype) b[n+1]=bias_variable([internal_width],dtype) h[n+2]=tf.nn.relu(tf.matmul(h[n+1], w[n+1]) + b[n+1]) shapeOuts(n+1) with tf.name_scope('section_'+str(n)+'_2') as scope: w[n+2]=weight_variable([internal_width, in_out_width],dtype) b[n+2]=bias_variable([in_out_width],dtype) z= tf.nn.relu(tf.matmul(h[n+2], w[n+2]) + b[n+2]) h[n+3]= tf.add(z ,h[n]) #n+3 print('z shape ={}'.format(z._shape)) shapeOuts(n+2) return def computeSize(s,tens): sumC = 1 tShape = tens.get_shape() nDims = len(tShape) for i in range(nDims): sumC *= tShape[i].value print ('\t{}\t{}'.format(s,sumC),flush=True) return sumC """# ============================================================================== Build sectional network """# ============================================================================== h[0]= ph[1] for i in range(nSections): section(3*i) """# ============================================================================== Dropout """# ============================================================================== keep_prob = tf.placeholder(dtype,name='keep_prob') with tf.name_scope("drop") as scope: h_fc2_drop = tf.nn.dropout(h[nSections*3], keep_prob) """# ============================================================================== Readout Layer """# ============================================================================== with tf.name_scope("softmax") as scope: w_fc3 = weight_variable([in_out_width, nTarget],dtype) b_fc3 = bias_variable([nTarget],dtype) y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3) print ('network size:',flush=True) total = 0 for i in range(nSections*3): total = total + computeSize("w{}".format(i),w[i]) total = total + computeSize ("b_fc3",b_fc3) + \ computeSize ("w_fc3",w_fc3) print('\ttotal\t{}'.format(total),flush=True) """# ============================================================================== Train and Evaluate the Model """# ============================================================================== with tf.name_scope("reshape_x_image") as scope: x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) with tf.name_scope("xent") as scope: # 1e-8 added to eliminate the crash of training when taking log of 0 cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) ce_summ = tf.scalar_summary("cross entropy", cross_entropy) with tf.name_scope("train") as scope: train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope("test") as scope: correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype)) accuracy_summary = tf.scalar_summary("accuracy", accuracy) merged = tf.merge_all_summaries() tm = "" tp = datetime.datetime.now().timetuple() for i in range(4): tm += str(tp[i])+'-' tm += str(tp[4]) writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) # To see the results in Chrome, # Run the following in terminal to activate server. # tensorboard --logdir '/tmp/ds_logs/' # See results on localhost:6006 sess.run(tf.initialize_all_variables()) perfect_count=10 for i in range(nEpochs): batch = ds.train.next_batch(100) # assign feature data to each placeholder # the batch list is returned in the same order as the features requested feed = {keep_prob: 0.5} for j in range(ds.train.num_features): feed[ph[j]] = batch[j] if i%100 == 0: # sh=h_pool2_flat.get_shape() feed[keep_prob] = 1.0 result = sess.run([merged, accuracy ], feed_dict=feed) summary_str = result[0] #acc = result[1] writer.add_summary(summary_str, i) train_accuracy = accuracy.eval(feed) if train_accuracy != 1: perfect_count=10; else: perfect_count -= 1 if perfect_count==0: break; print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) train_step.run(feed_dict=feed) feed={keep_prob: 1.0} # assign feature data to each placeholder error_images = np.empty((0,nRows,nCols)) test_accuracy=0 m=0 for n in range(0,ds.test.features[0].shape[0],100 ): for i in range(ds.train.num_features ): feed[ph[i]] = ds.test.features[i] [n:n+100] result = sess.run([accuracy, x_image, correct_prediction], feed_dict=feed) test_accuracy += result[0] error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0) m += 1 try: print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) except: print ("test accuracy {}".format(test_accuracy/m),flush=True) ocr_utils.montage(error_images,title='TensorFlow Error Images') tf.reset_default_graph() # only necessary when iterating through fonts sess.close()
import matplotlib.pyplot as plt from sklearn.lda import LDA print_limit = 20 chars_to_train = range(48, 58) columnsXY = range(0, 20) column_str = 'column_sum{}'.format(list(columnsXY)) input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'} # output the character label and the image and column sums output_feature_list = ['m_label', 'image', column_str] # read the complete image (20x20) = 400 pixels for each character ds = ocr_utils.read_data(input_filters_dict=input_filters_dict, output_feature_list=output_feature_list, test_size=.2, random_state=0) y_train = ds.train.features[0] X_train_image = ds.train.features[1] X_train = ds.train.features[2] y_test = ds.test.features[0] X_test_image = ds.test.features[1] X_test = ds.test.features[2] from sklearn.preprocessing import StandardScaler # sc = StandardScaler() X_train_std = sc.fit_transform(X_train_image) X_test_std = sc.fit_transform(X_test_image)
# output the character label, image, italic flag, aspect_ratio and upper_case flag # output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] # output only the character label and the image # output_feature_list = ['m_label_one_hot','image'] # identify the font given the input images #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case'] # train the digits 0-9 for all fonts input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'fontVariant':'scanned'} #input_filters_dict = {} output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = .1, engine_type='tensorflow',dtype=dtype) nn = nnetwork.network( ds.train) nn.fit( ds.train, nEpochs=5000) nn.test(ds.test) else: # loop through all the fonts and train individually # pick up the entire list of fonts and font variants. Train each one. df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) import pprint as pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(df1)
legend=[] for ys in np.unique(y): legend.append('{} \'{}\''.format(ys, chr(ys))) ocr_utils.scatter_plot(X=X, y=y, legend_entries=legend, axis_labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], title='E13B sum of columns') ############################################################################# # read and show character images for '0', and '1' # select the digits in columnsXY in the E13B font fd = {'m_label': ascii_characters_to_train, 'font': 'E13B'} # output only the character label and the image fl = ['m_label','image'] # read the complete image (20x20) = 400 pixels for each character ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=fl, dtype=np.int32) y,X = ds.train.features # change to a 2D shape X=np.reshape(X,(X.shape[0],ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X,title='some E13B Characters') print ('\n########################### No Errors ####################################')
) y_train_pred = model.predict_classes(X_train, verbose=0) print('First 3 predictions: ', y_train_pred[:3]) train_acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0] print('Training accuracy: %.2f%%' % (train_acc * 100)) y_test_pred = model.predict_classes(X_test, verbose=0) test_acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0] print('Test accuracy: %.2f%%' % (test_acc * 100)) input_filters_dict = {'font': ('HANDPRINT',)} output_feature_list = ['m_label_one_hot','image','m_label'] ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, engine_type='keras', test_size = .1, dtype=np.float32, random_state=0) X_train = ds.train.features[1] X_test = ds.test.features[1] y_train_ohe = ds.train.features[0] y_train = ds.train.features[2]-48 y_test = ds.test.features[2]-48 do_keras(X_train,X_test, y_train_ohe, y_train, y_test) print ('\n########################### No Errors ####################################')
for font in df1: df2 = ocr_utils.get_list(input_filters_dict = {'font':font,'fontVariant':(), 'm_label':(),'strength':(),'italic':(),'orientation':()}) unique_fonts = np.unique( np.append(unique_fonts, df2['font'])) u1= np.unique(df2['fontVariant']) unique_fontVariants = np.unique(np.append(unique_fontVariants, u1)) u2 = np.unique(df2['m_label']) unique_m_labels = np.unique(np.append(unique_m_labels,u2)) u3 = np.unique(df2['strength']) unique_strengths = np.unique(np.append(unique_strengths,u3)) u4 = np.unique(df2['italic']) unique_italics = np.unique(np.append(unique_italics,u4)) u5 =np.unique( df2['orientation']) unique_orientations = np.unique(np.append(unique_orientations,u5)) print ('\n{}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}\n'.format(font[0], len(u1), len(u2), len(u3), len(u4), len(u5))) for fontVariant in u1: fd = {'font': font, 'fontVariant': fontVariant} ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=['m_label','image'] , dtype=np.int32) y,X = ds.train.features X2D = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns )) title = '{}-{}'.format(font[0],fontVariant) ocr_utils.show_examples(X2D, y, title=title) print ('unique fonts={}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}'.format(len(unique_fonts), len(unique_fontVariants), len(unique_m_labels), len(unique_strengths), len(unique_italics), len(unique_orientations))) print ('\n########################### No Errors ####################################')
y=y, legend_entries=legend, axis_labels=[ 'column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY)) ], title='E13B sum of columns') ############################################################################# # read and show character images for '0', and '1' # select the digits in columnsXY in the E13B font fd = {'m_label': ascii_characters_to_train, 'font': 'E13B'} # output only the character label and the image fl = ['m_label', 'image'] # read the complete image (20x20) = 400 pixels for each character ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=fl, dtype=np.int32) y, X = ds.train.features # change to a 2D shape X = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage(X, title='some E13B Characters') print( '\n########################### No Errors ####################################' )
validation_split=0.1) y_train_pred = model.predict_classes(X_train, verbose=0) print('First 3 predictions: ', y_train_pred[:3]) train_acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0] print('Training accuracy: %.2f%%' % (train_acc * 100)) y_test_pred = model.predict_classes(X_test, verbose=0) test_acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0] print('Test accuracy: %.2f%%' % (test_acc * 100)) input_filters_dict = {'font': ('HANDPRINT', )} output_feature_list = ['m_label_one_hot', 'image', 'm_label'] ds = ocr_utils.read_data(input_filters_dict=input_filters_dict, output_feature_list=output_feature_list, engine_type='keras', test_size=.1, dtype=np.float32, random_state=0) X_train = ds.train.features[1] X_test = ds.test.features[1] y_train_ohe = ds.train.features[0] y_train = ds.train.features[2] - 48 y_test = ds.test.features[2] - 48 do_keras(X_train, X_test, y_train_ohe, y_train, y_test) print( '\n########################### No Errors ####################################' )
unique_fontVariants = np.unique(np.append(unique_fontVariants, u1)) u2 = np.unique(df2['m_label']) unique_m_labels = np.unique(np.append(unique_m_labels, u2)) u3 = np.unique(df2['strength']) unique_strengths = np.unique(np.append(unique_strengths, u3)) u4 = np.unique(df2['italic']) unique_italics = np.unique(np.append(unique_italics, u4)) u5 = np.unique(df2['orientation']) unique_orientations = np.unique(np.append(unique_orientations, u5)) print( '\n{}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}\n' .format(font[0], len(u1), len(u2), len(u3), len(u4), len(u5))) for fontVariant in u1: fd = {'font': font, 'fontVariant': fontVariant} ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=['m_label', 'image'], dtype=np.int32) y, X = ds.train.features X2D = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns)) title = '{}-{}'.format(font[0], fontVariant) ocr_utils.show_examples(X2D, y, title=title) print( 'unique fonts={}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}' .format(len(unique_fonts), len(unique_fontVariants), len(unique_m_labels), len(unique_strengths), len(unique_italics), len(unique_orientations))) print( '\n########################### No Errors ####################################'