def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000):
 
    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                                output_feature_list=output_feature_list,
                                test_size = .1,
                                engine_type='tensorflow',
                                dtype=dtype)

        
    """# ==============================================================================
    
    Start TensorFlow Interactive Session
    
    """# ==============================================================================

    sess = tf.InteractiveSession()
    
    """# ==============================================================================
    
    Placeholders
    
    Compute the size of various layers 
    
    Create a tensorflow Placeholder for each feature of data returned from the
    dataset
    
    """# ==============================================================================

    

    lst = []
    extra_features_width = 0 # width of extra features
    
    for i,nm in enumerate(output_feature_list):
        
        # features[0], is always the target. For instance it may be m_label_one_hot 
        # the second features[1] is the 'image' that is passed to the convolution layers 
        # Any additional features bypass the convolution layers and go directly 
        # into the fully connected layer.  
        
        # The width of the extra features is calculated in order to allocate 
        # the correct widths of weights,  # and inputs 
        # names are assigned to make the look pretty on the tensorboard graph.
        
        if i == 0:
            nm = 'y_'+nm
        else:
            nm = 'x_'+nm
        if i>1:
            extra_features_width += ds.train.feature_width[i]
        lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm))
        
    # ph is a named tuple with key names like 'image', 'm_label', and values that
    # are tensors.  The display name on the Chrome graph are 'y_m_label', 'x_image, 
    # x_upper_case etc.
    Place_Holders = namedtuple('Place_Holders', output_feature_list)   
    ph = Place_Holders(*lst) # unpack placeholders into named Tuple
        
    nRows = ds.train.num_rows #image height
    nCols = ds.train.num_columns #image width    
    nFc0 = 2048      # size of fully connected layer
    nFc1 = 2048      # size of fully connected layer        
    nFc2 = 2048      # size of fully connected layer    
    nConv1 = 32     # size of first convolution layer
    nConv2 = 64     # size of second convolution layer
    nTarget = ds.train.feature_width[0]  # the number of one_hot features in the target, 'm_label'    
    n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer 
    n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected
        
    """# ==============================================================================
    
    Build a Multilayer Convolutional Network
    
    Weight Initialization
    
    """# ==============================================================================
    
    def weight_variable(shape, dtype):
        initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype)
        return tf.Variable(initial)
    
    def bias_variable(shape, dtype):
        initial = tf.constant(0.1, shape=shape, dtype=dtype)
        return tf.Variable(initial)   
    
    """# ==============================================================================
    
    Convolution and Pooling
    
    keep our code cleaner, let's also abstract those operations into functions.
    
    """# ==============================================================================
    
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    
    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')
    
    """# ==============================================================================
    
    First Convolutional Layer
    
    """# ==============================================================================
    with tf.name_scope("w_conv1") as scope:
        W_conv1 = weight_variable([5, 5, 1, nConv1],dtype)
        b_conv1 = bias_variable([nConv1],dtype)    
    
    with tf.name_scope("reshape_x_image") as scope:
        x_image = tf.reshape(ph.image, [-1,nCols,nRows,1])
    
    image_summ = tf.image_summary("x_image", x_image)
    
    """# ==============================================================================
    
    We then convolve x_image with the weight tensor, add the bias, apply the ReLU function,
     and finally max pool.
    
    """# ==============================================================================
    
    with tf.name_scope("convolve_1") as scope:
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
        
    with tf.name_scope("pool_1") as scope:    
        h_pool1 = max_pool_2x2(h_conv1)
    
    """# ==============================================================================
    
    Second Convolutional Layer
    
    In order to build a deep network, we stack several layers of this type. The second 
    layer will have 64 features for each 5x5 patch.
    
    """# ==============================================================================
    
    with tf.name_scope("convolve_2") as scope:
        W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype)
        b_conv2 = bias_variable([64],dtype)
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)  
         
    with tf.name_scope("pool_2") as scope:
        h_pool2 = max_pool_2x2(h_conv2)
    
    """# ==============================================================================
    
    Densely Connected Layer 0
    
    Now that the image size has been reduced to 7x7, we add a fully-connected layer 
    with neurons to allow processing on the entire image. We reshape the tensor 
    from the pooling layer into a batch of vectors, multiply by a weight matrix, add 
    a bias, and apply a ReLU.
    
    """# ==============================================================================
    
    with tf.name_scope("W_fc0_b") as scope:
        W_fc0 = weight_variable([n_h_pool2_outputsx, nFc0],dtype)
        b_fc0 = bias_variable([nFc0],dtype)
            
        h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs])
        
        # append the features, the 2nd on, that go directly to the fully connected layer
        for i in range(2,ds.train.num_features ):
            h_pool2_flat = tf.concat(1, [h_pool2_flat, ph[i]])  
        h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) + b_fc0)
    """# ==============================================================================
    
    Densely Connected Layer 1
    
    We add a fully-connected layer 
    with neurons to allow processing on the entire image. We reshape the tensor 
    from the pooling layer into a batch of vectors, multiply by a weight matrix, add 
    a bias, and apply a ReLU.
    
    """# ==============================================================================  
          
    with tf.name_scope("W_fc1_b") as scope:
        W_fc1 = weight_variable([nFc0, nFc1],dtype)
        b_fc1 = bias_variable([nFc1],dtype)
        
        h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1)    
    
    """# ==============================================================================
    
    Densely Connected Layer 2
    
    We add a fully-connected layer 
    with neurons to allow processing on the entire image. We reshape the tensor 
    from the pooling layer into a batch of vectors, multiply by a weight matrix, add 
    a bias, and apply a ReLU.
    
    """# ==============================================================================
    
    with tf.name_scope("W_fc2_b") as scope:
        W_fc2 = weight_variable([nFc1, nFc2],dtype)
        b_fc2 = bias_variable([nFc2],dtype)
        
        h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
        
    """# ==============================================================================        
    Dropout
    
    """# ==============================================================================
    keep_prob = tf.placeholder(dtype,name='keep_prob')
    
    with tf.name_scope("drop") as scope:
        h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)
    
    """# ==============================================================================
    
    Readout Layer
    
    """# ==============================================================================
    with tf.name_scope("softmax") as scope:
        W_fc3 = weight_variable([nFc2, nTarget],dtype)
        b_fc3 = bias_variable([nTarget],dtype)    
        y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)
    
    """# ==============================================================================
    
    Train and Evaluate the Model
    
    """# ==============================================================================
    
    with tf.name_scope("xent") as scope:
        # 1e-8 added to eliminate the crash of training when taking log of 0
        cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8))
        ce_summ = tf.scalar_summary("cross entropy", cross_entropy)
            
    with tf.name_scope("train") as scope:
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        
    with tf.name_scope("test") as scope:        
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1))
    
        accuracy = tf.reduce_mean(tf.cast(correct_prediction,dtype))
        accuracy_summary = tf.scalar_summary("accuracy", accuracy)    
    
    merged = tf.merge_all_summaries()
    tm = ""
    tp = datetime.datetime.now().timetuple()
    for i in range(4):
        tm += str(tp[i])+'-'
    tm += str(tp[4])    
    writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph)
    
    # To see the results in Chrome, 
    # Run the following in terminal to activate server.
    # tensorboard --logdir '/tmp/ds_logs/'
    # See results on localhost:6006 
    
    sess.run(tf.initialize_all_variables())
    
    perfect_count=10
    for i in range(nEpochs):
    
        batch = ds.train.next_batch(100)
        # assign feature data to each placeholder
        # the batch list is returned in the same order as the features requested
        feed = {keep_prob: 0.5}
        for j in range(ds.train.num_features):
            feed[ph[j]] = batch[j]  
            
        if i%100 == 0:
            # sh=h_pool2_flat.get_shape()
            feed[keep_prob] = 1.0
            result = sess.run([merged, accuracy ], feed_dict=feed)    
            summary_str = result[0]
            #acc = result[1]       
            writer.add_summary(summary_str, i)
            train_accuracy = accuracy.eval(feed)    
            if train_accuracy != 1:
                perfect_count=10;
            else:
                perfect_count -= 1
                if perfect_count==0:
                    break;  
                
            print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True)
        train_step.run(feed_dict=feed)
    
    def computeSize(s,tens):
        sumC = 1
        tShape = tens.get_shape()
        nDims = len(tShape)
        for i in range(nDims):
            sumC *= tShape[i].value
        print ('\t{}\t{}'.format(s,sumC),flush=True)
        return sumC
            
    print ('network size:',flush=True)
    total = computeSize("W_fc0",W_fc0)+ \
    computeSize ("b_fc0",b_fc0) + \
    computeSize ("W_conv1",W_conv1) + \
    computeSize ("b_conv1",b_conv1) + \
    computeSize ("W_conv2",W_conv2) + \
    computeSize ("b_conv2",b_conv2) + \
    computeSize ("W_fc0",W_fc0) + \
    computeSize ("b_fc0",b_fc0) + \
    computeSize ("W_fc1",W_fc1) + \
    computeSize ("b_fc1",b_fc1) + \
    computeSize ("W_fc2",W_fc2) + \
    computeSize ("b_fc2",b_fc2)   

    
    print('\ttotal\t{}'.format(total),flush=True)
    
    feed={keep_prob: 1.0}
    # assign feature data to each placeholder
    error_images = np.empty((0,nRows,nCols))
        
    test_accuracy=0
    m=0
    for n in range(0,ds.test.features[0].shape[0],100 ):   
        for i in range(ds.train.num_features ):  
            feed[ph[i]] = ds.test.features[i] [n:n+100]
        result = sess.run([accuracy, x_image, W_conv1, correct_prediction], feed_dict=feed)    
        test_accuracy += result[0]
        error_images = np.append(error_images, result[1][:,:,:,0][result[3]==False],axis=0)
        m += 1
    try:        
        print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True)       
        ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font']))  
    except:                            
        print ("test accuracy {}".format(test_accuracy/m),flush=True)       
        ocr_utils.montage(error_images,title='TensorFlow Error Images')    
    
    tf.reset_default_graph() # only necessary when iterating through fonts
    sess.close()
Exemple #2
0
    #   identify the font given the input images
    #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case']

    # train the digits 0-9 for all fonts
    input_filters_dict = {
        'm_label':
        list(range(48, 58)) + list(range(65, 91)) + list(range(97, 123)),
        'fontVariant': 'scanned'
    }
    #input_filters_dict = {}
    output_feature_list = [
        'm_label_one_hot', 'image', 'italic', 'aspect_ratio', 'upper_case'
    ]
    ds = ocr_utils.read_data(input_filters_dict=input_filters_dict,
                             output_feature_list=output_feature_list,
                             test_size=.1,
                             engine_type='tensorflow',
                             dtype=dtype)
    nn = nnetwork.network(ds.train)
    nn.fit(ds.train, nEpochs=5000)
    nn.test(ds.test)

else:
    # loop through all the fonts and train individually

    # pick up the entire list of fonts and font variants. Train each one.
    df1 = ocr_utils.get_list(input_filters_dict={'font': ()})

    import pprint as pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(df1)
def main(model='mlp', num_epochs=50):
    
    print("Loading data...")
    input_filters_dict = {'font': ('HANDPRINT',), 'm_label': range(48,57)}    
    output_feature_list = ['m_label','image']    
    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                             output_feature_list=output_feature_list, 
                             engine_type='theano', 
                             test_size = .1, 
                             evaluation_size = .1, 
                             dtype='float32')
    nRows = ds.train.num_rows
    nCols = ds.train.num_columns
    X_train = ds.train.features[1]

    X_val = ds.evaluation.features[1]

    X_test = ds.test.features[1]
    y_train = np.array(ds.train.features[0]-48,dtype=np.int32)    
    y_test = np.array(ds.test.features[0]-48,dtype=np.int32)
    y_val = np.array(ds.evaluation.features[0]-48,dtype=np.int32)
      
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var,nRows, nCols)
    elif model.startswith('custom_mlp:'):
        depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        network = build_custom_mlp(input_var, int(depth), int(width),
                                   float(drop_in), float(drop_hid))
    elif model == 'cnn':
        network = build_cnn(input_var)
    else:
        print("Unrecognized model type %r." % model,flush=True)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=0.01, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype='float32')

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time),flush=True)
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches),flush=True)
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches),flush=True)
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:",flush=True)
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches),flush=True)
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100),flush=True)
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA


chars_to_train = range(48,58)
columnsXY=range(0,20)
column_str = 'column_sum{}'.format(list(columnsXY))

input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'}

# output  the character label and the image and column sums
output_feature_list = ['m_label','image',column_str] 

# read the complete image (20x20) = 400 pixels for each character
ds = ocr_utils.read_data(input_filters_dict=input_filters_dict, 
                            output_feature_list=output_feature_list, 
                            test_size=.2,
                            random_state=0)
windows_limit = 5000 # uses too much memory for my 32 bit windows computer so limit size of sample   
y_train = ds.train.features[0][:windows_limit]
X_train_image = ds.train.features[1][:windows_limit]
X_train = ds.train.features[2][:windows_limit]

y_test = ds.test.features[0]
X_test_image = ds.test.features[1]
X_test = ds.test.features[2]


cov_mat = np.cov(X_train_image.T)
eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)

print('\nEigenvalues \n%s' % eigen_vals[:2*n_components])
def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000):
 
    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                                output_feature_list=output_feature_list,
                                test_size = .1,
                                engine_type='tensorflow',dtype=dtype)

        
    """# ==============================================================================
    
    Start TensorFlow Interactive Session
    
    """# ==============================================================================

    sess = tf.InteractiveSession()
    
    """# ==============================================================================
    
    Placeholders
    
    Compute the size of various layers 
    
    Create a tensorflow Placeholder for each feature of data returned from the
    dataset
    
    """# ==============================================================================

    

    lst = []
    extra_features_width = 0 # width of extra features
    
    for i,nm in enumerate(output_feature_list):
        
        # features[0], is always the target. For instance it may be m_label_one_hot 
        # the second features[1] is the 'image' that is passed to the convolution layers 
        # Any additional features bypass the convolution layers and go directly 
        # into the fully connected layer.  
        
        # The width of the extra features is calculated in order to allocate 
        # the correct widths of weights,  # and inputs 
        # names are assigned to make the look pretty on the tensorboard graph.
        
        if i == 0:
            nm = 'y_'+nm
        else:
            nm = 'x_'+nm
        if i>1:
            extra_features_width += ds.train.feature_width[i]
        print (ds.train.features[i].dtype)
        lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm))
        
    # ph is a named tuple with key names like 'image', 'm_label', and values that
    # are tensors.  The display name on the Chrome graph are 'y_m_label', 'x_image, 
    # x_upper_case etc.
    Place_Holders = namedtuple('Place_Holders', ds.train.feature_names)   
    ph = Place_Holders(*lst) # unpack placeholders into named Tuple
        
    nRows = ds.train.num_rows #image height
    nCols = ds.train.num_columns #image width    
    
    nSections = 10
    w = list(range(nSections*3))
    b = list(range(nSections*3))
    h = list(range(nSections*3+1))

    
    in_out_width = nRows*nCols
    internal_width = int(in_out_width/4)

    
#     nFc0 = 2048      # size of fully connected layer
    nFc1 = 2048      # size of fully connected layer        
#     nFc2 = 2048      # size of fully connected layer    
#     nConv1 = 32     # size of first convolution layer
#     nConv2 = 64     # size of second convolution layer
    nTarget = ds.train.feature_width[0]  # the number of one_hot features in the target, 'm_label'    
    
#     n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer 
#     n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected
#         
    """# ==============================================================================
    
    Build a Multilayer Convolutional Network
    
    Weight Initialization
    
    """# ==============================================================================
       
    def weight_variable(shape, dtype):
        initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype)
        return tf.Variable(initial)
    
    def bias_variable(shape, dtype):
        initial = tf.constant(0.1, shape=shape, dtype=dtype)
        return tf.Variable(initial)    

        
    """# ==============================================================================
    Convolution and Pooling
    
    keep our code cleaner, let's also abstract those operations into functions.
    
    """# ==============================================================================
    
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    
    """# ==============================================================================
    
    First Convolutional Layers
    
    """# ==============================================================================
    
    def shapeOuts(n):
        print ('n={}, hin={},w={}, b={} ,hout={}\n'.format(n, h[n]._shape, w[n]._variable._shape, b[n]._variable._shape, h[n+1]._shape))
        
    def section(n):
        with tf.name_scope('section_'+str(n)+'_0') as scope:     
            w[n]=weight_variable([in_out_width, internal_width],dtype)
            b[n]=bias_variable([internal_width],dtype)  
            h[n+1] = tf.nn.relu(tf.matmul(h[n], w[n]) + b[n])
            shapeOuts(n)
            
        with tf.name_scope('section_'+str(n)+'_1') as scope:  
            w[n+1]=weight_variable([internal_width, internal_width],dtype)
            b[n+1]=bias_variable([internal_width],dtype)     
                          
            h[n+2]=tf.nn.relu(tf.matmul(h[n+1], w[n+1]) + b[n+1])
            shapeOuts(n+1)                  
                            
        with tf.name_scope('section_'+str(n)+'_2') as scope:  
            w[n+2]=weight_variable([internal_width, in_out_width],dtype)
            b[n+2]=bias_variable([in_out_width],dtype)   
            z= tf.nn.relu(tf.matmul(h[n+2], w[n+2]) + b[n+2])
            h[n+3]= tf.add(z   ,h[n]) #n+3   
                     
            print('z shape ={}'.format(z._shape)) 
            shapeOuts(n+2)                  
        return    
              
    def computeSize(s,tens):
        sumC = 1
        tShape = tens.get_shape()
        nDims = len(tShape)
        for i in range(nDims):
            sumC *= tShape[i].value
        print ('\t{}\t{}'.format(s,sumC),flush=True)
        return sumC
                    
    """# ==============================================================================        
    Build sectional network
    
    """# ==============================================================================      
    h[0]= ph[1]
    for i in range(nSections):
        section(3*i)
            
    """# ==============================================================================        
    Dropout
    
    """# ==============================================================================
    keep_prob = tf.placeholder(dtype,name='keep_prob')
    
    with tf.name_scope("drop") as scope:
        h_fc2_drop = tf.nn.dropout(h[nSections*3], keep_prob)
    
    """# ==============================================================================
    
    Readout Layer
    
    """# ==============================================================================
    with tf.name_scope("softmax") as scope:
        w_fc3 = weight_variable([in_out_width, nTarget],dtype)
        b_fc3 = bias_variable([nTarget],dtype)    
        y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3)
    
    print ('network size:',flush=True)
    total = 0
    for i in range(nSections*3):
        total = total + computeSize("w{}".format(i),w[i])
    total = total + computeSize ("b_fc3",b_fc3) + \
        computeSize ("w_fc3",w_fc3) 

    
    print('\ttotal\t{}'.format(total),flush=True)
        
    """# ==============================================================================
    
    Train and Evaluate the Model
    
    """# ==============================================================================
    with tf.name_scope("reshape_x_image") as scope:
        x_image = tf.reshape(ph.image, [-1,nCols,nRows,1])
        
    with tf.name_scope("xent") as scope:
        # 1e-8 added to eliminate the crash of training when taking log of 0
        cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8))
        ce_summ = tf.scalar_summary("cross entropy", cross_entropy)
            
    with tf.name_scope("train") as scope:
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        
    with tf.name_scope("test") as scope:        
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1))
    
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype))
        accuracy_summary = tf.scalar_summary("accuracy", accuracy)    
    
    merged = tf.merge_all_summaries()
    tm = ""
    tp = datetime.datetime.now().timetuple()
    for i in range(4):
        tm += str(tp[i])+'-'
    tm += str(tp[4])    
    writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph)
    
    # To see the results in Chrome, 
    # Run the following in terminal to activate server.
    # tensorboard --logdir '/tmp/ds_logs/'
    # See results on localhost:6006 
    
    sess.run(tf.initialize_all_variables())
    
    perfect_count=10
    for i in range(nEpochs):
    
        batch = ds.train.next_batch(100)
        # assign feature data to each placeholder
        # the batch list is returned in the same order as the features requested
        feed = {keep_prob: 0.5}
        for j in range(ds.train.num_features):
            feed[ph[j]] = batch[j]  
            
        if i%100 == 0:
            # sh=h_pool2_flat.get_shape()
            feed[keep_prob] = 1.0
            result = sess.run([merged, accuracy ], feed_dict=feed)    
            summary_str = result[0]
            #acc = result[1]       
            writer.add_summary(summary_str, i)
            train_accuracy = accuracy.eval(feed)    
            if train_accuracy != 1:
                perfect_count=10;
            else:
                perfect_count -= 1
                if perfect_count==0:
                    break;  
                
            print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True)
        train_step.run(feed_dict=feed)
        
    feed={keep_prob: 1.0}
    # assign feature data to each placeholder
    error_images = np.empty((0,nRows,nCols))
        
    test_accuracy=0
    m=0
    for n in range(0,ds.test.features[0].shape[0],100 ):   
        for i in range(ds.train.num_features ):  
            feed[ph[i]] = ds.test.features[i] [n:n+100]
        result = sess.run([accuracy, x_image, correct_prediction], feed_dict=feed)    
        test_accuracy += result[0]
        error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0)
        m += 1
    try:        
        print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True)       
        ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font']))  
    except:                            
        print ("test accuracy {}".format(test_accuracy/m),flush=True)       
        ocr_utils.montage(error_images,title='TensorFlow Error Images')    
    
    tf.reset_default_graph() # only necessary when iterating through fonts
    sess.close()
Exemple #6
0
import matplotlib.pyplot as plt
from sklearn.lda import LDA

print_limit = 20
chars_to_train = range(48, 58)
columnsXY = range(0, 20)
column_str = 'column_sum{}'.format(list(columnsXY))

input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'}

# output  the character label and the image and column sums
output_feature_list = ['m_label', 'image', column_str]

# read the complete image (20x20) = 400 pixels for each character
ds = ocr_utils.read_data(input_filters_dict=input_filters_dict,
                         output_feature_list=output_feature_list,
                         test_size=.2,
                         random_state=0)

y_train = ds.train.features[0]
X_train_image = ds.train.features[1]
X_train = ds.train.features[2]

y_test = ds.test.features[0]
X_test_image = ds.test.features[1]
X_test = ds.test.features[2]

from sklearn.preprocessing import StandardScaler
#
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train_image)
X_test_std = sc.fit_transform(X_test_image)
    
    # output the character label, image, italic flag, aspect_ratio and upper_case flag
    # output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case']    
    
    # output only the character label and the image
    # output_feature_list = ['m_label_one_hot','image'] 
    
    #   identify the font given the input images
    #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case']   

    # train the digits 0-9 for all fonts
    input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'fontVariant':'scanned'}
    #input_filters_dict = {}    
    output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case']    
    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                                output_feature_list=output_feature_list,
                                test_size = .1,
                                engine_type='tensorflow',dtype=dtype)    
    nn = nnetwork.network( ds.train)
    nn.fit( ds.train,  nEpochs=5000)  
    nn.test(ds.test)
    
else:
    # loop through all the fonts and train individually

    # pick up the entire list of fonts and font variants. Train each one.
    df1 = ocr_utils.get_list(input_filters_dict={'font': ()})      
    
    import pprint as pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(df1)
   
legend=[]
for ys in np.unique(y):
    legend.append('{} \'{}\''.format(ys, chr(ys)))
           
ocr_utils.scatter_plot(X=X, 
                  y=y,
                  legend_entries=legend,
                  axis_labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], 
                  title='E13B sum of columns')

#############################################################################
# read and show character images for '0', and '1'
# select the digits in columnsXY in the E13B font

fd = {'m_label': ascii_characters_to_train, 'font': 'E13B'}

# output only the character label and the image
fl = ['m_label','image'] 

# read the complete image (20x20) = 400 pixels for each character
ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=fl, dtype=np.int32)   
y,X = ds.train.features

# change to a 2D shape 
X=np.reshape(X,(X.shape[0],ds.train.num_rows, ds.train.num_columns))
ocr_utils.montage(X,title='some E13B Characters')



print ('\n########################### No Errors ####################################')
              )
    y_train_pred = model.predict_classes(X_train, verbose=0)
    print('First 3 predictions: ', y_train_pred[:3])
    train_acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0]
    print('Training accuracy: %.2f%%' % (train_acc * 100))
    
    
    y_test_pred = model.predict_classes(X_test, verbose=0)
    test_acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0]
    print('Test accuracy: %.2f%%' % (test_acc * 100))

input_filters_dict = {'font': ('HANDPRINT',)}
output_feature_list = ['m_label_one_hot','image','m_label'] 
ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                         output_feature_list=output_feature_list, 
                         engine_type='keras',
                         test_size = .1,
                         dtype=np.float32,
                         random_state=0)

X_train = ds.train.features[1]
X_test = ds.test.features[1]
y_train_ohe = ds.train.features[0]
y_train = ds.train.features[2]-48
y_test = ds.test.features[2]-48    
do_keras(X_train,X_test, y_train_ohe, y_train, y_test)


print ('\n########################### No Errors ####################################')

for font in df1:    
    df2 = ocr_utils.get_list(input_filters_dict = {'font':font,'fontVariant':(), 'm_label':(),'strength':(),'italic':(),'orientation':()})
    unique_fonts = np.unique( np.append(unique_fonts, df2['font']))
    u1= np.unique(df2['fontVariant'])    
    unique_fontVariants = np.unique(np.append(unique_fontVariants, u1))    
    u2 = np.unique(df2['m_label'])
    unique_m_labels = np.unique(np.append(unique_m_labels,u2))   
    u3 = np.unique(df2['strength'])
    unique_strengths =  np.unique(np.append(unique_strengths,u3))
    u4 = np.unique(df2['italic'])
    unique_italics = np.unique(np.append(unique_italics,u4))
    u5 =np.unique( df2['orientation'])
    unique_orientations = np.unique(np.append(unique_orientations,u5))
    print ('\n{}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}\n'.format(font[0], len(u1), 
                                                                                                               len(u2), len(u3),                                                                                                              len(u4), len(u5))) 
    for fontVariant in u1:
        fd = {'font': font, 'fontVariant': fontVariant}
        ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=['m_label','image'] , dtype=np.int32)   
        y,X = ds.train.features
        X2D = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns ))
        title = '{}-{}'.format(font[0],fontVariant)
        ocr_utils.show_examples(X2D, y, title=title)
       
print ('unique fonts={}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}'.format(len(unique_fonts), len(unique_fontVariants), 
                                                                                                               len(unique_m_labels), len(unique_strengths), 
                                                                                                               len(unique_italics), len(unique_orientations)))
    
    
print ('\n########################### No Errors ####################################')
                       y=y,
                       legend_entries=legend,
                       axis_labels=[
                           'column {} sum'.format(columnsXY[i])
                           for i in range(len(columnsXY))
                       ],
                       title='E13B sum of columns')

#############################################################################
# read and show character images for '0', and '1'
# select the digits in columnsXY in the E13B font

fd = {'m_label': ascii_characters_to_train, 'font': 'E13B'}

# output only the character label and the image
fl = ['m_label', 'image']

# read the complete image (20x20) = 400 pixels for each character
ds = ocr_utils.read_data(input_filters_dict=fd,
                         output_feature_list=fl,
                         dtype=np.int32)
y, X = ds.train.features

# change to a 2D shape
X = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns))
ocr_utils.montage(X, title='some E13B Characters')

print(
    '\n########################### No Errors ####################################'
)
Exemple #12
0
              validation_split=0.1)
    y_train_pred = model.predict_classes(X_train, verbose=0)
    print('First 3 predictions: ', y_train_pred[:3])
    train_acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0]
    print('Training accuracy: %.2f%%' % (train_acc * 100))

    y_test_pred = model.predict_classes(X_test, verbose=0)
    test_acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0]
    print('Test accuracy: %.2f%%' % (test_acc * 100))


input_filters_dict = {'font': ('HANDPRINT', )}
output_feature_list = ['m_label_one_hot', 'image', 'm_label']
ds = ocr_utils.read_data(input_filters_dict=input_filters_dict,
                         output_feature_list=output_feature_list,
                         engine_type='keras',
                         test_size=.1,
                         dtype=np.float32,
                         random_state=0)

X_train = ds.train.features[1]
X_test = ds.test.features[1]
y_train_ohe = ds.train.features[0]
y_train = ds.train.features[2] - 48
y_test = ds.test.features[2] - 48
do_keras(X_train, X_test, y_train_ohe, y_train, y_test)

print(
    '\n########################### No Errors ####################################'
)
    unique_fontVariants = np.unique(np.append(unique_fontVariants, u1))
    u2 = np.unique(df2['m_label'])
    unique_m_labels = np.unique(np.append(unique_m_labels, u2))
    u3 = np.unique(df2['strength'])
    unique_strengths = np.unique(np.append(unique_strengths, u3))
    u4 = np.unique(df2['italic'])
    unique_italics = np.unique(np.append(unique_italics, u4))
    u5 = np.unique(df2['orientation'])
    unique_orientations = np.unique(np.append(unique_orientations, u5))
    print(
        '\n{}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}\n'
        .format(font[0], len(u1), len(u2), len(u3), len(u4), len(u5)))
    for fontVariant in u1:
        fd = {'font': font, 'fontVariant': fontVariant}
        ds = ocr_utils.read_data(input_filters_dict=fd,
                                 output_feature_list=['m_label', 'image'],
                                 dtype=np.int32)
        y, X = ds.train.features
        X2D = np.reshape(X,
                         (X.shape[0], ds.train.num_rows, ds.train.num_columns))
        title = '{}-{}'.format(font[0], fontVariant)
        ocr_utils.show_examples(X2D, y, title=title)

print(
    'unique fonts={}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}'
    .format(len(unique_fonts), len(unique_fontVariants), len(unique_m_labels),
            len(unique_strengths), len(unique_italics),
            len(unique_orientations)))

print(
    '\n########################### No Errors ####################################'