logits, raw_pred, rnn_out = model(x_batch) loss = tf.reduce_mean( tf.nn.ctc_loss(labels=y_batch_sparse, logits=rnn_out, label_length=[len(i) for i in y_batch], logit_length=[47] * len(y_batch), blank_index=62)) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if iter % 100 == 0: model.save_weights('checkpoints/model_default') decoded, log_prob = tf.nn.ctc_greedy_decoder( logits.numpy().transpose((1, 0, 2)), sequence_length=[47] * len(y_batch), merge_repeated=True) decoded = tf.sparse.to_dense(decoded[0]).numpy() print(iter, loss.numpy().round(1), [ decode_to_text(char_dict, [j for j in i if j != 0]) for i in decoded ][:4]) loss_hist.append(loss.numpy().round(1)) with open("loss_hist.txt", "w") as file: [file.write(str(s) + "\n") for s in loss_hist] iter += 1
def main(args): """Main function to train the model. Args: args: Parsed arguments. Returns: Execution status defined by `constants.ExitCode`. """ # Validate paths. if not validate_paths(args): return constants.ExitCode.INVALID_PATH # Extract paths. input_dir = args.input_dir model_dir = args.model_dir log_dir = args.log_dir existing_model = args.existing_model # Extract model parameters. batch_size = args.batch_size dropout_pkeep = args.dropout_pkeep hidden_state_size = args.hidden_state_size hidden_layer_size = args.hidden_layer_size learning_rate = args.learning_rate # Extract additional flags. debug = args.debug validation = args.validation # Split corpus for training and validation. # validation_text will be empty if validation is False. code_text, validation_text, input_ranges = utils.read_data_files( input_dir, validation=validation) # Bail out if we don't have enough corpus for training. if len(code_text) < batch_size * constants.TRAINING_SEQLEN + 1: return constants.ExitCode.CORPUS_TOO_SMALL # Get corpus files info. Will be used in debug mode to generate sample text. files_info_list = [] if debug: files_info_list = utils.get_files_info(input_dir) assert files_info_list # Calculate validation batch size. It will be 0 if we choose not to validate. validation_batch_size = len(validation_text) // constants.VALIDATION_SEQLEN # Display some stats on the data. epoch_size = len(code_text) // (batch_size * constants.TRAINING_SEQLEN) utils.print_data_stats(len(code_text), len(validation_text), epoch_size) # Set graph-level random seed, so any random sequence generated in this # graph is repeatable. It could also be removed. tf.set_random_seed(0) # Define placeholder for learning rate, dropout and batch size. lr = tf.placeholder(tf.float32, name='lr') pkeep = tf.placeholder(tf.float32, name='pkeep') batchsize = tf.placeholder(tf.int32, name='batchsize') # Input data. input_bytes = tf.placeholder(tf.uint8, [None, None], name='input_bytes') input_onehot = tf.one_hot(input_bytes, constants.ALPHA_SIZE, 1.0, 0.0) # Expected outputs = same sequence shifted by 1, since we are trying to # predict the next character. expected_bytes = tf.placeholder(tf.uint8, [None, None], name='expected_bytes') expected_onehot = tf.one_hot(expected_bytes, constants.ALPHA_SIZE, 1.0, 0.0) # Input state. hidden_state = tf.placeholder( tf.float32, [None, hidden_state_size * hidden_layer_size], name='hidden_state') # "naive dropout" implementation. cells = [rnn.GRUCell(hidden_state_size) for _ in range(hidden_layer_size)] dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells ] multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False) multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep) output_raw, next_state = tf.nn.dynamic_rnn( multicell, input_onehot, dtype=tf.float32, initial_state=hidden_state) next_state = tf.identity(next_state, name='next_state') # Reshape training outputs. output_flat = tf.reshape(output_raw, [-1, hidden_state_size]) output_logits = layers.linear(output_flat, constants.ALPHA_SIZE) # Reshape expected outputs. expected_flat = tf.reshape(expected_onehot, [-1, constants.ALPHA_SIZE]) # Compute training loss. loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=output_logits, labels=expected_flat) loss = tf.reshape(loss, [batchsize, -1]) # Use softmax to normalize training outputs. output_onehot = tf.nn.softmax(output_logits, name='output_onehot') # Use argmax to get the max value, which is the predicted bytes. output_bytes = tf.argmax(output_onehot, 1) output_bytes = tf.reshape(output_bytes, [batchsize, -1], name='output_bytes') # Choose Adam optimizer to compute gradients. optimizer = tf.train.AdamOptimizer(lr).minimize(loss) # Stats for display. seqloss = tf.reduce_mean(loss, 1) batchloss = tf.reduce_mean(seqloss) accuracy = tf.reduce_mean( tf.cast( tf.equal(expected_bytes, tf.cast(output_bytes, tf.uint8)), tf.float32)) loss_summary = tf.summary.scalar('batch_loss', batchloss) acc_summary = tf.summary.scalar('batch_accuracy', accuracy) summaries = tf.summary.merge([loss_summary, acc_summary]) # Init Tensorboard stuff. # This will save Tensorboard information in folder specified in command line. # Two sets of data are saved so that you can compare training and # validation curves visually in Tensorboard. timestamp = str(math.trunc(time.time())) summary_writer = tf.summary.FileWriter( os.path.join(log_dir, timestamp + '-training')) validation_writer = tf.summary.FileWriter( os.path.join(log_dir, timestamp + '-validation')) # Init for saving models. # They will be saved into a directory specified in command line. saver = tf.train.Saver(max_to_keep=constants.MAX_TO_KEEP) # For display: init the progress bar. step_size = batch_size * constants.TRAINING_SEQLEN frequency = constants.DISPLAY_FREQ * step_size progress = utils.Progress( constants.DISPLAY_FREQ, size=constants.DISPLAY_LEN, msg='Training on next {} batches'.format(constants.DISPLAY_FREQ)) # Set initial state. state = np.zeros([batch_size, hidden_state_size * hidden_layer_size]) session = tf.Session() # We continue training on exsiting model, or start with a new model. if existing_model: print('Continue training on existing model: {}'.format(existing_model)) try: saver.restore(session, existing_model) except: print( ('Failed to restore existing model since model ' 'parameters do not match.'), file=sys.stderr) return constants.ExitCode.TENSORFLOW_ERROR else: print('No existing model provided. Start training with a new model.') session.run(tf.global_variables_initializer()) # Num of bytes we have trained so far. steps = 0 # Training loop. for input_batch, expected_batch, epoch in utils.rnn_minibatch_sequencer( code_text, batch_size, constants.TRAINING_SEQLEN, nb_epochs=constants.EPOCHS): # Train on one mini-batch. feed_dict = { input_bytes: input_batch, expected_bytes: expected_batch, hidden_state: state, lr: learning_rate, pkeep: dropout_pkeep, batchsize: batch_size } _, predicted, new_state = session.run( [optimizer, output_bytes, next_state], feed_dict=feed_dict) # Log training data for Tensorboard display a mini-batch of sequences # every `frequency` batches. if debug and steps % frequency == 0: feed_dict = { input_bytes: input_batch, expected_bytes: expected_batch, hidden_state: state, pkeep: 1.0, batchsize: batch_size } predicted, seq_loss, batch_loss, acc_value, summaries_value = session.run( [output_bytes, seqloss, batchloss, accuracy, summaries], feed_dict=feed_dict) utils.print_learning_learned_comparison( input_batch, predicted, seq_loss, input_ranges, batch_loss, acc_value, epoch_size, steps, epoch) summary_writer.add_summary(summaries_value, steps) # Run a validation step every `frequency` batches. # The validation text should be a single sequence but that's too slow. # We cut it up and batch the pieces (slightly inaccurate). if validation and steps % frequency == 0 and validation_batch_size: utils.print_validation_header(len(code_text), input_ranges) validation_x, validation_y, _ = next( utils.rnn_minibatch_sequencer(validation_text, validation_batch_size, constants.VALIDATION_SEQLEN, 1)) null_state = np.zeros( [validation_batch_size, hidden_state_size * hidden_layer_size]) feed_dict = { input_bytes: validation_x, expected_bytes: validation_y, hidden_state: null_state, pkeep: 1.0, batchsize: validation_batch_size } batch_loss, acc_value, summaries_value = session.run( [batchloss, accuracy, summaries], feed_dict=feed_dict) utils.print_validation_stats(batch_loss, acc_value) # Save validation data for Tensorboard. validation_writer.add_summary(summaries_value, steps) # Display a short text generated with the current weights and biases. # If enabled, there will be a large output. if debug and steps // 4 % frequency == 0: utils.print_text_generation_header() file_info = utils.random_element_from_list(files_info_list) first_byte, file_size = file_info['first_byte'], file_info['file_size'] ry = np.array([[first_byte]]) rh = np.zeros([1, hidden_state_size * hidden_layer_size]) sample = [first_byte] for _ in range(file_size - 1): feed_dict = { input_bytes: ry, pkeep: 1.0, hidden_state: rh, batchsize: 1 } ryo, rh = session.run([output_onehot, next_state], feed_dict=feed_dict) rc = utils.sample_from_probabilities(ryo, topn=10 if epoch <= 1 else 2) sample.append(rc) ry = np.array([[rc]]) print(repr(utils.decode_to_text(sample))) utils.print_text_generation_footer() # Save a checkpoint every `10 * frequency` batches. Each checkpoint is # a version of model. if steps // 10 % frequency == 0: saved_model_name = constants.RNN_MODEL_NAME + '_' + timestamp saved_model_path = os.path.join(model_dir, saved_model_name) saved_model = saver.save(session, saved_model_path, global_step=steps) print('Saved model: {}'.format(saved_model)) # Display progress bar. if debug: progress.step(reset=steps % frequency == 0) # Update state. state = new_state steps += step_size # Save the model after training is done. saved_model_name = constants.RNN_MODEL_NAME + '_' + timestamp saved_model_path = os.path.join(model_dir, saved_model_name) saved_model = saver.save(session, saved_model_path, global_step=steps) print('Saved model: {}'.format(saved_model)) return constants.ExitCode.SUCCESS
# height = geo_score[indices[:, 0], indices[:, 1], :][:, 0:2].sum(axis=1) # width = geo_score[indices[:, 0], indices[:, 1], :][:, 2:4].sum(axis=1) # angle = geo_score[indices[:, 0], indices[:, 1], :][:, -1] # # text_coords = np.concatenate([indices, height.reshape(-1, 1), width.reshape(-1, 1)], axis=1).astype(np.int) # text_crop = text_coords.copy() # text_crop[:, 0:2] = 0 # rboxes = [[text_coords.tolist(), text_crop.tolist(), angle.tolist()]] features, ws = model_RoIrotate(sharedconv, rboxes, expand_px=0, plot=False) # x_batch['rboxes'] logits = model_recognition(features) decoded, log_prob = tf.nn.ctc_greedy_decoder(logits.numpy().transpose((1, 0, 2)), sequence_length=[64] * logits.shape[0]) decoded = tf.sparse.to_dense(decoded[0]).numpy() recognition = [decode_to_text(CHAR_VECTOR, [j for j in i if j != 0]) for i in decoded] print(recognition) # plot boxes for i, box in enumerate(text_box_restored[selected_indices, :, :]): im_padded = cv2.polylines(im_padded[:, :, :].copy(), [box.astype(np.int32)], True, color=(255, 255, 0), thickness=1) # Draw recognition results area if len(selected_indices) > 0: text_area = box.copy() text_area[2, 1], text_area[3, 1], text_area[0, 1], text_area[1, 1] = text_area[1, 1], text_area[0, 1], text_area[0, 1] - 15, text_area[1, 1] - 15 im_padded = cv2.fillPoly(im_padded.copy(), [text_area.astype(np.int32).reshape((-1, 1, 2))], color=(255, 255, 0)) im_padded = cv2.putText(im_padded.copy(), recognition[i], (box.astype(np.int32)[0, 0], box.astype(np.int32)[0, 1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) detection = cv2.resize(f_score_.numpy().copy()[0, :, :, :], im_padded.shape[:2]) * 255
import os from crnn_model import CRNN from utils import preprocess_input_image, params, char_dict, decode_to_text # model model = CRNN(num_classes=params['NUM_CLASSES'], training=False) model.load_weights('checkpoints/model_default') # input single img x = cv2.imread('test_images/test.jpg', 0) x = preprocess_input_image(x) x = x[np.newaxis, :, :, :].astype(np.float32) # input test_images x = [] for img_dir in os.listdir('test_images'): print('/test_images/' + img_dir) img = cv2.imread('test_images/{}'.format(img_dir), 0) img = preprocess_input_image(img) x.append(img) x = np.array(x).astype(np.float32) # predict logits, raw_pred, rnn_out = model(x) decoded, log_prob = tf.nn.ctc_greedy_decoder( logits.numpy().transpose((1, 0, 2)), sequence_length=[params['SEQ_LENGTH']] * x.shape[0], merge_repeated=True) decoded = tf.sparse.to_dense(decoded[0]).numpy() print([decode_to_text(char_dict, [j for j in i if j != 0]) for i in decoded])
rboxes = [[ text_coords.tolist(), text_crop.tolist(), angle.tolist() ]] features, ws = model_RoIrotate(sharedconv, rboxes) # x_batch['rboxes'] logits = model_recognition(features) decoded, log_prob = tf.nn.ctc_greedy_decoder( logits.numpy().transpose((1, 0, 2)), sequence_length=[64] * logits.shape[0]) decoded = tf.sparse.to_dense(decoded[0]).numpy() recognition = [ decode_to_text(CHAR_VECTOR, [j for j in i if j != 0]) for i in decoded ] print(recognition) # plot boxes for i, box in enumerate(text_box_restored[selected_indices, :, :]): im_padded = cv2.polylines(im_padded[:, :, :].copy(), [box.astype(np.int32)], True, color=(255, 255, 0), thickness=1) # Draw recognition results area if len(selected_indices) > 0:
# every i iterations, do the following: # save weights of the model # print current model results # check test set and its loss if iter % 100 == 0: # model.save_weights('checkpoints/model_default') decoded, log_prob = tf.nn.ctc_greedy_decoder( logits.numpy().transpose((1, 0, 2)), sequence_length=[47] * len(y_batch), merge_repeated=True) decoded = tf.sparse.to_dense(decoded[0]).numpy() print(iter, loss.numpy().round(1), [ decode_to_text(char_dict, [char for char in np.trim_zeros(word, 'b')]) for word in decoded[:4] ]) loss_train.append(loss.numpy().round(1)) with open('loss_train.txt', 'w') as file: [file.write(str(s) + '\n') for s in loss_train] # test loss on one batch of data for x_test, y_test in data_generator(batches=1, batch_size=124, epochs=1, dataset='test'): indices, values, dense_shape = sparse_tuple_from(y_test) y_test_sparse = tf.sparse.SparseTensor(indices=indices, values=values,
loss_hist.append([ loss_cls.numpy(), loss_iou.numpy(), loss_angle.numpy(), loss_recongition.numpy() ]) # recognition results y_true = tf.sparse.to_dense( tf.SparseTensor(*x_batch['text_labels_sparse'])).numpy() decoded, _ = tf.nn.ctc_greedy_decoder(logits.numpy().transpose((1, 0, 2)), sequence_length=[logits.shape[1]] * logits.shape[0]) decoded = tf.sparse.to_dense(decoded[0]).numpy() print([ decode_to_text(CHAR_VECTOR, [j for j in i if j != 0]) for i in decoded[:4, :] ], [ decode_to_text(CHAR_VECTOR, [j for j in i if j != 0]) for i in y_true[:4, :] ]) # save if iter % save_iter == 0: model_sharedconv.save_weights(cpkt_dir + 'sharedconv') model_detection.save_weights(cpkt_dir + 'detection') model_recognition.save_weights(cpkt_dir + 'recognition') with open('loss.txt', ['a' if load_models else 'w'][0]) as file: for line in loss_hist: [file.write(str(i) + ' ') for i in line]