def do_train(model, params): """Run training. If target labels are phone, the model is evaluated by PER with 39 phones. Args: model: the model to train params (dict): A dictionary of parameters """ # Load dataset train_data = Dataset(data_type='train', label_type=params['label_type'], batch_size=params['batch_size'], eos_index=params['eos_index'], max_epoch=params['num_epoch'], splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=True) dev_data = Dataset(data_type='dev', label_type=params['label_type'], batch_size=params['batch_size'], eos_index=params['eos_index'], splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=False) if 'char' in params['label_type']: test_data = Dataset(data_type='test', label_type=params['label_type'], batch_size=1, eos_index=params['eos_index'], splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=False) else: test_data = Dataset(data_type='test', label_type='phone39', batch_size=1, eos_index=params['eos_index'], splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=False) # TODO(hirofumi): add frame_stacking and splice # Tell TensorFlow that the model will be built into the default graph with tf.Graph().as_default(): # Define placeholders model.create_placeholders() learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate') # Add to the graph each operation (including model definition) loss_op, att_logits, ctc_logits, decoder_outputs_train, decoder_outputs_infer = model.compute_loss( model.inputs_pl_list[0], model.att_labels_pl_list[0], model.inputs_seq_len_pl_list[0], model.att_labels_seq_len_pl_list[0], model.ctc_labels_pl_list[0], model.keep_prob_input_pl_list[0], model.keep_prob_hidden_pl_list[0], model.keep_prob_output_pl_list[0]) train_op = model.train(loss_op, optimizer=params['optimizer'], learning_rate=learning_rate_pl) _, decode_op_infer = model.decoder(decoder_outputs_train, decoder_outputs_infer, decode_type='greedy', beam_width=20) ler_op = model.compute_ler(model.att_labels_st_true_pl, model.att_labels_st_pred_pl) # Define learning rate controller lr_controller = Controller( learning_rate_init=params['learning_rate'], decay_start_epoch=params['decay_start_epoch'], decay_rate=params['decay_rate'], decay_patient_epoch=params['decay_patient_epoch'], lower_better=True) # Build the summary tensor based on the TensorFlow collection of # summaries summary_train = tf.summary.merge(model.summaries_train) summary_dev = tf.summary.merge(model.summaries_dev) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints saver = tf.train.Saver(max_to_keep=None) # Count total param parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M param" % (len(parameters_dict.keys()), "{:,}".format( total_parameters / 1000000))) csv_steps, csv_loss_train, csv_loss_dev = [], [], [] csv_ler_train, csv_ler_dev = [], [] # Create a session for running operation on the graph with tf.Session() as sess: # Instantiate a SummaryWriter to output summaries and the graph summary_writer = tf.summary.FileWriter(model.save_path, sess.graph) # Initialize param sess.run(init_op) # Train model start_time_train = time.time() start_time_epoch = time.time() start_time_step = time.time() ler_dev_best = 1 learning_rate = float(params['learning_rate']) for step, (data, is_new_epoch) in enumerate(train_data): # Create feed dictionary for next mini batch (train) inputs, att_labels_train, ctc_labels, inputs_seq_len, att_labels_seq_len, _ = data feed_dict_train = { model.inputs_pl_list[0]: inputs, model.att_labels_pl_list[0]: att_labels_train, model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.att_labels_seq_len_pl_list[0]: att_labels_seq_len, model.ctc_labels_pl_list[0]: list2sparsetensor( ctc_labels, padded_value=train_data.ctc_padded_value), model.keep_prob_input_pl_list[0]: params['dropout_input'], model.keep_prob_hidden_pl_list[0]: params['dropout_hidden'], model.keep_prob_output_pl_list[0]: params['dropout_output'], learning_rate_pl: learning_rate } # Update param sess.run(train_op, feed_dict=feed_dict_train) if (step + 1) % params['print_step'] == 0: # Create feed dictionary for next mini batch (dev) (inputs, att_labels_dev, ctc_labels, inputs_seq_len, att_labels_seq_len, _), _ = dev_data().next() feed_dict_dev = { model.inputs_pl_list[0]: inputs, model.att_labels_pl_list[0]: att_labels_dev, model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.att_labels_seq_len_pl_list[0]: att_labels_seq_len, model.ctc_labels_pl_list[0]: list2sparsetensor( ctc_labels, padded_value=dev_data.ctc_padded_value), model.keep_prob_input_pl_list[0]: 1.0, model.keep_prob_hidden_pl_list[0]: 1.0, model.keep_prob_output_pl_list[0]: 1.0 } # Compute loss loss_train = sess.run(loss_op, feed_dict=feed_dict_train) loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev) csv_steps.append(step) csv_loss_train.append(loss_train) csv_loss_dev.append(loss_dev) # Change to evaluation mode feed_dict_train[model.keep_prob_input_pl_list[0]] = 1.0 feed_dict_train[model.keep_prob_hidden_pl_list[0]] = 1.0 feed_dict_train[model.keep_prob_output_pl_list[0]] = 1.0 # Predict class ids & update event files predicted_ids_train, summary_str_train = sess.run( [decode_op_infer, summary_train], feed_dict=feed_dict_train) predicted_ids_dev, summary_str_dev = sess.run( [decode_op_infer, summary_dev], feed_dict=feed_dict_dev) summary_writer.add_summary(summary_str_train, step + 1) summary_writer.add_summary(summary_str_dev, step + 1) summary_writer.flush() # Convert to sparsetensor to compute LER feed_dict_ler_train = { model.att_labels_true_st: list2sparsetensor(att_labels_train, padded_value=params['eos_index']), model.att_labels_st_pred_pl: list2sparsetensor(predicted_ids_train, padded_value=params['eos_index']) } feed_dict_ler_dev = { model.att_labels_true_st: list2sparsetensor(att_labels_dev, padded_value=params['eos_index']), model.att_labels_st_pred_pl: list2sparsetensor(predicted_ids_dev, padded_value=params['eos_index']) } # Compute accuracy ler_train = sess.run(ler_op, feed_dict=feed_dict_ler_train) ler_dev = sess.run(ler_op, feed_dict=feed_dict_ler_dev) csv_ler_train.append(ler_train) csv_ler_dev.append(ler_dev) duration_step = time.time() - start_time_step print( "Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)" % (step + 1, train_data.epoch_detail, loss_train, loss_dev, ler_train, ler_dev, learning_rate, duration_step / 60)) # sys.stdout.flush() start_time_step = time.time() # Save checkpoint and evaluate model per epoch if is_new_epoch: duration_epoch = time.time() - start_time_epoch print('-----EPOCH:%d (%.3f min)-----' % (train_data.epoch, duration_epoch / 60)) # Save fugure of loss & ler plot_loss(csv_loss_train, csv_loss_dev, csv_steps, save_path=model.save_path) plot_ler(csv_ler_train, csv_ler_dev, csv_steps, label_type=params['label_type'], save_path=model.save_path) if train_data.epoch >= params['eval_start_epoch']: start_time_eval = time.time() if 'char' in params['label_type']: print('=== Dev Data Evaluation ===') ler_dev_epoch = do_eval_cer( session=sess, decode_op=decode_op_infer, model=model, dataset=dev_data, eval_batch_size=1) print(' CER: %f %%' % (ler_dev_epoch * 100)) if ler_dev_epoch < ler_dev_best: ler_dev_best = ler_dev_epoch print('■■■ ↑Best Score (CER)↑ ■■■') # Save model only when best accuracy is # obtained (check point) checkpoint_file = join(model.save_path, 'model.ckpt') save_path = saver.save( sess, checkpoint_file, global_step=train_data.epoch) print("Model saved in file: %s" % save_path) print('=== Test Data Evaluation ===') ler_test = do_eval_cer( session=sess, decode_op=decode_op_infer, model=model, dataset=test_data, eval_batch_size=1) print(' CER: %f %%' % (ler_test * 100)) else: print('=== Dev Data Evaluation ===') ler_dev_epoch = do_eval_per( session=sess, decode_op=decode_op_infer, per_op=ler_op, model=model, dataset=dev_data, label_type=params['label_type'], eval_batch_size=1) print(' PER: %f %%' % (ler_dev_epoch * 100)) if ler_dev_epoch < ler_dev_best: ler_dev_best = ler_dev_epoch print('■■■ ↑Best Score (PER)↑ ■■■') # Save model only when best accuracy is # obtained (check point) checkpoint_file = join(model.save_path, 'model.ckpt') save_path = saver.save( sess, checkpoint_file, global_step=train_data.epoch) print("Model saved in file: %s" % save_path) print('=== Test Data Evaluation ===') ler_test = do_eval_per( session=sess, decode_op=decode_op_infer, per_op=ler_op, model=model, dataset=test_data, label_type=params['label_type'], eval_batch_size=1) print(' PER: %f %%' % (ler_test * 100)) duration_eval = time.time() - start_time_eval print('Evaluation time: %.3f min' % (duration_eval / 60)) # Update learning rate learning_rate = lr_controller.decay_lr( learning_rate=learning_rate, epoch=train_data.epoch, value=ler_dev_epoch) start_time_epoch = time.time() duration_train = time.time() - start_time_train print('Total time: %.3f hour' % (duration_train / 3600)) # Training was finished correctly with open(join(model.save_path, 'complete.txt'), 'w') as f: f.write('')
def do_eval(model, params, epoch=None): """Evaluate the model. Args: model: the model to restore params (dict): A dictionary of parameters epoch (int): the epoch to restore """ # Load dataset if 'phone' in params['label_type']: test_data = Dataset( data_type='test', label_type='phone39', batch_size=1, eos_index=params['eos_index'], splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], shuffle=False, progressbar=True) else: test_data = Dataset( data_type='test', label_type=params['label_type'], batch_size=1, eos_index=params['eos_index'], splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], shuffle=False, progressbar=True) # TODO(hirofumi): add frame_stacking and splice # Define placeholders model.create_placeholders() # Add to the graph each operation (including model definition) _, _, decoder_outputs_train, decoder_outputs_infer = model.compute_loss( model.inputs_pl_list[0], model.labels_pl_list[0], model.inputs_seq_len_pl_list[0], model.labels_seq_len_pl_list[0], model.keep_prob_input_pl_list[0], model.keep_prob_hidden_pl_list[0], model.keep_prob_output_pl_list[0]) _, decode_op_infer = model.decoder( decoder_outputs_train, decoder_outputs_infer) per_op = model.compute_ler( model.labels_st_true_pl, model.labels_st_pred_pl) # Create a saver for writing training checkpoints saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(model.save_path) # If check point exists if ckpt: # Use last saved model model_path = ckpt.model_checkpoint_path if epoch != -1: # Use the best model # NOTE: In the training stage, parameters are saved only when # accuracies are improved model_path = model_path.split('/')[:-1] model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch) saver.restore(sess, model_path) print("Model restored: " + model_path) else: raise ValueError('There are not any checkpoints.') print('Test Data Evaluation:') if 'char' in params['label_type']: cer_test, wer_test = do_eval_cer( session=sess, decode_op=decode_op_infer, model=model, dataset=test_data, label_type=params['label_type'], eval_batch_size=1, progressbar=True) print(' CER: %f %%' % (cer_test * 100)) print(' WER: %f %%' % (wer_test * 100)) else: per_test = do_eval_per( session=sess, decode_op=decode_op_infer, per_op=per_op, model=model, dataset=test_data, label_type=params['label_type'], eval_batch_size=1, progressbar=True) print(' PER: %f %%' % (per_test * 100))
def do_eval(model, params, epoch, beam_width, eval_batch_size): """Evaluate the model. Args: model: the model to restore params (dict): A dictionary of parameters epoch (int): the epoch to restore beam_width (int): beam_width (int, optional): beam width for beam search. 1 disables beam search, which mean greedy decoding. eval_batch_size (int): the size of mini-batch when evaluation """ map_file_path = '../metrics/mapping_files/' + \ params['label_type'] + '.txt' dev_data = Dataset(data_type='dev', label_type='phone61', batch_size=eval_batch_size, map_file_path=map_file_path, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], shuffle=False, progressbar=True) # Load dataset if 'phone' in params['label_type']: test_data = Dataset(data_type='test', label_type='phone39', batch_size=eval_batch_size, map_file_path=map_file_path, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], shuffle=False, progressbar=True) else: test_data = Dataset(data_type='test', label_type=params['label_type'], batch_size=eval_batch_size, map_file_path=map_file_path, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], shuffle=False, progressbar=True) # Define placeholders model.create_placeholders() # Add to the graph each operation (including model definition) _, _, decoder_outputs_train, decoder_outputs_infer = model.compute_loss( model.inputs_pl_list[0], model.labels_pl_list[0], model.inputs_seq_len_pl_list[0], model.labels_seq_len_pl_list[0], model.keep_prob_encoder_pl_list[0], model.keep_prob_decoder_pl_list[0], model.keep_prob_embedding_pl_list[0]) _, decode_op_infer = model.decode(decoder_outputs_train, decoder_outputs_infer) per_op = model.compute_ler(model.labels_st_true_pl, model.labels_st_pred_pl) # Create a saver for writing training checkpoints saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(model.save_path) # If check point exists if ckpt: model_path = ckpt.model_checkpoint_path if epoch != -1: model_path = model_path.split('/')[:-1] model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch) saver.restore(sess, model_path) print("Model restored: " + model_path) else: raise ValueError('There are not any checkpoints.') print('Test Data Evaluation:') if 'char' in params['label_type']: cer_test, wer_test = do_eval_cer(session=sess, decode_op=decode_op_infer, model=model, dataset=test_data, label_type=params['label_type'], is_test=True, eval_batch_size=eval_batch_size, progressbar=True) print(' CER: %f %%' % (cer_test * 100)) print(' WER: %f %%' % (wer_test * 100)) else: per_test = do_eval_per(session=sess, decode_op=decode_op_infer, per_op=per_op, model=model, dataset=test_data, label_type=params['label_type'], is_test=True, eval_batch_size=eval_batch_size, progressbar=True) print(' PER: %f %%' % (per_test * 100))
def do_eval(network, param, epoch=None): """Evaluate the model. Args: network: model to restore param: A dictionary of parameters epoch: int the epoch to restore """ # Load dataset if param['label_type'] == 'character': test_data = Dataset(data_type='test', label_type='character', batch_size=1, eos_index=param['eos_index'], is_sorted=False, is_progressbar=True) else: test_data = Dataset(data_type='test', label_type='phone39', batch_size=1, eos_index=param['eos_index'], is_sorted=False, is_progressbar=True) # Define placeholders network.inputs = tf.placeholder(tf.float32, shape=[None, None, network.input_size], name='input') network.labels = tf.placeholder(tf.int32, shape=[None, None], name='label') # These are prepared for computing LER indices_true_pl = tf.placeholder(tf.int64, name='indices_pred') values_true_pl = tf.placeholder(tf.int32, name='values_pred') shape_true_pl = tf.placeholder(tf.int64, name='shape_pred') network.labels_st_true = tf.SparseTensor(indices_true_pl, values_true_pl, shape_true_pl) indices_pred_pl = tf.placeholder(tf.int64, name='indices_pred') values_pred_pl = tf.placeholder(tf.int32, name='values_pred') shape_pred_pl = tf.placeholder(tf.int64, name='shape_pred') network.labels_st_pred = tf.SparseTensor(indices_pred_pl, values_pred_pl, shape_pred_pl) network.inputs_seq_len = tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len') network.labels_seq_len = tf.placeholder(tf.int32, shape=[None], name='labels_seq_len') network.keep_prob_input = tf.placeholder(tf.float32, name='keep_prob_input') network.keep_prob_hidden = tf.placeholder(tf.float32, name='keep_prob_hidden') # Add to the graph each operation (including model definition) _, _, decoder_outputs_train, decoder_outputs_infer = network.compute_loss( network.inputs, network.labels, network.inputs_seq_len, network.labels_seq_len, network.keep_prob_input, network.keep_prob_hidden) _, decode_op_infer = network.decoder(decoder_outputs_train, decoder_outputs_infer, decode_type='greedy', beam_width=20) per_op = network.compute_ler(network.labels_st_true, network.labels_st_pred) # Create a saver for writing training checkpoints saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(network.model_dir) # If check point exists if ckpt: # Use last saved model model_path = ckpt.model_checkpoint_path if epoch is not None: model_path = model_path.split('/')[:-1] model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch) saver.restore(sess, model_path) print("Model restored: " + model_path) else: raise ValueError('There are not any checkpoints.') print('Test Data Evaluation:') if param['label_type'] == 'character': cer_test = do_eval_cer(session=sess, decode_op=decode_op_infer, network=network, dataset=test_data, is_progressbar=True) print(' CER: %f %%' % (cer_test * 100)) else: per_test = do_eval_per(session=sess, decode_op=decode_op_infer, per_op=per_op, network=network, dataset=test_data, label_type=param['label_type'], eos_index=param['eos_index'], is_progressbar=True) print(' PER: %f %%' % (per_test * 100))
def do_train(network, param): """Run training. If target labels are phone, the model is evaluated by PER with 39 phones. Args: network: network to train param: A dictionary of parameters """ # Load dataset train_data = Dataset(data_type='train', label_type=param['label_type'], batch_size=param['batch_size'], eos_index=param['eos_index'], is_sorted=True) dev_data = Dataset(data_type='dev', label_type=param['label_type'], batch_size=param['batch_size'], eos_index=param['eos_index'], is_sorted=False) if param['label_type'] == 'character': test_data = Dataset(data_type='test', label_type='character', batch_size=1, eos_index=param['eos_index'], is_sorted=False) else: test_data = Dataset(data_type='test', label_type='phone39', batch_size=1, eos_index=param['eos_index'], is_sorted=False) # Tell TensorFlow that the model will be built into the default graph with tf.Graph().as_default(): # Define placeholders network.inputs = tf.placeholder(tf.float32, shape=[None, None, network.input_size], name='inputs') network.labels = tf.placeholder(tf.int32, shape=[None, None], name='labels') # These are prepared for computing LER indices_true_pl = tf.placeholder(tf.int64, name='indices_true') values_true_pl = tf.placeholder(tf.int32, name='values_true') shape_true_pl = tf.placeholder(tf.int64, name='shape_true') network.labels_true_st = tf.SparseTensor(indices_true_pl, values_true_pl, shape_true_pl) indices_pred_pl = tf.placeholder(tf.int64, name='indices_pred') values_pred_pl = tf.placeholder(tf.int32, name='values_pred') shape_pred_pl = tf.placeholder(tf.int64, name='shape_pred') network.labels_pred_st = tf.SparseTensor(indices_pred_pl, values_pred_pl, shape_pred_pl) network.inputs_seq_len = tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len') network.labels_seq_len = tf.placeholder(tf.int32, shape=[None], name='labels_seq_len') network.keep_prob_input = tf.placeholder(tf.float32, name='keep_prob_input') network.keep_prob_hidden = tf.placeholder(tf.float32, name='keep_prob_hidden') # Add to the graph each operation (including model definition) loss_op, logits, decoder_outputs_train, decoder_outputs_infer = network.compute_loss( network.inputs, network.labels, network.inputs_seq_len, network.labels_seq_len, network.keep_prob_input, network.keep_prob_hidden) train_op = network.train(loss_op, optimizer=param['optimizer'], learning_rate_init=float( param['learning_rate']), is_scheduled=False) _, decode_op_infer = network.decoder(decoder_outputs_train, decoder_outputs_infer, decode_type='greedy', beam_width=20) ler_op = network.compute_ler(network.labels_true_st, network.labels_pred_st) # Build the summary tensor based on the TensorFlow collection of # summaries summary_train = tf.summary.merge(network.summaries_train) summary_dev = tf.summary.merge(network.summaries_dev) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints saver = tf.train.Saver(max_to_keep=None) # Count total param parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M param" % (len(parameters_dict.keys()), "{:,}".format( total_parameters / 1000000))) # Make mini-batch generator mini_batch_train = train_data.next_batch() mini_batch_dev = dev_data.next_batch() csv_steps, csv_loss_train, csv_loss_dev = [], [], [] csv_ler_train, csv_ler_dev = [], [] # Create a session for running operation on the graph with tf.Session() as sess: # Instantiate a SummaryWriter to output summaries and the graph summary_writer = tf.summary.FileWriter(network.model_dir, sess.graph) # Initialize param sess.run(init_op) # Train model iter_per_epoch = int(train_data.data_num / param['batch_size']) train_step = train_data.data_num / param['batch_size'] if train_step != int(train_step): iter_per_epoch += 1 max_steps = iter_per_epoch * param['num_epoch'] start_time_train = time.time() start_time_epoch = time.time() start_time_step = time.time() error_best = 1 for step in range(max_steps): # Create feed dictionary for next mini batch (train) inputs, labels_train, inputs_seq_len, labels_seq_len, _ = mini_batch_train.__next__( ) feed_dict_train = { network.inputs: inputs, network.labels: labels_train, network.inputs_seq_len: inputs_seq_len, network.labels_seq_len: labels_seq_len, network.keep_prob_input: network.dropout_ratio_input, network.keep_prob_hidden: network.dropout_ratio_hidden, network.lr: float(param['learning_rate']) } # Create feed dictionary for next mini batch (dev) inputs, labels_dev, inputs_seq_len, labels_seq_len, _ = mini_batch_dev.__next__( ) feed_dict_dev = { network.inputs: inputs, network.labels: labels_dev, network.inputs_seq_len: inputs_seq_len, network.labels_seq_len: labels_seq_len, network.keep_prob_input: network.dropout_ratio_input, network.keep_prob_hidden: network.dropout_ratio_hidden } # Update param sess.run(train_op, feed_dict=feed_dict_train) if (step + 1) % 10 == 0: # Compute loss loss_train = sess.run(loss_op, feed_dict=feed_dict_train) loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev) csv_steps.append(step) csv_loss_train.append(loss_train) csv_loss_dev.append(loss_dev) # Change to evaluation mode feed_dict_train[network.keep_prob_input] = 1.0 feed_dict_train[network.keep_prob_hidden] = 1.0 feed_dict_dev[network.keep_prob_input] = 1.0 feed_dict_dev[network.keep_prob_hidden] = 1.0 # Predict class ids & update event file predicted_ids_train, summary_str_train = sess.run( [decode_op_infer, summary_train], feed_dict=feed_dict_train) predicted_ids_dev, summary_str_dev = sess.run( [decode_op_infer, summary_dev], feed_dict=feed_dict_dev) summary_writer.add_summary(summary_str_train, step + 1) summary_writer.add_summary(summary_str_dev, step + 1) summary_writer.flush() # Convert to sparsetensor to compute LER feed_dict_ler_train = { network.labels_true_st: list2sparsetensor(labels_train, padded_value=param['eos_index']), network.labels_pred_st: list2sparsetensor(predicted_ids_train, padded_value=param['eos_index']) } feed_dict_ler_dev = { network.labels_true_st: list2sparsetensor(labels_dev, padded_value=param['eos_index']), network.labels_pred_st: list2sparsetensor(predicted_ids_dev, padded_value=param['eos_index']) } # Compute accuracy ler_train = sess.run(ler_op, feed_dict=feed_dict_ler_train) ler_dev = sess.run(ler_op, feed_dict=feed_dict_ler_dev) csv_ler_train.append(ler_train) csv_ler_dev.append(ler_dev) duration_step = time.time() - start_time_step print( "Step %d: loss = %.3f (%.3f) / ler = %.4f (%.4f) (%.3f min)" % (step + 1, loss_train, loss_dev, ler_train, ler_dev, duration_step / 60)) sys.stdout.flush() start_time_step = time.time() # Save checkpoint and evaluate model per epoch if (step + 1) % iter_per_epoch == 0 or (step + 1) == max_steps: duration_epoch = time.time() - start_time_epoch epoch = (step + 1) // iter_per_epoch print('-----EPOCH:%d (%.3f min)-----' % (epoch, duration_epoch / 60)) # Save model (check point) checkpoint_file = join(network.model_dir, 'model.ckpt') save_path = saver.save(sess, checkpoint_file, global_step=epoch) print("Model saved in file: %s" % save_path) if epoch >= 20: start_time_eval = time.time() if param['label_type'] == 'character': print('=== Dev Data Evaluation ===') cer_dev_epoch = do_eval_cer( session=sess, decode_op=decode_op_infer, network=network, dataset=dev_data, eval_batch_size=1) print(' CER: %f %%' % (cer_dev_epoch * 100)) if cer_dev_epoch < error_best: error_best = cer_dev_epoch print('■■■ ↑Best Score (CER)↑ ■■■') print('=== Test Data Evaluation ===') cer_test = do_eval_cer( session=sess, decode_op=decode_op_infer, network=network, dataset=test_data, eval_batch_size=1) print(' CER: %f %%' % (cer_test * 100)) else: print('=== Dev Data Evaluation ===') per_dev_epoch = do_eval_per( session=sess, decode_op=decode_op_infer, per_op=ler_op, network=network, dataset=dev_data, label_type=param['label_type'], eos_index=param['eos_index'], eval_batch_size=1) print(' PER: %f %%' % (per_dev_epoch * 100)) if per_dev_epoch < error_best: error_best = per_dev_epoch print('■■■ ↑Best Score (PER)↑ ■■■') print('=== Test Data Evaluation ===') per_test = do_eval_per( session=sess, decode_op=decode_op_infer, per_op=ler_op, network=network, dataset=test_data, label_type=param['label_type'], eos_index=param['eos_index'], eval_batch_size=1) print(' PER: %f %%' % (per_test * 100)) duration_eval = time.time() - start_time_eval print('Evaluation time: %.3f min' % (duration_eval / 60)) start_time_epoch = time.time() start_time_step = time.time() duration_train = time.time() - start_time_train print('Total time: %.3f hour' % (duration_train / 3600)) # Save train & dev loss, ler save_loss(csv_steps, csv_loss_train, csv_loss_dev, save_path=network.model_dir) save_ler(csv_steps, csv_ler_train, csv_loss_dev, save_path=network.model_dir) # Training was finished correctly with open(join(network.model_dir, 'complete.txt'), 'w') as f: f.write('')