def do_train(model, params):
    """Run training. If target labels are phone, the model is evaluated by PER
    with 39 phones.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
    """
    # Load dataset
    train_data = Dataset(data_type='train',
                         label_type=params['label_type'],
                         batch_size=params['batch_size'],
                         eos_index=params['eos_index'],
                         max_epoch=params['num_epoch'],
                         splice=params['splice'],
                         num_stack=params['num_stack'],
                         num_skip=params['num_skip'],
                         sort_utt=True)
    dev_data = Dataset(data_type='dev',
                       label_type=params['label_type'],
                       batch_size=params['batch_size'],
                       eos_index=params['eos_index'],
                       splice=params['splice'],
                       num_stack=params['num_stack'],
                       num_skip=params['num_skip'],
                       sort_utt=False)
    if 'char' in params['label_type']:
        test_data = Dataset(data_type='test',
                            label_type=params['label_type'],
                            batch_size=1,
                            eos_index=params['eos_index'],
                            splice=params['splice'],
                            num_stack=params['num_stack'],
                            num_skip=params['num_skip'],
                            sort_utt=False)
    else:
        test_data = Dataset(data_type='test',
                            label_type='phone39',
                            batch_size=1,
                            eos_index=params['eos_index'],
                            splice=params['splice'],
                            num_stack=params['num_stack'],
                            num_skip=params['num_skip'],
                            sort_utt=False)
    # TODO(hirofumi): add frame_stacking and splice

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default():

        # Define placeholders
        model.create_placeholders()
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')

        # Add to the graph each operation (including model definition)
        loss_op, att_logits, ctc_logits, decoder_outputs_train, decoder_outputs_infer = model.compute_loss(
            model.inputs_pl_list[0], model.att_labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.att_labels_seq_len_pl_list[0], model.ctc_labels_pl_list[0],
            model.keep_prob_input_pl_list[0],
            model.keep_prob_hidden_pl_list[0],
            model.keep_prob_output_pl_list[0])
        train_op = model.train(loss_op,
                               optimizer=params['optimizer'],
                               learning_rate=learning_rate_pl)
        _, decode_op_infer = model.decoder(decoder_outputs_train,
                                           decoder_outputs_infer,
                                           decode_type='greedy',
                                           beam_width=20)
        ler_op = model.compute_ler(model.att_labels_st_true_pl,
                                   model.att_labels_st_pred_pl)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=True)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total param
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M param" %
              (len(parameters_dict.keys()), "{:,}".format(
                  total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        with tf.Session() as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(model.save_path, sess.graph)

            # Initialize param
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            ler_dev_best = 1
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, att_labels_train, ctc_labels, inputs_seq_len, att_labels_seq_len, _ = data
                feed_dict_train = {
                    model.inputs_pl_list[0]:
                    inputs,
                    model.att_labels_pl_list[0]:
                    att_labels_train,
                    model.inputs_seq_len_pl_list[0]:
                    inputs_seq_len,
                    model.att_labels_seq_len_pl_list[0]:
                    att_labels_seq_len,
                    model.ctc_labels_pl_list[0]:
                    list2sparsetensor(
                        ctc_labels, padded_value=train_data.ctc_padded_value),
                    model.keep_prob_input_pl_list[0]:
                    params['dropout_input'],
                    model.keep_prob_hidden_pl_list[0]:
                    params['dropout_hidden'],
                    model.keep_prob_output_pl_list[0]:
                    params['dropout_output'],
                    learning_rate_pl:
                    learning_rate
                }

                # Update param
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % params['print_step'] == 0:

                    # Create feed dictionary for next mini batch (dev)
                    (inputs, att_labels_dev, ctc_labels, inputs_seq_len,
                     att_labels_seq_len, _), _ = dev_data().next()
                    feed_dict_dev = {
                        model.inputs_pl_list[0]:
                        inputs,
                        model.att_labels_pl_list[0]:
                        att_labels_dev,
                        model.inputs_seq_len_pl_list[0]:
                        inputs_seq_len,
                        model.att_labels_seq_len_pl_list[0]:
                        att_labels_seq_len,
                        model.ctc_labels_pl_list[0]:
                        list2sparsetensor(
                            ctc_labels,
                            padded_value=dev_data.ctc_padded_value),
                        model.keep_prob_input_pl_list[0]:
                        1.0,
                        model.keep_prob_hidden_pl_list[0]:
                        1.0,
                        model.keep_prob_output_pl_list[0]:
                        1.0
                    }

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    feed_dict_train[model.keep_prob_input_pl_list[0]] = 1.0
                    feed_dict_train[model.keep_prob_hidden_pl_list[0]] = 1.0
                    feed_dict_train[model.keep_prob_output_pl_list[0]] = 1.0

                    # Predict class ids & update event files
                    predicted_ids_train, summary_str_train = sess.run(
                        [decode_op_infer, summary_train],
                        feed_dict=feed_dict_train)
                    predicted_ids_dev, summary_str_dev = sess.run(
                        [decode_op_infer, summary_dev],
                        feed_dict=feed_dict_dev)
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    # Convert to sparsetensor to compute LER
                    feed_dict_ler_train = {
                        model.att_labels_true_st:
                        list2sparsetensor(att_labels_train,
                                          padded_value=params['eos_index']),
                        model.att_labels_st_pred_pl:
                        list2sparsetensor(predicted_ids_train,
                                          padded_value=params['eos_index'])
                    }
                    feed_dict_ler_dev = {
                        model.att_labels_true_st:
                        list2sparsetensor(att_labels_dev,
                                          padded_value=params['eos_index']),
                        model.att_labels_st_pred_pl:
                        list2sparsetensor(predicted_ids_dev,
                                          padded_value=params['eos_index'])
                    }

                    # Compute accuracy
                    ler_train = sess.run(ler_op, feed_dict=feed_dict_ler_train)
                    ler_dev = sess.run(ler_op, feed_dict=feed_dict_ler_dev)
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)

                    duration_step = time.time() - start_time_step
                    print(
                        "Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)"
                        % (step + 1, train_data.epoch_detail, loss_train,
                           loss_dev, ler_train, ler_dev, learning_rate,
                           duration_step / 60))
                    # sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train,
                              csv_loss_dev,
                              csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train,
                             csv_ler_dev,
                             csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        if 'char' in params['label_type']:
                            print('=== Dev Data Evaluation ===')
                            ler_dev_epoch = do_eval_cer(
                                session=sess,
                                decode_op=decode_op_infer,
                                model=model,
                                dataset=dev_data,
                                eval_batch_size=1)
                            print('  CER: %f %%' % (ler_dev_epoch * 100))

                            if ler_dev_epoch < ler_dev_best:
                                ler_dev_best = ler_dev_epoch
                                print('■■■ ↑Best Score (CER)↑ ■■■')

                                # Save model only when best accuracy is
                                # obtained (check point)
                                checkpoint_file = join(model.save_path,
                                                       'model.ckpt')
                                save_path = saver.save(
                                    sess,
                                    checkpoint_file,
                                    global_step=train_data.epoch)
                                print("Model saved in file: %s" % save_path)

                                print('=== Test Data Evaluation ===')
                                ler_test = do_eval_cer(
                                    session=sess,
                                    decode_op=decode_op_infer,
                                    model=model,
                                    dataset=test_data,
                                    eval_batch_size=1)
                                print('  CER: %f %%' % (ler_test * 100))

                        else:
                            print('=== Dev Data Evaluation ===')
                            ler_dev_epoch = do_eval_per(
                                session=sess,
                                decode_op=decode_op_infer,
                                per_op=ler_op,
                                model=model,
                                dataset=dev_data,
                                label_type=params['label_type'],
                                eval_batch_size=1)
                            print('  PER: %f %%' % (ler_dev_epoch * 100))

                            if ler_dev_epoch < ler_dev_best:
                                ler_dev_best = ler_dev_epoch
                                print('■■■ ↑Best Score (PER)↑ ■■■')

                                # Save model only when best accuracy is
                                # obtained (check point)
                                checkpoint_file = join(model.save_path,
                                                       'model.ckpt')
                                save_path = saver.save(
                                    sess,
                                    checkpoint_file,
                                    global_step=train_data.epoch)
                                print("Model saved in file: %s" % save_path)

                                print('=== Test Data Evaluation ===')
                                ler_test = do_eval_per(
                                    session=sess,
                                    decode_op=decode_op_infer,
                                    per_op=ler_op,
                                    model=model,
                                    dataset=test_data,
                                    label_type=params['label_type'],
                                    eval_batch_size=1)
                                print('  PER: %f %%' % (ler_test * 100))

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=ler_dev_epoch)

                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')
def do_train(model, params, gpu_indices):
    """Run training.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
        gpu_indices (list): GPU indices
    """
    if 'kanji' in params['label_type']:
        map_file_path = '../metrics/mapping_files/' + \
            params['label_type'] + '_' + params['train_data_size'] + '.txt'
    elif 'kana' in params['label_type']:
        map_file_path = '../metrics/mapping_files/' + \
            params['label_type'] + '.txt'

    # Load dataset
    train_data = Dataset(
        data_type='train', train_data_size=params['train_data_size'],
        label_type=params['label_type'], map_file_path=map_file_path,
        batch_size=params['batch_size'], max_epoch=params['num_epoch'],
        splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=True, sort_stop_epoch=params['sort_stop_epoch'],
        num_gpu=len(gpu_indices))
    dev_data = Dataset(
        data_type='dev', train_data_size=params['train_data_size'],
        label_type=params['label_type'], map_file_path=map_file_path,
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=False, num_gpu=len(gpu_indices))

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        # Create a variable to track the global step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # Set optimizer
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')
        optimizer = model._set_optimizer(
            params['optimizer'], learning_rate_pl)

        # Calculate the gradients for each model tower
        total_grads_and_vars, total_losses = [], []
        decode_ops_infer, ler_ops = [], []
        all_devices = ['/gpu:%d' % i_gpu for i_gpu in range(len(gpu_indices))]
        # NOTE: /cpu:0 is prepared for evaluation
        with tf.variable_scope(tf.get_variable_scope()):
            for i_gpu in range(len(all_devices)):
                with tf.device(all_devices[i_gpu]):
                    with tf.name_scope('tower_gpu%d' % i_gpu) as scope:

                        # Define placeholders in each tower
                        model.create_placeholders()

                        # Calculate the total loss for the current tower of the
                        # model. This function constructs the entire model but
                        # shares the variables across all towers.
                        tower_loss, tower_logits, tower_decoder_outputs_train, tower_decoder_outputs_infer = model.compute_loss(
                            model.inputs_pl_list[i_gpu],
                            model.labels_pl_list[i_gpu],
                            model.inputs_seq_len_pl_list[i_gpu],
                            model.labels_seq_len_pl_list[i_gpu],
                            model.keep_prob_encoder_pl_list[i_gpu],
                            model.keep_prob_decoder_pl_list[i_gpu],
                            model.keep_prob_embedding_pl_list[i_gpu],
                            scope)
                        tower_loss = tf.expand_dims(tower_loss, axis=0)
                        total_losses.append(tower_loss)

                        # Reuse variables for the next tower
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this
                        # tower
                        tower_grads_and_vars = optimizer.compute_gradients(
                            tower_loss)

                        # Gradient clipping
                        tower_grads_and_vars = model._clip_gradients(
                            tower_grads_and_vars)

                        # TODO: Optionally add gradient noise

                        # Keep track of the gradients across all towers
                        total_grads_and_vars.append(tower_grads_and_vars)

                        # Add to the graph each operation per tower
                        _, decode_op_tower_infer = model.decode(
                            tower_decoder_outputs_train,
                            tower_decoder_outputs_infer)
                        decode_ops_infer.append(decode_op_tower_infer)
                        # ler_op_tower = model.compute_ler(
                        #     decode_op_tower, model.labels_pl_list[i_gpu])
                        ler_op_tower = model.compute_ler(
                            model.labels_st_true_pl_list[i_gpu],
                            model.labels_st_pred_pl_list[i_gpu])
                        ler_op_tower = tf.expand_dims(ler_op_tower, axis=0)
                        ler_ops.append(ler_op_tower)

        # Aggregate losses, then calculate average loss
        total_losses = tf.concat(axis=0, values=total_losses)
        loss_op = tf.reduce_mean(total_losses, axis=0)
        ler_ops = tf.concat(axis=0, values=ler_ops)
        ler_op = tf.reduce_mean(ler_ops, axis=0)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers
        average_grads_and_vars = average_gradients(total_grads_and_vars)

        # Apply the gradients to adjust the shared variables.
        train_op = optimizer.apply_gradients(average_grads_and_vars,
                                             global_step=global_step)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=True)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total parameters
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" %
                  (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M parameters" %
              (len(parameters_dict.keys()),
               "{:,}".format(total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        # NOTE: Start running operations on the Graph. allow_soft_placement
        # must be set to True to build towers on GPU, as some of the ops do not
        # have GPU implementations.
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                              log_device_placement=False)) as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(
                model.save_path, sess.graph)

            # Initialize param
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            cer_dev_best = 1
            not_improved_epoch = 0
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, labels_train, inputs_seq_len, labels_seq_len, _ = data
                feed_dict_train = {}
                for i_gpu in range(len(gpu_indices)):
                    feed_dict_train[model.inputs_pl_list[i_gpu]
                                    ] = inputs[i_gpu]
                    feed_dict_train[model.labels_pl_list[i_gpu]
                                    ] = labels_train[i_gpu]
                    feed_dict_train[model.inputs_seq_len_pl_list[i_gpu]
                                    ] = inputs_seq_len[i_gpu]
                    feed_dict_train[model.labels_seq_len_pl_list[i_gpu]
                                    ] = labels_seq_len[i_gpu]
                    feed_dict_train[model.keep_prob_encoder_pl_list[i_gpu]
                                    ] = 1 - float(params['dropout_encoder'])
                    feed_dict_train[model.keep_prob_decoder_pl_list[i_gpu]
                                    ] = 1 - float(params['dropout_decoder'])
                    feed_dict_train[model.keep_prob_embedding_pl_list[i_gpu]
                                    ] = 1 - float(params['dropout_embedding'])
                feed_dict_train[learning_rate_pl] = learning_rate

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % int(params['print_step'] / len(gpu_indices)) == 0:

                    # Create feed dictionary for next mini batch (dev)
                    inputs, labels_dev, inputs_seq_len, labels_seq_len, _ = dev_data.next()[
                        0]
                    feed_dict_dev = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_dev[model.inputs_pl_list[i_gpu]
                                      ] = inputs[i_gpu]
                        feed_dict_dev[model.labels_pl_list[i_gpu]
                                      ] = labels_dev[i_gpu]
                        feed_dict_dev[model.inputs_seq_len_pl_list[i_gpu]
                                      ] = inputs_seq_len[i_gpu]
                        feed_dict_dev[model.labels_seq_len_pl_list[i_gpu]
                                      ] = labels_seq_len[i_gpu]
                        feed_dict_dev[model.keep_prob_encoder_pl_list[i_gpu]
                                      ] = 1.0
                        feed_dict_dev[model.keep_prob_decoder_pl_list[i_gpu]
                                      ] = 1.0
                        feed_dict_dev[model.keep_prob_embedding_pl_list[i_gpu]
                                      ] = 1.0

                    # Compute loss
                    loss_train = sess.run(
                        loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_train[model.keep_prob_encoder_pl_list[i_gpu]] = 1.0
                        feed_dict_train[model.keep_prob_decoder_pl_list[i_gpu]] = 1.0
                        feed_dict_train[model.keep_prob_embedding_pl_list[i_gpu]] = 1.0

                    # Predict class ids
                    predicted_ids_train_list, summary_str_train = sess.run(
                        [decode_ops_infer, summary_train], feed_dict=feed_dict_train)
                    predicted_ids_dev_list, summary_str_dev = sess.run(
                        [decode_ops_infer, summary_dev], feed_dict=feed_dict_dev)

                    # Convert to sparsetensor to compute LER
                    feed_dict_ler_train = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_ler_train[model.labels_st_true_pl_list[i_gpu]] = list2sparsetensor(
                            labels_train[i_gpu],
                            padded_value=train_data.padded_value),
                        feed_dict_ler_train[model.labels_st_pred_pl_list[i_gpu]] = list2sparsetensor(
                            predicted_ids_train_list[i_gpu],
                            padded_value=train_data.padded_value)
                    feed_dict_ler_dev = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_ler_dev[model.labels_st_true_pl_list[i_gpu]] = list2sparsetensor(
                            labels_dev[i_gpu],
                            padded_value=dev_data.padded_value),
                        feed_dict_ler_dev[model.labels_st_pred_pl_list[i_gpu]] = list2sparsetensor(
                            predicted_ids_dev_list[i_gpu],
                            padded_value=dev_data.padded_value)

                    # Compute accuracy
                    # ler_train = sess.run(ler_op, feed_dict=feed_dict_ler_train)
                    # ler_dev = sess.run(ler_op, feed_dict=feed_dict_ler_dev)
                    ler_train = 1
                    ler_dev = 1
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)
                    # TODO: fix this

                    # Update even files
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    duration_step = time.time() - start_time_step
                    print("Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)" %
                          (step + 1, train_data.epoch_detail, loss_train, loss_dev, ler_train, ler_dev,
                           learning_rate, duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train, csv_loss_dev, csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train, csv_ler_dev, csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        print('=== Dev Data Evaluation ===')
                        cer_dev_epoch = do_eval_cer(
                            session=sess,
                            decode_ops=decode_ops_infer,
                            model=model,
                            dataset=dev_data,
                            label_type=params['label_type'],
                            train_data_size=params['train_data_size'],
                            eval_batch_size=1)
                        print('  CER: %f %%' % (cer_dev_epoch * 100))

                        if cer_dev_epoch < cer_dev_best:
                            cer_dev_best = cer_dev_epoch
                            print('■■■ ↑Best Score (CER)↑ ■■■')

                            # Save model (check point)
                            checkpoint_file = join(
                                model.save_path, 'model.ckpt')
                            save_path = saver.save(
                                sess, checkpoint_file, global_step=train_data.epoch)
                            print("Model saved in file: %s" % save_path)
                        else:
                            not_improved_epoch += 1

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Early stopping
                        if not_improved_epoch == params['not_improved_patient_epoch']:
                            break

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=cer_dev_epoch)

                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')
コード例 #3
0
def do_train(model, params):
    """Run training.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
    """
    # Load dataset
    train_data = Dataset(
        data_type='train', label_type=params['label_type'],
        batch_size=params['batch_size'], max_epoch=params['num_epoch'],
        splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=True)
    dev_data = Dataset(
        data_type='dev', label_type=params['label_type'],
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=False)
    test_data = Dataset(
        data_type='dev', label_type=params['label_type'],
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=False)

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default():

        # Define placeholders
        model.create_placeholders()
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')

        # Add to the graph each operation (including model definition)
        loss_op, logits = model.compute_loss(
            model.inputs_pl_list[0],
            model.labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.keep_prob_pl_list[0])
        train_op = model.train(
            loss_op,
            optimizer=params['optimizer'],
            learning_rate=learning_rate_pl)
        decode_op = model.decoder(logits,
                                  model.inputs_seq_len_pl_list[0],
                                  beam_width=params['beam_width'])
        ler_op = model.compute_ler(decode_op, model.labels_pl_list[0])
        posteriors_op = model.posteriors(logits, blank_prior=1)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=False)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total parameters
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M parameters" %
              (len(parameters_dict.keys()),
               "{:,}".format(total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        with tf.Session() as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(
                model.save_path, sess.graph)

            # Initialize parameters
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            fmean_dev_best = 0
            fmean_time_dev_best = 0
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, labels, inputs_seq_len, _ = data
                feed_dict_train = {
                    model.inputs_pl_list[0]: inputs[0],
                    model.labels_pl_list[0]: list2sparsetensor(
                        labels[0], padded_value=train_data.padded_value),
                    model.inputs_seq_len_pl_list[0]: inputs_seq_len[0],
                    model.keep_prob_pl_list[0]: 1 - float(params['dropout']),
                    learning_rate_pl: learning_rate
                }

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % params['print_step'] == 0:

                    # Create feed dictionary for next mini batch (dev)
                    (inputs, labels, inputs_seq_len, _), _ = dev_data.next()
                    feed_dict_dev = {
                        model.inputs_pl_list[0]: inputs[0],
                        model.labels_pl_list[0]: list2sparsetensor(
                            labels[0], padded_value=dev_data.padded_value),
                        model.inputs_seq_len_pl_list[0]: inputs_seq_len[0],
                        model.keep_prob_pl_list[0]: 1.0
                    }

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    feed_dict_train[model.keep_prob_pl_list[0]] = 1.0

                    # Compute accuracy & update event files
                    ler_train, summary_str_train = sess.run(
                        [ler_op, summary_train], feed_dict=feed_dict_train)
                    ler_dev, summary_str_dev = sess.run(
                        [ler_op, summary_dev], feed_dict=feed_dict_dev)
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    duration_step = time.time() - start_time_step
                    print("Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)" %
                          (step + 1, train_data.epoch_detail, loss_train, loss_dev, ler_train, ler_dev,
                           learning_rate, duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train, csv_loss_dev, csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train, csv_ler_dev, csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        print('=== Dev Data Evaluation ===')
                        fmean_dev_epoch, df_acc = do_eval_fmeasure(
                            session=sess,
                            decode_op=decode_op,
                            model=model,
                            dataset=dev_data,
                            eval_batch_size=params['batch_size'])
                        print(df_acc)
                        print('  F-measure: %f %%' % (fmean_dev_epoch))

                        if fmean_dev_epoch > fmean_dev_best:
                            fmean_dev_best = fmean_dev_epoch
                            print('■■■ ↑Best Score (F-measure)↑ ■■■')

                            # Save model only when best accuracy is
                            # obtained (check point)
                            checkpoint_file = join(
                                model.save_path, 'model.ckpt')
                            save_path = saver.save(
                                sess, checkpoint_file, global_step=train_data.epoch)
                            print("Model saved in file: %s" % save_path)

                            print('=== Test Data Evaluation ===')
                            fmean_test_epoch, df_acc = do_eval_fmeasure(
                                session=sess,
                                decode_op=decode_op,
                                model=model,
                                dataset=test_data,
                                eval_batch_size=params['batch_size'])
                            print(df_acc)
                            print('  F-measure: %f %%' % (fmean_test_epoch))

                        # fmean_time_dev_epoch, df_acc = do_eval_fmeasure_time(
                        #     session=sess,
                        #     decode_op=decode_op,
                        #     posteriors_op=posteriors_op,
                        #     model=model,
                        #     dataset=dev_data,
                        #     eval_batch_size=params['batch_size'])
                        # print(df_acc)
                        # print('  Time F-measure: %f %%' %
                        #       (fmean_time_dev_epoch))

                        # if fmean_time_dev_best < fmean_time_dev_epoch:
                        #     fmean_time_dev_best = fmean_time_dev_epoch
                        #     print('■■■ ↑Best Score (Time F-measure)↑ ■■■')

                        # fmean_time_test_epoch, df_acc = do_eval_fmeasure_time(
                        #     session=sess,
                        #     decode_op=decode_op,
                        #     posteriors_op=posteriors_op,
                        #     model=model,
                        #     dataset=test_data,
                        #     eval_batch_size=params['batch_size'])
                        # print(df_acc)
                        # print('  Time F-measure: %f %%' %
                        #       (fmean_time_test_epoch))

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=fmean_dev_epoch)

                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')
コード例 #4
0
def main():

    args = parser.parse_args()

    ##################################################
    # DATSET
    ##################################################
    if args.model_save_path is not None:
        # Load a config file (.yml)
        params = load_config(args.config_path)
    # NOTE: Retrain the saved model from the last checkpoint
    elif args.saved_model_path is not None:
        params = load_config(os.path.join(args.saved_model_path, 'config.yml'))
    else:
        raise ValueError("Set model_save_path or saved_model_path.")

    # Load dataset
    train_data = Dataset(data_save_path=args.data_save_path,
                         backend=params['backend'],
                         input_channel=params['input_channel'],
                         use_delta=params['use_delta'],
                         use_double_delta=params['use_double_delta'],
                         data_type='train',
                         data_size=params['data_size'],
                         label_type=params['label_type'],
                         batch_size=params['batch_size'],
                         max_epoch=params['num_epoch'],
                         splice=params['splice'],
                         num_stack=params['num_stack'],
                         num_skip=params['num_skip'],
                         sort_utt=True,
                         sort_stop_epoch=params['sort_stop_epoch'],
                         tool=params['tool'],
                         num_enque=None,
                         dynamic_batching=params['dynamic_batching'])
    dev_clean_data = Dataset(data_save_path=args.data_save_path,
                             backend=params['backend'],
                             input_channel=params['input_channel'],
                             use_delta=params['use_delta'],
                             use_double_delta=params['use_double_delta'],
                             data_type='dev_clean',
                             data_size=params['data_size'],
                             label_type=params['label_type'],
                             batch_size=params['batch_size'],
                             splice=params['splice'],
                             num_stack=params['num_stack'],
                             num_skip=params['num_skip'],
                             shuffle=True,
                             tool=params['tool'])
    dev_other_data = Dataset(data_save_path=args.data_save_path,
                             backend=params['backend'],
                             input_channel=params['input_channel'],
                             use_delta=params['use_delta'],
                             use_double_delta=params['use_double_delta'],
                             data_type='dev_other',
                             data_size=params['data_size'],
                             label_type=params['label_type'],
                             batch_size=params['batch_size'],
                             splice=params['splice'],
                             num_stack=params['num_stack'],
                             num_skip=params['num_skip'],
                             shuffle=True,
                             tool=params['tool'])
    test_clean_data = Dataset(data_save_path=args.data_save_path,
                              backend=params['backend'],
                              input_channel=params['input_channel'],
                              use_delta=params['use_delta'],
                              use_double_delta=params['use_double_delta'],
                              data_type='test_clean',
                              data_size=params['data_size'],
                              label_type=params['label_type'],
                              batch_size=params['batch_size'],
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              tool=params['tool'])
    test_other_data = Dataset(data_save_path=args.data_save_path,
                              backend=params['backend'],
                              input_channel=params['input_channel'],
                              use_delta=params['use_delta'],
                              use_double_delta=params['use_double_delta'],
                              data_type='test_other',
                              data_size=params['data_size'],
                              label_type=params['label_type'],
                              batch_size=params['batch_size'],
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              tool=params['tool'])

    params['num_classes'] = train_data.num_classes

    ##################################################
    # MODEL
    ##################################################
    # Model setting
    model = load(model_type=params['model_type'],
                 params=params,
                 backend=params['backend'])

    if args.model_save_path is not None:

        # Set save path
        save_path = mkdir_join(args.model_save_path, params['backend'],
                               params['model_type'], params['label_type'],
                               params['data_size'], model.name)
        model.set_save_path(save_path)

        # Save config file
        save_config(config_path=args.config_path, save_path=model.save_path)

        # Setting for logging
        logger = set_logger(model.save_path)

        if os.path.isdir(params['char_init']):
            # NOTE: Start training from the pre-trained character model
            model.load_checkpoint(save_path=params['char_init'],
                                  epoch=-1,
                                  load_pretrained_model=True)

        # Count total parameters
        for name in sorted(list(model.num_params_dict.keys())):
            num_params = model.num_params_dict[name]
            logger.info("%s %d" % (name, num_params))
        logger.info("Total %.3f M parameters" %
                    (model.total_parameters / 1000000))

        # Define optimizer
        model.set_optimizer(optimizer=params['optimizer'],
                            learning_rate_init=float(params['learning_rate']),
                            weight_decay=float(params['weight_decay']),
                            clip_grad_norm=params['clip_grad_norm'],
                            lr_schedule=False,
                            factor=params['decay_rate'],
                            patience_epoch=params['decay_patient_epoch'])

        epoch, step = 1, 0
        learning_rate = float(params['learning_rate'])
        metric_dev_best = 1

    # NOTE: Retrain the saved model from the last checkpoint
    elif args.saved_model_path is not None:

        # Set save path
        model.save_path = args.saved_model_path

        # Setting for logging
        logger = set_logger(model.save_path, restart=True)

        # Define optimizer
        model.set_optimizer(
            optimizer=params['optimizer'],
            learning_rate_init=float(params['learning_rate']),  # on-the-fly
            weight_decay=float(params['weight_decay']),
            clip_grad_norm=params['clip_grad_norm'],
            lr_schedule=False,
            factor=params['decay_rate'],
            patience_epoch=params['decay_patient_epoch'])

        # Restore the last saved model
        epoch, step, learning_rate, metric_dev_best = model.load_checkpoint(
            save_path=args.saved_model_path, epoch=-1, restart=True)

    else:
        raise ValueError("Set model_save_path or saved_model_path.")

    train_data.epoch = epoch - 1

    # GPU setting
    model.set_cuda(deterministic=False, benchmark=True)

    logger.info('PID: %s' % os.getpid())
    logger.info('USERNAME: %s' % os.uname()[1])

    # Set process name
    setproctitle('libri_' + params['backend'] + '_' + params['model_type'] +
                 '_' + params['label_type'] + '_' + params['data_size'])

    ##################################################
    # TRAINING LOOP
    ##################################################
    # Define learning rate controller
    lr_controller = Controller(
        learning_rate_init=learning_rate,
        backend=params['backend'],
        decay_start_epoch=params['decay_start_epoch'],
        decay_rate=params['decay_rate'],
        decay_patient_epoch=params['decay_patient_epoch'],
        lower_better=True)

    # Setting for tensorboard
    if params['backend'] == 'pytorch':
        tf_writer = SummaryWriter(model.save_path)

    # Train model
    csv_steps, csv_loss_train, csv_loss_dev = [], [], []
    start_time_train = time.time()
    start_time_epoch = time.time()
    start_time_step = time.time()
    not_improved_epoch = 0
    best_model = model
    loss_train_mean = 0.
    pbar_epoch = tqdm(total=len(train_data))
    while True:
        # Compute loss in the training set (including parameter update)
        batch_train, is_new_epoch = train_data.next()
        model, loss_train_val = train_step(model,
                                           batch_train,
                                           params['clip_grad_norm'],
                                           backend=params['backend'])
        loss_train_mean += loss_train_val

        pbar_epoch.update(len(batch_train['xs']))

        if (step + 1) % params['print_step'] == 0:

            # Compute loss in the dev set
            batch_dev = dev_clean_data.next()[0]
            loss_dev = model(batch_dev['xs'],
                             batch_dev['ys'],
                             batch_dev['x_lens'],
                             batch_dev['y_lens'],
                             is_eval=True)

            loss_train_mean /= params['print_step']
            csv_steps.append(step)
            csv_loss_train.append(loss_train_mean)
            csv_loss_dev.append(loss_dev)

            # Logging by tensorboard
            if params['backend'] == 'pytorch':
                tf_writer.add_scalar('train/loss', loss_train_mean, step + 1)
                tf_writer.add_scalar('dev/loss', loss_dev, step + 1)
                for name, param in model.named_parameters():
                    name = name.replace('.', '/')
                    tf_writer.add_histogram(name,
                                            param.data.cpu().numpy(), step + 1)
                    tf_writer.add_histogram(name + '/grad',
                                            param.grad.data.cpu().numpy(),
                                            step + 1)

            duration_step = time.time() - start_time_step
            logger.info(
                "...Step:%d(epoch:%.3f) loss:%.3f(%.3f)/lr:%.5f/batch:%d/x_lens:%d (%.3f min)"
                % (step + 1, train_data.epoch_detail, loss_train_mean,
                   loss_dev, learning_rate, train_data.current_batch_size,
                   max(batch_train['x_lens']) * params['num_stack'],
                   duration_step / 60))
            start_time_step = time.time()
            loss_train_mean = 0.
        step += 1

        # Save checkpoint and evaluate model per epoch
        if is_new_epoch:
            duration_epoch = time.time() - start_time_epoch
            logger.info('===== EPOCH:%d (%.3f min) =====' %
                        (epoch, duration_epoch / 60))

            # Save fugure of loss
            plot_loss(csv_loss_train,
                      csv_loss_dev,
                      csv_steps,
                      save_path=model.save_path)

            if epoch < params['eval_start_epoch']:
                # Save the model
                model.save_checkpoint(model.save_path, epoch, step,
                                      learning_rate, metric_dev_best)
            else:
                start_time_eval = time.time()
                # dev
                if 'word' in params['label_type']:
                    metric_dev_epoch, _ = do_eval_wer(
                        models=[model],
                        dataset=dev_clean_data,
                        beam_width=1,
                        max_decode_len=MAX_DECODE_LEN_WORD,
                        eval_batch_size=1)
                    logger.info('  WER (dev-clean): %.3f %%' %
                                (metric_dev_epoch * 100))
                else:
                    metric_dev_epoch, wer_dev_clean_epoch, _ = do_eval_cer(
                        models=[model],
                        dataset=dev_clean_data,
                        beam_width=1,
                        max_decode_len=MAX_DECODE_LEN_CHAR,
                        eval_batch_size=1)
                    logger.info('  CER / WER (dev-clean): %.3f %% / %.3f %%' %
                                ((metric_dev_epoch * 100),
                                 (wer_dev_clean_epoch * 100)))

                if metric_dev_epoch < metric_dev_best:
                    metric_dev_best = metric_dev_epoch
                    not_improved_epoch = 0
                    best_model = copy.deepcopy(model)
                    logger.info('||||| Best Score |||||')

                    # Save the model
                    model.save_checkpoint(model.save_path, epoch, step,
                                          learning_rate, metric_dev_best)

                    # dev-other & test
                    if 'word' in params['label_type']:
                        metric_dev_other_epoch, _ = do_eval_wer(
                            models=[model],
                            dataset=dev_other_data,
                            beam_width=1,
                            max_decode_len=MAX_DECODE_LEN_WORD,
                            eval_batch_size=1)
                        logger.info('  WER (dev-other): %.3f %%' %
                                    (metric_dev_other_epoch * 100))

                        wer_test_clean, _ = do_eval_wer(
                            models=[model],
                            dataset=test_clean_data,
                            beam_width=1,
                            max_decode_len=MAX_DECODE_LEN_WORD,
                            eval_batch_size=1)
                        logger.info('  WER (test-clean): %.3f %%' %
                                    (wer_test_clean * 100))

                        wer_test_other, _ = do_eval_wer(
                            models=[model],
                            dataset=test_other_data,
                            beam_width=1,
                            max_decode_len=MAX_DECODE_LEN_WORD,
                            eval_batch_size=1)
                        logger.info('  WER (test-other): %.3f %%' %
                                    (wer_test_other * 100))

                        logger.info(
                            '  WER (test-mean): %.3f %%' %
                            ((wer_test_clean + wer_test_other) * 100 / 2))
                    else:
                        metric_dev_other_epoch, wer_dev_other_epoch, _ = do_eval_cer(
                            models=[model],
                            dataset=dev_other_data,
                            beam_width=1,
                            max_decode_len=MAX_DECODE_LEN_CHAR,
                            eval_batch_size=1)
                        logger.info(
                            '  CER / WER (dev-other): %.3f %% / %.3f %%' %
                            ((metric_dev_other_epoch * 100),
                             (wer_dev_other_epoch * 100)))

                        cer_test_clean, wer_test_clean, _ = do_eval_cer(
                            models=[model],
                            dataset=test_clean_data,
                            beam_width=1,
                            max_decode_len=MAX_DECODE_LEN_CHAR,
                            eval_batch_size=1)
                        logger.info(
                            '  CER / WER (test-clean): %.3f %% / %.3f %%' %
                            ((cer_test_clean * 100), (wer_test_clean * 100)))

                        cer_test_other, wer_test_other, _ = do_eval_cer(
                            models=[model],
                            dataset=test_other_data,
                            beam_width=1,
                            max_decode_len=MAX_DECODE_LEN_CHAR,
                            eval_batch_size=1)
                        logger.info(
                            '  CER / WER (test-other): %.3f %% / %.3f %%' %
                            ((cer_test_other * 100), (wer_test_other * 100)))

                        logger.info(
                            '  CER / WER (test-mean): %.3f %% / %.3f %%' %
                            (((cer_test_clean + cer_test_other) * 100 / 2),
                             ((wer_test_clean + wer_test_other) * 100 / 2)))

                else:
                    not_improved_epoch += 1

                duration_eval = time.time() - start_time_eval
                logger.info('Evaluation time: %.3f min' % (duration_eval / 60))

                # Early stopping
                if not_improved_epoch == params['not_improved_patient_epoch']:
                    break

                # Update learning rate
                model.optimizer, learning_rate = lr_controller.decay_lr(
                    optimizer=model.optimizer,
                    learning_rate=learning_rate,
                    epoch=epoch,
                    value=metric_dev_epoch)

                if epoch == params['convert_to_sgd_epoch']:
                    # Convert to fine-tuning stage
                    model.set_optimizer(
                        'sgd',
                        learning_rate_init=learning_rate,
                        weight_decay=float(params['weight_decay']),
                        clip_grad_norm=params['clip_grad_norm'],
                        lr_schedule=False,
                        factor=params['decay_rate'],
                        patience_epoch=params['decay_patient_epoch'])
                    logger.info('========== Convert to SGD ==========')

                    # Inject Gaussian noise to all parameters
                    if float(params['weight_noise_std']) > 0:
                        model.weight_noise_injection = True

            pbar_epoch = tqdm(total=len(train_data))
            print('========== EPOCH:%d (%.3f min) ==========' %
                  (epoch, duration_epoch / 60))

            if epoch == params['num_epoch']:
                break

            start_time_step = time.time()
            start_time_epoch = time.time()
            epoch += 1

    # TODO: evaluate the best model by beam search here

    duration_train = time.time() - start_time_train
    logger.info('Total time: %.3f hour' % (duration_train / 3600))

    if params['backend'] == 'pytorch':
        tf_writer.close()

    # Training was finished correctly
    with open(os.path.join(model.save_path, 'COMPLETE'), 'w') as f:
        f.write('')
コード例 #5
0
def do_train(model, params, gpu_indices):
    """Run training.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
        gpu_indices (list): GPU indices
    """
    if 'kanji' in params['label_type']:
        map_file_path = '../metrics/mapping_files/' + \
            params['label_type'] + '_' + params['train_data_size'] + '.txt'
    elif 'kana' in params['label_type']:
        map_file_path = '../metrics/mapping_files/' + \
            params['label_type'] + '.txt'

    # Load dataset
    train_data = Dataset(data_type='train',
                         train_data_size=params['train_data_size'],
                         label_type=params['label_type'],
                         map_file_path=map_file_path,
                         batch_size=params['batch_size'],
                         max_epoch=params['num_epoch'],
                         splice=params['splice'],
                         num_stack=params['num_stack'],
                         num_skip=params['num_skip'],
                         sort_utt=True,
                         sort_stop_epoch=params['sort_stop_epoch'],
                         num_gpu=len(gpu_indices))
    dev_data = Dataset(data_type='dev',
                       train_data_size=params['train_data_size'],
                       label_type=params['label_type'],
                       map_file_path=map_file_path,
                       batch_size=params['batch_size'],
                       splice=params['splice'],
                       num_stack=params['num_stack'],
                       num_skip=params['num_skip'],
                       sort_utt=False,
                       num_gpu=len(gpu_indices))

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        # Create a variable to track the global step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # Set optimizer
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')
        optimizer = model._set_optimizer(params['optimizer'], learning_rate_pl)

        # Calculate the gradients for each model tower
        total_grads_and_vars, total_losses = [], []
        decode_ops_infer, ler_ops = [], []
        all_devices = ['/gpu:%d' % i_gpu for i_gpu in range(len(gpu_indices))]
        # NOTE: /cpu:0 is prepared for evaluation
        with tf.variable_scope(tf.get_variable_scope()):
            for i_gpu in range(len(all_devices)):
                with tf.device(all_devices[i_gpu]):
                    with tf.name_scope('tower_gpu%d' % i_gpu) as scope:

                        # Define placeholders in each tower
                        model.create_placeholders()

                        # Calculate the total loss for the current tower of the
                        # model. This function constructs the entire model but
                        # shares the variables across all towers.
                        tower_loss, tower_logits, tower_decoder_outputs_train, tower_decoder_outputs_infer = model.compute_loss(
                            model.inputs_pl_list[i_gpu],
                            model.labels_pl_list[i_gpu],
                            model.inputs_seq_len_pl_list[i_gpu],
                            model.labels_seq_len_pl_list[i_gpu],
                            model.keep_prob_encoder_pl_list[i_gpu],
                            model.keep_prob_decoder_pl_list[i_gpu],
                            model.keep_prob_embedding_pl_list[i_gpu], scope)
                        tower_loss = tf.expand_dims(tower_loss, axis=0)
                        total_losses.append(tower_loss)

                        # Reuse variables for the next tower
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this
                        # tower
                        tower_grads_and_vars = optimizer.compute_gradients(
                            tower_loss)

                        # Gradient clipping
                        tower_grads_and_vars = model._clip_gradients(
                            tower_grads_and_vars)

                        # TODO: Optionally add gradient noise

                        # Keep track of the gradients across all towers
                        total_grads_and_vars.append(tower_grads_and_vars)

                        # Add to the graph each operation per tower
                        _, decode_op_tower_infer = model.decode(
                            tower_decoder_outputs_train,
                            tower_decoder_outputs_infer)
                        decode_ops_infer.append(decode_op_tower_infer)
                        # ler_op_tower = model.compute_ler(
                        #     decode_op_tower, model.labels_pl_list[i_gpu])
                        ler_op_tower = model.compute_ler(
                            model.labels_st_true_pl_list[i_gpu],
                            model.labels_st_pred_pl_list[i_gpu])
                        ler_op_tower = tf.expand_dims(ler_op_tower, axis=0)
                        ler_ops.append(ler_op_tower)

        # Aggregate losses, then calculate average loss
        total_losses = tf.concat(axis=0, values=total_losses)
        loss_op = tf.reduce_mean(total_losses, axis=0)
        ler_ops = tf.concat(axis=0, values=ler_ops)
        ler_op = tf.reduce_mean(ler_ops, axis=0)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers
        average_grads_and_vars = average_gradients(total_grads_and_vars)

        # Apply the gradients to adjust the shared variables.
        train_op = optimizer.apply_gradients(average_grads_and_vars,
                                             global_step=global_step)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=True)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total parameters
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M parameters" %
              (len(parameters_dict.keys()), "{:,}".format(
                  total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        # NOTE: Start running operations on the Graph. allow_soft_placement
        # must be set to True to build towers on GPU, as some of the ops do not
        # have GPU implementations.
        with tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)) as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(model.save_path, sess.graph)

            # Initialize param
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            cer_dev_best = 1
            not_improved_epoch = 0
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, labels_train, inputs_seq_len, labels_seq_len, _ = data
                feed_dict_train = {}
                for i_gpu in range(len(gpu_indices)):
                    feed_dict_train[
                        model.inputs_pl_list[i_gpu]] = inputs[i_gpu]
                    feed_dict_train[
                        model.labels_pl_list[i_gpu]] = labels_train[i_gpu]
                    feed_dict_train[model.inputs_seq_len_pl_list[
                        i_gpu]] = inputs_seq_len[i_gpu]
                    feed_dict_train[model.labels_seq_len_pl_list[
                        i_gpu]] = labels_seq_len[i_gpu]
                    feed_dict_train[
                        model.keep_prob_encoder_pl_list[i_gpu]] = 1 - float(
                            params['dropout_encoder'])
                    feed_dict_train[
                        model.keep_prob_decoder_pl_list[i_gpu]] = 1 - float(
                            params['dropout_decoder'])
                    feed_dict_train[
                        model.keep_prob_embedding_pl_list[i_gpu]] = 1 - float(
                            params['dropout_embedding'])
                feed_dict_train[learning_rate_pl] = learning_rate

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % int(
                        params['print_step'] / len(gpu_indices)) == 0:

                    # Create feed dictionary for next mini batch (dev)
                    inputs, labels_dev, inputs_seq_len, labels_seq_len, _ = dev_data.next(
                    )[0]
                    feed_dict_dev = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_dev[
                            model.inputs_pl_list[i_gpu]] = inputs[i_gpu]
                        feed_dict_dev[
                            model.labels_pl_list[i_gpu]] = labels_dev[i_gpu]
                        feed_dict_dev[model.inputs_seq_len_pl_list[
                            i_gpu]] = inputs_seq_len[i_gpu]
                        feed_dict_dev[model.labels_seq_len_pl_list[
                            i_gpu]] = labels_seq_len[i_gpu]
                        feed_dict_dev[
                            model.keep_prob_encoder_pl_list[i_gpu]] = 1.0
                        feed_dict_dev[
                            model.keep_prob_decoder_pl_list[i_gpu]] = 1.0
                        feed_dict_dev[
                            model.keep_prob_embedding_pl_list[i_gpu]] = 1.0

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_train[
                            model.keep_prob_encoder_pl_list[i_gpu]] = 1.0
                        feed_dict_train[
                            model.keep_prob_decoder_pl_list[i_gpu]] = 1.0
                        feed_dict_train[
                            model.keep_prob_embedding_pl_list[i_gpu]] = 1.0

                    # Predict class ids
                    predicted_ids_train_list, summary_str_train = sess.run(
                        [decode_ops_infer, summary_train],
                        feed_dict=feed_dict_train)
                    predicted_ids_dev_list, summary_str_dev = sess.run(
                        [decode_ops_infer, summary_dev],
                        feed_dict=feed_dict_dev)

                    # Convert to sparsetensor to compute LER
                    feed_dict_ler_train = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_ler_train[model.labels_st_true_pl_list[
                            i_gpu]] = list2sparsetensor(
                                labels_train[i_gpu],
                                padded_value=train_data.padded_value),
                        feed_dict_ler_train[model.labels_st_pred_pl_list[
                            i_gpu]] = list2sparsetensor(
                                predicted_ids_train_list[i_gpu],
                                padded_value=train_data.padded_value)
                    feed_dict_ler_dev = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_ler_dev[model.labels_st_true_pl_list[
                            i_gpu]] = list2sparsetensor(
                                labels_dev[i_gpu],
                                padded_value=dev_data.padded_value),
                        feed_dict_ler_dev[model.labels_st_pred_pl_list[
                            i_gpu]] = list2sparsetensor(
                                predicted_ids_dev_list[i_gpu],
                                padded_value=dev_data.padded_value)

                    # Compute accuracy
                    # ler_train = sess.run(ler_op, feed_dict=feed_dict_ler_train)
                    # ler_dev = sess.run(ler_op, feed_dict=feed_dict_ler_dev)
                    ler_train = 1
                    ler_dev = 1
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)
                    # TODO: fix this

                    # Update even files
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    duration_step = time.time() - start_time_step
                    print(
                        "Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)"
                        % (step + 1, train_data.epoch_detail, loss_train,
                           loss_dev, ler_train, ler_dev, learning_rate,
                           duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train,
                              csv_loss_dev,
                              csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train,
                             csv_ler_dev,
                             csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        print('=== Dev Data Evaluation ===')
                        cer_dev_epoch = do_eval_cer(
                            session=sess,
                            decode_ops=decode_ops_infer,
                            model=model,
                            dataset=dev_data,
                            label_type=params['label_type'],
                            train_data_size=params['train_data_size'],
                            eval_batch_size=1)
                        print('  CER: %f %%' % (cer_dev_epoch * 100))

                        if cer_dev_epoch < cer_dev_best:
                            cer_dev_best = cer_dev_epoch
                            print('■■■ ↑Best Score (CER)↑ ■■■')

                            # Save model (check point)
                            checkpoint_file = join(model.save_path,
                                                   'model.ckpt')
                            save_path = saver.save(
                                sess,
                                checkpoint_file,
                                global_step=train_data.epoch)
                            print("Model saved in file: %s" % save_path)
                        else:
                            not_improved_epoch += 1

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Early stopping
                        if not_improved_epoch == params[
                                'not_improved_patient_epoch']:
                            break

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=cer_dev_epoch)

                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')
def do_train(model, params):
    """Run training. If target labels are phone, the model is evaluated by PER
    with 39 phones.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
    """
    map_file_path_train = '../metrics/mapping_files/' + \
        params['label_type'] + '.txt'
    if 'phone' in params['label_type']:
        map_file_path_eval = '../metrics/mapping_files/phone39.txt'
    else:
        map_file_path_eval = '../metrics/mapping_files/' + \
            params['label_type'] + '.txt'

    # Load dataset
    train_data = Dataset(
        data_type='train', label_type=params['label_type'],
        batch_size=params['batch_size'], map_file_path=map_file_path_train,
        max_epoch=params['num_epoch'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=True, sort_stop_epoch=params['sort_stop_epoch'])
    dev_data = Dataset(
        data_type='dev', label_type=params['label_type'],
        batch_size=params['batch_size'], map_file_path=map_file_path_train,
        splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=False)
    if 'char' in params['label_type']:
        test_data = Dataset(
            data_type='test', label_type=params['label_type'],
            batch_size=1, map_file_path=map_file_path_eval,
            splice=params['splice'],
            num_stack=params['num_stack'], num_skip=params['num_skip'],
            sort_utt=False)
    else:
        test_data = Dataset(
            data_type='test', label_type='phone39',
            batch_size=1, map_file_path=map_file_path_eval,
            splice=params['splice'],
            num_stack=params['num_stack'], num_skip=params['num_skip'],
            sort_utt=False)

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default():

        # Define placeholders
        model.create_placeholders()
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')

        # Add to the graph each operation (including model definition)
        loss_op, logits, decoder_outputs_train, decoder_outputs_infer = model.compute_loss(
            model.inputs_pl_list[0],
            model.labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.labels_seq_len_pl_list[0],
            model.keep_prob_encoder_pl_list[0],
            model.keep_prob_decoder_pl_list[0],
            model.keep_prob_embedding_pl_list[0])
        train_op = model.train(loss_op,
                               optimizer=params['optimizer'],
                               learning_rate=learning_rate_pl)
        _, decode_op_infer = model.decode(
            decoder_outputs_train,
            decoder_outputs_infer)
        ler_op = model.compute_ler(model.labels_st_true_pl,
                                   model.labels_st_pred_pl)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=True)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total param
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M param" %
              (len(parameters_dict.keys()),
               "{:,}".format(total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        with tf.Session() as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(
                model.save_path, sess.graph)

            # Initialize param
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            ler_dev_best = 1
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, labels_train, inputs_seq_len, labels_seq_len, _ = data
                feed_dict_train = {
                    model.inputs_pl_list[0]: inputs[0],
                    model.labels_pl_list[0]: labels_train[0],
                    model.inputs_seq_len_pl_list[0]: inputs_seq_len[0],
                    model.labels_seq_len_pl_list[0]: labels_seq_len[0],
                    model.keep_prob_encoder_pl_list[0]: 1 - float(params['dropout_encoder']),
                    model.keep_prob_decoder_pl_list[0]: 1 - float(params['dropout_decoder']),
                    model.keep_prob_embedding_pl_list[0]: 1 - float(params['dropout_embedding']),
                    learning_rate_pl: learning_rate
                }

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % params['print_step'] == 0:

                    # Create feed dictionary for next mini batch (dev)
                    (inputs, labels_dev, inputs_seq_len,
                     labels_seq_len, _), _ = dev_data.next()
                    feed_dict_dev = {
                        model.inputs_pl_list[0]: inputs[0],
                        model.labels_pl_list[0]: labels_dev[0],
                        model.inputs_seq_len_pl_list[0]: inputs_seq_len[0],
                        model.labels_seq_len_pl_list[0]: labels_seq_len[0],
                        model.keep_prob_encoder_pl_list[0]: 1.0,
                        model.keep_prob_decoder_pl_list[0]: 1.0,
                        model.keep_prob_embedding_pl_list[0]: 1.0
                    }

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    feed_dict_train[model.keep_prob_encoder_pl_list[0]] = 1.0
                    feed_dict_train[model.keep_prob_decoder_pl_list[0]] = 1.0
                    feed_dict_train[model.keep_prob_embedding_pl_list[0]] = 1.0

                    # Predict class ids & update even files
                    predicted_ids_train, summary_str_train = sess.run(
                        [decode_op_infer, summary_train], feed_dict=feed_dict_train)
                    predicted_ids_dev, summary_str_dev = sess.run(
                        [decode_op_infer, summary_dev], feed_dict=feed_dict_dev)
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    # Convert to sparsetensor to compute LER
                    feed_dict_ler_train = {
                        model.labels_st_true_pl: list2sparsetensor(
                            labels_train[0], padded_value=train_data.padded_value),
                        model.labels_st_pred_pl: list2sparsetensor(
                            predicted_ids_train, padded_value=train_data.padded_value)
                    }
                    feed_dict_ler_dev = {
                        model.labels_st_true_pl: list2sparsetensor(
                            labels_dev[0], padded_value=dev_data.padded_value),
                        model.labels_st_pred_pl: list2sparsetensor(
                            predicted_ids_dev, padded_value=dev_data.padded_value)
                    }

                    # Compute accuracy
                    ler_train = sess.run(ler_op, feed_dict=feed_dict_ler_train)
                    ler_dev = sess.run(ler_op, feed_dict=feed_dict_ler_dev)
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)

                    duration_step = time.time() - start_time_step
                    print("Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)" %
                          (step + 1, train_data.epoch_detail, loss_train, loss_dev, ler_train, ler_dev,
                           learning_rate, duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train, csv_loss_dev, csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train, csv_ler_dev, csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        if 'char' in params['label_type']:
                            print('=== Dev Data Evaluation ===')
                            ler_dev_epoch, wer_dev_epoch = do_eval_cer(
                                session=sess,
                                decode_op=decode_op_infer,
                                model=model,
                                dataset=dev_data,
                                label_type=params['label_type'],
                                eval_batch_size=1)
                            print('  CER: %f %%' % (ler_dev_epoch * 100))
                            print('  WER: %f %%' % (wer_dev_epoch * 100))

                            if ler_dev_epoch < ler_dev_best:
                                ler_dev_best = ler_dev_epoch
                                print('■■■ ↑Best Score (CER)↑ ■■■')

                                # Save model only when best accuracy is
                                # obtained (check point)
                                checkpoint_file = join(
                                    model.save_path, 'model.ckpt')
                                save_path = saver.save(
                                    sess, checkpoint_file, global_step=train_data.epoch)
                                print("Model saved in file: %s" % save_path)

                                print('=== Test Data Evaluation ===')
                                ler_test, wer_test = do_eval_cer(
                                    session=sess,
                                    decode_op=decode_op_infer,
                                    model=model,
                                    dataset=test_data,
                                    label_type=params['label_type'],
                                    is_test=True,
                                    eval_batch_size=1)
                                print('  CER: %f %%' % (ler_test * 100))
                                print('  WER: %f %%' % (wer_test * 100))

                        else:
                            print('=== Dev Data Evaluation ===')
                            ler_dev_epoch = do_eval_per(
                                session=sess,
                                decode_op=decode_op_infer,
                                per_op=ler_op,
                                model=model,
                                dataset=dev_data,
                                label_type=params['label_type'],
                                eval_batch_size=1)
                            print('  PER: %f %%' % (ler_dev_epoch * 100))

                            if ler_dev_epoch < ler_dev_best:
                                ler_dev_best = ler_dev_epoch
                                print('■■■ ↑Best Score (PER)↑ ■■■')

                                # Save model only when best accuracy is
                                # obtained (check point)
                                checkpoint_file = join(
                                    model.save_path, 'model.ckpt')
                                save_path = saver.save(
                                    sess, checkpoint_file, global_step=train_data.epoch)
                                print("Model saved in file: %s" % save_path)

                                print('=== Test Data Evaluation ===')
                                ler_test = do_eval_per(
                                    session=sess,
                                    decode_op=decode_op_infer,
                                    per_op=ler_op,
                                    model=model,
                                    dataset=test_data,
                                    label_type=params['label_type'],
                                    is_test=True,
                                    eval_batch_size=1)
                                print('  PER: %f %%' % (ler_test * 100))

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=ler_dev_epoch)

                    start_time_step = time.time()
                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')