Python Dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: experiments.librispeech.data.load_dataset_ctc

클래스/타입: Dataset

hotexamples.com에서의 예제들: 11

Python Dataset - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 experiments.librispeech.data.load_dataset_ctc.Dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Dataset(10)

next(1)

자주 사용되는 메소드들

Dataset (10)

next (1)

예제 #1

파일 보기

파일: eval_student.py 프로젝트: zz12375/tensorflow_end2end_speech_recognition

def do_eval(model, params, epoch, beam_width, eval_batch_size, temperature):
    """Evaluate the model.
    Args:
        model: the model to restore
        params (dict): A dictionary of parameters
        epoch (int): the epoch to restore
        beam_width (int): beam width for beam search.
            1 disables beam search, which mean greedy decoding.
        eval_batch_size (int): the size of mini-batch when evaluation
        temperature (int):
    """
    if 'temp1' in params['teacher_model_path']:
        teacher_train_temperature = 1
    elif 'temp2' in params['teacher_model_path']:
        teacher_train_temperature = 2

    print('=' * 40)
    print('  frame stack %d' % int(params['num_stack']))
    print('  splice %d' % int(params['splice']))
    print('  beam width: %d' % beam_width)
    print('  temperature (teacher, training): %d' % teacher_train_temperature)
    print('  temperature (teacher, inference): %d' %
          params['teacher_temperature'])
    print('  temperature (training): %d' % params['student_temperature'])
    print('  temperature (inference): %d' % temperature)
    print('=' * 40)

    # Load dataset
    test_clean_data = Dataset(data_type='test_clean',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=params['batch_size']
                              if eval_batch_size == -1 else eval_batch_size,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              shuffle=False)
    test_other_data = Dataset(data_type='test_other',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=eval_batch_size,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              shuffle=False)

    with tf.name_scope('tower_gpu0') as scope:
        # Define placeholders
        model.create_placeholders_ctc()

        # Add to the graph each operation (including model definition)
        _, logits = model.compute_ctc_loss(
            model.inputs_pl_list[0],
            model.labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.keep_prob_pl_list[0],
            scope,
            softmax_temperature=temperature,  # this is for training
            # is_training=False)
            is_training=True)
        logits /= temperature
        decode_op = model.decoder(logits,
                                  model.inputs_seq_len_pl_list[0],
                                  beam_width=beam_width)

    # Create a saver for writing training checkpoints
    saver = tf.train.Saver()

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model.save_path)

        # If check point exists
        if ckpt:
            model_path = ckpt.model_checkpoint_path
            if epoch != -1:
                model_path = model_path.split('/')[:-1]
                model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch)
            saver.restore(sess, model_path)
            print("Model restored: " + model_path)
        else:
            raise ValueError('There are not any checkpoints.')

        print('Test Data Evaluation:')
        cer_clean_test, wer_clean_test = do_eval_cer(
            session=sess,
            decode_ops=[decode_op],
            model=model,
            dataset=test_clean_data,
            label_type=params['label_type'],
            is_test=True,
            eval_batch_size=eval_batch_size,
            progressbar=True)
        print('  CER (clean): %f %%' % (cer_clean_test * 100))
        print('  WER (clean): %f %%' % (wer_clean_test * 100))

        cer_other_test, wer_other_test = do_eval_cer(
            session=sess,
            decode_ops=[decode_op],
            model=model,
            dataset=test_other_data,
            label_type=params['label_type'],
            is_test=True,
            eval_batch_size=eval_batch_size,
            progressbar=True)
        print('  CER (other): %f %%' % (cer_other_test * 100))
        print('  WER (other): %f %%' % (wer_other_test * 100))

예제 #2

파일 보기

파일: test_load_dataset_ctc.py 프로젝트: sky1170447398/tensorflow_end2end_speech_recognition

    def check_loading(self, label_type, data_type='dev_clean',
                      shuffle=False,  sort_utt=False, sort_stop_epoch=None,
                      frame_stacking=False, splice=1, num_gpu=1):

        print('========================================')
        print('  label_type: %s' % label_type)
        print('  data_type: %s' % data_type)
        print('  shuffle: %s' % str(shuffle))
        print('  sort_utt: %s' % str(sort_utt))
        print('  sort_stop_epoch: %s' % str(sort_stop_epoch))
        print('  frame_stacking: %s' % str(frame_stacking))
        print('  splice: %d' % splice)
        print('  num_gpu: %d' % num_gpu)
        print('========================================')

        num_stack = 3 if frame_stacking else 1
        num_skip = 3 if frame_stacking else 1
        dataset = Dataset(
            data_type=data_type, train_data_size='train_clean100',
            label_type=label_type,
            batch_size=64, max_epoch=1, splice=splice,
            num_stack=num_stack, num_skip=num_skip,
            shuffle=shuffle, sort_utt=sort_utt, sort_stop_epoch=sort_stop_epoch,
            progressbar=True, num_gpu=num_gpu)

        print('=> Loading mini-batch...')
        if label_type == 'character':
            map_file_path = '../../metrics/mapping_files/ctc/character.txt'
        elif label_type == 'character_capital_divide':
            map_file_path = '../../metrics/mapping_files/ctc/character_capital.txt'
        elif label_type == 'word':
            map_file_path = '../../metrics/mapping_files/ctc/word_' + \
                dataset.train_data_size + '.txt'

        idx2char = Idx2char(map_file_path)
        idx2word = Idx2word(map_file_path)

        for data, is_new_epoch in dataset:
            inputs, labels, inputs_seq_len, input_names = data

            if not self.length_check:
                for i, l in zip(inputs[0], labels[0]):
                    if len(i) < len(l):
                        raise ValueError(
                            'input length must be longer than label length.')
                self.length_check = True

            if num_gpu > 1:
                for inputs_gpu in inputs:
                    print(inputs_gpu.shape)

            if label_type == 'word':
                if 'test' not in data_type:
                    str_true = ' '.join(idx2word(labels[0][0]))
                else:
                    word_list = np.delete(labels[0][0], np.where(
                        labels[0][0] == None), axis=0)
                    str_true = ' '.join(word_list)
            else:
                str_true = idx2char(labels[0][0])
            str_true = re.sub(r'_', ' ', str_true)
            print('----- %s (epoch: %.3f) -----' %
                  (input_names[0][0], dataset.epoch_detail))
            print(inputs[0].shape)
            print(str_true)

            if dataset.epoch_detail >= 0.05:
                break

예제 #3

파일 보기

파일: decode_ctc.py 프로젝트: sky1170447398/tensorflow_end2end_speech_recognition

def do_decode(model, params, epoch, beam_width):
    """Decode the CTC outputs.
    Args:
        model: the model to restore
        params (dict): A dictionary of parameters
        epoch (int): the epoch to restore
        beam_width (int): beam_width (int, optional): beam width for beam search.
            1 disables beam search, which mean greedy decoding.
    """
    # Load dataset
    test_clean_data = Dataset(data_type='test_clean',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=1,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              shuffle=False)
    test_other_data = Dataset(data_type='test_other',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=1,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              shuffle=False)

    with tf.name_scope('tower_gpu0'):
        # Define placeholders
        model.create_placeholders()

        # Add to the graph each operation (including model definition)
        _, logits = model.compute_loss(model.inputs_pl_list[0],
                                       model.labels_pl_list[0],
                                       model.inputs_seq_len_pl_list[0],
                                       model.keep_prob_input_pl_list[0],
                                       model.keep_prob_hidden_pl_list[0],
                                       model.keep_prob_output_pl_list[0])
        decode_op = model.decoder(logits,
                                  model.inputs_seq_len_pl_list[0],
                                  beam_width=beam_width)

    # Create a saver for writing training checkpoints
    saver = tf.train.Saver()

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model.save_path)

        # If check point exists
        if ckpt:
            # Use last saved model
            model_path = ckpt.model_checkpoint_path
            if epoch != -1:
                model_path = model_path.split('/')[:-1]
                model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch)
            saver.restore(sess, model_path)
            print("Model restored: " + model_path)
        else:
            raise ValueError('There are not any checkpoints.')

        # Visualize
        decode_test(session=sess,
                    decode_op=decode_op,
                    model=model,
                    dataset=test_clean_data,
                    label_type=params['label_type'],
                    train_data_size=params['train_data_size'],
                    save_path=None)
        # save_path=model.save_path)

        decode_test(session=sess,
                    decode_op=decode_op,
                    model=model,
                    dataset=test_other_data,
                    label_type=params['label_type'],
                    train_data_size=params['train_data_size'],
                    save_path=None)

예제 #4

파일 보기

파일: eval_ensemble4_ctc.py 프로젝트: zz12375/tensorflow_end2end_speech_recognition

def do_eval(save_paths, params, beam_width, temperature_infer,
            result_save_path):
    """Evaluate the model.
    Args:
        save_paths (list):
        params (dict): A dictionary of parameters
        epoch_list (list): list of the epoch to restore
        beam_width (int): beam width for beam search.
            1 disables beam search, which mean greedy decoding.
        eval_batch_size (int): the size of mini-batch when evaluation
        temperature_infer (int): temperature in the inference stage
        result_save_path (string, optional):
    """
    if 'temp1' in save_paths[0]:
        temperature_train = 1
    elif 'temp2' in save_paths[0]:
        temperature_train = 2
    else:
        raise ValueError

    if result_save_path is not None:
        sys.stdout = open(
            join(
                result_save_path,
                '4models_traintemp' + str(temperature_train) + '_inftemp' +
                str(temperature_infer) + '.log'), 'w')

    print('=' * 30)
    print('  frame stack %d' % int(params['num_stack']))
    print('  beam width: %d' % beam_width)
    print('  ensemble: %d' % len(save_paths))
    print('  temperature (training): %d' % temperature_train)
    print('  temperature (inference): %d' % temperature_infer)
    print('=' * 30)

    # Load dataset
    test_clean_data = Dataset(data_type='test_clean',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=1,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              sort_utt=True)
    test_other_data = Dataset(data_type='test_other',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=1,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              sort_utt=True)

    print('Test Data Evaluation:')
    cer_clean_test, wer_clean_test = do_eval_cer(
        save_paths=save_paths,
        dataset=test_clean_data,
        data_type='test_clean',
        label_type=params['label_type'],
        num_classes=params['num_classes'] + 1,
        beam_width=beam_width,
        temperature_infer=temperature_infer,
        is_test=True,
        progressbar=True)
    print('  CER (clean): %f %%' % (cer_clean_test * 100))
    print('  WER (clean): %f %%' % (wer_clean_test * 100))

    cer_other_test, wer_other_test = do_eval_cer(
        save_paths=save_paths,
        dataset=test_other_data,
        data_type='test_other',
        label_type=params['label_type'],
        num_classes=params['num_classes'] + 1,
        beam_width=beam_width,
        temperature_infer=temperature_infer,
        is_test=True,
        progressbar=True)
    print('  CER (other): %f %%' % (cer_other_test * 100))
    print('  WER (other): %f %%' % (wer_other_test * 100))

예제 #5

파일 보기

def do_eval(model, params, epoch, eval_batch_size, beam_width):
    """Evaluate the model.
    Args:
        model: the model to restore
        params (dict): A dictionary of parameters
        epoch (int): the epoch to restore
        eval_batch_size (int): the size of mini-batch when evaluation
        beam_width (int): beam_width (int, optional): beam width for beam search.
            1 disables beam search, which mean greedy decoding.
    """
    # Load dataset
    test_clean_data = Dataset(data_type='test_clean',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=params['batch_size']
                              if eval_batch_size == -1 else eval_batch_size,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              shuffle=False)
    test_other_data = Dataset(data_type='test_other',
                              train_data_size=params['train_data_size'],
                              label_type=params['label_type'],
                              batch_size=params['batch_size']
                              if eval_batch_size == -1 else eval_batch_size,
                              splice=params['splice'],
                              num_stack=params['num_stack'],
                              num_skip=params['num_skip'],
                              shuffle=False)

    with tf.name_scope('tower_gpu0'):
        # Define placeholders
        model.create_placeholders()

        # Add to the graph each operation (including model definition)
        _, logits = model.compute_loss(model.inputs_pl_list[0],
                                       model.labels_pl_list[0],
                                       model.inputs_seq_len_pl_list[0],
                                       model.keep_prob_pl_list[0],
                                       is_training=False)
        decode_op = model.decoder(logits,
                                  model.inputs_seq_len_pl_list[0],
                                  beam_width=beam_width)
        posteriors_op = model.posteriors(logits)

    # Create a saver for writing training checkpoints
    saver = tf.train.Saver()

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model.save_path)

        # If check point exists
        if ckpt:
            model_path = ckpt.model_checkpoint_path
            if epoch != -1:
                model_path = model_path.split('/')[:-1]
                model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch)
            saver.restore(sess, model_path)
            print("Model restored: " + model_path)
        else:
            raise ValueError('There are not any checkpoints.')

        print('Test Data Evaluation:')
        if 'char' in params['label_type']:
            if DECODER_TYPE == 1:
                cer_clean_test, wer_clean_test = do_eval_cer(
                    session=sess,
                    decode_ops=[decode_op],
                    model=model,
                    dataset=test_clean_data,
                    label_type=params['label_type'],
                    is_test=True,
                    # eval_batch_size=eval_batch_size,
                    progressbar=True)
                print('  WER (clean): %f %%' % (wer_clean_test * 100))
                print('  CER (clean): %f %%' % (cer_clean_test * 100))

                cer_other_test, wer_other_test = do_eval_cer(
                    session=sess,
                    decode_ops=[decode_op],
                    model=model,
                    dataset=test_other_data,
                    label_type=params['label_type'],
                    is_test=True,
                    # eval_batch_size=eval_batch_size,
                    progressbar=True)
                print('  WER (other): %f %%' % (wer_other_test * 100))
                print('  CER (other): %f %%' % (cer_other_test * 100))

            elif DECODER_TYPE == 2:
                cer_clean_test, wer_clean_test = do_eval_cer2(
                    session=sess,
                    # beam_width=beam_width,
                    beam_width=20,
                    posteriors_ops=[posteriors_op],
                    model=model,
                    dataset=test_clean_data,
                    label_type=params['label_type'],
                    is_test=True,
                    eval_batch_size=20,
                    # eval_batch_size=eval_batch_size,
                    progressbar=True)
                print('  WER (clean): %f %%' % (wer_clean_test * 100))
                print('  CER (clean): %f %%' % (cer_clean_test * 100))

                cer_other_test, wer_other_test = do_eval_cer2(
                    session=sess,
                    # beam_width=beam_width,
                    beam_width=20,
                    posteriors_ops=[posteriors_op],
                    model=model,
                    dataset=test_other_data,
                    label_type=params['label_type'],
                    is_test=True,
                    eval_batch_size=20,
                    # eval_batch_size=eval_batch_size,
                    progressbar=True)
                print('  WER (other): %f %%' % (wer_other_test * 100))
                print('  CER (other): %f %%' % (cer_other_test * 100))

        else:
            wer_clean_test = do_eval_wer(
                session=sess,
                decode_ops=[decode_op],
                model=model,
                dataset=test_clean_data,
                train_data_size=params['train_data_size'],
                is_test=True,
                # eval_batch_size=eval_batch_size,
                progressbar=True)
            print('  WER (clean): %f %%' % (wer_clean_test * 100))

            wer_other_test = do_eval_wer(
                session=sess,
                decode_ops=[decode_op],
                model=model,
                dataset=test_other_data,
                train_data_size=params['train_data_size'],
                is_test=True,
                # eval_batch_size=eval_batch_size,
                progressbar=True)
            print('  WER (other): %f %%' % (wer_other_test * 100))

예제 #6

파일 보기

    def check(self,
              label_type,
              data_type='dev_clean',
              shuffle=False,
              sort_utt=False,
              sort_stop_epoch=None,
              frame_stacking=False,
              splice=1,
              num_gpu=1):

        print('========================================')
        print('  label_type: %s' % label_type)
        print('  data_type: %s' % data_type)
        print('  shuffle: %s' % str(shuffle))
        print('  sort_utt: %s' % str(sort_utt))
        print('  sort_stop_epoch: %s' % str(sort_stop_epoch))
        print('  frame_stacking: %s' % str(frame_stacking))
        print('  splice: %d' % splice)
        print('  num_gpu: %d' % num_gpu)
        print('========================================')

        num_stack = 3 if frame_stacking else 1
        num_skip = 3 if frame_stacking else 1
        dataset = Dataset(data_type=data_type,
                          train_data_size='train100h',
                          label_type=label_type,
                          batch_size=64,
                          max_epoch=2,
                          splice=splice,
                          num_stack=num_stack,
                          num_skip=num_skip,
                          shuffle=shuffle,
                          sort_utt=sort_utt,
                          sort_stop_epoch=sort_stop_epoch,
                          progressbar=True,
                          num_gpu=num_gpu)

        print('=> Loading mini-batch...')
        if label_type == 'character':
            map_file_path = '../../metrics/mapping_files/character.txt'
        else:
            map_file_path = '../../metrics/mapping_files/' + label_type + '_' + \
                dataset.train_data_size + '.txt'

        idx2char = Idx2char(map_file_path)
        idx2word = Idx2word(map_file_path)

        for data, is_new_epoch in dataset:
            inputs, labels, inputs_seq_len, input_names = data

            if data_type == 'train':
                for i, l in zip(inputs[0], labels[0]):
                    if len(i) < len(l):
                        raise ValueError(
                            'input length must be longer than label length.')

            if num_gpu > 1:
                for inputs_gpu in inputs:
                    print(inputs_gpu.shape)

            if 'test' in data_type:
                str_true = labels[0][0][0]
            else:
                if 'word' in label_type:
                    str_true = '_'.join(idx2word(labels[0][0]))
                else:
                    str_true = idx2char(labels[0][0])

            print('----- %s (epoch: %.3f) -----' %
                  (input_names[0][0], dataset.epoch_detail))
            print(inputs[0].shape)
            print(str_true)

            if dataset.epoch_detail >= 0.1:
                break

예제 #7

파일 보기

파일: train_ctc_multigpu.py 프로젝트: sky1170447398/tensorflow_end2end_speech_recognition

def do_train(model, params, gpu_indices):
    """Run CTC training.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
        gpu_indices (list): GPU indices
    """
    # Load dataset
    train_data = Dataset(data_type='train',
                         train_data_size=params['train_data_size'],
                         label_type=params['label_type'],
                         batch_size=params['batch_size'],
                         max_epoch=params['num_epoch'],
                         splice=params['splice'],
                         num_stack=params['num_stack'],
                         num_skip=params['num_skip'],
                         sort_utt=True,
                         sort_stop_epoch=params['sort_stop_epoch'],
                         num_gpu=len(gpu_indices))
    dev_data_clean = Dataset(data_type='dev_clean',
                             train_data_size=params['train_data_size'],
                             label_type=params['label_type'],
                             batch_size=params['batch_size'],
                             splice=params['splice'],
                             num_stack=params['num_stack'],
                             num_skip=params['num_skip'],
                             sort_utt=False,
                             num_gpu=len(gpu_indices))
    dev_data_other = Dataset(data_type='dev_other',
                             train_data_size=params['train_data_size'],
                             label_type=params['label_type'],
                             batch_size=params['batch_size'],
                             splice=params['splice'],
                             num_stack=params['num_stack'],
                             num_skip=params['num_skip'],
                             sort_utt=False,
                             num_gpu=len(gpu_indices))

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        # Create a variable to track the global step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # Set optimizer
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')
        optimizer = model._set_optimizer(params['optimizer'], learning_rate_pl)

        # Calculate the gradients for each model tower
        total_grads_and_vars, total_losses = [], []
        decode_ops, ler_ops = [], []
        all_devices = ['/gpu:%d' % i_gpu for i_gpu in range(len(gpu_indices))]
        # NOTE: /cpu:0 is prepared for evaluation
        with tf.variable_scope(tf.get_variable_scope()):
            for i_gpu in range(len(all_devices)):
                with tf.device(all_devices[i_gpu]):
                    with tf.name_scope('tower_gpu%d' % i_gpu) as scope:

                        # Define placeholders in each tower
                        model.create_placeholders()

                        # Calculate the total loss for the current tower of the
                        # model. This function constructs the entire model but
                        # shares the variables across all towers.
                        tower_loss, tower_logits = model.compute_loss(
                            model.inputs_pl_list[i_gpu],
                            model.labels_pl_list[i_gpu],
                            model.inputs_seq_len_pl_list[i_gpu],
                            model.keep_prob_input_pl_list[i_gpu],
                            model.keep_prob_hidden_pl_list[i_gpu],
                            model.keep_prob_output_pl_list[i_gpu], scope)
                        tower_loss = tf.expand_dims(tower_loss, axis=0)
                        total_losses.append(tower_loss)

                        # Reuse variables for the next tower
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this
                        # tower
                        tower_grads_and_vars = optimizer.compute_gradients(
                            tower_loss)

                        # Gradient clipping
                        tower_grads_and_vars = model._clip_gradients(
                            tower_grads_and_vars, _clip_norm=False)

                        # TODO: Optionally add gradient noise

                        # Keep track of the gradients across all towers
                        total_grads_and_vars.append(tower_grads_and_vars)

                        # Add to the graph each operation per tower
                        decode_op_tower = model.decoder(
                            tower_logits,
                            model.inputs_seq_len_pl_list[i_gpu],
                            beam_width=params['beam_width'])
                        decode_ops.append(decode_op_tower)
                        ler_op_tower = model.compute_ler(
                            decode_op_tower, model.labels_pl_list[i_gpu])
                        ler_op_tower = tf.expand_dims(ler_op_tower, axis=0)
                        ler_ops.append(ler_op_tower)

        # Aggregate losses, then calculate average loss
        total_losses = tf.concat(axis=0, values=total_losses)
        loss_op = tf.reduce_mean(total_losses, axis=0)
        ler_ops = tf.concat(axis=0, values=ler_ops)
        ler_op = tf.reduce_mean(ler_ops, axis=0)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers
        average_grads_and_vars = average_gradients(total_grads_and_vars)

        # Apply the gradients to adjust the shared variables.
        train_op = optimizer.apply_gradients(average_grads_and_vars,
                                             global_step=global_step)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=True)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total parameters
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M parameters" %
              (len(parameters_dict.keys()), "{:,}".format(
                  total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        # NOTE: Start running operations on the Graph. allow_soft_placement
        # must be set to True to build towers on GPU, as some of the ops do not
        # have GPU implementations.
        with tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)) as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(model.save_path, sess.graph)

            # Initialize parameters
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            ler_dev_best = 1
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, labels, inputs_seq_len, _ = data
                feed_dict_train = {}
                for i_gpu in range(len(gpu_indices)):
                    feed_dict_train[
                        model.inputs_pl_list[i_gpu]] = inputs[i_gpu]
                    feed_dict_train[
                        model.labels_pl_list[i_gpu]] = list2sparsetensor(
                            labels[i_gpu],
                            padded_value=train_data.padded_value)
                    feed_dict_train[model.inputs_seq_len_pl_list[
                        i_gpu]] = inputs_seq_len[i_gpu]
                    feed_dict_train[model.keep_prob_input_pl_list[
                        i_gpu]] = params['dropout_input']
                    feed_dict_train[model.keep_prob_hidden_pl_list[
                        i_gpu]] = params['dropout_hidden']
                    feed_dict_train[model.keep_prob_output_pl_list[
                        i_gpu]] = params['dropout_output']
                feed_dict_train[learning_rate_pl] = learning_rate

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % int(
                        params['print_step'] / len(gpu_indices)) == 0:

                    # Create feed dictionary for next mini batch (dev)
                    (inputs, labels, inputs_seq_len,
                     _), _ = dev_data_other.next()
                    feed_dict_dev = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_dev[
                            model.inputs_pl_list[i_gpu]] = inputs[i_gpu]
                        feed_dict_dev[
                            model.labels_pl_list[i_gpu]] = list2sparsetensor(
                                labels[i_gpu],
                                padded_value=dev_data_other.padded_value)
                        feed_dict_dev[model.inputs_seq_len_pl_list[
                            i_gpu]] = inputs_seq_len[i_gpu]
                        feed_dict_dev[
                            model.keep_prob_input_pl_list[i_gpu]] = 1.0
                        feed_dict_dev[
                            model.keep_prob_hidden_pl_list[i_gpu]] = 1.0
                        feed_dict_dev[
                            model.keep_prob_output_pl_list[i_gpu]] = 1.0

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_train[
                            model.keep_prob_input_pl_list[i_gpu]] = 1.0
                        feed_dict_train[
                            model.keep_prob_hidden_pl_list[i_gpu]] = 1.0
                        feed_dict_train[
                            model.keep_prob_output_pl_list[i_gpu]] = 1.0

                    # Compute accuracy & update event files
                    ler_train, summary_str_train = sess.run(
                        [ler_op, summary_train], feed_dict=feed_dict_train)
                    ler_dev, summary_str_dev = sess.run(
                        [ler_op, summary_dev], feed_dict=feed_dict_dev)
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    duration_step = time.time() - start_time_step
                    print(
                        "Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)"
                        % (step + 1, train_data.epoch_detail, loss_train,
                           loss_dev, ler_train, ler_dev, learning_rate,
                           duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train,
                              csv_loss_dev,
                              csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train,
                             csv_ler_dev,
                             csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    # Save model (check point)
                    checkpoint_file = join(model.save_path, 'model.ckpt')
                    save_path = saver.save(sess,
                                           checkpoint_file,
                                           global_step=train_data.epoch)
                    print("Model saved in file: %s" % save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        if params['label_type'] != 'word':
                            print('=== Dev Data Evaluation ===')
                            # Dev-clean
                            ler_dev_clean_epoch, wer_dev_clean_epoch = do_eval_cer(
                                session=sess,
                                decode_ops=decode_ops,
                                model=model,
                                dataset=dev_data_clean,
                                label_type=params['label_type'],
                                eval_batch_size=params['batch_size'])
                            print('  CER (clean): %f %%' %
                                  (ler_dev_clean_epoch * 100))
                            print('  WER (clean): %f %%' %
                                  (wer_dev_clean_epoch * 100))

                            # Dev-other
                            ler_dev_other_epoch, wer_dev_other_epoch = do_eval_cer(
                                session=sess,
                                decode_ops=decode_ops,
                                model=model,
                                dataset=dev_data_other,
                                label_type=params['label_type'],
                                eval_batch_size=params['batch_size'])
                            print('  CER (other): %f %%' %
                                  (ler_dev_other_epoch * 100))
                            print('  WER (other): %f %%' %
                                  (wer_dev_other_epoch * 100))

                            if ler_dev_other_epoch < ler_dev_best:
                                ler_dev_best = ler_dev_other_epoch
                                print('■■■ ↑Best Score (CER)↑ ■■■')

                        else:
                            print('=== Dev Data Evaluation ===')
                            # Dev-clean
                            ler_dev_clean_epoch = do_eval_wer(
                                session=sess,
                                decode_ops=decode_ops,
                                model=model,
                                dataset=dev_data_clean,
                                train_data_size=params['train_data_size'],
                                eval_batch_size=params['batch_size'])
                            print('  WER (clean): %f %%' %
                                  (ler_dev_clean_epoch * 100))

                            # Dev-other
                            ler_dev_other_epoch = do_eval_wer(
                                session=sess,
                                decode_ops=decode_ops,
                                model=model,
                                dataset=dev_data_other,
                                train_data_size=params['train_data_size'],
                                eval_batch_size=params['batch_size'])
                            print('  WER (other): %f %%' %
                                  (ler_dev_other_epoch * 100))

                            if ler_dev_other_epoch < ler_dev_best:
                                ler_dev_best = ler_dev_other_epoch
                                print('■■■ ↑Best Score (WER)↑ ■■■')

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=ler_dev_other_epoch)

                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')

예제 #8

파일 보기

파일: save_ctc_prob.py 프로젝트: zz12375/tensorflow_end2end_speech_recognition

def do_save(model, params, epoch, eval_batch_size):
    """Save the CTC outputs.
    Args:
        model: the model to restore
        params (dict): A dictionary of parameters
        epoch (int): the epoch to restore
        eval_batch_size (int): the size of mini-batch in evaluation
    """
    # Load dataset
    train_data = Dataset(data_type='train',
                         train_data_size=params['train_data_size'],
                         label_type=params['label_type'],
                         batch_size=eval_batch_size,
                         splice=params['splice'],
                         num_stack=params['num_stack'],
                         num_skip=params['num_skip'],
                         sort_utt=True)

    with tf.name_scope('tower_gpu0'):
        # Define placeholders
        model.create_placeholders()

        # Add to the graph each operation (including model definition)
        _, logits = model.compute_loss(
            model.inputs_pl_list[0],
            model.labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.keep_prob_input_pl_list[0],
            model.keep_prob_hidden_pl_list[0],
            model.keep_prob_output_pl_list[0],
            softmax_temperature=params['softmax_temperature'])
        posteriors_op = model.posteriors(logits, blank_prior=1)

    # Create a saver for writing training checkpoints
    saver = tf.train.Saver()

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model.save_path)

        # If check point exists
        if ckpt:
            # Use last saved model
            model_path = ckpt.model_checkpoint_path
            if epoch != -1:
                model_path = model_path.split('/')[:-1]
                model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch)
            saver.restore(sess, model_path)
            print("Model restored: " + model_path)
        else:
            raise ValueError('There are not any checkpoints.')

        for data, is_new_epoch in train_data:

            # Create feed dictionary for next mini batch
            inputs, _, inputs_seq_len, input_names = data
            feed_dict = {
                model.inputs_pl_list[0]: inputs[0],
                model.inputs_seq_len_pl_list[0]: inputs_seq_len[0],
                model.keep_prob_input_pl_list[0]: 1.0,
                model.keep_prob_hidden_pl_list[0]: 1.0,
                model.keep_prob_output_pl_list[0]: 1.0
            }

            batch_size, max_frame_num = inputs[0].shape[:2]
            posteriors = sess.run(posteriors_op, feed_dict=feed_dict)
            posteriors = posteriors.reshape(-1, max_frame_num,
                                            model.num_classes)

            for i_batch in range(batch_size):
                prob = posteriors[i_batch][:int(inputs_seq_len[0][i_batch]), :]

                # Save as a npy file
                np.save(
                    mkdir_join(model.save_path, 'probs',
                               input_names[0][i_batch]), prob)

            if is_new_epoch:
                break

예제 #9

파일 보기

파일: save_ctc_prob.py 프로젝트: zz12375/tensorflow_end2end_speech_recognition

def do_save(model, params, epoch, eval_batch_size, temperature):
    """Save the CTC outputs.
    Args:
        model: the model to restore
        params (dict): A dictionary of parameters
        epoch (int): the epoch to restore
        eval_batch_size (int): the size of mini-batch in evaluation
        temperature (int):
    """
    print('=' * 30)
    print('  frame stack %d' % int(params['num_stack']))
    print('  splice %d' % int(params['splice']))
    print('  temperature (training): %d' % temperature)
    print('=' * 30)

    # Load dataset
    train_data = Dataset(
        data_type='train', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        max_epoch=3, splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=True, num_gpu=1)
    dev_clean_data = Dataset(
        data_type='dev_clean', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        max_epoch=3, splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=True, num_gpu=1)
    dev_other_data = Dataset(
        data_type='dev_other', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        max_epoch=3, splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=True, num_gpu=1)
    test_clean_data = Dataset(
        data_type='test_clean', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        max_epoch=3, splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=True, num_gpu=1)
    test_other_data = Dataset(
        data_type='test_other', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        max_epoch=3, splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=True, num_gpu=1)

    with tf.name_scope('tower_gpu0'):
        # Define placeholders
        model.create_placeholders()

        # Add to the graph each operation (including model definition)
        _, logits = model.compute_loss(
            model.inputs_pl_list[0],
            model.labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.keep_prob_pl_list[0])
        logits /= temperature
        posteriors_op = model.posteriors(logits, blank_prior=1)

    # Create a saver for writing training checkpoints
    saver = tf.train.Saver()

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model.save_path)

        # If check point exists
        if ckpt:
            model_path = ckpt.model_checkpoint_path
            if epoch != -1:
                model_path = model_path.split('/')[:-1]
                model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch)
            saver.restore(sess, model_path)
            print("Model restored: " + model_path)
        else:
            raise ValueError('There are not any checkpoints.')

        #########################
        # Save soft targets
        #########################
        # train100h
        # save(session=sess,
        #      posteriors_op=posteriors_op,
        #      model=model,
        #      dataset=train_data,
        #      data_type='train',
        #      num_stack=params['num_stack'],
        #      save_prob=False,
        #      save_soft_targets=True,
        #      save_path=mkdir_join(model.save_path, 'temp' + str(temperature), 'train'))

        # dev
        # save(session=sess,
        #      posteriors_op=posteriors_op,
        #      model=model,
        #      dataset=dev_clean_data,
        #      data_type='dev_clean',
        #      num_stack=params['num_stack'],
        #      save_prob=False,
        #      save_soft_targets=True,
        #      save_path=mkdir_join(model.save_path, 'temp' + str(temperature), 'dev_clean'))
        # save(session=sess,
        #      posteriors_op=posteriors_op,
        #      model=model,
        #      dataset=dev_other_data,
        #      data_type='dev_other',
        #      num_stack=params['num_stack'],
        #      save_prob=False,
        #      save_soft_targets=True,
        #      save_path=mkdir_join(model.save_path, 'temp' + str(temperature), 'dev_other'))

        # test
        save(session=sess,
             posteriors_op=posteriors_op,
             model=model,
             dataset=test_clean_data,
             data_type='test_clean',
             num_stack=params['num_stack'],
             save_prob=True,
             save_soft_targets=False,
             save_path=mkdir_join(model.save_path, 'temp' + str(temperature), 'test_clean'))
        save(session=sess,
             posteriors_op=posteriors_op,
             model=model,
             dataset=test_other_data,
             data_type='test_other',
             num_stack=params['num_stack'],
             save_prob=True,
             save_soft_targets=False,
             save_path=mkdir_join(model.save_path, 'temp' + str(temperature), 'test_other'))

예제 #10

파일 보기

파일: train_ctc_temp.py 프로젝트: seasky100/tensorflow_end2end_speech_recognition

def do_train(model, params, gpu_indices):
    """Run CTC training.
    Args:
        model: the model to train
        params (dict): A dictionary of parameters
        gpu_indices (list): GPU indices
    """
    # Load dataset
    train_data = Dataset(
        data_type='train', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'], max_epoch=params['num_epoch'],
        splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=True, sort_stop_epoch=params['sort_stop_epoch'],
        num_gpu=len(gpu_indices))
    dev_clean_data = Dataset(
        data_type='dev_clean', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=False, num_gpu=len(gpu_indices))
    dev_other_data = Dataset(
        data_type='dev_other', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=False, num_gpu=len(gpu_indices))
    test_clean_data = Dataset(
        data_type='test_clean', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=False)
    test_other_data = Dataset(
        data_type='test_other', train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'], splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=False)

    # Tell TensorFlow that the model will be built into the default graph
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        # Create a variable to track the global step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # Set optimizer
        learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate')
        optimizer = model._set_optimizer(
            params['optimizer'], learning_rate_pl)

        # Calculate the gradients for each model tower
        total_grads_and_vars, total_losses = [], []
        decode_ops, ler_ops = [], []
        all_devices = ['/gpu:%d' % i_gpu for i_gpu in range(len(gpu_indices))]
        # NOTE: /cpu:0 is prepared for evaluation
        with tf.variable_scope(tf.get_variable_scope()):
            for i_gpu in range(len(all_devices)):
                with tf.device(all_devices[i_gpu]):
                    with tf.name_scope('tower_gpu%d' % i_gpu) as scope:

                        # Define placeholders in each tower
                        model.create_placeholders()

                        # Calculate the total loss for the current tower of the
                        # model. This function constructs the entire model but
                        # shares the variables across all towers.
                        tower_loss, tower_logits = model.compute_loss(
                            model.inputs_pl_list[i_gpu],
                            model.labels_pl_list[i_gpu],
                            model.inputs_seq_len_pl_list[i_gpu],
                            model.keep_prob_pl_list[i_gpu],
                            scope,
                            softmax_temperature=params['softmax_temperature'])
                        # NOTE: tower_logits have NOT been divided by
                        # softmax_temperature
                        tower_loss = tf.expand_dims(tower_loss, axis=0)
                        total_losses.append(tower_loss)

                        # Reuse variables for the next tower
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this
                        # tower
                        tower_grads_and_vars = optimizer.compute_gradients(
                            tower_loss)

                        # Gradient clipping
                        tower_grads_and_vars = model._clip_gradients(
                            tower_grads_and_vars)

                        # TODO: Optionally add gradient noise

                        # Keep track of the gradients across all towers
                        total_grads_and_vars.append(tower_grads_and_vars)

                        # Add to the graph each operation per tower
                        decode_op_tower = model.decoder(
                            tower_logits,
                            model.inputs_seq_len_pl_list[i_gpu],
                            beam_width=params['beam_width'])
                        decode_ops.append(decode_op_tower)
                        ler_op_tower = model.compute_ler(
                            decode_op_tower, model.labels_pl_list[i_gpu])
                        ler_op_tower = tf.expand_dims(ler_op_tower, axis=0)
                        ler_ops.append(ler_op_tower)

        # Aggregate losses, then calculate average loss
        total_losses = tf.concat(axis=0, values=total_losses)
        loss_op = tf.reduce_mean(total_losses, axis=0)
        ler_ops = tf.concat(axis=0, values=ler_ops)
        ler_op = tf.reduce_mean(ler_ops, axis=0)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers
        average_grads_and_vars = average_gradients(total_grads_and_vars)

        # Apply the gradients to adjust the shared variables.
        train_op = optimizer.apply_gradients(average_grads_and_vars,
                                             global_step=global_step)

        # Define learning rate controller
        lr_controller = Controller(
            learning_rate_init=params['learning_rate'],
            decay_start_epoch=params['decay_start_epoch'],
            decay_rate=params['decay_rate'],
            decay_patient_epoch=params['decay_patient_epoch'],
            lower_better=True)

        # Build the summary tensor based on the TensorFlow collection of
        # summaries
        summary_train = tf.summary.merge(model.summaries_train)
        summary_dev = tf.summary.merge(model.summaries_dev)

        # Add the variable initializer operation
        init_op = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        # Count total parameters
        parameters_dict, total_parameters = count_total_parameters(
            tf.trainable_variables())
        for parameter_name in sorted(parameters_dict.keys()):
            print("%s %d" % (parameter_name, parameters_dict[parameter_name]))
        print("Total %d variables, %s M parameters" %
              (len(parameters_dict.keys()),
               "{:,}".format(total_parameters / 1000000)))

        csv_steps, csv_loss_train, csv_loss_dev = [], [], []
        csv_ler_train, csv_ler_dev = [], []
        # Create a session for running operation on the graph
        # NOTE: Start running operations on the Graph. allow_soft_placement
        # must be set to True to build towers on GPU, as some of the ops do not
        # have GPU implementations.
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                              log_device_placement=False)) as sess:

            # Instantiate a SummaryWriter to output summaries and the graph
            summary_writer = tf.summary.FileWriter(
                model.save_path, sess.graph)

            # Initialize parameters
            sess.run(init_op)

            # Train model
            start_time_train = time.time()
            start_time_epoch = time.time()
            start_time_step = time.time()
            ler_dev_best = 1
            not_improved_epoch = 0
            learning_rate = float(params['learning_rate'])
            for step, (data, is_new_epoch) in enumerate(train_data):

                # Create feed dictionary for next mini batch (train)
                inputs, labels, inputs_seq_len, _ = data
                feed_dict_train = {}
                for i_gpu in range(len(gpu_indices)):
                    feed_dict_train[model.inputs_pl_list[i_gpu]
                                    ] = inputs[i_gpu]
                    feed_dict_train[model.labels_pl_list[i_gpu]] = list2sparsetensor(
                        labels[i_gpu], padded_value=train_data.padded_value)
                    feed_dict_train[model.inputs_seq_len_pl_list[i_gpu]
                                    ] = inputs_seq_len[i_gpu]
                    feed_dict_train[model.keep_prob_pl_list[i_gpu]
                                    ] = 1 - float(params['dropout'])
                feed_dict_train[learning_rate_pl] = learning_rate

                # Update parameters
                sess.run(train_op, feed_dict=feed_dict_train)

                if (step + 1) % int(params['print_step'] / len(gpu_indices)) == 0:

                    # Create feed dictionary for next mini batch (dev)
                    if params['train_data_size'] in ['train100h', 'train460h']:
                        inputs, labels, inputs_seq_len, _ = dev_clean_data.next()[
                            0]
                    else:
                        inputs, labels, inputs_seq_len, _ = dev_other_data.next()[
                            0]
                    feed_dict_dev = {}
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_dev[model.inputs_pl_list[i_gpu]
                                      ] = inputs[i_gpu]
                        feed_dict_dev[model.labels_pl_list[i_gpu]] = list2sparsetensor(
                            labels[i_gpu], padded_value=dev_other_data.padded_value)
                        feed_dict_dev[model.inputs_seq_len_pl_list[i_gpu]
                                      ] = inputs_seq_len[i_gpu]
                        feed_dict_dev[model.keep_prob_pl_list[i_gpu]] = 1.0

                    # Compute loss
                    loss_train = sess.run(loss_op, feed_dict=feed_dict_train)
                    loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev)
                    csv_steps.append(step)
                    csv_loss_train.append(loss_train)
                    csv_loss_dev.append(loss_dev)

                    # Change to evaluation mode
                    for i_gpu in range(len(gpu_indices)):
                        feed_dict_train[model.keep_prob_pl_list[i_gpu]] = 1.0

                    # Compute accuracy & update event files
                    ler_train, summary_str_train = sess.run(
                        [ler_op, summary_train], feed_dict=feed_dict_train)
                    ler_dev, summary_str_dev = sess.run(
                        [ler_op, summary_dev], feed_dict=feed_dict_dev)
                    csv_ler_train.append(ler_train)
                    csv_ler_dev.append(ler_dev)
                    summary_writer.add_summary(summary_str_train, step + 1)
                    summary_writer.add_summary(summary_str_dev, step + 1)
                    summary_writer.flush()

                    duration_step = time.time() - start_time_step
                    print("Step %d (epoch: %.3f): loss = %.3f (%.3f) / ler = %.3f (%.3f) / lr = %.5f (%.3f min)" %
                          (step + 1, train_data.epoch_detail, loss_train, loss_dev, ler_train, ler_dev,
                           learning_rate, duration_step / 60))
                    sys.stdout.flush()
                    start_time_step = time.time()

                # Save checkpoint and evaluate model per epoch
                if is_new_epoch:
                    duration_epoch = time.time() - start_time_epoch
                    print('-----EPOCH:%d (%.3f min)-----' %
                          (train_data.epoch, duration_epoch / 60))

                    # Save fugure of loss & ler
                    plot_loss(csv_loss_train, csv_loss_dev, csv_steps,
                              save_path=model.save_path)
                    plot_ler(csv_ler_train, csv_ler_dev, csv_steps,
                             label_type=params['label_type'],
                             save_path=model.save_path)

                    if train_data.epoch >= params['eval_start_epoch']:
                        start_time_eval = time.time()
                        print('=== Dev Data Evaluation ===')
                        # dev-clean
                        cer_dev_clean_epoch, wer_dev_clean_epoch = do_eval_cer(
                            session=sess,
                            decode_ops=decode_ops,
                            model=model,
                            dataset=dev_clean_data,
                            label_type=params['label_type'],
                            eval_batch_size=params['batch_size'])
                        print('  CER (clean): %f %%' %
                              (cer_dev_clean_epoch * 100))
                        print('  WER (clean): %f %%' %
                              (wer_dev_clean_epoch * 100))

                        # dev-other
                        cer_dev_other_epoch, wer_dev_other_epoch = do_eval_cer(
                            session=sess,
                            decode_ops=decode_ops,
                            model=model,
                            dataset=dev_other_data,
                            label_type=params['label_type'],
                            eval_batch_size=params['batch_size'])
                        print('  CER (other): %f %%' %
                              (cer_dev_other_epoch * 100))
                        print('  WER (other): %f %%' %
                              (wer_dev_other_epoch * 100))

                        if params['train_data_size'] in ['train100h', 'train460h']:
                            metric_epoch = cer_dev_clean_epoch
                        else:
                            metric_epoch = cer_dev_other_epoch

                        if metric_epoch < ler_dev_best:
                            ler_dev_best = metric_epoch
                            not_improved_epoch = 0
                            print('■■■ ↑Best Score (CER)↑ ■■■')

                            # Save model (check point)
                            checkpoint_file = join(
                                model.save_path, 'model.ckpt')
                            save_path = saver.save(
                                sess, checkpoint_file, global_step=train_data.epoch)
                            print("Model saved in file: %s" % save_path)

                            print('=== Test Data Evaluation ===')
                            # test-clean
                            cer_test_clean_epoch, wer_test_clean_epoch = do_eval_cer(
                                session=sess,
                                decode_ops=decode_ops,
                                model=model,
                                dataset=test_clean_data,
                                label_type=params['label_type'],
                                is_test=True,
                                eval_batch_size=params['batch_size'])
                            print('  CER (clean): %f %%' %
                                  (cer_test_clean_epoch * 100))
                            print('  WER (clean): %f %%' %
                                  (wer_test_clean_epoch * 100))

                            # test-other
                            cer_test_other_epoch, wer_test_other_epoch = do_eval_cer(
                                session=sess,
                                decode_ops=decode_ops,
                                model=model,
                                dataset=test_other_data,
                                label_type=params['label_type'],
                                is_test=True,
                                eval_batch_size=params['batch_size'])
                            print('  CER (other): %f %%' %
                                  (cer_test_other_epoch * 100))
                            print('  WER (other): %f %%' %
                                  (wer_test_other_epoch * 100))
                        else:
                            not_improved_epoch += 1

                        duration_eval = time.time() - start_time_eval
                        print('Evaluation time: %.3f min' %
                              (duration_eval / 60))

                        # Early stopping
                        if not_improved_epoch == params['not_improved_patient_epoch']:
                            break

                        # Update learning rate
                        learning_rate = lr_controller.decay_lr(
                            learning_rate=learning_rate,
                            epoch=train_data.epoch,
                            value=metric_epoch)

                    start_time_epoch = time.time()

            duration_train = time.time() - start_time_train
            print('Total time: %.3f hour' % (duration_train / 3600))

            # Training was finished correctly
            with open(join(model.save_path, 'complete.txt'), 'w') as f:
                f.write('')

예제 #11

파일 보기

파일: plot_ctc_prob.py 프로젝트: zz12375/tensorflow_end2end_speech_recognition

def do_plot(model, params, epoch, eval_batch_size):
    """Plot the CTC posteriors.
    Args:
        model: the model to restore
        params (dict): A dictionary of parameters
        epoch (int): the epoch to restore
        eval_batch_size (int): the size of mini-batch in evaluation
    """
    # Load dataset
    test_clean_data = Dataset(
        data_type='test_clean',
        train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        sort_utt=True)
    test_other_data = Dataset(
        data_type='test_other',
        train_data_size=params['train_data_size'],
        label_type=params['label_type'],
        batch_size=params['batch_size'] if eval_batch_size == -
        1 else eval_batch_size,
        splice=params['splice'],
        num_stack=params['num_stack'], num_skip=params['num_skip'],
        shuffle=False)

    with tf.name_scope('tower_gpu0'):
        # Define placeholders
        model.create_placeholders()

        # Add to the graph each operation (including model definition)
        _, logits = model.compute_loss(
            model.inputs_pl_list[0],
            model.labels_pl_list[0],
            model.inputs_seq_len_pl_list[0],
            model.keep_prob_hidden_pl_list[0],
            # softmax_temperature=params['softmax_temperature'])
            softmax_temperature=10)
        posteriors_op = model.posteriors(logits, blank_prior=1)

    # Create a saver for writing training checkpoints
    saver = tf.train.Saver()

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model.save_path)

        # If check point exists
        if ckpt:
            # Use last saved model
            model_path = ckpt.model_checkpoint_path
            if epoch != -1:
                model_path = model_path.split('/')[:-1]
                model_path = '/'.join(model_path) + '/model.ckpt-' + str(epoch)
            saver.restore(sess, model_path)
            print("Model restored: " + model_path)
        else:
            raise ValueError('There are not any checkpoints.')

        plot(session=sess,
             posteriors_op=posteriors_op,
             model=model,
             dataset=test_clean_data,
             label_type=params['label_type'],
             num_stack=params['num_stack'],
             #    save_path=None)
             save_path=mkdir_join(model.save_path, 'ctc_output', 'test-clean'))

        plot(session=sess,
             posteriors_op=posteriors_op,
             model=model,
             dataset=test_other_data,
             label_type=params['label_type'],
             num_stack=params['num_stack'],
             #    save_path=None)
             save_path=mkdir_join(model.save_path, 'ctc_output', 'test-other'))