Python load 예제들, models.ctc.load_model.load Python 예제들

예제 #1

0

파일 보기

def main(model_path, epoch):

    # Load config file (.yml)
    with open(os.path.join(model_path, 'config.yml'), "r") as f:
        config = yaml.load(f)
        param = config['param']

    # Except for a blank label
    if param['label_type'] == 'phone':
        param['num_classes'] = 38
    elif param['label_type'] == 'kana':
        param['num_classes'] = 147
    elif param['label_type'] == 'kanji':
        param['num_classes'] = 3386

    # Modle setting
    CTCModel = load(model_type=param['model'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=param['input_size'] * param['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer=param['num_layer'],
                       bottleneck_dim=param['bottleneck_dim'],
                       num_classes=param['num_classes'],
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_dir = model_path
    print(network.model_dir)
    do_eval(network=network, param=param, epoch=epoch)

예제 #2

0

파일 보기

파일: decode_ctc.py 프로젝트: fresty/tensorflow_end2end_speech_recognition

def main(model_path, epoch):

    # Load config file
    with open(os.path.join(model_path, 'config.yml'), "r") as f:
        config = yaml.load(f)
        param = config['param']

    # Except for a blank label
    if param['label_type'] == 'phone61':
        param['num_classes'] = 61
    elif param['label_type'] == 'phone48':
        param['num_classes'] = 48
    elif param['label_type'] == 'phone39':
        param['num_classes'] = 39
    elif param['label_type'] == 'character':
        param['num_classes'] = 33

    # Model setting
    CTCModel = load(model_type=param['model'])
    network = CTCModel(batch_size=1,
                       input_size=param['input_size'] * param['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer=param['num_layer'],
                       num_classes=param['num_classes'],
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_dir = model_path
    print(network.model_dir)
    do_decode(network=network, param=param, epoch=epoch)

예제 #3

0

파일 보기

파일: eval_ctc.py 프로젝트: PuchoDeepLearningLabs/tensorflow_end2end_speech_recognition

def main(model_path):

    epoch = None  # if None, restore the final epoch

    # Load config file (.yml)
    with open(os.path.join(model_path, 'config.yml'), "r") as f:
        config = yaml.load(f)
        corpus = config['corpus']
        feature = config['feature']
        param = config['param']

    if corpus['label_type'] == 'phone':
        output_size = 38
    elif corpus['label_type'] == 'character':
        output_size = 147
    elif corpus['label_type'] == 'kanji':
        output_size = 3386

    # Modle setting
    CTCModel = load(model_type=config['model_name'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=feature['input_size'] * feature['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer=param['num_layer'],
                       bottleneck_dim=param['bottleneck_dim'],
                       output_size=output_size,
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])
    network.model_name = config['model_name']
    network.model_dir = model_path

    print(network.model_dir)
    do_eval(network=network,
            label_type=corpus['label_type'],
            num_stack=feature['num_stack'],
            num_skip=feature['num_skip'],
            train_data_size=corpus['train_data_size'],
            epoch=epoch)

예제 #4

0

파일 보기

def main(model_path):

    epoch = None  # if None, restore the final epoch

    # Read config file
    with open(os.path.join(model_path, 'config.yml'), "r") as f:
        config = yaml.load(f)
        corpus = config['corpus']
        feature = config['feature']
        param = config['param']

    if corpus['label_type'] == 'phone61':
        output_size = 61
    elif corpus['label_type'] == 'phone48':
        output_size = 48
    elif corpus['label_type'] == 'phone39':
        output_size = 39
    elif corpus['label_type'] == 'character':
        output_size = 30

    # Model setting
    CTCModel = load(model_type=config['model_name'])
    network = CTCModel(batch_size=1,
                       input_size=feature['input_size'] * feature['num_stack'],
                       num_cell=param['num_cell'],
                       num_layer=param['num_layer'],
                       output_size=output_size,
                       clip_gradients=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])
    network.model_name = config['model_name']
    network.model_dir = model_path

    print(network.model_dir)
    do_restore(network=network,
               label_type=corpus['label_type'],
               num_stack=feature['num_stack'],
               num_skip=feature['num_skip'],
               epoch=epoch)

예제 #5

0

파일 보기

파일: plot_ctc_posterior.py 프로젝트: seasky100/tensorflow_end2end_speech_recognition

def main(model_path, epoch):

    # Load config file
    with open(os.path.join(model_path, 'config.yml'), "r") as f:
        config = yaml.load(f)
        param = config['param']

    # Except for a blank label
    if param['label_type'] == 'kanji':
        param['num_classes'] = 3386
    elif param['label_type'] == 'kana':
        param['num_classes'] = 147
    elif param['label_type'] == 'phone':
        param['num_classes'] = 38

    # Model setting
    model = load(model_type=param['model'])
    network = model(
        batch_size=1,
        input_size=params['input_size'],
        splice=params['splice'],
        num_stack=params['num_stack'],
        num_units=param['num_units'],
        num_layer=param['num_layer'],
        bottleneck_dim=param['bottleneck_dim'],
        num_classes=param['num_classes'],
        parameter_init=param['weight_init'],
        clip_grad=param['clip_grad'],
        clip_activation=param['clip_activation'],
        dropout_ratio_input=param['dropout_input'],
        dropout_ratio_hidden=param['dropout_hidden'],
        num_proj=param['num_proj'],
        weight_decay=param['weight_decay'])

    network.model_dir = model_path
    do_plot(network=network, param=param, epoch=epoch)

예제 #6

0

파일 보기

def main(config_path, gpu_indices):

    # Load a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        corpus = config['corpus']
        feature = config['feature']
        param = config['param']

    if corpus['label_type'] == 'phone61':
        output_size = 61
    elif corpus['label_type'] == 'phone48':
        output_size = 48
    elif corpus['label_type'] == 'phone39':
        output_size = 39
    elif corpus['label_type'] == 'character':
        output_size = 30

    # Model setting
    CTCModel = load(model_type=config['model_name'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=feature['input_size'] * feature['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer=param['num_layer'],
                       output_size=output_size,
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = config['model_name'].upper()
    network.model_name += '_' + str(param['num_unit'])
    network.model_name += '_' + str(param['num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if param['num_proj'] != 0:
        network.model_name += '_proj' + str(param['num_proj'])
    if feature['num_stack'] != 1:
        network.model_name += '_stack' + str(feature['num_stack'])
    if param['weight_decay'] != 0:
        network.model_name += '_weightdecay' + str(param['weight_decay'])
    network.model_name += '_' + str(len(gpu_indices)) + 'gpu'

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/timit/ctc/')
    network.model_dir = mkdir_join(network.model_dir, corpus['label_type'])
    network.model_dir = mkdir_join(network.model_dir, network.model_name)

    # Reset model directory
    if not isfile(join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('multigpu_ctc_timit_' + corpus['label_type'])

    # Save config file
    shutil.copyfile(config_path, join(network.model_dir, 'config.yml'))

    sys.stdout = open(join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_train(network=network,
             optimizer=param['optimizer'],
             learning_rate=param['learning_rate'],
             batch_size=param['batch_size'],
             epoch_num=param['num_epoch'],
             label_type=corpus['label_type'],
             num_stack=feature['num_stack'],
             num_skip=feature['num_skip'],
             gpu_indices=gpu_indices)
    sys.stdout = sys.__stdout__

예제 #7

0

파일 보기

파일: finetune_ctc_dialog.py 프로젝트: seasky100/tensorflow_end2end_speech_recognition

def main(config_path, trained_model_path):

    restore_epoch = None  # if None, restore the final epoch

    # Read a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        corpus = config['corpus']
        feature = config['feature']
        param = config['param']

    if corpus['label_type'] == 'phone':
        if corpus['social_signal_type'] in ['insert', 'insert3']:
            output_size = 41
        elif corpus['social_signal_type'] == 'insert2':
            output_size = 44
        elif corpus['social_signal_type'] == 'remove':
            output_size = 38
    elif corpus['label_type'] == 'character':
        if corpus['social_signal_type'] in ['insert', 'insert3']:
            output_size = 150
        elif corpus['social_signal_type'] == 'insert2':
            output_size = 153
        elif corpus['social_signal_type'] == 'remove':
            output_size = 147

    # Load model
    CTCModel = load(model_type=config['model_name'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=feature['input_size'] * feature['num_stack'],
                       num_cell=param['num_cell'],
                       num_layer=param['num_layer'],
                       output_size=output_size,
                       clip_gradients=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = config['model_name'].upper()
    network.model_name += '_' + str(param['num_cell'])
    network.model_name += '_' + str(param['num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if feature['num_stack'] != 1:
        network.model_name += '_stack' + str(feature['num_stack'])
    network.model_name += '_transfer_' + corpus['transfer_data_size']

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/csj/dialog/')
    network.model_dir = join(network.model_dir, 'ctc')
    network.model_dir = join(network.model_dir, corpus['label_type'])
    network.model_dir = join(network.model_dir, corpus['social_signal_type'])
    network.model_dir = join(network.model_dir, network.model_name)

    # Reset model directory
    if not os.path.isfile(os.path.join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('ctc_csj_dialog_' + corpus['label_type'] + '_' +
                 param['optimizer'] + '_' + corpus['social_signal_type'] +
                 '_transfer_' + corpus['transfer_data_size'])

    # Save config file
    shutil.copyfile(config_path, os.path.join(network.model_dir, 'config.yml'))

    sys.stdout = open(os.path.join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_fine_tune(network=network,
                 optimizer=param['optimizer'],
                 learning_rate=param['learning_rate'],
                 batch_size=param['batch_size'],
                 epoch_num=param['num_epoch'],
                 label_type=corpus['label_type'],
                 num_stack=feature['num_stack'],
                 num_skip=feature['num_skip'],
                 social_signal_type=corpus['social_signal_type'],
                 trained_model_path=trained_model_path,
                 restore_epoch=restore_epoch)
    sys.stdout = sys.__stdout__

예제 #8

0

파일 보기

파일: train_ctc.py 프로젝트: fresty/tensorflow_end2end_speech_recognition

def main(config_path):

    # Load a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        param = config['param']

    # Except for a blank label
    if param['label_type'] == 'kanji':
        param['num_classes'] = 3386
    elif param['label_type'] == 'kana':
        param['num_classes'] = 147
    elif param['label_type'] == 'phone':
        param['num_classes'] = 38

    # Model setting
    CTCModel = load(model_type=param['model'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=param['input_size'] * param['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer=param['num_layer'],
                       bottleneck_dim=param['bottleneck_dim'],
                       num_classes=param['num_classes'],
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = param['model']
    network.model_name += '_' + str(param['num_unit'])
    network.model_name += '_' + str(param['num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if param['bottleneck_dim'] != 0:
        network.model_name += '_bottoleneck' + str(param['bottleneck_dim'])
    if param['num_proj'] != 0:
        network.model_name += '_proj' + str(param['num_proj'])
    if param['num_stack'] != 1:
        network.model_name += '_stack' + str(param['num_stack'])
    if param['weight_decay'] != 0:
        network.model_name += '_weightdecay' + str(param['weight_decay'])
    if param['train_data_size'] == 'large':
        network.model_name += '_large'

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/csj/')
    network.model_dir = mkdir_join(network.model_dir, 'ctc')
    network.model_dir = mkdir_join(network.model_dir, param['label_type'])
    network.model_dir = mkdir_join(network.model_dir, network.model_name)

    # Reset model directory
    if not isfile(join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('csj_ctc_' + param['label_type'] + '_' +
                 param['train_data_size'])

    # Save config file
    shutil.copyfile(config_path, join(network.model_dir, 'config.yml'))

    sys.stdout = open(join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_train(network=network, param=param)
    sys.stdout = sys.__stdout__

예제 #9

0

파일 보기

def main(config_path, trained_model_path):

    restore_epoch = None  # if None, restore the final epoch

    # Read a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        corpus = config['corpus']
        feature = config['feature']
        param = config['param']

    if corpus['label_type'] == 'phone':
        if corpus['social_signal_type'] in ['insert', 'insert3']:
            output_size = 41
        elif corpus['social_signal_type'] == 'insert2':
            output_size = 44
        elif corpus['social_signal_type'] == 'remove':
            output_size = 38
    elif corpus['label_type'] == 'character':
        if corpus['social_signal_type'] in ['insert', 'insert3']:
            output_size = 150
        elif corpus['social_signal_type'] == 'insert2':
            output_size = 153
        elif corpus['social_signal_type'] == 'remove':
            output_size = 147

    # Load model
    CTCModel = load(model_type=config['model_name'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=feature['input_size'] * feature['num_stack'],
                       num_cell=param['num_cell'],
                       num_layer=param['num_layer'],
                       output_size=output_size,
                       clip_gradients=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = config['model_name'].upper()
    network.model_name += '_' + str(param['num_cell'])
    network.model_name += '_' + str(param['num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if feature['num_stack'] != 1:
        network.model_name += '_stack' + str(feature['num_stack'])
    network.model_name += '_transfer_' + corpus['transfer_data_size']

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/csj/dialog/')
    network.model_dir = join(network.model_dir, 'ctc')
    network.model_dir = join(network.model_dir, corpus['label_type'])
    network.model_dir = join(network.model_dir, corpus['social_signal_type'])
    network.model_dir = join(network.model_dir, network.model_name)

    # Reset model directory
    if not os.path.isfile(os.path.join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('ctc_csj_dialog_' + corpus['label_type'] + '_' +
                 param['optimizer'] + '_' + corpus['social_signal_type'] +
                 '_transfer_' + corpus['transfer_data_size'])

    # Save config file
    shutil.copyfile(config_path, os.path.join(network.model_dir, 'config.yml'))

    sys.stdout = open(os.path.join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_fine_tune(network=network,
                 optimizer=param['optimizer'],
                 learning_rate=param['learning_rate'],
                 batch_size=param['batch_size'],
                 epoch_num=param['num_epoch'],
                 label_type=corpus['label_type'],
                 num_stack=feature['num_stack'],
                 num_skip=feature['num_skip'],
                 social_signal_type=corpus['social_signal_type'],
                 trained_model_path=trained_model_path,
                 restore_epoch=restore_epoch)
    sys.stdout = sys.__stdout__

예제 #10

0

파일 보기

    def check_training(self, model_type, label_type):
        print('----- ' + model_type + ', ' + label_type + ' -----')
        tf.reset_default_graph()
        with tf.Graph().as_default():
            # Load batch data
            batch_size = 4
            inputs, labels_true_st, inputs_seq_len = generate_data(
                label_type=label_type, model='ctc', batch_size=batch_size)

            # Define placeholders
            inputs_pl = tf.placeholder(tf.float32,
                                       shape=[None, None, inputs.shape[-1]],
                                       name='inputs')
            indices_pl = tf.placeholder(tf.int64, name='indices')
            values_pl = tf.placeholder(tf.int32, name='values')
            shape_pl = tf.placeholder(tf.int64, name='shape')
            labels_pl = tf.SparseTensor(indices_pl, values_pl, shape_pl)
            inputs_seq_len_pl = tf.placeholder(tf.int64,
                                               shape=[None],
                                               name='inputs_seq_len')
            keep_prob_input_pl = tf.placeholder(tf.float32,
                                                name='keep_prob_input')
            keep_prob_hidden_pl = tf.placeholder(tf.float32,
                                                 name='keep_prob_hidden')

            # Define model graph
            num_classes = 26 if label_type == 'character' else 61
            model = load(model_type=model_type)
            network = model(batch_size=batch_size,
                            input_size=inputs[0].shape[1],
                            num_unit=256,
                            num_layer=2,
                            bottleneck_dim=0,
                            num_classes=num_classes,
                            parameter_init=0.1,
                            clip_grad=5.0,
                            clip_activation=50,
                            dropout_ratio_input=1.0,
                            dropout_ratio_hidden=1.0,
                            num_proj=None,
                            weight_decay=1e-6)

            # Add to the graph each operation
            loss_op, logits = network.compute_loss(inputs_pl, labels_pl,
                                                   inputs_seq_len_pl,
                                                   keep_prob_input_pl,
                                                   keep_prob_hidden_pl)
            learning_rate = 1e-3
            train_op = network.train(loss_op,
                                     optimizer='rmsprop',
                                     learning_rate_init=learning_rate,
                                     is_scheduled=False)
            decode_op = network.decoder(logits,
                                        inputs_seq_len_pl,
                                        decode_type='beam_search',
                                        beam_width=20)
            ler_op = network.compute_ler(decode_op, labels_pl)

            # Add the variable initializer operation
            init_op = tf.global_variables_initializer()

            # Count total parameters
            parameters_dict, total_parameters = count_total_parameters(
                tf.trainable_variables())
            for parameter_name in sorted(parameters_dict.keys()):
                print("%s %d" %
                      (parameter_name, parameters_dict[parameter_name]))
            print("Total %d variables, %s M parameters" %
                  (len(parameters_dict.keys()), "{:,}".format(
                      total_parameters / 1000000)))

            # Make feed dict
            feed_dict = {
                inputs_pl: inputs,
                labels_pl: labels_true_st,
                inputs_seq_len_pl: inputs_seq_len,
                keep_prob_input_pl: network.dropout_ratio_input,
                keep_prob_hidden_pl: network.dropout_ratio_hidden,
                network.lr: learning_rate
            }

            with tf.Session() as sess:
                # Initialize parameters
                sess.run(init_op)

                # Wrapper for tfdbg
                # sess = tf_debug.LocalCLIDebugWrapperSession(sess)

                # Train model
                max_steps = 400
                start_time_global = time.time()
                start_time_step = time.time()
                ler_train_pre = 1
                not_improved_count = 0
                for step in range(max_steps):

                    # Compute loss
                    _, loss_train = sess.run([train_op, loss_op],
                                             feed_dict=feed_dict)

                    # Gradient check
                    # grads = sess.run(network.clipped_grads,
                    #                  feed_dict=feed_dict)
                    # for grad in grads:
                    #     print(np.max(grad))

                    if (step + 1) % 10 == 0:
                        # Change to evaluation mode
                        feed_dict[keep_prob_input_pl] = 1.0
                        feed_dict[keep_prob_hidden_pl] = 1.0

                        # Compute accuracy
                        ler_train = sess.run(ler_op, feed_dict=feed_dict)

                        duration_step = time.time() - start_time_step
                        print('Step %d: loss = %.3f / ler = %.4f (%.3f sec)' %
                              (step + 1, loss_train, ler_train, duration_step))
                        start_time_step = time.time()

                        # Visualize
                        labels_pred_st = sess.run(decode_op,
                                                  feed_dict=feed_dict)
                        labels_true = sparsetensor2list(labels_true_st,
                                                        batch_size=batch_size)
                        labels_pred = sparsetensor2list(labels_pred_st,
                                                        batch_size=batch_size)
                        if label_type == 'character':
                            print('True: %s' % num2alpha(labels_true[0]))
                            print('Pred: %s' % num2alpha(labels_pred[0]))
                        else:
                            print('True: %s' % num2phone(labels_true[0]))
                            print('Pred: %s' % num2phone(labels_pred[0]))

                        if ler_train >= ler_train_pre:
                            not_improved_count += 1
                        else:
                            not_improved_count = 0
                        if not_improved_count >= 5:
                            print('Modle is Converged.')
                            break
                        ler_train_pre = ler_train

                duration_global = time.time() - start_time_global
                print('Total time: %.3f sec' % (duration_global))

예제 #11

0

파일 보기

def main(config_path):

    # Read a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        corpus = config['corpus']
        feature = config['feature']
        param = config['param']

    if corpus['label_type'] == 'phone':
        output_size = 38
    elif corpus['label_type'] == 'character':
        output_size = 147
    elif corpus['label_type'] == 'kanji':
        output_size = 3386

    # Model setting
    CTCModel = load(model_type=config['model_name'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=feature['input_size'] * feature['num_stack'],
                       num_cell=param['num_cell'],
                       num_layer=param['num_layer'],
                       bottleneck_dim=param['bottleneck_dim'],
                       output_size=output_size,
                       clip_gradients=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = config['model_name'].upper()
    network.model_name += '_' + str(param['num_cell'])
    network.model_name += '_' + str(param['num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if param['num_proj'] != 0:
        network.model_name += '_proj' + str(param['num_proj'])
    if feature['num_stack'] != 1:
        network.model_name += '_stack' + str(feature['num_stack'])
    if param['weight_decay'] != 0:
        network.model_name += '_weightdecay' + str(param['weight_decay'])

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/csj/monolog/')
    network.model_dir = mkdir_join(network.model_dir, 'ctc')
    network.model_dir = mkdir_join(network.model_dir, corpus['label_type'])
    network.model_dir = mkdir_join(network.model_dir,
                                   corpus['train_data_size'])
    network.model_dir = mkdir_join(network.model_dir, network.model_name)

    # Reset model directory
    if not os.path.isfile(join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('ctc_csj_' + corpus['label_type'] + '_' +
                 corpus['train_data_size'])

    # Save config file
    shutil.copyfile(config_path, join(network.model_dir, 'config.yml'))

    sys.stdout = open(join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_train(network=network,
             optimizer=param['optimizer'],
             learning_rate=param['learning_rate'],
             batch_size=param['batch_size'],
             epoch_num=param['num_epoch'],
             label_type=corpus['label_type'],
             num_stack=feature['num_stack'],
             num_skip=feature['num_skip'],
             train_data_size=corpus['train_data_size'])
    sys.stdout = sys.__stdout__

예제 #12

0

파일 보기

파일: train_multitask_ctc.py 프로젝트: zz12375/tensorflow_end2end_speech_recognition

def main(config_path, model_save_path):

    # Read a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        params = config['param']

    # Except for a blank label
    if params['label_type_main'] == 'kanji':
        params['num_classes_main'] = 3386
    elif params['label_type_main'] == 'kana':
        params['num_classes_main'] = 147
    else:
        raise TypeError

    if params['label_type_sub'] == 'kana':
        params['num_classes_sub'] = 147
    elif params['label_type_sub'] == 'phone':
        params['num_classes_sub'] = 38
    else:
        TypeError

    # Model setting
    model = load(model_type=params['model'])
    model = model(batch_size=params['batch_size'],
                  input_size=params['input_size'],
                  splice=params['splice'],
                  num_stack=params['num_stack'],
                  num_units=params['num_units'],
                  num_layer_main=params['num_layer_main'],
                  num_layer_sub=params['num_layer_sub'],
                  #    bottleneck_dim=params['bottleneck_dim'],
                  num_classes_main=params['num_classes_main'],
                  num_classes_sub=params['num_classes_sub'],
                  main_task_weight=params['main_task_weight'],
                  parameter_init=params['weight_init'],
                  clip_grad_norm=params['clip_grad_norm'],
                  clip_activation=params['clip_activation'],
                  num_proj=params['num_proj'],
                  weight_decay=params['weight_decay'])

    model.model_name = params['model']
    model.model_name += '_' + str(params['num_units'])
    model.model_name += '_main' + str(params['num_layer_main'])
    model.model_name += '_sub' + str(params['num_layer_sub'])
    model.model_name += '_' + params['optimizer']
    model.model_name += '_lr' + str(params['learning_rate'])
    if params['bottleneck_dim'] != 0:
        model.model_name += '_bottoleneck' + str(params['bottleneck_dim'])
    if params['num_proj'] != 0:
        model.model_name += '_proj' + str(params['num_proj'])
    if params['num_stack'] != 1:
        model.model_name += '_stack' + str(params['num_stack'])
    if params['weight_decay'] != 0:
        model.model_name += '_weightdecay' + str(params['weight_decay'])
    model.model_name += '_taskweight' + str(params['main_task_weight'])
    if params['train_data_size'] == 'large':
        model.model_name += '_large'

    # Set save path
    model.save_path = mkdir(model_save_path)
    model.save_path = mkdir_join(model.save_path, 'ctc')
    model.save_path = mkdir_join(
        model.save_path,
        params['label_type_main'] + '_' + params['label_type_sub'])
    model.save_path = mkdir_join(model.save_path, model.model_name)

    # Reset model directory
    if not isfile(join(model.save_path, 'complete.txt')):
        tf.gfile.DeleteRecursively(model.save_path)
        tf.gfile.MakeDirs(model.save_path)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('csj_multictc_' + params['label_type_main'] + '_' +
                 params['label_type_sub'] + '_' + params['train_data_size'])

    # Save config file
    shutil.copyfile(config_path, join(model.save_path, 'config.yml'))

    sys.stdout = open(join(model.save_path, 'train.log'), 'w')
    do_train(model=model, params=params)