Exemple #1
0
def posterior_test_multitask(session,
                             posteriors_op_main,
                             posteriors_op_second,
                             network,
                             dataset,
                             label_type_second,
                             save_path=None,
                             show=False):
    """Visualize label posteriors of the multi-task CTC model.
    Args:
        session: session of training model
        posteriois_op_main: operation for computing posteriors in the main task
        posteriois_op_second: operation for computing posteriors in the second
            task
        network: network to evaluate
        dataset: An instance of a `Dataset` class
        label_type_second: string, phone39 or phone48 or phone61
        save_path: path to save ctc outpus
        show: if True, show each figure
    """
    # Batch size is expected to be 1
    iteration = dataset.data_num

    # Make data generator
    mini_batch = dataset.next_batch(batch_size=1)

    save_path = mkdir_join(save_path, 'ctc_output')

    for step in range(iteration):
        # Create feed dictionary for next mini batch
        inputs, _, _, inputs_seq_len, input_names = mini_batch.__next__()

        feed_dict = {
            network.inputs: inputs,
            network.inputs_seq_len: inputs_seq_len,
            network.keep_prob_input: 1.0,
            network.keep_prob_hidden: 1.0
        }

        # Visualize
        max_frame_num = inputs.shape[1]
        posteriors_char = session.run(posteriors_op_main, feed_dict=feed_dict)
        posteriors_phone = session.run(posteriors_op_second,
                                       feed_dict=feed_dict)

        i_batch = 0  # index in mini-batch
        posteriors_index = np.array(
            [i_batch * max_frame_num + i for i in range(max_frame_num)])

        plot_probs_ctc_char_phone(
            probs_char=posteriors_char[posteriors_index]
            [:int(inputs_seq_len[0]), :],
            probs_phone=posteriors_phone[posteriors_index]
            [:int(inputs_seq_len[0]), :],
            wav_index=input_names[0],
            label_type_second=label_type_second,
            save_path=save_path,
            show=show)
def posterior_test(session, posteriors_op, network, dataset, label_type,
                   save_path=None, show=False):
    """Visualize label posteriors of CTC model.
    Args:
        session: session of training model
        posteriois_op: operation for computing posteriors
        network: network to evaluate
        dataset: An instance of a `Dataset` class
        label_type: string, kanji or kana or phone
        save_path: path to save ctc outputs
        show: if True, show each figure
    """
    # Batch size is expected to be 1
    iteration = dataset.data_num

    # Make data generator
    mini_batch = dataset.next_batch(batch_size=1)

    save_path = mkdir_join(save_path, 'ctc_output')

    for step in range(iteration):
        # Create feed dictionary for next mini batch
        inputs, _, inputs_seq_len, input_names = mini_batch.__next__()

        feed_dict = {
            network.inputs: inputs,
            network.inputs_seq_len: inputs_seq_len,
            network.keep_prob_input: 1.0,
            network.keep_prob_hidden: 1.0,
            network.keep_prob_output: 1.0
        }

        # Visualize
        max_frame_num = inputs.shape[1]
        posteriors = session.run(posteriors_op, feed_dict=feed_dict)

        i_batch = 0  # index in mini-batch
        posteriors_index = np.array(
            [i_batch * max_frame_num + i for i in range(max_frame_num)])

        plot_probs_ctc(
            probs=posteriors[posteriors_index][:int(inputs_seq_len[0]), :],
            wav_index=input_names[0],
            label_type=label_type,
            save_path=save_path,
            show=show)
def main(config_path):

    # Load a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        param = config['param']

    # Except for a blank label
    if param['label_type'] == 'kanji':
        param['num_classes'] = 3386
    elif param['label_type'] == 'kana':
        param['num_classes'] = 147
    elif param['label_type'] == 'phone':
        param['num_classes'] = 38

    # Model setting
    CTCModel = load(model_type=param['model'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=param['input_size'] * param['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer=param['num_layer'],
                       bottleneck_dim=param['bottleneck_dim'],
                       num_classes=param['num_classes'],
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = param['model']
    network.model_name += '_' + str(param['num_unit'])
    network.model_name += '_' + str(param['num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if param['bottleneck_dim'] != 0:
        network.model_name += '_bottoleneck' + str(param['bottleneck_dim'])
    if param['num_proj'] != 0:
        network.model_name += '_proj' + str(param['num_proj'])
    if param['num_stack'] != 1:
        network.model_name += '_stack' + str(param['num_stack'])
    if param['weight_decay'] != 0:
        network.model_name += '_weightdecay' + str(param['weight_decay'])
    if param['train_data_size'] == 'large':
        network.model_name += '_large'

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/csj/')
    network.model_dir = mkdir_join(network.model_dir, 'ctc')
    network.model_dir = mkdir_join(network.model_dir, param['label_type'])
    network.model_dir = mkdir_join(network.model_dir, network.model_name)

    # Reset model directory
    if not isfile(join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('csj_ctc_' + param['label_type'] + '_' +
                 param['train_data_size'])

    # Save config file
    shutil.copyfile(config_path, join(network.model_dir, 'config.yml'))

    sys.stdout = open(join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_train(network=network, param=param)
    sys.stdout = sys.__stdout__
def main(config_path):

    # Load a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        param = config['param']

    if param['label_type_sub'] == 'phone61':
        param['num_classes_sub'] = 61
    elif param['label_type_sub'] == 'phone48':
        param['num_classes_sub'] = 48
    elif param['label_type_sub'] == 'phone39':
        param['num_classes_sub'] = 39

    # Model setting
    CTCModel = load(model_type=param['model'])
    network = CTCModel(batch_size=param['batch_size'],
                       input_size=param['input_size'] * param['num_stack'],
                       num_unit=param['num_unit'],
                       num_layer_main=param['num_layer_main'],
                       num_layer_sub=param['num_layer_sub'],
                       num_classes_main=33,
                       num_classes_sub=param['num_classes_sub'],
                       main_task_weight=param['main_task_weight'],
                       parameter_init=param['weight_init'],
                       clip_grad=param['clip_grad'],
                       clip_activation=param['clip_activation'],
                       dropout_ratio_input=param['dropout_input'],
                       dropout_ratio_hidden=param['dropout_hidden'],
                       num_proj=param['num_proj'],
                       weight_decay=param['weight_decay'])

    network.model_name = param['model']
    network.model_name += '_' + str(param['num_unit'])
    network.model_name += '_main' + str(param['num_layer_main'])
    network.model_name += '_sub' + str(param['num_layer_sub'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    if param['num_proj'] != 0:
        network.model_name += '_proj' + str(param['num_proj'])
    if param['dropout_input'] != 1:
        network.model_name += '_dropi' + str(param['dropout_input'])
    if param['dropout_hidden'] != 1:
        network.model_name += '_droph' + str(param['dropout_hidden'])
    if param['num_stack'] != 1:
        network.model_name += '_stack' + str(param['num_stack'])
    if param['weight_decay'] != 0:
        network.model_name += '_weightdecay' + str(param['weight_decay'])
    network.model_name += '_taskweight' + str(param['main_task_weight'])
    if param['decay_rate'] != 1:
        network.model_name += '_lrdecay' + \
            str(param['decay_steps'] + param['decay_rate'])

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/timit/')
    network.model_dir = mkdir_join(network.model_dir, 'ctc')
    network.model_dir = mkdir_join(network.model_dir,
                                   'char_' + param['label_type_sub'])
    network.model_dir = mkdir_join(network.model_dir, network.model_name)

    # Reset model directory
    if not isfile(join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('timit_multictc')

    # Save config file
    shutil.copyfile(config_path, join(network.model_dir, 'config.yml'))

    sys.stdout = open(join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_train(network=network, param=param)
Exemple #5
0
def main(config_path):

    # Load a config file (.yml)
    with open(config_path, "r") as f:
        config = yaml.load(f)
        param = config['param']

    if param['label_type'] == 'phone61':
        param['att_num_classes'] = 63
        param['ctc_num_classes'] = 61
        param['sos_index'] = 0
        param['eos_index'] = 1
    elif param['label_type'] == 'phone48':
        param['att_num_classes'] = 50
        param['ctc_num_classes'] = 48
        param['sos_index'] = 0
        param['eos_index'] = 1
    elif param['label_type'] == 'phone39':
        param['att_num_classes'] = 41
        param['ctc_num_classes'] = 39
        param['sos_index'] = 0
        param['eos_index'] = 1
    elif param['label_type'] == 'character':
        param['att_num_classes'] = 35
        param['ctc_num_classes'] = 33
        param['sos_index'] = 1
        param['eos_index'] = 2

    # Model setting
    # AttentionModel = load(model_type=config['model_name'])
    network = JointCTCAttention(
        batch_size=param['batch_size'],
        input_size=param['input_size'],
        encoder_num_unit=param['encoder_num_unit'],
        encoder_num_layer=param['encoder_num_layer'],
        attention_dim=param['attention_dim'],
        attention_type=param['attention_type'],
        decoder_num_unit=param['decoder_num_unit'],
        decoder_num_layer=param['decoder_num_layer'],
        embedding_dim=param['embedding_dim'],
        att_num_classes=param['att_num_classes'],
        ctc_num_classes=param['ctc_num_classes'],
        att_task_weight=param['att_task_weight'],
        sos_index=param['sos_index'],
        eos_index=param['eos_index'],
        max_decode_length=param['max_decode_length'],
        # attention_smoothing=param['attention_smoothing'],
        attention_weights_tempareture=param['attention_weights_tempareture'],
        logits_tempareture=param['logits_tempareture'],
        parameter_init=param['weight_init'],
        clip_grad=param['clip_grad'],
        clip_activation_encoder=param['clip_activation_encoder'],
        clip_activation_decoder=param['clip_activation_decoder'],
        dropout_ratio_input=param['dropout_input'],
        dropout_ratio_hidden=param['dropout_hidden'],
        weight_decay=param['weight_decay'])

    network.model_name = param['model']
    network.model_name += '_encoder' + str(param['encoder_num_unit'])
    network.model_name += '_' + str(param['encoder_num_layer'])
    network.model_name += '_attdim' + str(param['attention_dim'])
    network.model_name += '_decoder' + str(param['decoder_num_unit'])
    network.model_name += '_' + str(param['decoder_num_layer'])
    network.model_name += '_' + param['optimizer']
    network.model_name += '_lr' + str(param['learning_rate'])
    network.model_name += '_' + param['attention_type']
    # if bool(param['attention_smoothing']):
    #     network.model_name += '_smoothing'
    if param['attention_weights_tempareture'] != 1:
        network.model_name += '_sharpening' + \
            str(param['attention_weights_tempareture'])
    if param['weight_decay'] != 0:
        network.model_name += '_weightdecay' + str(param['weight_decay'])

    # Set save path
    network.model_dir = mkdir('/n/sd8/inaguma/result/timit/')
    network.model_dir = mkdir_join(network.model_dir, 'attention')
    network.model_dir = mkdir_join(network.model_dir, param['label_type'])
    network.model_dir = mkdir_join(network.model_dir, network.model_name)

    # Reset model directory
    if not isfile(join(network.model_dir, 'complete.txt')):
        tf.gfile.DeleteRecursively(network.model_dir)
        tf.gfile.MakeDirs(network.model_dir)
    else:
        raise ValueError('File exists.')

    # Set process name
    setproctitle('timit_jointctcatt_' + param['label_type'])

    # Save config file
    shutil.copyfile(config_path, join(network.model_dir, 'config.yml'))

    sys.stdout = open(join(network.model_dir, 'train.log'), 'w')
    print(network.model_name)
    do_train(network=network, param=param)