def posterior_test_multitask(session, posteriors_op_main, posteriors_op_second, network, dataset, label_type_second, save_path=None, show=False): """Visualize label posteriors of the multi-task CTC model. Args: session: session of training model posteriois_op_main: operation for computing posteriors in the main task posteriois_op_second: operation for computing posteriors in the second task network: network to evaluate dataset: An instance of a `Dataset` class label_type_second: string, phone39 or phone48 or phone61 save_path: path to save ctc outpus show: if True, show each figure """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) save_path = mkdir_join(save_path, 'ctc_output') for step in range(iteration): # Create feed dictionary for next mini batch inputs, _, _, inputs_seq_len, input_names = mini_batch.__next__() feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize max_frame_num = inputs.shape[1] posteriors_char = session.run(posteriors_op_main, feed_dict=feed_dict) posteriors_phone = session.run(posteriors_op_second, feed_dict=feed_dict) i_batch = 0 # index in mini-batch posteriors_index = np.array( [i_batch * max_frame_num + i for i in range(max_frame_num)]) plot_probs_ctc_char_phone( probs_char=posteriors_char[posteriors_index] [:int(inputs_seq_len[0]), :], probs_phone=posteriors_phone[posteriors_index] [:int(inputs_seq_len[0]), :], wav_index=input_names[0], label_type_second=label_type_second, save_path=save_path, show=show)
def posterior_test(session, posteriors_op, network, dataset, label_type, save_path=None, show=False): """Visualize label posteriors of CTC model. Args: session: session of training model posteriois_op: operation for computing posteriors network: network to evaluate dataset: An instance of a `Dataset` class label_type: string, kanji or kana or phone save_path: path to save ctc outputs show: if True, show each figure """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) save_path = mkdir_join(save_path, 'ctc_output') for step in range(iteration): # Create feed dictionary for next mini batch inputs, _, inputs_seq_len, input_names = mini_batch.__next__() feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0, network.keep_prob_output: 1.0 } # Visualize max_frame_num = inputs.shape[1] posteriors = session.run(posteriors_op, feed_dict=feed_dict) i_batch = 0 # index in mini-batch posteriors_index = np.array( [i_batch * max_frame_num + i for i in range(max_frame_num)]) plot_probs_ctc( probs=posteriors[posteriors_index][:int(inputs_seq_len[0]), :], wav_index=input_names[0], label_type=label_type, save_path=save_path, show=show)
def main(config_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) param = config['param'] # Except for a blank label if param['label_type'] == 'kanji': param['num_classes'] = 3386 elif param['label_type'] == 'kana': param['num_classes'] = 147 elif param['label_type'] == 'phone': param['num_classes'] = 38 # Model setting CTCModel = load(model_type=param['model']) network = CTCModel(batch_size=param['batch_size'], input_size=param['input_size'] * param['num_stack'], num_unit=param['num_unit'], num_layer=param['num_layer'], bottleneck_dim=param['bottleneck_dim'], num_classes=param['num_classes'], parameter_init=param['weight_init'], clip_grad=param['clip_grad'], clip_activation=param['clip_activation'], dropout_ratio_input=param['dropout_input'], dropout_ratio_hidden=param['dropout_hidden'], num_proj=param['num_proj'], weight_decay=param['weight_decay']) network.model_name = param['model'] network.model_name += '_' + str(param['num_unit']) network.model_name += '_' + str(param['num_layer']) network.model_name += '_' + param['optimizer'] network.model_name += '_lr' + str(param['learning_rate']) if param['bottleneck_dim'] != 0: network.model_name += '_bottoleneck' + str(param['bottleneck_dim']) if param['num_proj'] != 0: network.model_name += '_proj' + str(param['num_proj']) if param['num_stack'] != 1: network.model_name += '_stack' + str(param['num_stack']) if param['weight_decay'] != 0: network.model_name += '_weightdecay' + str(param['weight_decay']) if param['train_data_size'] == 'large': network.model_name += '_large' # Set save path network.model_dir = mkdir('/n/sd8/inaguma/result/csj/') network.model_dir = mkdir_join(network.model_dir, 'ctc') network.model_dir = mkdir_join(network.model_dir, param['label_type']) network.model_dir = mkdir_join(network.model_dir, network.model_name) # Reset model directory if not isfile(join(network.model_dir, 'complete.txt')): tf.gfile.DeleteRecursively(network.model_dir) tf.gfile.MakeDirs(network.model_dir) else: raise ValueError('File exists.') # Set process name setproctitle('csj_ctc_' + param['label_type'] + '_' + param['train_data_size']) # Save config file shutil.copyfile(config_path, join(network.model_dir, 'config.yml')) sys.stdout = open(join(network.model_dir, 'train.log'), 'w') print(network.model_name) do_train(network=network, param=param) sys.stdout = sys.__stdout__
def main(config_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) param = config['param'] if param['label_type_sub'] == 'phone61': param['num_classes_sub'] = 61 elif param['label_type_sub'] == 'phone48': param['num_classes_sub'] = 48 elif param['label_type_sub'] == 'phone39': param['num_classes_sub'] = 39 # Model setting CTCModel = load(model_type=param['model']) network = CTCModel(batch_size=param['batch_size'], input_size=param['input_size'] * param['num_stack'], num_unit=param['num_unit'], num_layer_main=param['num_layer_main'], num_layer_sub=param['num_layer_sub'], num_classes_main=33, num_classes_sub=param['num_classes_sub'], main_task_weight=param['main_task_weight'], parameter_init=param['weight_init'], clip_grad=param['clip_grad'], clip_activation=param['clip_activation'], dropout_ratio_input=param['dropout_input'], dropout_ratio_hidden=param['dropout_hidden'], num_proj=param['num_proj'], weight_decay=param['weight_decay']) network.model_name = param['model'] network.model_name += '_' + str(param['num_unit']) network.model_name += '_main' + str(param['num_layer_main']) network.model_name += '_sub' + str(param['num_layer_sub']) network.model_name += '_' + param['optimizer'] network.model_name += '_lr' + str(param['learning_rate']) if param['num_proj'] != 0: network.model_name += '_proj' + str(param['num_proj']) if param['dropout_input'] != 1: network.model_name += '_dropi' + str(param['dropout_input']) if param['dropout_hidden'] != 1: network.model_name += '_droph' + str(param['dropout_hidden']) if param['num_stack'] != 1: network.model_name += '_stack' + str(param['num_stack']) if param['weight_decay'] != 0: network.model_name += '_weightdecay' + str(param['weight_decay']) network.model_name += '_taskweight' + str(param['main_task_weight']) if param['decay_rate'] != 1: network.model_name += '_lrdecay' + \ str(param['decay_steps'] + param['decay_rate']) # Set save path network.model_dir = mkdir('/n/sd8/inaguma/result/timit/') network.model_dir = mkdir_join(network.model_dir, 'ctc') network.model_dir = mkdir_join(network.model_dir, 'char_' + param['label_type_sub']) network.model_dir = mkdir_join(network.model_dir, network.model_name) # Reset model directory if not isfile(join(network.model_dir, 'complete.txt')): tf.gfile.DeleteRecursively(network.model_dir) tf.gfile.MakeDirs(network.model_dir) else: raise ValueError('File exists.') # Set process name setproctitle('timit_multictc') # Save config file shutil.copyfile(config_path, join(network.model_dir, 'config.yml')) sys.stdout = open(join(network.model_dir, 'train.log'), 'w') print(network.model_name) do_train(network=network, param=param)
def main(config_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) param = config['param'] if param['label_type'] == 'phone61': param['att_num_classes'] = 63 param['ctc_num_classes'] = 61 param['sos_index'] = 0 param['eos_index'] = 1 elif param['label_type'] == 'phone48': param['att_num_classes'] = 50 param['ctc_num_classes'] = 48 param['sos_index'] = 0 param['eos_index'] = 1 elif param['label_type'] == 'phone39': param['att_num_classes'] = 41 param['ctc_num_classes'] = 39 param['sos_index'] = 0 param['eos_index'] = 1 elif param['label_type'] == 'character': param['att_num_classes'] = 35 param['ctc_num_classes'] = 33 param['sos_index'] = 1 param['eos_index'] = 2 # Model setting # AttentionModel = load(model_type=config['model_name']) network = JointCTCAttention( batch_size=param['batch_size'], input_size=param['input_size'], encoder_num_unit=param['encoder_num_unit'], encoder_num_layer=param['encoder_num_layer'], attention_dim=param['attention_dim'], attention_type=param['attention_type'], decoder_num_unit=param['decoder_num_unit'], decoder_num_layer=param['decoder_num_layer'], embedding_dim=param['embedding_dim'], att_num_classes=param['att_num_classes'], ctc_num_classes=param['ctc_num_classes'], att_task_weight=param['att_task_weight'], sos_index=param['sos_index'], eos_index=param['eos_index'], max_decode_length=param['max_decode_length'], # attention_smoothing=param['attention_smoothing'], attention_weights_tempareture=param['attention_weights_tempareture'], logits_tempareture=param['logits_tempareture'], parameter_init=param['weight_init'], clip_grad=param['clip_grad'], clip_activation_encoder=param['clip_activation_encoder'], clip_activation_decoder=param['clip_activation_decoder'], dropout_ratio_input=param['dropout_input'], dropout_ratio_hidden=param['dropout_hidden'], weight_decay=param['weight_decay']) network.model_name = param['model'] network.model_name += '_encoder' + str(param['encoder_num_unit']) network.model_name += '_' + str(param['encoder_num_layer']) network.model_name += '_attdim' + str(param['attention_dim']) network.model_name += '_decoder' + str(param['decoder_num_unit']) network.model_name += '_' + str(param['decoder_num_layer']) network.model_name += '_' + param['optimizer'] network.model_name += '_lr' + str(param['learning_rate']) network.model_name += '_' + param['attention_type'] # if bool(param['attention_smoothing']): # network.model_name += '_smoothing' if param['attention_weights_tempareture'] != 1: network.model_name += '_sharpening' + \ str(param['attention_weights_tempareture']) if param['weight_decay'] != 0: network.model_name += '_weightdecay' + str(param['weight_decay']) # Set save path network.model_dir = mkdir('/n/sd8/inaguma/result/timit/') network.model_dir = mkdir_join(network.model_dir, 'attention') network.model_dir = mkdir_join(network.model_dir, param['label_type']) network.model_dir = mkdir_join(network.model_dir, network.model_name) # Reset model directory if not isfile(join(network.model_dir, 'complete.txt')): tf.gfile.DeleteRecursively(network.model_dir) tf.gfile.MakeDirs(network.model_dir) else: raise ValueError('File exists.') # Set process name setproctitle('timit_jointctcatt_' + param['label_type']) # Save config file shutil.copyfile(config_path, join(network.model_dir, 'config.yml')) sys.stdout = open(join(network.model_dir, 'train.log'), 'w') print(network.model_name) do_train(network=network, param=param)