def decode_test(session, decode_op, network, dataset, label_type, save_path=None): """Visualize label outputs. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class label_type: stirng, phone39 or phone48 or phone61 or character save_path: path to save decoding results """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) if label_type == 'character': map_file_path = '../metrics/mapping_files/attention/char2num.txt' else: map_file_path = '../metrics/mapping_files/attention/phone2num_' + \ label_type[5:7] + '.txt' # if save_path is not None: # sys.stdout = open(join(network.model_dir, 'decode.txt'), 'w') for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true, inputs_seq_len, _, input_names = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.labels: labels_true, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize labels_pred = session.run(decode_op, feed_dict=feed_dict) if label_type == 'character': print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2char(labels_true[0][1:-1], map_file_path)) print('Pred: %s' % num2char(labels_pred[0], map_file_path).replace('>', '')) else: print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2phone(labels_true[0][1:-1], map_file_path)) print('Pred: %s' % num2phone(labels_pred[0], map_file_path).replace('>', ''))
def decode_test(session, decode_op, network, dataset, label_type, save_path=None): """Visualize label outputs of CTC model. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class label_type: string, kanji or kana or phone save_path: path to save decoding results """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) if label_type == 'kanji': map_file_path = '../metrics/mapping_files/ctc/kanji2num.txt' elif label_type == 'kana': map_file_path = '../metrics/mapping_files/ctc/kana2num.txt' elif label_type == 'phone': map_file_path = '../metrics/mapping_files/ctc/phone2num.txt' if save_path is not None: sys.stdout = open(join(network.model_dir, 'decode.txt'), 'w') for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true, inputs_seq_len, input_names = mini_batch.__next__() # NOTE: labels_true is expected to be a list of string when evaluation # using dataset where label_type is kanji or kana feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list(labels_pred_st, batch_size=1) if label_type in ['kanji', 'kana']: print('----- wav: %s -----' % input_names[0]) print('True: %s' % labels_true[0]) print('Pred: %s' % num2char(labels_pred[0], map_file_path)) elif label_type == 'phone': print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2phone(labels_true[0], map_file_path)) print('Pred: %s' % num2phone(labels_pred[0], map_file_path))
def check_loading(self, num_gpu, is_sorted): print('----- num_gpu: ' + str(num_gpu) + ', is_sorted: ' + str(is_sorted) + ' -----') batch_size = 64 dataset = Dataset(data_type='train', label_type_main='character', label_type_sub='phone61', batch_size=batch_size, num_stack=3, num_skip=3, is_sorted=is_sorted, is_progressbar=True, num_gpu=num_gpu) tf.reset_default_graph() with tf.Session().as_default() as sess: print('=> Loading mini-batch...') map_file_path_char = '../metrics/mapping_files/ctc/char2num.txt' map_file_path_phone = '../metrics/mapping_files/ctc/phone2num_61.txt' mini_batch = dataset.next_batch(session=sess) iter_per_epoch = int(dataset.data_num / (batch_size * num_gpu)) + 1 for i in range(iter_per_epoch + 1): return_tuple = mini_batch.__next__() inputs = return_tuple[0] labels_char_st = return_tuple[1] labels_phone_st = return_tuple[2] if num_gpu > 1: for inputs_gpu in inputs: print(inputs_gpu.shape) labels_char_st = labels_char_st[0] labels_phone_st = labels_phone_st[0] labels_char = sparsetensor2list(labels_char_st, batch_size=len(inputs)) labels_phone = sparsetensor2list(labels_phone_st, batch_size=len(inputs)) if num_gpu == 1: for inputs_i, labels_i in zip(inputs, labels_char): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError for inputs_i, labels_i in zip(inputs, labels_phone): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError str_true_char = num2char(labels_char[0], map_file_path_char) str_true_char = re.sub(r'_', ' ', str_true_char) str_true_phone = num2phone(labels_phone[0], map_file_path_phone)
def decode_test_multitask(session, decode_op_main, decode_op_second, network, dataset, label_type_second, save_path=None): """Visualize label outputs of Multi-task CTC model. Args: session: session of training model decode_op_main: operation for decoding in the main task decode_op_second: operation for decoding in the second task network: network to evaluate dataset: An instance of a `Dataset` class label_type_second: string, phone39 or phone48 or phone61 save_path: path to save decoding results """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) if save_path is not None: sys.stdout = open(join(network.model_dir, 'decode.txt'), 'w') # Decode character print('===== character =====') map_file_path = '../metrics/mapping_files/ctc/char2num.txt' for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true_st, _, inputs_seq_len, input_names = mini_batch.__next__() feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize labels_pred_st = session.run(decode_op_main, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size=1) labels_pred = sparsetensor2list(labels_pred_st, batch_size=1) print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2char( labels_true[0], map_file_path)) print('Pred: %s' % num2char( labels_pred[0], map_file_path)) # Decode phone print('\n===== phone =====') map_file_path = '../metrics/mapping_files/ctc/phone2num_' + \ label_type_second[5:7] + '.txt' for step in range(iteration): # Create feed dictionary for next mini batch inputs, _, labels_true_st, inputs_seq_len, input_names = mini_batch.__next__() feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize labels_pred_st = session.run(decode_op_second, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size=1) labels_pred = sparsetensor2list(labels_pred_st, batch_size=1) print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2phone( labels_true[0], map_file_path)) print('Pred: %s' % num2phone( labels_pred[0], map_file_path))
def do_eval_per(session, decode_op, per_op, network, dataset, label_type, eos_index, eval_batch_size=None, is_progressbar=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model decode_op: operation for decoding per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class label_type: string, phone39 or phone48 or phone61 eos_index: int, the index of <EOS> class eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize the progressbar Returns: per_global: An average of PER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size train_label_type = label_type data_label_type = dataset.label_type num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_global = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) phone2num_map_file_path = '../metrics/mapping_files/attention/phone2num_' + \ train_label_type[5:7] + '.txt' phone2num_39_map_file_path = '../metrics/mapping_files/attention/phone2num_39.txt' phone2phone_map_file_path = '../metrics/mapping_files/phone2phone.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini-batch inputs, att_labels_true, _, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) if False: # Evaluate by 61 phones per_local = session.run(per_op, feed_dict=feed_dict) per_global += per_local * batch_size_each else: # Evaluate by 39 phones predicted_ids = session.run(decode_op, feed_dict=feed_dict) predicted_ids_phone39 = [] labels_true_phone39 = [] for i_batch in range(batch_size_each): # Convert from num to phone (-> list of phone strings) phone_pred_seq = num2phone(predicted_ids[i_batch], phone2num_map_file_path) phone_pred_list = phone_pred_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_pred_list = map_to_39phone(phone_pred_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_pred_list = phone2num(phone_pred_list, phone2num_39_map_file_path) predicted_ids_phone39.append(phone_pred_list) if data_label_type != 'phone39': # Convert from num to phone (-> list of phone strings) phone_true_seq = num2phone(att_labels_true[i_batch], phone2num_map_file_path) phone_true_list = phone_true_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_true_list = map_to_39phone( phone_true_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_true_list = phone2num(phone_true_list, phone2num_39_map_file_path) labels_true_phone39.append(phone_true_list) else: labels_true_phone39 = att_labels_true # Compute edit distance labels_true_st = list2sparsetensor(labels_true_phone39, padded_value=eos_index) labels_pred_st = list2sparsetensor(predicted_ids_phone39, padded_value=eos_index) per_local = compute_edit_distance(session, labels_true_st, labels_pred_st) per_global += per_local * batch_size_each per_global /= dataset.data_num return per_global
def do_eval_per(session, decode_op, per_op, network, dataset, label_type, eos_index, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model decode_op: operation for decoding per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class label_type: string, phone39 or phone48 or phone61 eos_index: int, the index of <EOS> class eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize the progressbar is_multitask: if True, evaluate the multitask model Returns: per_mean: An average of PER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size train_label_type = label_type eval_label_type = dataset.label_type num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_mean = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) train_phone2num_map_file_path = '../metrics/mapping_files/ctc/' + \ train_label_type + '_to_num.txt' eval_phone2num_map_file_path = '../metrics/mapping_files/ctc/' + \ train_label_type + '_to_num.txt' phone2num_39_map_file_path = '../metrics/mapping_files/ctc/phone39_to_num.txt' phone2phone_map_file_path = '../metrics/mapping_files/phone2phone.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini-batch if not is_multitask: inputs, labels_true, inputs_seq_len, _, _ = mini_batch.__next__() else: inputs, _, labels_true, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) # Evaluate by 39 phones predicted_ids = session.run(decode_op, feed_dict=feed_dict) labels_pred_mapped, labels_true_mapped = [], [] for i_batch in range(batch_size_each): ############### # Hypothesis ############### # Convert from num to phone (-> list of phone strings) phone_pred_list = num2phone( predicted_ids[i_batch], train_phone2num_map_file_path).split(' ') # Mapping to 39 phones (-> list of phone strings) phone_pred_list = map_to_39phone(phone_pred_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_pred_list = phone2num(phone_pred_list, phone2num_39_map_file_path) labels_pred_mapped.append(phone_pred_list) ############### # Reference ############### # Convert from num to phone (-> list of phone strings) phone_true_list = num2phone( labels_true[i_batch], eval_phone2num_map_file_path).split(' ') # Mapping to 39 phones (-> list of phone strings) phone_true_list = map_to_39phone(phone_true_list, eval_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_true_list = phone2num(phone_true_list, phone2num_39_map_file_path) labels_true_mapped.append(phone_true_list) # Compute edit distance labels_true_st = list2sparsetensor(labels_true_mapped, padded_value=eos_index) labels_pred_st = list2sparsetensor(labels_pred_mapped, padded_value=eos_index) per_each = compute_edit_distance(session, labels_true_st, labels_pred_st) per_mean += per_each * batch_size_each per_mean /= dataset.data_num return per_mean