def check_reading(self, num_gpu, is_sorted): print('----- num_gpu: ' + str(num_gpu) + ', is_sorted: ' + str(is_sorted) + ' -----') batch_size = 64 dataset = DataSet(data_type='train', label_type_second='phone61', batch_size=batch_size, num_stack=3, num_skip=3, is_sorted=is_sorted, is_progressbar=True, num_gpu=num_gpu) tf.reset_default_graph() with tf.Session().as_default() as sess: print('=> Reading mini-batch...') map_file_path_char = '../metric/mapping_files/ctc/char2num.txt' map_file_path_phone = '../metric/mapping_files/ctc/phone2num_61.txt' mini_batch = dataset.next_batch(session=sess) iter_per_epoch = int(dataset.data_num / (batch_size * num_gpu)) + 1 for i in range(iter_per_epoch + 1): inputs, labels_char_st, labels_phone_st, inputs_seq_len, input_names = mini_batch.__next__( ) if num_gpu > 1: for inputs_gpu in inputs: print(inputs_gpu.shape) labels_char_st = labels_char_st[0] labels_phone_st = labels_phone_st[0] labels_char = sparsetensor2list(labels_char_st, batch_size=len(inputs)) labels_phone = sparsetensor2list(labels_phone_st, batch_size=len(inputs)) if num_gpu == 1: for inputs_i, labels_i in zip(inputs, labels_char): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError for inputs_i, labels_i in zip(inputs, labels_phone): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError str_true_char = num2char(labels_char[0], map_file_path_char) str_true_char = re.sub(r'_', ' ', str_true_char) str_true_phone = num2phone(labels_phone[0], map_file_path_phone) print(str_true_char) print(str_true_phone) print('-----')
def decode_test(session, decode_op, network, dataset, label_type, save_path=None): """Visualize label outputs of CTC model. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class label_type: string, phone39 or phone48 or phone61 or character save_path: path to save decoding results """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) if label_type == 'character': map_file_path = '../metric/mapping_files/ctc/char2num.txt' else: map_file_path = '../metric/mapping_files/ctc/phone2num_' + \ label_type[5:7] + '.txt' if save_path is not None: sys.stdout = open(join(network.model_dir, 'decode.txt'), 'w') for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true_st, inputs_seq_len, input_names = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size=1) labels_pred = sparsetensor2list(labels_pred_st, batch_size=1) if label_type == 'character': print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2char(labels_true[0], map_file_path)) print('Pred: %s' % num2char(labels_pred[0], map_file_path)) else: print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2phone(labels_true[0], map_file_path)) print('Pred: %s' % num2phone(labels_pred[0], map_file_path))
def check_reading(self, label_type, num_gpu, is_sorted): print('----- label_type: ' + label_type + ', num_gpu: ' + str(num_gpu) + ', is_sorted: ' + str(is_sorted) + ' -----') batch_size = 64 dataset = DataSet(data_type='train', train_data_size='default', label_type=label_type, batch_size=batch_size, num_stack=3, num_skip=3, is_sorted=is_sorted, is_progressbar=True, num_gpu=num_gpu) tf.reset_default_graph() with tf.Session().as_default() as sess: print('=> Reading mini-batch...') if label_type == 'kanji': map_file_path = '../metric/mapping_files/ctc/kanji2num.txt' map_fn = num2char elif label_type == 'character': map_file_path = '../metric/mapping_files/ctc/char2num.txt' map_fn = num2char else: map_file_path = '../metric/mapping_files/ctc/phone2num.txt' map_fn = num2phone mini_batch = dataset.next_batch(session=sess) iter_per_epoch = int(dataset.data_num / (batch_size * num_gpu)) + 1 for i in range(iter_per_epoch + 1): inputs, labels_st, inputs_seq_len, input_names = mini_batch.__next__( ) if num_gpu > 1: for inputs_gpu in inputs: print(inputs_gpu.shape) inputs = inputs[0] labels_st = labels_st[0] labels = sparsetensor2list(labels_st, batch_size=len(inputs)) if num_gpu < 1: for inputs_i, labels_i in zip(inputs, labels): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError str_true = map_fn(labels[0], map_file_path) str_true = re.sub(r'_', ' ', str_true) print(str_true)
def do_eval_cer(session, decode_op, network, dataset, label_type, is_test=None, eval_batch_size=None, is_progressbar=False, is_multitask=False, is_main=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of `Dataset` class label_type: string, character or kanji is_test: set to True when evaluating by the test set eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize progressbar is_multitask: if True, evaluate the multitask model is_main: if True, evaluate the main task Return: cer_mean: An average of CER """ if eval_batch_size is None: batch_size = network.batch_size else: batch_size = eval_batch_size num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) if label_type == 'character': map_file_path = '../metric/mapping_files/ctc/char2num.txt' elif label_type == 'kanji': map_file_path = '../metric/mapping_files/ctc/kanji2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__() else: if is_main: inputs, labels_true_st, _, inputs_seq_len, _ = mini_batch.__next__( ) else: inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size_each) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from list to string str_pred = num2char(labels_pred[i_batch], map_file_path) # TODO: change in case of character if label_type == 'kanji' and is_test: str_true = ''.join(labels_true[i_batch]) # NOTE* 漢字の場合はテストデータのラベルはそのまま保存してある else: str_true = num2char(labels_true[i_batch], map_file_path) # Remove silence(_) labels str_true = re.sub(r'[_]+', "", str_true) str_pred = re.sub(r'[_]+', "", str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean
def do_eval_per(session, decode_op, per_op, network, dataset, train_label_type, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model decode_op: operation for decoding per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class train_label_type: string, phone39 or phone48 or phone61 eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize the progressbar is_multitask: if True, evaluate the multitask model Returns: per_global: An average of PER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size data_label_type = dataset.label_type num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_global = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) phone2num_map_file_path = '../metric/mapping_files/ctc/phone2num_' + \ train_label_type[5:7] + '.txt' phone2num_39_map_file_path = '../metric/mapping_files/ctc/phone2num_39.txt' phone2phone_map_file_path = '../metric/mapping_files/phone2phone.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__() else: inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) if False: # Evaluate by the same phones as phones used when training per_local = session.run(per_op, feed_dict=feed_dict) per_global += per_local * batch_size_each else: # Evaluate by 39 phones labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size_each) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from num to phone (-> list of phone strings) phone_pred_seq = num2phone(labels_pred[i_batch], phone2num_map_file_path) phone_pred_list = phone_pred_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_pred_list = map_to_39phone(phone_pred_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_pred_list = phone2num(phone_pred_list, phone2num_39_map_file_path) labels_pred[i_batch] = phone_pred_list if data_label_type != 'phone39': # Convert from num to phone (-> list of phone strings) phone_true_seq = num2phone(labels_true[i_batch], phone2num_map_file_path) phone_true_list = phone_true_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_true_list = map_to_39phone( phone_true_list, data_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_true_list = phone2num(phone_true_list, phone2num_39_map_file_path) labels_true[i_batch] = phone_true_list # Compute edit distance labels_true_st = list2sparsetensor(labels_true) labels_pred_st = list2sparsetensor(labels_pred) per_local = compute_edit_distance(session, labels_true_st, labels_pred_st) per_global += per_local * batch_size_each per_global /= dataset.data_num return per_global