def iter_asr(set_name, set_train_mode, set_rr): assert set_name in ['train', 'dev', 'test'] rr = set_rr rr = sort_reverse(rr, feat_len[set_name]) rr_key = feat_iterator[set_name].get_key_by_index(rr) curr_feat_list = feat_iterator[set_name].get_feat_by_key(rr_key) curr_text_list = text_iterator[set_name].get_text_by_key(rr_key) curr_feat_mat, curr_feat_len, curr_text_mat, curr_text_len = batch_speech_text( opts['gpu'], curr_feat_list, curr_text_list) _loss, _acc = fn_batch_asr(model_asr, curr_feat_mat, curr_feat_len, curr_text_mat, curr_text_len, train_step=set_train_mode, coeff_loss=opts['coeff_pair']) _loss /= opts['coeff_pair'] assert_nan(_loss) _count = len(rr) m_asr_loss[set_name] += _loss * _count m_asr_acc[set_name] += _acc * _count m_asr_count[set_name] += _count if tf_writer is not None: auto_writer_info_asr(set_name, _loss, _acc)
def iter_tts(set_name, set_train_mode, set_rr): assert set_name in ['train', 'dev', 'test'] rr = set_rr rr = sort_reverse(rr, text_len[set_name]) rr_key = text_iterator[set_name].get_key_by_index(rr) curr_feat_list = feat_iterator[set_name].get_feat_by_key(rr_key) curr_text_list = text_iterator[set_name].get_text_by_key(rr_key) if model_tts.TYPE == TacotronType.MULTI_SPEAKER: curr_spkvec_list = feat_spkvec_iterator.get_feat_by_key(rr_key) curr_aux_info = {'speaker_vector': curr_spkvec_list} else: curr_aux_info = None curr_feat_mat, curr_feat_len, curr_text_mat, curr_text_len = batch_speech_text( opts['gpu'], curr_feat_list, curr_text_list, feat_sil=feat_sil, group=opts['tts_group'], start_sil=1, end_sil=opts['tts_pad_sil']) _loss, _loss_feat, _loss_bce_fend, _loss_spk_emb, _acc_fend = fn_batch_tts( model_tts, curr_text_mat, curr_text_len, curr_feat_mat, curr_feat_len, aux_info=curr_aux_info, train_step=set_train_mode, coeff_loss=opts['coeff_pair']) _loss /= opts['coeff_pair'] assert_nan(_loss) _count = len(rr) m_tts_loss[set_name] += _loss * _count m_tts_loss_feat[set_name] += _loss_feat * _count m_tts_loss_bce[set_name] += _loss_bce_fend * _count m_tts_loss_spk_emb[set_name] += _loss_spk_emb * _count m_tts_acc[set_name] += _acc_fend * _count m_tts_count[set_name] += _count if tf_writer is not None: auto_writer_info_tts(set_name, _loss, _loss_feat, _loss_bce_fend, _loss_spk_emb, _acc_fend)
curr_key_list) if opts['strip_sil']: curr_feat_list = list_feat_sil_strip(curr_feat_list) curr_label_list = text_iterator[set_name].get_text_by_key( curr_key_list) aux_info = None if model.TYPE == TacotronType.MULTI_SPEAKER: curr_spkvec_list = feat_spkvec_iterator.get_feat_by_key( curr_key_list) aux_info = {'speaker_vector': curr_spkvec_list} # print(1, timeit.default_timer() - tic); tic = timeit.default_timer() feat_mat, feat_len, text_mat, text_len = batch_speech_text( opts['gpu'], curr_feat_list, curr_label_list, feat_sil=feat_sil, group=opts['group'], start_sil=1, end_sil=opts['pad_sil']) # print(2, timeit.default_timer() - tic); tic = timeit.default_timer() _tmp_loss, _tmp_loss_feat, _tmp_loss_bernend, _tmp_acc_bernend = fn_batch( text_mat, text_len, feat_mat, feat_len, aux_info=aux_info, train_step=set_train_mode) # print(3, timeit.default_timer() - tic); tic = timeit.default_timer() _tmp_count = len(rr) assert_nan(_tmp_loss) mloss[set_name] += _tmp_loss * _tmp_count
for set_name, set_rr, set_train_mode in [('train', train_rr, True), ('dev', dev_rr, False), ('test', test_rr, False)]: for rr in tqdm_wrapper(set_rr): start = time.time() curr_key_list = feat_iterator[set_name].get_key_by_index(rr) curr_feat_list = feat_iterator[set_name].get_feat_by_key( curr_key_list) curr_label_list = text_iterator[set_name].get_text_by_key( curr_key_list) assert (feat_iterator[set_name].get_key_by_index( rr) == text_iterator[set_name].get_key_by_index(rr) ), 'key(s) not same' # print(1, start-time.time()); start = time.time() feat_mat, feat_len, text_mat, text_len = batch_speech_text( opts['gpu'], curr_feat_list, curr_label_list) # print(2, start-time.time()); start = time.time() _tmp_loss, _tmp_acc = fn_batch(feat_mat, feat_len, text_mat, text_len, train_step=set_train_mode) # print(3, start-time.time()); start = time.time() _tmp_count = len(rr) assert_nan(_tmp_loss) mloss[set_name] += _tmp_loss * _tmp_count macc[set_name] += _tmp_acc * _tmp_count mcount[set_name] += _tmp_count pass info_header = ['set', 'loss', 'acc'] info_table = []
max_target=opts['max_target']) elif opts['mode'] == 'tf': curr_key_list = text_iterator.get_key_by_index(rr) curr_text_list = text_iterator.get_text_by_key(curr_key_list) curr_feat_list = data_iterator.get_feat_by_key(curr_key_list) if model.TYPE == TacotronType.MULTI_SPEAKER: _spk_vec = np.stack( feat_spkvec_iterator.get_feat_by_key( curr_key_list)).astype('float32') _spk_vec = Variable( tensorauto(opts['gpu'], torch.from_numpy(_spk_vec))) aux_info = {'speaker_vector': _spk_vec} feat_mat, feat_len, text_mat, text_len = batch_speech_text( opts['gpu'], curr_feat_list, curr_text_list, feat_sil=feat_sil, group=group, start_sil=1, end_sil=0) pred_feat, pred_len, pred_att = generator_speech.decode_greedy_tf_torch( model, Variable(text_mat), text_len, Variable(feat_mat), feat_len, group=group, feat_sil=feat_sil, aux_info=aux_info) pred_len = data_iterator.get_feat_length_by_key( text_iterator.get_key_by_index(rr))