def gen_figure_from_output(self, id_name, labels, hidden, hparams): labels_post = self.dataset_train.postprocess_sample( labels) # Labels come in as T x C. org_raw = RawWaveformLabelGen.load_sample( id_name, self.OutputGen.frame_rate_output_Hz) # Get a data plotter. plotter = DataPlotter() net_name = os.path.basename(hparams.model_name) id_name = os.path.basename(id_name).rsplit('.', 1)[0] filename = os.path.join(hparams.out_dir, id_name + "." + net_name) plotter.set_title(id_name + " - " + net_name) grid_idx = 0 graphs = list() graphs.append((org_raw, 'Org')) graphs.append((labels_post, 'Wavenet')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_linewidth(grid_idx=grid_idx, linewidth=[0.1]) plotter.set_colors(grid_idx=grid_idx, alpha=0.8) plotter.set_lim(grid_idx, ymin=-1, ymax=1) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_rate_output_Hz) + ' Hz]', ylabel='raw') plotter.gen_plot() plotter.save_to_file(filename + '.Raw' + hparams.gen_figure_ext)
def gen_figure_from_output(self, id_name, labels, hidden, hparams): if labels.ndim < 2: labels = np.expand_dims(labels, axis=1) labels_post = self.OutputGen.postprocess_sample(labels, identify_peaks=True, peak_range=100) lf0 = self.OutputGen.labels_to_lf0(labels_post, hparams.k) lf0, vuv = interpolate_lin(lf0) vuv = vuv.astype(np.bool) # Load original lf0 and vuv. world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\ else os.path.join(self.OutputGen.dir_labels, self.dir_extracted_acoustic_features) org_labels = WorldFeatLabelGen.load_sample( id_name, world_dir, num_coded_sps=hparams.num_coded_sps) _, original_lf0, original_vuv, _ = WorldFeatLabelGen.convert_to_world_features( org_labels, num_coded_sps=hparams.num_coded_sps) original_lf0, _ = interpolate_lin(original_lf0) original_vuv = original_vuv.astype(np.bool) phrase_curve = np.fromfile(os.path.join( self.OutputGen.dir_labels, id_name + self.OutputGen.ext_phrase), dtype=np.float32).reshape(-1, 1) original_lf0 -= phrase_curve len_diff = len(original_lf0) - len(lf0) original_lf0 = WorldFeatLabelGen.trim_end_sample( original_lf0, int(len_diff / 2.0)) original_lf0 = WorldFeatLabelGen.trim_end_sample(original_lf0, int(len_diff / 2.0) + 1, reverse=True) org_labels = self.OutputGen.load_sample(id_name, self.OutputGen.dir_labels, len(hparams.thetas)) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0)) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0) + 1, reverse=True) org_atoms = self.OutputGen.labels_to_atoms( org_labels, k=hparams.k, frame_size=hparams.frame_size_ms) # Get a data plotter. net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter = DataPlotter() plotter.set_title(id_name + " - " + net_name) graphs_output = list() grid_idx = 0 for idx in reversed(range(labels.shape[1])): graphs_output.append( (labels[:, idx], r'$\theta$=' + "{0:.3f}".format(hparams.thetas[idx]))) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='NN output') plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_output) # plotter.set_lim(grid_idx=0, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_peaks = list() for idx in reversed(range(labels_post.shape[1])): graphs_peaks.append((labels_post[:, idx, 0], )) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='NN post-processed') plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_peaks) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv), '0.8', 1.0)]) plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_target = list() for idx in reversed(range(org_labels.shape[1])): graphs_target.append((org_labels[:, idx, 0], )) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='target') plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_target) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(original_vuv), '0.8', 1.0) ]) plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 output_atoms = AtomLabelGen.labels_to_atoms( labels_post, hparams.k, hparams.frame_size_ms, amp_threshold=hparams.min_atom_amp) wcad_lf0 = AtomLabelGen.atoms_to_lf0(org_atoms, len(labels)) output_lf0 = AtomLabelGen.atoms_to_lf0(output_atoms, len(labels)) graphs_lf0 = list() graphs_lf0.append((wcad_lf0, "wcad lf0")) graphs_lf0.append((original_lf0, "org lf0")) graphs_lf0.append((output_lf0, "predicted lf0")) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_lf0) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(original_vuv), '0.8', 1.0) ]) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='lf0') amp_lim = max(np.max(np.abs(wcad_lf0)), np.max( np.abs(output_lf0))) * 1.1 plotter.set_lim(grid_idx=grid_idx, ymin=-amp_lim, ymax=amp_lim) plotter.set_linestyles(grid_idx=grid_idx, linestyles=[':', '--', '-']) # plotter.set_lim(xmin=300, xmax=1100) plotter.gen_plot() plotter.save_to_file(filename + ".BASE" + hparams.gen_figure_ext)
def gen_figure_from_output(self, id_name, label, hidden, hparams): _, alphas = hidden labels_post = self.OutputGen.postprocess_sample(label) coded_sp, lf0, vuv, bap = WorldFeatLabelGen.convert_to_world_features( labels_post, contains_deltas=False, num_coded_sps=hparams.num_coded_sps) sp = WorldFeatLabelGen.mcep_to_amp_sp(coded_sp, hparams.synth_fs) lf0, _ = interpolate_lin(lf0) # Load original LF0. org_labels_post = WorldFeatLabelGen.load_sample( id_name, dir_out=self.OutputGen.dir_labels, add_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_mgc, original_lf0, original_vuv, *_ = WorldFeatLabelGen.convert_to_world_features( sample=org_labels_post, contains_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_lf0, _ = interpolate_lin(original_lf0) sp = sp[:, :150] # Zoom into spectral features. # Get a data plotter. grid_idx = -1 plotter = DataPlotter() net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter.set_title(id_name + ' - ' + net_name) plotter.set_num_colors(3) # plotter.set_lim(grid_idx=0, ymin=math.log(60), ymax=math.log(250)) # # Plot LF0 # grid_idx += 1 # graphs.append((original_lf0, 'Original LF0')) # graphs.append((lf0, 'NN LF0')) # plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) # plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), # (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) # plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format(hparams.frame_length), ylabel='log(f0)') # Reverse the warping. wl = self._get_dummy_warping_layer(hparams) norm_params_no_deltas = ( self.OutputGen.norm_params[0][:hparams.num_coded_sps], self.OutputGen.norm_params[1][:hparams.num_coded_sps]) pre_net_output, _ = wl.forward_sample(label, -alphas) # Postprocess sample manually. pre_net_output = pre_net_output.detach().cpu().numpy() pre_net_mgc = pre_net_output[:, 0, :hparams. num_coded_sps] * norm_params_no_deltas[ 1] + norm_params_no_deltas[0] # Plot spectral features predicted by pre-network. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='Pre-network') plotter.set_specshow(grid_idx=grid_idx, spec=np.abs( WorldFeatLabelGen.mcep_to_amp_sp( pre_net_mgc, hparams.synth_fs)[:, :sp.shape[1]])) # Plot final predicted spectral features. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='VTLN') plotter.set_specshow(grid_idx=grid_idx, spec=np.abs(sp)) # Plot predicted alpha value and V/UV flag. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='alpha') graphs = list() graphs.append((alphas, 'NN alpha')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) # Add phoneme annotations if given. if hasattr(hparams, "phoneme_indices") and hparams.phoneme_indices is not None \ and hasattr(hparams, "question_file") and hparams.question_file is not None: questions = QuestionLabelGen.load_sample( id_name, os.path.join("experiments", hparams.voice, "questions"), num_questions=hparams.num_questions)[:len(lf0)] np_phonemes = QuestionLabelGen.questions_to_phonemes( questions, hparams.phoneme_indices, hparams.question_file) plotter.set_annotations(grid_idx, np_phonemes) # Plot reference spectral features. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='Original spectrogram') plotter.set_specshow(grid_idx=grid_idx, spec=np.abs( WorldFeatLabelGen.mcep_to_amp_sp( original_mgc, hparams.synth_fs)[:, :sp.shape[1]])) plotter.gen_plot() plotter.save_to_file(filename + '.VTLN' + hparams.gen_figure_ext)
def gen_figure_from_output(self, id_name, label, hidden, hparams): _, alphas = hidden labels_post = self.OutputGen.postprocess_sample(label) coded_sp, lf0, vuv, bap = WorldFeatLabelGen.convert_to_world_features( labels_post, contains_deltas=False, num_coded_sps=hparams.num_coded_sps) sp = WorldFeatLabelGen.mcep_to_amp_sp(coded_sp, hparams.synth_fs) lf0, _ = interpolate_lin(lf0) # Load original lf0. org_labels_post = WorldFeatLabelGen.load_sample( id_name, self.OutputGen.dir_labels, add_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_mgc, original_lf0, original_vuv, *_ = WorldFeatLabelGen.convert_to_world_features( org_labels_post, contains_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_lf0, _ = interpolate_lin(original_lf0) questions = QuestionLabelGen.load_sample( id_name, os.path.join("experiments", hparams.voice, "questions"), num_questions=hparams.num_questions)[:len(alphas)] phoneme_indices = QuestionLabelGen.questions_to_phoneme_indices( questions, hparams.phoneme_indices) alpha_vec = self.phonemes_to_alpha_tensor[phoneme_indices % len( self.phonemes_to_alpha_tensor)] # Get a data plotter. grid_idx = 0 plotter = DataPlotter() net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter.set_title(id_name + ' - ' + net_name) plotter.set_num_colors(3) # plotter.set_lim(grid_idx=0, ymin=math.log(60), ymax=math.log(250)) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='log(f0)') graphs = list() graphs.append((original_lf0, 'Original LF0')) graphs.append((lf0, 'NN LF0')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) # grid_idx += 1 # plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='Original spectrogram') # plotter.set_specshow(grid_idx=grid_idx, spec=WorldFeatLabelGen.mgc_to_sp(original_mgc, hparams.synth_fs)) # # grid_idx += 1 # plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='NN spectrogram') # plotter.set_specshow(grid_idx=grid_idx, spec=sp) grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='alpha') graphs = list() graphs.append((alpha_vec, 'Original alpha')) graphs.append((alphas, 'NN alpha')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) if hasattr(hparams, "phoneme_indices") and hparams.phoneme_indices is not None \ and hasattr(hparams, "question_file") and hparams.question_file is not None: questions = QuestionLabelGen.load_sample( id_name, os.path.join("experiments", hparams.voice, "questions"), num_questions=hparams.num_questions)[:len(lf0)] np_phonemes = QuestionLabelGen.questions_to_phonemes( questions, hparams.phoneme_indices, hparams.question_file) plotter.set_annotations(grid_idx, np_phonemes) plotter.gen_plot() plotter.save_to_file(filename + '.VTLN' + hparams.gen_figure_ext)
def gen_figure_from_output(self, id_name, label, hidden, hparams): # Retrieve data from label. output_amps = label[:, 1:-1] output_pos = label[:, -1] labels_post = self.OutputGen.postprocess_sample(label) output_vuv = labels_post[:, 0, 1].astype(bool) output_atoms = self.OutputGen.labels_to_atoms(labels_post, k=hparams.k, amp_threshold=hparams.min_atom_amp) output_lf0 = self.OutputGen.atoms_to_lf0(output_atoms, len(label)) # Load original lf0 and vuv. world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\ else os.path.join(self.OutputGen.dir_labels, self.dir_extracted_acoustic_features) org_labels = LF0LabelGen.load_sample(id_name, world_dir) original_lf0, _ = LF0LabelGen.convert_to_world_features(org_labels) original_lf0, _ = interpolate_lin(original_lf0) phrase_curve = np.fromfile(os.path.join(self.OutputGen.dir_labels, id_name + self.OutputGen.ext_phrase), dtype=np.float32).reshape(-1, 1) original_lf0[:len(phrase_curve)] -= phrase_curve[:len(original_lf0)] original_lf0 = original_lf0[:len(output_lf0)] org_labels = self.OutputGen.load_sample(id_name, self.OutputGen.dir_labels, len(hparams.thetas), self.OutputGen.dir_world_labels) org_vuv = org_labels[:, 0, 0].astype(bool) org_labels = org_labels[:, 1:] len_diff = len(org_labels) - len(labels_post) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0)) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0) + 1) org_atoms = AtomLabelGen.labels_to_atoms(org_labels, k=hparams.k, frame_size=hparams.frame_size_ms) wcad_lf0 = self.OutputGen.atoms_to_lf0(org_atoms, len(org_labels)) # Get a data plotter net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter = DataPlotter() plotter.set_title(id_name + " - " + net_name) grid_idx = 0 graphs_output = list() for idx in reversed(range(output_amps.shape[1])): graphs_output.append((output_amps[:, idx], r'$\theta$={0:.3f}'.format(hparams.thetas[idx]))) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_output) plotter.set_label(grid_idx=grid_idx, ylabel='NN amps') amp_max = np.max(output_amps) * 1.1 amp_min = np.min(output_amps) * 1.1 plotter.set_lim(grid_idx=grid_idx, ymin=amp_min, ymax=amp_max) grid_idx += 1 graphs_pos_flag = list() graphs_pos_flag.append((output_pos,)) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_pos_flag) plotter.set_label(grid_idx=grid_idx, ylabel='NN pos') grid_idx += 1 graphs_peaks = list() for idx in reversed(range(label.shape[1] - 2)): graphs_peaks.append((labels_post[:, 1 + idx, 0],)) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_peaks) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(output_vuv), '0.75', 1.0, 'Unvoiced')]) plotter.set_label(grid_idx=grid_idx, ylabel='NN peaks') plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_target = list() for idx in reversed(range(org_labels.shape[1])): graphs_target.append((org_labels[:, idx, 0],)) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_target) plotter.set_hatchstyles(grid_idx=grid_idx, hatchstyles=['\\\\']) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(org_vuv.astype(bool)), '0.75', 1.0, 'Reference unvoiced')]) plotter.set_label(grid_idx=grid_idx, ylabel='target') plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_lf0 = list() graphs_lf0.append((wcad_lf0, "wcad lf0")) graphs_lf0.append((original_lf0, "org lf0")) graphs_lf0.append((output_lf0, "predicted lf0")) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_lf0) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(org_vuv.astype(bool)), '0.75', 1.0)]) plotter.set_hatchstyles(grid_idx=grid_idx, hatchstyles=['\\\\']) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='lf0') amp_lim = max(np.max(np.abs(wcad_lf0)), np.max(np.abs(output_lf0))) * 1.1 plotter.set_lim(grid_idx=grid_idx, ymin=-amp_lim, ymax=amp_lim) plotter.set_linestyles(grid_idx=grid_idx, linestyles=[':', '--', '-']) # # Compute F0 RMSE for sample and add it to title. # org_f0 = (np.exp(lf0.squeeze() + phrase_curve[:len(lf0)].squeeze()) * vuv)[:len(output_lf0)] # Fix minor negligible length mismatch. # output_f0 = np.exp(output_lf0 + phrase_curve[:len(output_lf0)].squeeze()) * output_vuv[:len(output_lf0)] # f0_mse = (org_f0 - output_f0) ** 2 # # non_zero_count = np.logical_and(vuv[:len(output_lf0)], output_vuv).sum() # f0_rmse = math.sqrt(f0_mse.sum() / (np.logical_and(vuv[:len(output_lf0)], output_vuv).sum())) # # Compute vuv error rate. # num_errors = (vuv[:len(output_lf0)] != output_vuv) # vuv_error_rate = float(num_errors.sum()) / len(output_lf0) # plotter.set_title(id_name + " - " + net_name + " - F0_RMSE_" + "{:4.2f}Hz".format(f0_rmse) + " - VUV_" + "{:2.2f}%".format(vuv_error_rate * 100)) # plotter.set_lim(xmin=300, xmax=1100)g plotter.gen_plot(monochrome=True) plotter.gen_plot() plotter.save_to_file(filename + ".VUV_DIST_POS" + hparams.gen_figure_ext)