def synthesize(self, id_list, synth_output, hparams): """ Synthesise LF0 from atoms. The run_atom_synth function either loads the original acoustic features or uses an acoustic model to predict them. """ full_output = self.run_atom_synth(id_list, synth_output, hparams) for id_name, labels in full_output.items(): lf0 = labels[:, -3] lf0, _ = interpolate_lin(lf0) vuv = synth_output[id_name][:, 0, 1] len_diff = len(labels) - len(vuv) labels = WorldFeatLabelGen.trim_end_sample(labels, int(len_diff / 2), reverse=True) labels = WorldFeatLabelGen.trim_end_sample(labels, len_diff - int(len_diff / 2)) labels[:, -2] = vuv # Run the vocoder. ModelTrainer.synthesize(self, id_list, full_output, hparams)
def gen_figure_from_output(self, id_name, labels, hidden, hparams): if labels.ndim < 2: labels = np.expand_dims(labels, axis=1) labels_post = self.OutputGen.postprocess_sample(labels, identify_peaks=True, peak_range=100) lf0 = self.OutputGen.labels_to_lf0(labels_post, hparams.k) lf0, vuv = interpolate_lin(lf0) vuv = vuv.astype(np.bool) # Load original lf0 and vuv. world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\ else os.path.join(self.OutputGen.dir_labels, self.dir_extracted_acoustic_features) org_labels = WorldFeatLabelGen.load_sample( id_name, world_dir, num_coded_sps=hparams.num_coded_sps) _, original_lf0, original_vuv, _ = WorldFeatLabelGen.convert_to_world_features( org_labels, num_coded_sps=hparams.num_coded_sps) original_lf0, _ = interpolate_lin(original_lf0) original_vuv = original_vuv.astype(np.bool) phrase_curve = np.fromfile(os.path.join( self.OutputGen.dir_labels, id_name + self.OutputGen.ext_phrase), dtype=np.float32).reshape(-1, 1) original_lf0 -= phrase_curve len_diff = len(original_lf0) - len(lf0) original_lf0 = WorldFeatLabelGen.trim_end_sample( original_lf0, int(len_diff / 2.0)) original_lf0 = WorldFeatLabelGen.trim_end_sample(original_lf0, int(len_diff / 2.0) + 1, reverse=True) org_labels = self.OutputGen.load_sample(id_name, self.OutputGen.dir_labels, len(hparams.thetas)) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0)) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0) + 1, reverse=True) org_atoms = self.OutputGen.labels_to_atoms( org_labels, k=hparams.k, frame_size=hparams.frame_size_ms) # Get a data plotter. net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter = DataPlotter() plotter.set_title(id_name + " - " + net_name) graphs_output = list() grid_idx = 0 for idx in reversed(range(labels.shape[1])): graphs_output.append( (labels[:, idx], r'$\theta$=' + "{0:.3f}".format(hparams.thetas[idx]))) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='NN output') plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_output) # plotter.set_lim(grid_idx=0, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_peaks = list() for idx in reversed(range(labels_post.shape[1])): graphs_peaks.append((labels_post[:, idx, 0], )) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='NN post-processed') plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_peaks) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv), '0.8', 1.0)]) plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_target = list() for idx in reversed(range(org_labels.shape[1])): graphs_target.append((org_labels[:, idx, 0], )) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='target') plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_target) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(original_vuv), '0.8', 1.0) ]) plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 output_atoms = AtomLabelGen.labels_to_atoms( labels_post, hparams.k, hparams.frame_size_ms, amp_threshold=hparams.min_atom_amp) wcad_lf0 = AtomLabelGen.atoms_to_lf0(org_atoms, len(labels)) output_lf0 = AtomLabelGen.atoms_to_lf0(output_atoms, len(labels)) graphs_lf0 = list() graphs_lf0.append((wcad_lf0, "wcad lf0")) graphs_lf0.append((original_lf0, "org lf0")) graphs_lf0.append((output_lf0, "predicted lf0")) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_lf0) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(original_vuv), '0.8', 1.0) ]) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='lf0') amp_lim = max(np.max(np.abs(wcad_lf0)), np.max( np.abs(output_lf0))) * 1.1 plotter.set_lim(grid_idx=grid_idx, ymin=-amp_lim, ymax=amp_lim) plotter.set_linestyles(grid_idx=grid_idx, linestyles=[':', '--', '-']) # plotter.set_lim(xmin=300, xmax=1100) plotter.gen_plot() plotter.save_to_file(filename + ".BASE" + hparams.gen_figure_ext)
def gen_figure_phrase(self, hparams, ids_input): id_list = ModelTrainer._input_to_str_list(ids_input) model_output, model_output_post = self._forward_batched( hparams, id_list, hparams.batch_size_gen_figure, synth=False, benchmark=False, gen_figure=False) for id_name, outputs_post in model_output_post.items(): if outputs_post.ndim < 2: outputs_post = np.expand_dims(outputs_post, axis=1) lf0 = outputs_post[:, 0] output_lf0, _ = interpolate_lin(lf0) output_vuv = outputs_post[:, 1] output_vuv[output_vuv < 0.5] = 0.0 output_vuv[output_vuv >= 0.5] = 1.0 output_vuv = output_vuv.astype(np.bool) # Load original lf0 and vuv. world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\ else os.path.join(hparams.out_dir, self.dir_extracted_acoustic_features) org_labels = WorldFeatLabelGen.load_sample( id_name, world_dir, num_coded_sps=hparams.num_coded_sps, num_bap=hparams.num_bap)[:len(output_lf0)] _, original_lf0, original_vuv, _ = WorldFeatLabelGen.convert_to_world_features( org_labels, num_coded_sps=hparams.num_coded_sps, num_bap=hparams.num_bap) original_lf0, _ = interpolate_lin(original_lf0) original_vuv = original_vuv.astype(np.bool) phrase_curve = np.fromfile(os.path.join( self.flat_trainer.atom_trainer.OutputGen.dir_labels, id_name + self.OutputGen.ext_phrase), dtype=np.float32).reshape( -1, 1)[:len(original_lf0)] f0_mse = (np.exp(original_lf0.squeeze(-1)) - np.exp(phrase_curve.squeeze(-1)))**2 f0_rmse = math.sqrt( (f0_mse * original_vuv[:len(output_lf0)]).sum() / original_vuv[:len(output_lf0)].sum()) self.logger.info("RMSE of {} phrase curve: {} Hz.".format( id_name, f0_rmse)) len_diff = len(original_lf0) - len(lf0) original_lf0 = WorldFeatLabelGen.trim_end_sample( original_lf0, int(len_diff / 2.0)) original_lf0 = WorldFeatLabelGen.trim_end_sample( original_lf0, int(len_diff / 2.0) + 1, reverse=True) # Get a data plotter. net_name = os.path.basename(hparams.model_name) filename = str( os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter = DataPlotter() # plotter.set_title(id_name + " - " + net_name) grid_idx = 0 graphs_lf0 = list() graphs_lf0.append((original_lf0, "Original")) graphs_lf0.append((phrase_curve, "Predicted")) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_lf0) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(original_vuv), '0.8', 1.0, 'Reference unvoiced')]) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='LF0') # amp_lim = max(np.max(np.abs(wcad_lf0)), np.max(np.abs(output_lf0))) * 1.1 # plotter.set_lim(grid_idx=grid_idx, ymin=-amp_lim, ymax=amp_lim) plotter.set_lim(grid_idx=grid_idx, ymin=4.2, ymax=5.4) # plotter.set_linestyles(grid_idx=grid_idx, linestyles=[':', '--', '-']) # plotter.set_lim(xmin=300, xmax=1100) plotter.gen_plot() plotter.save_to_file(filename + ".PHRASE" + hparams.gen_figure_ext)
def gen_figure_from_output(self, id_name, label, hidden, hparams): _, alphas = hidden labels_post = self.OutputGen.postprocess_sample(label) coded_sp, lf0, vuv, bap = WorldFeatLabelGen.convert_to_world_features( labels_post, contains_deltas=False, num_coded_sps=hparams.num_coded_sps) sp = WorldFeatLabelGen.mcep_to_amp_sp(coded_sp, hparams.synth_fs) lf0, _ = interpolate_lin(lf0) # Load original LF0. org_labels_post = WorldFeatLabelGen.load_sample( id_name, dir_out=self.OutputGen.dir_labels, add_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_mgc, original_lf0, original_vuv, *_ = WorldFeatLabelGen.convert_to_world_features( sample=org_labels_post, contains_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_lf0, _ = interpolate_lin(original_lf0) sp = sp[:, :150] # Zoom into spectral features. # Get a data plotter. grid_idx = -1 plotter = DataPlotter() net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter.set_title(id_name + ' - ' + net_name) plotter.set_num_colors(3) # plotter.set_lim(grid_idx=0, ymin=math.log(60), ymax=math.log(250)) # # Plot LF0 # grid_idx += 1 # graphs.append((original_lf0, 'Original LF0')) # graphs.append((lf0, 'NN LF0')) # plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) # plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), # (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) # plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format(hparams.frame_length), ylabel='log(f0)') # Reverse the warping. wl = self._get_dummy_warping_layer(hparams) norm_params_no_deltas = ( self.OutputGen.norm_params[0][:hparams.num_coded_sps], self.OutputGen.norm_params[1][:hparams.num_coded_sps]) pre_net_output, _ = wl.forward_sample(label, -alphas) # Postprocess sample manually. pre_net_output = pre_net_output.detach().cpu().numpy() pre_net_mgc = pre_net_output[:, 0, :hparams. num_coded_sps] * norm_params_no_deltas[ 1] + norm_params_no_deltas[0] # Plot spectral features predicted by pre-network. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='Pre-network') plotter.set_specshow(grid_idx=grid_idx, spec=np.abs( WorldFeatLabelGen.mcep_to_amp_sp( pre_net_mgc, hparams.synth_fs)[:, :sp.shape[1]])) # Plot final predicted spectral features. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='VTLN') plotter.set_specshow(grid_idx=grid_idx, spec=np.abs(sp)) # Plot predicted alpha value and V/UV flag. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='alpha') graphs = list() graphs.append((alphas, 'NN alpha')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) # Add phoneme annotations if given. if hasattr(hparams, "phoneme_indices") and hparams.phoneme_indices is not None \ and hasattr(hparams, "question_file") and hparams.question_file is not None: questions = QuestionLabelGen.load_sample( id_name, os.path.join("experiments", hparams.voice, "questions"), num_questions=hparams.num_questions)[:len(lf0)] np_phonemes = QuestionLabelGen.questions_to_phonemes( questions, hparams.phoneme_indices, hparams.question_file) plotter.set_annotations(grid_idx, np_phonemes) # Plot reference spectral features. grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [{}] ms'.format( hparams.frame_size_ms), ylabel='Original spectrogram') plotter.set_specshow(grid_idx=grid_idx, spec=np.abs( WorldFeatLabelGen.mcep_to_amp_sp( original_mgc, hparams.synth_fs)[:, :sp.shape[1]])) plotter.gen_plot() plotter.save_to_file(filename + '.VTLN' + hparams.gen_figure_ext)
def gen_figure_from_output(self, id_name, label, hidden, hparams): _, alphas = hidden labels_post = self.OutputGen.postprocess_sample(label) coded_sp, lf0, vuv, bap = WorldFeatLabelGen.convert_to_world_features( labels_post, contains_deltas=False, num_coded_sps=hparams.num_coded_sps) sp = WorldFeatLabelGen.mcep_to_amp_sp(coded_sp, hparams.synth_fs) lf0, _ = interpolate_lin(lf0) # Load original lf0. org_labels_post = WorldFeatLabelGen.load_sample( id_name, self.OutputGen.dir_labels, add_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_mgc, original_lf0, original_vuv, *_ = WorldFeatLabelGen.convert_to_world_features( org_labels_post, contains_deltas=self.OutputGen.add_deltas, num_coded_sps=hparams.num_coded_sps) original_lf0, _ = interpolate_lin(original_lf0) questions = QuestionLabelGen.load_sample( id_name, os.path.join("experiments", hparams.voice, "questions"), num_questions=hparams.num_questions)[:len(alphas)] phoneme_indices = QuestionLabelGen.questions_to_phoneme_indices( questions, hparams.phoneme_indices) alpha_vec = self.phonemes_to_alpha_tensor[phoneme_indices % len( self.phonemes_to_alpha_tensor)] # Get a data plotter. grid_idx = 0 plotter = DataPlotter() net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter.set_title(id_name + ' - ' + net_name) plotter.set_num_colors(3) # plotter.set_lim(grid_idx=0, ymin=math.log(60), ymax=math.log(250)) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='log(f0)') graphs = list() graphs.append((original_lf0, 'Original LF0')) graphs.append((lf0, 'NN LF0')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) # grid_idx += 1 # plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='Original spectrogram') # plotter.set_specshow(grid_idx=grid_idx, spec=WorldFeatLabelGen.mgc_to_sp(original_mgc, hparams.synth_fs)) # # grid_idx += 1 # plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='NN spectrogram') # plotter.set_specshow(grid_idx=grid_idx, spec=sp) grid_idx += 1 plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='alpha') graphs = list() graphs.append((alpha_vec, 'Original alpha')) graphs.append((alphas, 'NN alpha')) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(vuv.astype(bool)), '0.8', 1.0), (np.invert(original_vuv.astype(bool)), 'red', 0.2)]) if hasattr(hparams, "phoneme_indices") and hparams.phoneme_indices is not None \ and hasattr(hparams, "question_file") and hparams.question_file is not None: questions = QuestionLabelGen.load_sample( id_name, os.path.join("experiments", hparams.voice, "questions"), num_questions=hparams.num_questions)[:len(lf0)] np_phonemes = QuestionLabelGen.questions_to_phonemes( questions, hparams.phoneme_indices, hparams.question_file) plotter.set_annotations(grid_idx, np_phonemes) plotter.gen_plot() plotter.save_to_file(filename + '.VTLN' + hparams.gen_figure_ext)
def gen_data(self, dir_in, dir_out=None, file_id_list="", id_list=None, add_deltas=False, return_dict=False): """ Prepare LF0 and V/UV features from audio files. If add_delta is false each numpy array has the dimension num_frames x 2 [f0, vuv], otherwise the deltas and double deltas are added between the features resulting in num_frames x 4 [lf0(3*1), vuv]. :param dir_in: Directory where the .wav files are stored for each utterance to process. :param dir_out: Main directory where the labels and normalisation parameters are saved to subdirectories. If None, labels are not saved. :param file_id_list: Name of the file containing the ids. Normalisation parameters are saved using this name to differentiate parameters between subsets. :param id_list: The list of utterances to process. Should have the form uttId1 \\n uttId2 \\n ...\\n uttIdN. If None, all file in audio_dir are used. :param add_deltas: Add deltas and double deltas to all features except vuv. :param return_dict: If true, returns an OrderedDict of all samples as first output. :return: Returns two normalisation parameters as tuple. If return_dict is True it returns all processed labels in an OrderedDict followed by the two normalisation parameters. """ # Fill file_id_list by .wav files in dir_in if not given and set an appropriate file_id_list_name. if id_list is None: id_list = list() filenames = glob.glob(os.path.join(dir_in, "*.wav")) for filename in filenames: id_list.append(os.path.splitext(os.path.basename(filename))[0]) file_id_list_name = "all" else: file_id_list_name = os.path.splitext(os.path.basename(file_id_list))[0] # Create directories in dir_out if it is given. if dir_out is not None: if add_deltas: makedirs_safe(os.path.join(dir_out, LF0LabelGen.dir_deltas)) else: makedirs_safe(os.path.join(dir_out, LF0LabelGen.dir_lf0)) makedirs_safe(os.path.join(dir_out, LF0LabelGen.dir_vuv)) # Create the return dictionary if required. if return_dict: label_dict = OrderedDict() # Create normalisation computation units. norm_params_ext_lf0 = MeanStdDevExtractor() # norm_params_ext_vuv = MeanStdDevExtractor() norm_params_ext_deltas = MeanStdDevExtractor() logging.info("Extract WORLD LF0 features for " + "[{0}]".format(", ".join(str(i) for i in id_list))) for file_name in id_list: logging.debug("Extract WORLD LF0 features from " + file_name) # Load audio file and extract features. audio_name = os.path.join(dir_in, file_name + ".wav") raw, fs = soundfile.read(audio_name) _f0, t = pyworld.dio(raw, fs) # Raw pitch extraction. TODO: Use magphase here? f0 = pyworld.stonemask(raw, _f0, t, fs) # Pitch refinement. # Compute lf0 and vuv information. lf0 = np.log(f0, dtype=np.float32) lf0[lf0 <= math.log(LF0LabelGen.f0_silence_threshold)] = LF0LabelGen.lf0_zero lf0, vuv = interpolate_lin(lf0) if add_deltas: # Compute the deltas and double deltas for all features. lf0_deltas, lf0_double_deltas = compute_deltas(lf0) # Combine them to a single feature sample. labels = np.concatenate((lf0, lf0_deltas, lf0_double_deltas, vuv), axis=1) # Save into return dictionary and/or file. if return_dict: label_dict[file_name] = labels if dir_out is not None: labels.tofile(os.path.join(dir_out, LF0LabelGen.dir_deltas, file_name + LF0LabelGen.ext_deltas)) # Add sample to normalisation computation unit. norm_params_ext_deltas.add_sample(labels) else: # Save into return dictionary and/or file. if return_dict: label_dict[file_name] = np.concatenate((lf0, vuv), axis=1) if dir_out is not None: lf0.tofile(os.path.join(dir_out, LF0LabelGen.dir_lf0, file_name + LF0LabelGen.ext_lf0)) vuv.astype(np.float32).tofile(os.path.join(dir_out, LF0LabelGen.dir_vuv, file_name + LF0LabelGen.ext_vuv)) # Add sample to normalisation computation unit. norm_params_ext_lf0.add_sample(lf0) # norm_params_ext_vuv.add_sample(vuv) # Save mean and std dev of all features. if not add_deltas: norm_params_ext_lf0.save(os.path.join(dir_out, LF0LabelGen.dir_lf0, file_id_list_name)) # norm_params_ext_vuv.save(os.path.join(dir_out, LF0LabelGen.dir_vuv, file_id_list_name)) else: # Manually set vuv normalisation parameters before saving. norm_params_ext_deltas.sum_frames[-1] = 0.0 # Mean = 0.0 norm_params_ext_deltas.sum_squared_frames[-1] = norm_params_ext_deltas.sum_length # Variance = 1.0 norm_params_ext_deltas.save(os.path.join(dir_out, LF0LabelGen.dir_deltas, file_id_list_name)) # Get normalisation parameters. if not add_deltas: norm_lf0 = norm_params_ext_lf0.get_params() # norm_vuv = norm_params_ext_vuv.get_params() norm_first = np.concatenate((norm_lf0[0], (0.0,)), axis=0) norm_second = np.concatenate((norm_lf0[1], (1.0,)), axis=0) else: norm_first, norm_second = norm_params_ext_deltas.get_params() if return_dict: # Return dict of labels for all utterances. return label_dict, norm_first, norm_second else: return norm_first, norm_second
def gen_figure_from_output(self, id_name, label, hidden, hparams): # Retrieve data from label. output_amps = label[:, 1:-1] output_pos = label[:, -1] labels_post = self.OutputGen.postprocess_sample(label) output_vuv = labels_post[:, 0, 1].astype(bool) output_atoms = self.OutputGen.labels_to_atoms(labels_post, k=hparams.k, amp_threshold=hparams.min_atom_amp) output_lf0 = self.OutputGen.atoms_to_lf0(output_atoms, len(label)) # Load original lf0 and vuv. world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\ else os.path.join(self.OutputGen.dir_labels, self.dir_extracted_acoustic_features) org_labels = LF0LabelGen.load_sample(id_name, world_dir) original_lf0, _ = LF0LabelGen.convert_to_world_features(org_labels) original_lf0, _ = interpolate_lin(original_lf0) phrase_curve = np.fromfile(os.path.join(self.OutputGen.dir_labels, id_name + self.OutputGen.ext_phrase), dtype=np.float32).reshape(-1, 1) original_lf0[:len(phrase_curve)] -= phrase_curve[:len(original_lf0)] original_lf0 = original_lf0[:len(output_lf0)] org_labels = self.OutputGen.load_sample(id_name, self.OutputGen.dir_labels, len(hparams.thetas), self.OutputGen.dir_world_labels) org_vuv = org_labels[:, 0, 0].astype(bool) org_labels = org_labels[:, 1:] len_diff = len(org_labels) - len(labels_post) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0)) org_labels = self.OutputGen.trim_end_sample(org_labels, int(len_diff / 2.0) + 1) org_atoms = AtomLabelGen.labels_to_atoms(org_labels, k=hparams.k, frame_size=hparams.frame_size_ms) wcad_lf0 = self.OutputGen.atoms_to_lf0(org_atoms, len(org_labels)) # Get a data plotter net_name = os.path.basename(hparams.model_name) filename = str(os.path.join(hparams.out_dir, id_name + '.' + net_name)) plotter = DataPlotter() plotter.set_title(id_name + " - " + net_name) grid_idx = 0 graphs_output = list() for idx in reversed(range(output_amps.shape[1])): graphs_output.append((output_amps[:, idx], r'$\theta$={0:.3f}'.format(hparams.thetas[idx]))) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_output) plotter.set_label(grid_idx=grid_idx, ylabel='NN amps') amp_max = np.max(output_amps) * 1.1 amp_min = np.min(output_amps) * 1.1 plotter.set_lim(grid_idx=grid_idx, ymin=amp_min, ymax=amp_max) grid_idx += 1 graphs_pos_flag = list() graphs_pos_flag.append((output_pos,)) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_pos_flag) plotter.set_label(grid_idx=grid_idx, ylabel='NN pos') grid_idx += 1 graphs_peaks = list() for idx in reversed(range(label.shape[1] - 2)): graphs_peaks.append((labels_post[:, 1 + idx, 0],)) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_peaks) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(output_vuv), '0.75', 1.0, 'Unvoiced')]) plotter.set_label(grid_idx=grid_idx, ylabel='NN peaks') plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_target = list() for idx in reversed(range(org_labels.shape[1])): graphs_target.append((org_labels[:, idx, 0],)) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_target) plotter.set_hatchstyles(grid_idx=grid_idx, hatchstyles=['\\\\']) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(org_vuv.astype(bool)), '0.75', 1.0, 'Reference unvoiced')]) plotter.set_label(grid_idx=grid_idx, ylabel='target') plotter.set_lim(grid_idx=grid_idx, ymin=-1.8, ymax=1.8) grid_idx += 1 graphs_lf0 = list() graphs_lf0.append((wcad_lf0, "wcad lf0")) graphs_lf0.append((original_lf0, "org lf0")) graphs_lf0.append((output_lf0, "predicted lf0")) plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_lf0) plotter.set_area_list(grid_idx=grid_idx, area_list=[(np.invert(org_vuv.astype(bool)), '0.75', 1.0)]) plotter.set_hatchstyles(grid_idx=grid_idx, hatchstyles=['\\\\']) plotter.set_label(grid_idx=grid_idx, xlabel='frames [' + str(hparams.frame_size_ms) + ' ms]', ylabel='lf0') amp_lim = max(np.max(np.abs(wcad_lf0)), np.max(np.abs(output_lf0))) * 1.1 plotter.set_lim(grid_idx=grid_idx, ymin=-amp_lim, ymax=amp_lim) plotter.set_linestyles(grid_idx=grid_idx, linestyles=[':', '--', '-']) # # Compute F0 RMSE for sample and add it to title. # org_f0 = (np.exp(lf0.squeeze() + phrase_curve[:len(lf0)].squeeze()) * vuv)[:len(output_lf0)] # Fix minor negligible length mismatch. # output_f0 = np.exp(output_lf0 + phrase_curve[:len(output_lf0)].squeeze()) * output_vuv[:len(output_lf0)] # f0_mse = (org_f0 - output_f0) ** 2 # # non_zero_count = np.logical_and(vuv[:len(output_lf0)], output_vuv).sum() # f0_rmse = math.sqrt(f0_mse.sum() / (np.logical_and(vuv[:len(output_lf0)], output_vuv).sum())) # # Compute vuv error rate. # num_errors = (vuv[:len(output_lf0)] != output_vuv) # vuv_error_rate = float(num_errors.sum()) / len(output_lf0) # plotter.set_title(id_name + " - " + net_name + " - F0_RMSE_" + "{:4.2f}Hz".format(f0_rmse) + " - VUV_" + "{:2.2f}%".format(vuv_error_rate * 100)) # plotter.set_lim(xmin=300, xmax=1100)g plotter.gen_plot(monochrome=True) plotter.gen_plot() plotter.save_to_file(filename + ".VUV_DIST_POS" + hparams.gen_figure_ext)