def __getitem__(self, index): id = self._metadata[index][4].split(".")[0] x_ = self._metadata[index][3].split() if self.use_phonemes: x = phonemes_to_sequence(x_) else: x = text_to_sequence(x_, self.tts_cleaner_names, self.eos) mel = np.load(f"{self.path}mels/{id}.npy") durations = str_to_int_list(self._metadata[index][2]) e = remove_outlier( np.load(f"{self.path}energy/{id}.npy") ) # self._norm_mean_std(np.load(f'{self.path}energy/{id}.npy'), self.e_mean, self.e_std, True) p = remove_outlier( np.load(f"{self.path}pitch/{id}.npy") ) # self._norm_mean_std(np.load(f'{self.path}pitch/{id}.npy'), self.f0_mean, self.f0_std, True) mel_len = mel.shape[1] durations = durations[:len(x)] durations[-1] = durations[-1] + (mel.shape[1] - sum(durations)) assert mel.shape[1] == sum(durations) return ( np.array(x), mel.T, id, mel_len, np.array(durations), e, p, ) # Mel [T, num_mel]
def _norm_mean_std(self, x, mean, std, is_remove_outlier = False): if is_remove_outlier: x = remove_outlier(x) zero_idxs = np.where(x == 0.0)[0] x = (x - mean) / std x[zero_idxs] = 0.0 return x
nz_min_p = [] nz_min_e = [] energy_path = os.path.join(hp.data.data_dir, "energy") pitch_path = os.path.join(hp.data.data_dir, "pitch") mel_path = os.path.join(hp.data.data_dir, "mels") energy_files = get_files(energy_path, extension=".npy") pitch_files = get_files(pitch_path, extension=".npy") mel_files = get_files(mel_path, extension=".npy") assert len(energy_files) == len(pitch_files) == len(mel_files) energy_vecs = [] for f in tqdm(energy_files): e = np.load(f) e = remove_outlier(e) energy_vecs.append(e) min_e.append(e.min()) nz_min_e.append(e[e > 0].min()) max_e.append(e.max()) nonzeros = np.concatenate([v[np.where(v != 0.0)[0]] for v in energy_vecs]) e_mean, e_std = np.mean(nonzeros), np.std(nonzeros) print("Non zero Min Energy : {}".format(min(nz_min_e))) print("Max Energy : {}".format(max(max_e))) print("Energy mean : {}".format(e_mean)) print("Energy std: {}".format(e_std)) pitch_vecs = [] bad_pitch = [] for f in tqdm(pitch_files):