Esempio n. 1
0
 def __getitem__(self, index):
     id = self._metadata[index][4].split(".")[0]
     x_ = self._metadata[index][3].split()
     if self.use_phonemes:
         x = phonemes_to_sequence(x_)
     else:
         x = text_to_sequence(x_, self.tts_cleaner_names, self.eos)
     mel = np.load(f"{self.path}mels/{id}.npy")
     durations = str_to_int_list(self._metadata[index][2])
     e = remove_outlier(
         np.load(f"{self.path}energy/{id}.npy")
     )  # self._norm_mean_std(np.load(f'{self.path}energy/{id}.npy'), self.e_mean, self.e_std, True)
     p = remove_outlier(
         np.load(f"{self.path}pitch/{id}.npy")
     )  # self._norm_mean_std(np.load(f'{self.path}pitch/{id}.npy'), self.f0_mean, self.f0_std, True)
     mel_len = mel.shape[1]
     durations = durations[:len(x)]
     durations[-1] = durations[-1] + (mel.shape[1] - sum(durations))
     assert mel.shape[1] == sum(durations)
     return (
         np.array(x),
         mel.T,
         id,
         mel_len,
         np.array(durations),
         e,
         p,
     )  # Mel [T, num_mel]
Esempio n. 2
0
 def _norm_mean_std(self, x, mean, std, is_remove_outlier = False):
     if is_remove_outlier:
         x = remove_outlier(x)
     zero_idxs = np.where(x == 0.0)[0]
     x = (x - mean) / std
     x[zero_idxs] = 0.0
     return x
Esempio n. 3
0
    nz_min_p = []
    nz_min_e = []

    energy_path = os.path.join(hp.data.data_dir, "energy")
    pitch_path = os.path.join(hp.data.data_dir, "pitch")
    mel_path = os.path.join(hp.data.data_dir, "mels")
    energy_files = get_files(energy_path, extension=".npy")
    pitch_files = get_files(pitch_path, extension=".npy")
    mel_files = get_files(mel_path, extension=".npy")

    assert len(energy_files) == len(pitch_files) == len(mel_files)

    energy_vecs = []
    for f in tqdm(energy_files):
        e = np.load(f)
        e = remove_outlier(e)
        energy_vecs.append(e)
        min_e.append(e.min())
        nz_min_e.append(e[e > 0].min())
        max_e.append(e.max())

    nonzeros = np.concatenate([v[np.where(v != 0.0)[0]] for v in energy_vecs])
    e_mean, e_std = np.mean(nonzeros), np.std(nonzeros)
    print("Non zero Min Energy : {}".format(min(nz_min_e)))
    print("Max Energy : {}".format(max(max_e)))
    print("Energy mean : {}".format(e_mean))
    print("Energy std: {}".format(e_std))

    pitch_vecs = []
    bad_pitch = []
    for f in tqdm(pitch_files):