def __init__(self, metadata_file_path, hparams):
        # self.audiopaths_and_text = load_filepaths_and_text(metadata_file_path)

        self._metadata = _read_meta_yyh(metadata_file_path)

        self.mel_dir = hparams.mel_dir

        if hparams.use_phone:
            from text.phones import Phones
            self.phone_class = Phones(hparams.phone_set_file)
            self.text_to_sequence = self.phone_class.text_to_sequence
        else:
            from text import text_to_sequence
            self.text_to_sequence = text_to_sequence

        self.text_cleaners = hparams.text_cleaners
        self.is_multi_speakers = hparams.is_multi_speakers
        self.is_multi_styles = hparams.is_multi_styles

        # random.seed(1234)
        random.shuffle(self._metadata)
        self.utt_list = []
        self.utt_data = {}
        self.utt_mels = []
        for i in range(len(self._metadata)):
            item = self._metadata[i]
            self.utt_list.append(
                {"mel_frame": int(item.strip().split('|')[-1])})
            self.utt_data[str(i)] = {
                "text":
                torch.IntTensor(
                    self.text_to_sequence(item.strip().split('|')[5],
                                          self.text_cleaners)),
                "mel":
                torch.from_numpy(
                    np.load(self.mel_dir + 'out_' +
                            item.strip().split('|')[0] + '.npy')) * 8.0 - 4.0,
                "dur":
                DurationCalculator._calculate_duration(
                    torch.from_numpy(
                        np.load(self.mel_dir + 'encdec_' +
                                item.strip().split('|')[0] + '.npy'))),
                "speaker":
                int(item.strip().split('|')[4])
                if self.is_multi_speakers else None,
                "style":
                int(item.strip().split('|')[2])
                if self.is_multi_styles else None
            }
            # self.utt_list.append({"mel_frame": int(np.load(self.mel_dir + 'out_' + item.strip().split('|')[0] + '.npy').shape[0])})
            if hparams.is_refine_style:
                self.utt_mels.append((torch.from_numpy(
                    np.load(self.mel_dir + 'out_' +
                            item.strip().split('|')[0] + '.npy')) * 8.0 -
                                      4.0).unsqueeze(0))
Example #2
0
 def get_dur(self, att_ws):
     return DurationCalculator._calculate_duration(
         torch.from_numpy(np.load(att_ws)))