def reconstruct_audio(filename, tier_to_breakdown):
    hp = HParam('./config/blizzard_compressed_experiments.yaml')
    melgen = MelGen(hp)
    tierutil = TierUtil(hp)
    final_reconstruction = None

    # Verify that tier 2 is conditionally generated from just tier 1
    assert (breakdown[2][0] == breakdown[1][1]
            ).all(), "Tier 2 not created from Tier 1"

    for tier in range(2, 7):
        source = tier_to_breakdown[tier][0]
        target = tier_to_breakdown[tier][1]

        source_tensor = torch.unsqueeze(torch.from_numpy(source), 0)
        target_tensor = torch.unsqueeze(torch.from_numpy(target), 0)
        reconstructed_mel_tensor = tierutil.interleave(source_tensor,
                                                       target_tensor, tier + 1)
        reconstructed_mel = reconstructed_mel_tensor.numpy()[0]

        # Verify that interleaving the source and target of the current tier conditionally generates the source of the next tier
        if tier < 6:
            next_tier = tier_to_breakdown[tier + 1][0]
            assert (reconstructed_mel == next_tier).all(
            ), "Tier %d not created from Tier %d" % (tier + 1, tier)
        else:
            final_reconstruction = reconstructed_mel
    print('reconstructing audio...')
    reconstructed_audio = melgen.reconstruct_audio(final_reconstruction)
    melgen.save_audio('reconstructed_' + filename, reconstructed_audio)
Example #2
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.file_list = glob.glob(os.path.join(hp.data.path, '**',
                                                hp.data.extension),
                                   recursive=True)

        random.seed(123)
        random.shuffle(self.file_list)
        if train:
            self.file_list = self.file_list[:int(0.95 * len(self.file_list))]
        else:
            self.file_list = self.file_list[int(0.95 * len(self.file_list)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
Example #3
0
 def __init__(self, hp, args, train):
     self.hp = hp
     self.args = args
     self.train = train
     self.data = hp.data.path
     self.melgen = MelGen(hp)
     self.tierutil = TierUtil(hp)
     self.file_list = []
     self.root_dir = hp.data.path
     txt_path = os.path.join(
         self.root_dir,
         'blizzard_train.csv' if train else 'blizzard_test.csv')
     with open(txt_path, 'r') as read_obj:
         csv_reader = csv.reader(read_obj)
         headers = next(csv_reader)
         for row in csv_reader:
             [original_sentence, parsed_sentence, wav_path,
              wav_length] = row
             if float(wav_length) < hp.audio.duration and float(
                     wav_length) > 0.4:
                 self.file_list.append(wav_path)
     # Just to ensure the data always comes in the right order
     random.Random(123).shuffle(self.file_list)
     self.wavlen = int(hp.audio.sr * hp.audio.duration)
     self.tier = self.args.tier
     self.melgen = MelGen(hp)
     self.tierutil = TierUtil(hp)
Example #4
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.file_list = []
        # for i, f in enumerate(glob.glob(os.path.join(hp.data.path, '**', hp.data.extension), recursive=True)):
        #     wav = read_wav_np(f)
        #     duraton = (len(wav)/hp.audio.sr)
        #     if duraton < hp.audio.duration:
        #         self.file_list.append(f)
        self.file_list = glob.glob(os.path.join(hp.data.path, '**',
                                                hp.data.extension),
                                   recursive=True)

        random.seed(123)
        random.shuffle(self.file_list)
        if train:
            self.file_list = self.file_list[:int(0.95 * len(self.file_list))]
        else:
            self.file_list = self.file_list[int(0.95 * len(self.file_list)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
Example #5
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []

        txt_path = os.path.join(
            self.root_dir,
            'blizzard_train.csv' if train else 'blizzard_test.csv')
        # open file in read mode
        with open(txt_path, 'r') as read_obj:
            csv_reader = csv.reader(read_obj)
            headers = next(csv_reader)
            for row in csv_reader:
                [original_sentence, parsed_sentence, wav_path,
                 wav_length] = row
                if float(wav_length) < hp.audio.duration and float(
                        wav_length) > 0.4:
                    self.dataset.append((wav_path, parsed_sentence))
        random.Random(123).shuffle(self.dataset)
        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
def deconstruct_audio(wav):
  hp = HParam('./config/blizzard_compressed_experiments.yaml')
  melgen = MelGen(hp)
  tierutil = TierUtil(hp)
  mel = melgen.get_normalized_mel(wav)
  tier_to_breakdown = {}
  for tier in range(1, 7):
    source, target = tierutil.cut_divide_tiers(mel, tier)
    print("Tier %d has source dims: %s, target dims %s" % (tier, source.shape, target.shape))
    tier_to_breakdown[tier] = (source, target)
  tier_to_breakdown[7] = (mel, mel)
  return tier_to_breakdown
Example #7
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []
        if hp.data.name == 'KSS':
            with open(os.path.join(self.root_dir, 'transcript.v.1.4.txt'),
                      'r') as f:
                lines = f.read().splitlines()
                for line in tqdm(lines):
                    wav_name, _, _, text, length, _ = line.split('|')

                    wav_path = os.path.join(self.root_dir, 'kss', wav_name)
                    duraton = float(length)
                    if duraton < hp.audio.duration:
                        self.dataset.append((wav_path, text))

                # if len(self.dataset) > 100: break
        elif hp.data.name.startswith('Blizzard'):
            with open(os.path.join(self.root_dir, 'prompts.gui'), 'r') as f:
                lines = f.read().splitlines()
                filenames = lines[::3]
                sentences = lines[1::3]
                for filename, sentence in tqdm(zip(filenames, sentences),
                                               total=len(filenames)):
                    wav_path = os.path.join(self.root_dir, 'wavn',
                                            filename + '.wav')
                    length = get_length(wav_path, hp.audio.sr)
                    if length < hp.audio.duration:
                        self.dataset.append((wav_path, sentence))

        else:
            raise NotImplementedError

        random.seed(123)
        random.shuffle(self.dataset)
        if train:
            self.dataset = self.dataset[:int(0.95 * len(self.dataset))]
        else:
            self.dataset = self.dataset[int(0.95 * len(self.dataset)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
Example #8
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []

        txt_path = os.path.join(
            self.root_dir, 'complete_blizzard/train_prompts.gui'
            if train else 'complete_blizzard/test_prompts.gui')
        # txt_file_list = glob.glob(
        #     os.path.join(txt_path, '**', '*.txt'),
        #     recursive=True
        # )
        # for txt_filepath in tqdm(txt_file_list, total=len(txt_file_list)):
        #     wav_filepath = txt_filepath.replace('_txt', '_wav').replace('.txt', '.wav')
        #     f = open(txt_filepath, "r")
        #     sentence = f.read().strip()
        #     f.close()
        #     # Skip the length filtering below because we already filtered the dataset
        #     length = get_length(wav_filepath, hp.audio.sr)
        #     if length < hp.audio.duration and length > 0.56 and len(sentence) > 5:
        #         self.dataset.append((wav_filepath, sentence))
        with open(txt_path, 'r') as f:
            lines = f.read().splitlines()
            wav_paths = lines[::2]
            sentences = lines[1::2]
            for wav_path, sentence in tqdm(
                    zip(wav_paths, sentences),
                    desc='Audio/text data loader for %s' % txt_path,
                    total=len(wav_paths)):
                # Skip the length filtering below because we already filtered the dataset
                # length = get_length(wav_path, hp.audio.sr)
                # if length < hp.audio.duration:
                self.dataset.append((wav_path, sentence))

        random.seed(123)
        random.shuffle(self.dataset)
        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
Example #9
0
class AudioOnlyDataset(Dataset):
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.file_list = glob.glob(os.path.join(hp.data.path, '**',
                                                hp.data.extension),
                                   recursive=True)

        random.seed(123)
        random.shuffle(self.file_list)
        if train:
            self.file_list = self.file_list[:int(0.95 * len(self.file_list))]
        else:
            self.file_list = self.file_list[int(0.95 * len(self.file_list)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        wav = read_wav_np(self.file_list[idx], sample_rate=self.hp.audio.sr)
        # wav = cut_wav(self.wavlen, wav)
        mel = self.melgen.get_normalized_mel(wav)
        source, target = self.tierutil.cut_divide_tiers(mel, self.tier)

        # # Reconstruct audio for testing
        # filename = os.path.basename(self.file_list[idx])
        # plt.imsave('./reconstructed_audio/original_'+filename+'.png', mel)
        # plt.imsave('./reconstructed_audio/source_'+filename+'.png', source)
        # plt.imsave('./reconstructed_audio/target_'+filename+'.png', target)
        # self.melgen.save_audio('source_'+filename, wav)

        # source_tensor = torch.unsqueeze(torch.from_numpy(source), 0)
        # target_tensor = torch.unsqueeze(torch.from_numpy(target), 0)
        # reconstructed_mel_tensor = self.tierutil.interleave(source_tensor, target_tensor, self.tier)
        # reconstructed_mel = reconstructed_mel_tensor.numpy()
        # print('Shapes: [mel, source, target, reconstruction], [%s, %s, %s, %s]' % (
        #     mel.shape,
        #     source.shape,
        #     target.shape,
        #     reconstructed_mel.shape,
        #     ))
        # reconstructed_audio = self.melgen.reconstruct_audio(reconstructed_mel)
        # self.melgen.save_audio('reconstructed_'+filename, reconstructed_audio)

        return source, target
Example #10
0
class AudioTextDataset(Dataset):
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []
        with open(os.path.join(self.root_dir, 'transcript.v.1.2.txt'),
                  'r') as f:
            lines = f.read().splitlines()
            for line in lines:
                wav_name, _, _, text, _ = line.split('|')
                wav_name = wav_name[2:-4] + '.wav'

                wav_path = os.path.join(self.root_dir, 'wavs', wav_name)
                wav = read_wav_np(wav_path)
                duraton = (len(wav) / hp.audio.sr)
                if duraton < hp.audio.duration:
                    self.dataset.append((wav_path, text))

                #if len(self.dataset) > 100: break

        random.seed(123)
        random.shuffle(self.dataset)
        if train:
            self.dataset = self.dataset[:int(0.95 * len(self.dataset))]
        else:
            self.dataset = self.dataset[int(0.95 * len(self.dataset)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        text = self.dataset[idx][1]
        seq = text_to_sequence(text)

        wav = read_wav_np(self.dataset[idx][0])
        wav = cut_wav(self.wavlen, wav)
        mel = self.melgen.get_normalized_mel(wav)
        source, target = self.tierutil.cut_divide_tiers(mel, self.tier)

        return seq, source, target
Example #11
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
        self.file_list = []

        # if train:
        #     self.file_list = glob.glob(
        #         os.path.join(hp.data.path, 'complete_blizzard/train_wav', '**', hp.data.extension),
        #         recursive=True
        #     )
        # else:
        #     self.file_list = glob.glob(
        #         os.path.join(hp.data.path, 'complete_blizzard/test_wav', '**', hp.data.extension),
        #         recursive=True
        #     )
        txt_path = 'datasets/complete_blizzard/train_prompts.gui' if train else 'datasets/complete_blizzard/test_prompts.gui'
        with open(txt_path, 'r') as f:
            lines = f.read().splitlines()
            wav_paths = lines[::2]
            for wav_path in tqdm(wav_paths,
                                 desc='Audio data loader',
                                 total=len(wav_paths)):
                # Skip the length filtering below because we already filtered the dataset
                # length = get_length(wav_path, hp.audio.sr)
                # if length < hp.audio.duration:
                self.file_list.append(wav_path)

        # Just to ensure the data always comes in the right order
        random.seed(123)
        random.shuffle(self.file_list)

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
Example #12
0
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []
        with open(os.path.join(self.root_dir, 'transcript.v.1.2.txt'),
                  'r') as f:
            lines = f.read().splitlines()
            for line in lines:
                wav_name, _, _, text, _ = line.split('|')
                wav_name = wav_name[2:-4] + '.wav'

                wav_path = os.path.join(self.root_dir, 'wavs', wav_name)
                wav = read_wav_np(wav_path)
                duraton = (len(wav) / hp.audio.sr)
                if duraton < hp.audio.duration:
                    self.dataset.append((wav_path, text))

                #if len(self.dataset) > 100: break

        random.seed(123)
        random.shuffle(self.dataset)
        if train:
            self.dataset = self.dataset[:int(0.95 * len(self.dataset))]
        else:
            self.dataset = self.dataset[int(0.95 * len(self.dataset)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)
Example #13
0
class AudioOnlyDataset(Dataset):
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.file_list = []
        # for i, f in enumerate(glob.glob(os.path.join(hp.data.path, '**', hp.data.extension), recursive=True)):
        #     wav = read_wav_np(f)
        #     duraton = (len(wav)/hp.audio.sr)
        #     if duraton < hp.audio.duration:
        #         self.file_list.append(f)
        self.file_list = glob.glob(os.path.join(hp.data.path, '**',
                                                hp.data.extension),
                                   recursive=True)

        random.seed(123)
        random.shuffle(self.file_list)
        if train:
            self.file_list = self.file_list[:int(0.95 * len(self.file_list))]
        else:
            self.file_list = self.file_list[int(0.95 * len(self.file_list)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        wav = read_wav_np(self.file_list[idx])
        wav = cut_wav(self.wavlen, wav)
        mel = self.melgen.get_normalized_mel(wav)
        source, target = self.tierutil.cut_divide_tiers(mel, self.tier)

        return source, target
Example #14
0
class CompleteAudioTextDatasetv3(AudioTextDataset):
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []

        txt_path = os.path.join(
            self.root_dir,
            'blizzard_train.csv' if train else 'blizzard_test.csv')
        # open file in read mode
        with open(txt_path, 'r') as read_obj:
            csv_reader = csv.reader(read_obj)
            headers = next(csv_reader)
            for row in csv_reader:
                [original_sentence, parsed_sentence, wav_path,
                 wav_length] = row
                if float(wav_length) < hp.audio.duration and float(
                        wav_length) > 0.4:
                    self.dataset.append((wav_path, parsed_sentence))
        random.Random(123).shuffle(self.dataset)
        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

    def __getitem__(self, idx):
        sentence = self.dataset[idx][1]
        seq = seq_to_array(sentence)
        wav = read_wav_np(self.dataset[idx][0], sample_rate=self.hp.audio.sr)
        mel = self.melgen.get_normalized_mel(wav)
        source, target = self.tierutil.cut_divide_tiers(mel, self.tier)
        return seq, source, target
        source_tensor = torch.unsqueeze(torch.from_numpy(source), 0)
        target_tensor = torch.unsqueeze(torch.from_numpy(target), 0)
        reconstructed_mel_tensor = tierutil.interleave(source_tensor,
                                                       target_tensor, tier + 1)
        reconstructed_mel = reconstructed_mel_tensor.numpy()[0]

        # Verify that interleaving the source and target of the current tier conditionally generates the source of the next tier
        if tier < 6:
            next_tier = tier_to_breakdown[tier + 1][0]
            assert (reconstructed_mel == next_tier).all(
            ), "Tier %d not created from Tier %d" % (tier + 1, tier)
        else:
            final_reconstruction = reconstructed_mel
    print('reconstructing audio...')
    reconstructed_audio = melgen.reconstruct_audio(final_reconstruction)
    melgen.save_audio('reconstructed_' + filename, reconstructed_audio)


breakdown = None
audio_files = get_audio()
for filename, wav in audio_files:
    breakdown = deconstruct_audio(wav)
    reconstruct_audio(filename, breakdown)
    hp = HParam('./config/blizzard_compressed_experiments.yaml')
    melgen = MelGen(hp)
    melgen.save_audio('original_' + filename, wav)
    print('')
    print('')
    break
Example #16
0
  #   inference_breakdown[i][0]
  #   save_image('tier%d_inferred_breakdown_%s.png' % (i, filename), inference_breakdown[i][0])
  # save_image('final_inferred_%s.png' % filename, inferred)

  tier = 5
  source = breakdown[tier][0]
  print("Source tier 5 shape: %s" % str(source.shape))
  save_image('source_tier_%d_%s.png' % (tier, filename), breakdown[tier][0])
  inferred_source_6, inferred_5 = run_inference_on_tier(source, tier, text, timestep)
  print("inferred tier 5 target shape: %s" % str(inferred_5.shape))
  print("inferred tier 6 source shape: %s" % str(inferred_source_6.shape))
  tier = 6
  inferred_final, inferred_6 = run_inference_on_tier(inferred_source_6, tier, text, timestep)
  print("inferred tier 6 target shape: %s" % str(inferred_6.shape))
  print("inferred final shape: %s" % str(inferred_final.shape))
  print("original final shape: %s" % str(breakdown[tier+1][0].shape))
  save_image('target_tier_%d_%s.png' % (tier, filename), breakdown[tier][1])
  save_image('next_tier_%d_%s.png' % (tier, filename), breakdown[tier+1][0])
  save_image('inferred_tier_%d_%s.png' % (tier, filename), inferred_6)
  save_image('inferred_next_tier_%d_%s.png' % (tier, filename), inferred_final)

  # Save the actual audio
  hp = HParam('./config/blizzard_compressed_experiments.yaml')
  melgen = MelGen(hp)
  source_wav = melgen.reconstruct_audio(breakdown[tier+1][0])
  inference_wav = melgen.reconstruct_audio(inferred_final)
  melgen.save_audio('source_'+filename, source_wav)
  melgen.save_audio('inference_'+filename, inference_wav)

  break
  
Example #17
0
        required=False,
        help="Input for conditional generation, leave empty for unconditional")
    return parser.parse_args(args)


if __name__ == '__main__':

    args = parse_args(sys.argv[1:])

    hp = HParam(args.config)
    infer_hp = HParam(args.infer_config)

    assert args.timestep % t_div[hp.model.tier] == 0, \
        "timestep should be divisible by %d, got %d" % (t_div[hp.model.tier], args.timestep)

    melgen = MelGen(hp)
    model = MelNet(hp, args, infer_hp).cuda()
    model.load_tiers()
    model.eval()

    with torch.no_grad():
        generated = model.sample(args.input)

    os.makedirs('temp', exist_ok=True)
    torch.save(generated, os.path.join('temp', args.name + '.pt'))
    spectrogram = plot_spectrogram_to_numpy(
        generated[0].cpu().detach().numpy())
    plt.imsave(os.path.join('temp', args.name + '.png'),
               spectrogram.transpose((1, 2, 0)))

    # waveform, wavespec = Reconstruct(hp).inverse(generated[0])
Example #18
0
def save_audio(filename, final_reconstruction):
    hp = HParam('./config/blizzard_alldata_v5.yaml')
    melgen = MelGen(hp)
    reconstructed_audio = melgen.reconstruct_audio(final_reconstruction)
    melgen.save_audio('temp/reconstructed_' + filename, reconstructed_audio)
Example #19
0
class AudioTextDataset(Dataset):
    def __init__(self, hp, args, train):
        self.hp = hp
        self.args = args
        self.train = train
        self.data = hp.data.path
        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

        # this will search all files within hp.data.path
        self.root_dir = hp.data.path
        self.dataset = []
        if hp.data.name == 'KSS':
            with open(os.path.join(self.root_dir, 'transcript.v.1.4.txt'),
                      'r') as f:
                lines = f.read().splitlines()
                for line in tqdm(lines):
                    wav_name, _, _, text, length, _ = line.split('|')

                    wav_path = os.path.join(self.root_dir, 'kss', wav_name)
                    duraton = float(length)
                    if duraton < hp.audio.duration:
                        self.dataset.append((wav_path, text))

                # if len(self.dataset) > 100: break
        elif hp.data.name.startswith('Blizzard'):
            with open(os.path.join(self.root_dir, 'prompts.gui'), 'r') as f:
                lines = f.read().splitlines()
                filenames = lines[::3]
                sentences = lines[1::3]
                for filename, sentence in tqdm(zip(filenames, sentences),
                                               total=len(filenames)):
                    wav_path = os.path.join(self.root_dir, 'wavn',
                                            filename + '.wav')
                    length = get_length(wav_path, hp.audio.sr)
                    if length < hp.audio.duration:
                        self.dataset.append((wav_path, sentence))

        else:
            raise NotImplementedError

        random.seed(123)
        random.shuffle(self.dataset)
        if train:
            self.dataset = self.dataset[:int(0.95 * len(self.dataset))]
        else:
            self.dataset = self.dataset[int(0.95 * len(self.dataset)):]

        self.wavlen = int(hp.audio.sr * hp.audio.duration)
        self.tier = self.args.tier

        self.melgen = MelGen(hp)
        self.tierutil = TierUtil(hp)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        text = self.dataset[idx][1]
        if self.hp.data.name == 'KSS':
            seq = text_to_sequence(text)
        elif self.hp.data.name.startswith('Blizzard'):
            seq = process_blizzard(text)

        wav = read_wav_np(self.dataset[idx][0], sample_rate=self.hp.audio.sr)
        # wav = cut_wav(self.wavlen, wav)
        mel = self.melgen.get_normalized_mel(wav)
        source, target = self.tierutil.cut_divide_tiers(mel, self.tier)
        # print(text)

        return seq, source, target