Beispiel #1
0
def reprocess(batch, cut_list):
    texts = [batch[ind]["text"] for ind in cut_list]
    cembs = [batch[ind]["cemb"] for ind in cut_list]
    Ds = [batch[ind]["D"] for ind in cut_list]
    # mel_gt_targets = [batch[ind]["mel_gt_target"] for ind in cut_list]
    mel_tac2_targets = [batch[ind]["mel_tac2_target"] for ind in cut_list]

    length_text = np.array([])
    for text in texts:
        length_text = np.append(length_text, text.shape[0])

    length_mel = np.array(list())
    for mel in mel_tac2_targets:
        length_mel = np.append(length_mel, mel.shape[0])

    texts = pad_1D(texts)
    Ds = pad_1D(Ds)
    # mel_gt_targets = pad_2D(mel_gt_targets)
    mel_tac2_targets = pad_2D(mel_tac2_targets)
    cembs = pad_2D(cembs)

    out = {
        "text": texts,
        "mel_tac2_target": mel_tac2_targets,
        "cemb": cembs,
        "D": Ds,
        "length_mel": length_mel,
        "length_text": length_text
    }

    return out
Beispiel #2
0
def reprocess(batch, cut_list):
    C1s = [batch[ind]["condition1"] for ind in cut_list]
    C2s = [batch[ind]["condition2"] for ind in cut_list]
    mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
    norm_f0s = [batch[ind]["norm_f0"] for ind in cut_list]
    Ds = [batch[ind]["D"] for ind in cut_list]
    mel_ins = [batch[ind]["mel_in"] for ind in cut_list]

    length_C = np.array([])
    for C in C1s:
        length_C = np.append(length_C, C.shape[0])

    src_pos = list()
    max_len = int(max(length_C))
    for length_src_row in length_C:
        src_pos.append(
            np.pad([i + 1 for i in range(int(length_src_row))],
                   (0, max_len - int(length_src_row)), 'constant'))
    src_pos = np.array(src_pos)

    length_mel = np.array(list())
    for mel in mel_targets:
        length_mel = np.append(length_mel, mel.shape[0])

    mel_pos = list()
    lens = torch.LongTensor(length_mel)
    max_mel_len = int(max(length_mel))
    for length_mel_row in length_mel:
        mel_pos.append(
            np.pad([i + 1 for i in range(int(length_mel_row))],
                   (0, max_mel_len - int(length_mel_row)), 'constant'))
    mel_pos = np.array(mel_pos)

    C1s = pad_1D(C1s)
    C2s = pad_1D(C2s)
    Ds = pad_1D(Ds)
    norm_f0s = pad_2D(norm_f0s, maxlen=max_mel_len)
    mel_targets = pad_2D(mel_targets, maxlen=max_mel_len)
    mel_ins = pad_2D(mel_ins, maxlen=max_mel_len)

    out = {
        "condition1": C1s,
        "condition2": C2s,
        "mel_target": mel_targets,
        "norm_f0": norm_f0s,
        "mel_in": mel_ins,
        "D": Ds,
        "mel_pos": mel_pos,
        "src_pos": src_pos,
        "lens": lens,
        "mel_max_len": max_mel_len
    }

    return out
Beispiel #3
0
def preprocess_audio(mel, energy, f0, f0_norm):
    mel = utils.pad_2D(mel[None])
    f0 = utils.pad_1D(f0[None])
    f0_norm = utils.pad_1D(f0_norm[None])
    energy = utils.pad_1D(energy[None])

    mel_target = torch.from_numpy(mel).float().to(device)
    mel_len = torch.from_numpy(np.array([mel.shape[1]])).long().to(device)
    f0 = torch.from_numpy(f0).float().to(device)
    f0_norm = torch.from_numpy(f0_norm).float().to(device)
    energy = torch.from_numpy(energy).float().to(device)

    return mel_target, mel_len, energy, f0, f0_norm
Beispiel #4
0
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]

        if hp.with_hanzi:
            hz_texts = [batch[ind]["hz_text"] for ind in cut_list]

        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        # f0s = [batch[ind]["f0"] for ind in cut_list]
        # energies = [batch[ind]["energy"] for ind in cut_list]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print('error:', text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        if hp.with_hanzi:
            hz_texts = pad_1D(hz_texts)
        else:
            hz_texts = None

        Ds = [d - hp.duration_mean for d in Ds]

        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        # f0s = None#pad_1D(f0s)
        # energies = None#pad_1D(energies)
        #log_Ds = np.log(Ds + hp.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "hz_text": hz_texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": Ds,
            #"#f0": f0s,
            #"energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out
Beispiel #5
0
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        if hp.use_spk_embed:
            if hp.dataset == "VCTK" or hp.dataset == "LibriTTS":
                spk_ids = [self.spk_table[_id.split("_")[0]] for _id in ids]
            else:
                raise NotImplementedError(
                    "Looking up datset {} speaker table not implemented".
                    format(hp.dataset))

        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]

        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print(text, text.shape, D, D.shape, id_)

        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hp.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }
        if hp.use_spk_embed:
            out.update({"spk_ids": spk_ids})

        return out
Beispiel #6
0
    def reprocess(self, batch, cut_list):
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]

        length_text = np.array([])
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        src_pos = list()
        max_len = int(max(length_text))
        for length_src_row in length_text:
            src_pos.append(
                np.pad([i + 1 for i in range(int(length_src_row))],
                       (0, max_len - int(length_src_row)), 'constant'))
        src_pos = np.array(src_pos)

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        mel_pos = list()
        max_mel_len = int(max(length_mel))
        for length_mel_row in length_mel:
            mel_pos.append(
                np.pad([i + 1 for i in range(int(length_mel_row))],
                       (0, max_mel_len - int(length_mel_row)), 'constant'))
        mel_pos = np.array(mel_pos)

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)

        out = {
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "f0": f0s,
            "energy": energies,
            "mel_pos": mel_pos,
            "src_pos": src_pos,
            "mel_len": length_mel
        }

        return out
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [
            min_max_norm(batch[ind]["f0"],
                         min_val=hparams.f0_min,
                         max_val=hparams.f0_max) for ind in cut_list
        ]
        energies = [
            min_max_norm(batch[ind]["energy"],
                         min_val=hparams.energy_min,
                         max_val=hparams.energy_max) for ind in cut_list
        ]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print('the dimension of text and duration should be the same')
                print('text: ', sequence_to_text(text))
                print(text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hparams.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out
Beispiel #8
0
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        mel_augs = [batch[ind]["mel_aug"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        f0_norms = [batch[ind]["f0_norm"] for ind in cut_list]
        f0_norm_augs = [batch[ind]["f0_norm_aug"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]
        energy_inputs = [batch[ind]["energy_input"] for ind in cut_list]
        energy_input_augs = [
            batch[ind]["energy_input_aug"] for ind in cut_list
        ]
        speaker_embed = [batch[ind]["speaker_embed"] for ind in cut_list]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print(text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        mel_augs = pad_2D(mel_augs)
        f0s = pad_1D(f0s)
        f0_norms = pad_1D(f0_norms)
        f0_norm_augs = pad_1D(f0_norm_augs)
        energies = pad_1D(energies)
        energy_inputs = pad_1D(energy_inputs)
        energy_input_augs = pad_1D(energy_input_augs)
        log_Ds = np.log(Ds + hparams.log_offset)
        speaker_embeds = np.concatenate(speaker_embed, axis=0)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "mel_aug": mel_augs,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "f0_norm": f0_norms,
            "f0_norm_aug": f0_norm_augs,
            "energy": energies,
            "energy_input": energy_inputs,
            "energy_input_aug": energy_input_augs,
            "speaker_embed": speaker_embeds,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out
Beispiel #9
0
def reprocess(batch, cut_list):
    texts = [batch[ind]["text"] for ind in cut_list]
    mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
    Ds = [batch[ind]["D"] for ind in cut_list]
    stop_tokens = [batch[ind]["stop_token"] for ind in cut_list]

    length_text = np.array([])
    for text in texts:
        length_text = np.append(length_text, text.shape[0])

    src_pos = list()
    max_len = int(max(length_text))
    for length_src_row in length_text:
        src_pos.append(
            np.pad([i + 1 for i in range(int(length_src_row))],
                   (0, max_len - int(length_src_row)), 'constant'))
    src_pos = np.array(src_pos)

    length_mel = np.array(list())
    for mel in mel_targets:
        length_mel = np.append(length_mel, mel.shape[0])

    mel_pos = list()
    max_mel_len = int(max(length_mel))
    for length_mel_row in length_mel:
        mel_pos.append(
            np.pad([i + 1 for i in range(int(length_mel_row))],
                   (0, max_mel_len - int(length_mel_row)), 'constant'))
    mel_pos = np.array(mel_pos)

    texts = pad_1D(texts)
    Ds = pad_1D(Ds)
    mel_targets = pad_2D(mel_targets)
    stop_tokens = pad_1D(stop_tokens, PAD=1.)

    out = {
        "text": texts,
        "mel_target": mel_targets,
        "D": Ds,
        "stop_token": stop_tokens,
        "mel_pos": mel_pos,
        "src_pos": src_pos,
        "mel_max_len": max_mel_len
    }

    return out
Beispiel #10
0
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]
        # text(音素)和duration要一样多,duration里的数都加起来是mel谱,f0,energy的帧数
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print(text, text.shape, D, D.shape, id_)

        # 音素个数,mel谱的帧数,不一样
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])
        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(
            mel_targets)  #mel是<831, 80>, 831是时间帧数,80是频率上有80个系数做feature
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hparams.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,  #音素个数
            "mel_len": length_mel
        }  #帧数

        return out
Beispiel #11
0
    def cal_D_target(self, duration_predictor_output, length_c, length_mel):
        targets = list()
        for i, l in enumerate(length_c):
            origin_batch = duration_predictor_output[i][0:l]
            sum_batch = torch.sum(origin_batch, 0).data

            ratio = length_mel[i] / sum_batch
            processed_batch = origin_batch * ratio

            target_batch = [self.rounding(ele) for ele in processed_batch]
            temp_sum = sum(target_batch)
            diff = (length_mel[i] - temp_sum).item()
            bias = [ele.item() - (int(ele) + 0.5) for ele in processed_batch]

            if diff != 0:
                if diff > 0:
                    index_list = [[-1000, -1] for _ in range(diff)]
                    for i, ele in enumerate(bias):
                        if ele < 0:
                            for ind, (value, _) in enumerate(index_list):
                                if ele > value:
                                    index_list[ind][0] = ele
                                    index_list[ind][1] = i
                                    break
                    for (_, index) in index_list:
                        target_batch[index] += 1
                else:
                    index_list = [[1000, -1] for _ in range(-diff)]
                    for i, ele in enumerate(bias):
                        if ele > 0:
                            for ind, (value, _) in enumerate(index_list):
                                if ele < value:
                                    index_list[ind][0] = ele
                                    index_list[ind][1] = i
                                    break
                    for (_, index) in index_list:
                        target_batch[index] -= 1
            targets.append(target_batch)

        for i, target in enumerate(targets):
            targets[i] = np.array(target)
        D = torch.from_numpy(utils.pad_1D(targets)).cuda()

        return D
Beispiel #12
0
    def __call__(self, batch):
        # batch[i] is a tuple of __getitem__ outputs
        new_batch = []
        print(len(batch))
        print(len(batch[0]))
        print(len(batch[1]))
        print(len(batch[2]))
        print("BEFORE batch[0][0].shape:", batch[0][0].shape)
        print("BEFORE batch[0][1].shape:", batch[0][1].shape)
        print("BEFORE batch[0][2].shape:", batch[0][2].shape)
        # for token in batch:
        #     aa, b, c = token
        #     len_crop = np.random.randint(self.min_len_seq, self.max_len_seq+1, size=2) # 1.5s ~ 3s
        #     # print("len_crop:", len_crop)
        #     # print(aa.shape, b.shape, c.shape, len(aa))
        #     try:
        #         left = np.random.randint(0, len(aa)-len_crop[0], size=2)
        #     except:
        #         len_crop[0], left = len(aa), [0, 0]
        #     # pdb.set_trace()

        #     a = aa[left[0]:left[0]+len_crop[0], :]
        #     c = c[left[0]:left[0]+len_crop[0]]

        #     a = np.clip(a, 0, 1)

        #     a_pad = np.pad(a, ((0,self.max_len_pad-a.shape[0]),(0,0)), 'constant')
        #     c_pad = np.pad(c[:,np.newaxis], ((0,self.max_len_pad-c.shape[0]),(0,0)), 'constant', constant_values=-1e10)

        #     new_batch.append( (a_pad, b, c_pad, len_crop[0]) )

        # batch = new_batch
        # a, b, c, d = zip(*new_batch)
        # melsp = torch.from_numpy(np.stack(a, axis=0))
        # spk_emb = torch.from_numpy(np.stack(b, axis=0))
        # pitch = torch.from_numpy(np.stack(c, axis=0))
        # len_org = torch.from_numpy(np.stack(d, axis=0))
        # print("AFTER new_batch[0][0].shape:", new_batch[0][0].shape)
        # print("AFTER new_batch[0][1].shape:", new_batch[0][1].shape)
        # print("AFTER new_batch[0][2].shape:", new_batch[0][2].shape)
        # print("AFTER melsp.shape:", melsp.shape)
        # print("AFTER spk_emb.shape:", spk_emb.shape)
        # print("AFTER pitch.shape:", pitch.shape)
        # print("AFTER len_org.shape:", len_org.shape)
        # exit(0)

        a = [sb[0] for sb in batch]
        b = [sb[1] for sb in batch]
        c = [sb[2] for sb in batch]
        len_org = np.array(list())
        for mel in a:
            len_org = np.append(len_org, mel.shape[0])
        print("len_org:", len_org)
        melsp = torch.from_numpy(pad_2D(a))
        spk_emb = torch.from_numpy(np.array(b))
        pitch = torch.from_numpy(pad_1D(c)).unsqueeze(-1)
        len_org = torch.from_numpy(len_org)
        print("AFTER batch[0][0].shape:", melsp[0].shape)
        print("AFTER batch[0][1].shape:", spk_emb[0].shape)
        print("AFTER batch[0][2].shape:", pitch[0].shape)
        print("AFTER melsp.shape:", melsp.shape)
        print("AFTER spk_emb.shape:", spk_emb.shape)
        print("AFTER pitch.shape:", pitch.shape)
        print("AFTER len_org.shape:", len_org.shape)

        return melsp, spk_emb, pitch, len_org
Beispiel #13
0
def synthesize(
    model,
    vocoder,
    d_vec,
    x_vec,
    adain,
    speaker,
    gst,
    texts,
    file_ids,
    prefix="",
):
    src_len = torch.from_numpy(np.array([len(t) for t in texts])).to(device)
    texts = torch.from_numpy(utils.pad_1D(texts)).to(device)
    d_vec = (torch.from_numpy(np.array(d_vec)).to(device).unsqueeze(0).expand(
        len(file_ids), -1) if d_vec is not None else None)
    x_vec = (torch.from_numpy(np.array(x_vec)).to(device).unsqueeze(0).expand(
        len(file_ids), -1) if x_vec is not None else None)
    adain = (torch.from_numpy(np.array(adain)).to(device).unsqueeze(0).expand(
        len(file_ids), -1) if adain is not None else None)
    speakers = (torch.from_numpy(np.array([speaker])).to(device).expand(
        len(file_ids)) if speaker is not None else None)
    gst = (torch.from_numpy(np.array(gst)).to(device).unsqueeze(0).expand(
        len(file_ids), -1) if gst is not None else None)

    (
        mel,
        mel_postnet,
        log_duration_output,
        duration_output,
        f0_output,
        energy_output,
        _,
        _,
        mel_len,
    ) = model(
        texts,
        src_len,
        max_src_len=torch.max(src_len).item(),
        d_vec=d_vec,
        x_vec=x_vec,
        adain=adain,
        speaker=speakers,
        use_gst=args.gst,
        gst=gst,
    )

    if not os.path.exists(hp.test_path):
        os.makedirs(hp.test_path)

    utils.vocoder_infer(
        mel_postnet.transpose(1, 2),
        vocoder,
        [
            os.path.join(hp.test_path, "{}_{}.wav".format(prefix, file_id))
            for file_id in file_ids
        ],
        mel_len * hp.hop_length,
    )

    for i in range(len(texts)):
        file_id = file_ids[i]
        src_length = src_len[i]
        mel_length = mel_len[i]
        mel_postnet_ = (mel_postnet[i, :mel_length].transpose(
            0, 1).detach().cpu().numpy())
        f0_output_ = f0_output[i, :src_length].detach().cpu().numpy()
        energy_output_ = energy_output[i, :src_length].detach().cpu().numpy()
        duration_output_ = (
            duration_output[i, :src_length].detach().cpu().numpy().astype(
                np.int))

        np.save(
            os.path.join(hp.test_path, "{}_{}.npy".format(prefix, file_id)),
            mel_postnet_.T,
        )

        plot_mel(
            [(mel_postnet_, f0_output_, energy_output_, duration_output_)],
            ["Synthesized Spectrogram"],
            filename=os.path.join(hp.test_path,
                                  "{}_{}.png".format(prefix, file_id)),
        )
Beispiel #14
0
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        conditions = [batch[ind]["condition"] for ind in cut_list]
        mel_refers = [batch[ind]["mel_refer"] for ind in cut_list]
        if hp.vocoder == 'WORLD':
            ap_targets = [batch[ind]["ap_target"] for ind in cut_list]
            sp_targets = [batch[ind]["sp_target"] for ind in cut_list]
        else:
            mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]

        for condition, D, id_ in zip(conditions, Ds, ids):
            if len(condition) != len(D):
                print(condition, condition.shape, D, D.shape, id_)

        length_condition = np.array(list())
        for condition in conditions:
            length_condition = np.append(length_condition, condition.shape[0])

        length_mel = np.array(list())
        if hp.vocoder == 'WORLD':
            for mel in sp_targets:
                length_mel = np.append(length_mel, mel.shape[0])
        else:
            for mel in mel_targets:
                length_mel = np.append(length_mel, mel.shape[0])

        conditions = pad_2D(conditions)
        Ds = pad_1D(Ds)
        mel_refers = pad_2D(mel_refers)
        if hp.vocoder == 'WORLD':
            ap_targets = pad_2D(ap_targets)
            sp_targets = pad_2D(sp_targets)


#             print(ap_targets.shape,sp_targets.shape)
        else:
            mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hp.log_offset)

        if hp.vocoder == 'WORLD':
            out = {
                "id": ids,
                "condition": conditions,
                "mel_refer": mel_refers,
                "ap_target": ap_targets,
                "sp_target": sp_targets,
                "D": Ds,
                "log_D": log_Ds,
                "f0": f0s,
                "energy": energies,
                "src_len": length_condition,
                "mel_len": length_mel
            }
        else:
            out = {
                "id": ids,
                "condition": conditions,
                "mel_refer": mel_refers,
                "mel_target": mel_targets,
                "D": Ds,
                "log_D": log_Ds,
                "f0": f0s,
                "energy": energies,
                "src_len": length_condition,
                "mel_len": length_mel
            }

        return out