Python pad_2D Beispiele, utils.pad_2D Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: dataset.py Projekt: xzm2004260/LightSpeech

def reprocess(batch, cut_list):
    texts = [batch[ind]["text"] for ind in cut_list]
    cembs = [batch[ind]["cemb"] for ind in cut_list]
    Ds = [batch[ind]["D"] for ind in cut_list]
    # mel_gt_targets = [batch[ind]["mel_gt_target"] for ind in cut_list]
    mel_tac2_targets = [batch[ind]["mel_tac2_target"] for ind in cut_list]

    length_text = np.array([])
    for text in texts:
        length_text = np.append(length_text, text.shape[0])

    length_mel = np.array(list())
    for mel in mel_tac2_targets:
        length_mel = np.append(length_mel, mel.shape[0])

    texts = pad_1D(texts)
    Ds = pad_1D(Ds)
    # mel_gt_targets = pad_2D(mel_gt_targets)
    mel_tac2_targets = pad_2D(mel_tac2_targets)
    cembs = pad_2D(cembs)

    out = {
        "text": texts,
        "mel_tac2_target": mel_tac2_targets,
        "cemb": cembs,
        "D": Ds,
        "length_mel": length_mel,
        "length_text": length_text
    }

    return out

Beispiel #2

0

Datei anzeigen

    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        mel_augs = [batch[ind]["mel_aug"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        f0_norms = [batch[ind]["f0_norm"] for ind in cut_list]
        f0_norm_augs = [batch[ind]["f0_norm_aug"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]
        energy_inputs = [batch[ind]["energy_input"] for ind in cut_list]
        energy_input_augs = [
            batch[ind]["energy_input_aug"] for ind in cut_list
        ]
        speaker_embed = [batch[ind]["speaker_embed"] for ind in cut_list]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print(text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        mel_augs = pad_2D(mel_augs)
        f0s = pad_1D(f0s)
        f0_norms = pad_1D(f0_norms)
        f0_norm_augs = pad_1D(f0_norm_augs)
        energies = pad_1D(energies)
        energy_inputs = pad_1D(energy_inputs)
        energy_input_augs = pad_1D(energy_input_augs)
        log_Ds = np.log(Ds + hparams.log_offset)
        speaker_embeds = np.concatenate(speaker_embed, axis=0)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "mel_aug": mel_augs,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "f0_norm": f0_norms,
            "f0_norm_aug": f0_norm_augs,
            "energy": energies,
            "energy_input": energy_inputs,
            "energy_input_aug": energy_input_augs,
            "speaker_embed": speaker_embeds,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out

Beispiel #3

0

Datei anzeigen

Datei: dataset.py Projekt: yang123qwe/Fastsinging

def reprocess(batch, cut_list):
    C1s = [batch[ind]["condition1"] for ind in cut_list]
    C2s = [batch[ind]["condition2"] for ind in cut_list]
    mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
    norm_f0s = [batch[ind]["norm_f0"] for ind in cut_list]
    Ds = [batch[ind]["D"] for ind in cut_list]
    mel_ins = [batch[ind]["mel_in"] for ind in cut_list]

    length_C = np.array([])
    for C in C1s:
        length_C = np.append(length_C, C.shape[0])

    src_pos = list()
    max_len = int(max(length_C))
    for length_src_row in length_C:
        src_pos.append(
            np.pad([i + 1 for i in range(int(length_src_row))],
                   (0, max_len - int(length_src_row)), 'constant'))
    src_pos = np.array(src_pos)

    length_mel = np.array(list())
    for mel in mel_targets:
        length_mel = np.append(length_mel, mel.shape[0])

    mel_pos = list()
    lens = torch.LongTensor(length_mel)
    max_mel_len = int(max(length_mel))
    for length_mel_row in length_mel:
        mel_pos.append(
            np.pad([i + 1 for i in range(int(length_mel_row))],
                   (0, max_mel_len - int(length_mel_row)), 'constant'))
    mel_pos = np.array(mel_pos)

    C1s = pad_1D(C1s)
    C2s = pad_1D(C2s)
    Ds = pad_1D(Ds)
    norm_f0s = pad_2D(norm_f0s, maxlen=max_mel_len)
    mel_targets = pad_2D(mel_targets, maxlen=max_mel_len)
    mel_ins = pad_2D(mel_ins, maxlen=max_mel_len)

    out = {
        "condition1": C1s,
        "condition2": C2s,
        "mel_target": mel_targets,
        "norm_f0": norm_f0s,
        "mel_in": mel_ins,
        "D": Ds,
        "mel_pos": mel_pos,
        "src_pos": src_pos,
        "lens": lens,
        "mel_max_len": max_mel_len
    }

    return out

Beispiel #4

0

Datei anzeigen

Datei: synthesize.py Projekt: qqwwaass11/STYLER

def preprocess_audio(mel, energy, f0, f0_norm):
    mel = utils.pad_2D(mel[None])
    f0 = utils.pad_1D(f0[None])
    f0_norm = utils.pad_1D(f0_norm[None])
    energy = utils.pad_1D(energy[None])

    mel_target = torch.from_numpy(mel).float().to(device)
    mel_len = torch.from_numpy(np.array([mel.shape[1]])).long().to(device)
    f0 = torch.from_numpy(f0).float().to(device)
    f0_norm = torch.from_numpy(f0_norm).float().to(device)
    energy = torch.from_numpy(energy).float().to(device)

    return mel_target, mel_len, energy, f0, f0_norm

Beispiel #5

0

Datei anzeigen

Datei: dataset.py Projekt: hommmm/mandarin-tts

    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]

        if hp.with_hanzi:
            hz_texts = [batch[ind]["hz_text"] for ind in cut_list]

        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        # f0s = [batch[ind]["f0"] for ind in cut_list]
        # energies = [batch[ind]["energy"] for ind in cut_list]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print('error:', text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        if hp.with_hanzi:
            hz_texts = pad_1D(hz_texts)
        else:
            hz_texts = None

        Ds = [d - hp.duration_mean for d in Ds]

        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        # f0s = None#pad_1D(f0s)
        # energies = None#pad_1D(energies)
        #log_Ds = np.log(Ds + hp.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "hz_text": hz_texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": Ds,
            #"#f0": f0s,
            #"energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out

Beispiel #6

0

Datei anzeigen

Datei: dataset.py Projekt: pohanchi/FastSpeech2

    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        if hp.use_spk_embed:
            if hp.dataset == "VCTK" or hp.dataset == "LibriTTS":
                spk_ids = [self.spk_table[_id.split("_")[0]] for _id in ids]
            else:
                raise NotImplementedError(
                    "Looking up datset {} speaker table not implemented".
                    format(hp.dataset))

        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]

        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print(text, text.shape, D, D.shape, id_)

        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hp.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }
        if hp.use_spk_embed:
            out.update({"spk_ids": spk_ids})

        return out

Beispiel #7

0

Datei anzeigen

    def reprocess(self, batch, cut_list):
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]

        length_text = np.array([])
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        src_pos = list()
        max_len = int(max(length_text))
        for length_src_row in length_text:
            src_pos.append(
                np.pad([i + 1 for i in range(int(length_src_row))],
                       (0, max_len - int(length_src_row)), 'constant'))
        src_pos = np.array(src_pos)

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        mel_pos = list()
        max_mel_len = int(max(length_mel))
        for length_mel_row in length_mel:
            mel_pos.append(
                np.pad([i + 1 for i in range(int(length_mel_row))],
                       (0, max_mel_len - int(length_mel_row)), 'constant'))
        mel_pos = np.array(mel_pos)

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)

        out = {
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "f0": f0s,
            "energy": energies,
            "mel_pos": mel_pos,
            "src_pos": src_pos,
            "mel_len": length_mel
        }

        return out

Beispiel #8

0

Datei anzeigen

Datei: dataset.py Projekt: wonwizard/Korean-FastSpeech2-Pytorch

    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [
            min_max_norm(batch[ind]["f0"],
                         min_val=hparams.f0_min,
                         max_val=hparams.f0_max) for ind in cut_list
        ]
        energies = [
            min_max_norm(batch[ind]["energy"],
                         min_val=hparams.energy_min,
                         max_val=hparams.energy_max) for ind in cut_list
        ]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print('the dimension of text and duration should be the same')
                print('text: ', sequence_to_text(text))
                print(text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hparams.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out

Beispiel #9

0

Datei anzeigen

Datei: dataset.py Projekt: xiaochunan/acc-tacotron2

def reprocess(batch, cut_list):
    texts = [batch[ind]["text"] for ind in cut_list]
    mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
    Ds = [batch[ind]["D"] for ind in cut_list]
    stop_tokens = [batch[ind]["stop_token"] for ind in cut_list]

    length_text = np.array([])
    for text in texts:
        length_text = np.append(length_text, text.shape[0])

    src_pos = list()
    max_len = int(max(length_text))
    for length_src_row in length_text:
        src_pos.append(
            np.pad([i + 1 for i in range(int(length_src_row))],
                   (0, max_len - int(length_src_row)), 'constant'))
    src_pos = np.array(src_pos)

    length_mel = np.array(list())
    for mel in mel_targets:
        length_mel = np.append(length_mel, mel.shape[0])

    mel_pos = list()
    max_mel_len = int(max(length_mel))
    for length_mel_row in length_mel:
        mel_pos.append(
            np.pad([i + 1 for i in range(int(length_mel_row))],
                   (0, max_mel_len - int(length_mel_row)), 'constant'))
    mel_pos = np.array(mel_pos)

    texts = pad_1D(texts)
    Ds = pad_1D(Ds)
    mel_targets = pad_2D(mel_targets)
    stop_tokens = pad_1D(stop_tokens, PAD=1.)

    out = {
        "text": texts,
        "mel_target": mel_targets,
        "D": Ds,
        "stop_token": stop_tokens,
        "mel_pos": mel_pos,
        "src_pos": src_pos,
        "mel_max_len": max_mel_len
    }

    return out

Beispiel #10

0

Datei anzeigen

    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]
        # text（音素）和duration要一样多，duration里的数都加起来是mel谱，f0，energy的帧数
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print(text, text.shape, D, D.shape, id_)

        # 音素个数，mel谱的帧数，不一样
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])
        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(
            mel_targets)  #mel是<831, 80>， 831是时间帧数，80是频率上有80个系数做feature
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hparams.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,  #音素个数
            "mel_len": length_mel
        }  #帧数

        return out

Beispiel #11

0

Datei anzeigen

    def __call__(self, batch):
        # batch[i] is a tuple of __getitem__ outputs
        new_batch = []
        print(len(batch))
        print(len(batch[0]))
        print(len(batch[1]))
        print(len(batch[2]))
        print("BEFORE batch[0][0].shape:", batch[0][0].shape)
        print("BEFORE batch[0][1].shape:", batch[0][1].shape)
        print("BEFORE batch[0][2].shape:", batch[0][2].shape)
        # for token in batch:
        #     aa, b, c = token
        #     len_crop = np.random.randint(self.min_len_seq, self.max_len_seq+1, size=2) # 1.5s ~ 3s
        #     # print("len_crop:", len_crop)
        #     # print(aa.shape, b.shape, c.shape, len(aa))
        #     try:
        #         left = np.random.randint(0, len(aa)-len_crop[0], size=2)
        #     except:
        #         len_crop[0], left = len(aa), [0, 0]
        #     # pdb.set_trace()

        #     a = aa[left[0]:left[0]+len_crop[0], :]
        #     c = c[left[0]:left[0]+len_crop[0]]

        #     a = np.clip(a, 0, 1)

        #     a_pad = np.pad(a, ((0,self.max_len_pad-a.shape[0]),(0,0)), 'constant')
        #     c_pad = np.pad(c[:,np.newaxis], ((0,self.max_len_pad-c.shape[0]),(0,0)), 'constant', constant_values=-1e10)

        #     new_batch.append( (a_pad, b, c_pad, len_crop[0]) )

        # batch = new_batch
        # a, b, c, d = zip(*new_batch)
        # melsp = torch.from_numpy(np.stack(a, axis=0))
        # spk_emb = torch.from_numpy(np.stack(b, axis=0))
        # pitch = torch.from_numpy(np.stack(c, axis=0))
        # len_org = torch.from_numpy(np.stack(d, axis=0))
        # print("AFTER new_batch[0][0].shape:", new_batch[0][0].shape)
        # print("AFTER new_batch[0][1].shape:", new_batch[0][1].shape)
        # print("AFTER new_batch[0][2].shape:", new_batch[0][2].shape)
        # print("AFTER melsp.shape:", melsp.shape)
        # print("AFTER spk_emb.shape:", spk_emb.shape)
        # print("AFTER pitch.shape:", pitch.shape)
        # print("AFTER len_org.shape:", len_org.shape)
        # exit(0)

        a = [sb[0] for sb in batch]
        b = [sb[1] for sb in batch]
        c = [sb[2] for sb in batch]
        len_org = np.array(list())
        for mel in a:
            len_org = np.append(len_org, mel.shape[0])
        print("len_org:", len_org)
        melsp = torch.from_numpy(pad_2D(a))
        spk_emb = torch.from_numpy(np.array(b))
        pitch = torch.from_numpy(pad_1D(c)).unsqueeze(-1)
        len_org = torch.from_numpy(len_org)
        print("AFTER batch[0][0].shape:", melsp[0].shape)
        print("AFTER batch[0][1].shape:", spk_emb[0].shape)
        print("AFTER batch[0][2].shape:", pitch[0].shape)
        print("AFTER melsp.shape:", melsp.shape)
        print("AFTER spk_emb.shape:", spk_emb.shape)
        print("AFTER pitch.shape:", pitch.shape)
        print("AFTER len_org.shape:", len_org.shape)

        return melsp, spk_emb, pitch, len_org

Beispiel #12

0

Datei anzeigen

    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        conditions = [batch[ind]["condition"] for ind in cut_list]
        mel_refers = [batch[ind]["mel_refer"] for ind in cut_list]
        if hp.vocoder == 'WORLD':
            ap_targets = [batch[ind]["ap_target"] for ind in cut_list]
            sp_targets = [batch[ind]["sp_target"] for ind in cut_list]
        else:
            mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [batch[ind]["f0"] for ind in cut_list]
        energies = [batch[ind]["energy"] for ind in cut_list]

        for condition, D, id_ in zip(conditions, Ds, ids):
            if len(condition) != len(D):
                print(condition, condition.shape, D, D.shape, id_)

        length_condition = np.array(list())
        for condition in conditions:
            length_condition = np.append(length_condition, condition.shape[0])

        length_mel = np.array(list())
        if hp.vocoder == 'WORLD':
            for mel in sp_targets:
                length_mel = np.append(length_mel, mel.shape[0])
        else:
            for mel in mel_targets:
                length_mel = np.append(length_mel, mel.shape[0])

        conditions = pad_2D(conditions)
        Ds = pad_1D(Ds)
        mel_refers = pad_2D(mel_refers)
        if hp.vocoder == 'WORLD':
            ap_targets = pad_2D(ap_targets)
            sp_targets = pad_2D(sp_targets)


#             print(ap_targets.shape,sp_targets.shape)
        else:
            mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hp.log_offset)

        if hp.vocoder == 'WORLD':
            out = {
                "id": ids,
                "condition": conditions,
                "mel_refer": mel_refers,
                "ap_target": ap_targets,
                "sp_target": sp_targets,
                "D": Ds,
                "log_D": log_Ds,
                "f0": f0s,
                "energy": energies,
                "src_len": length_condition,
                "mel_len": length_mel
            }
        else:
            out = {
                "id": ids,
                "condition": conditions,
                "mel_refer": mel_refers,
                "mel_target": mel_targets,
                "D": Ds,
                "log_D": log_Ds,
                "f0": f0s,
                "energy": energies,
                "src_len": length_condition,
                "mel_len": length_mel
            }

        return out