def main():
    english_loader = DataLoader('data/sentences/', 25, (32, 256))
    german_loader = DataLoader('data/GT4HistOCR/', 25, (32, 256))
    english_loader2 = DataLoader('data/english_generated/', 25, (32, 256))
    german_loader2 = DataLoader('data/something/', 25, (32, 256))
    model = Model()
    # plt.imshow(english_loader.getNext()[0])
    # plt.show()
    # plt.imshow(german_loader.getNext()[0])
    # plt.show()
    train(model, english_loader, german_loader, english_loader2,
          german_loader2)
    print(
        test(model, english_loader, german_loader, english_loader2,
             german_loader2))
	def get_vhrd_embeddings(self, data_fname, mode, save_name, use_saved_embeddings):

		if (use_saved_embeddings) and (os.path.exists(save_name)):
			with open(save_name, 'rb') as handle:
				data = cPickle.load(handle)
		else:
			data_loader = DataLoader(data_fname, mode)
			data = data_loader.load_data()
			assert not self.pretrainer is None
			data = self.pretrainer.get_embeddings(data)

			with open(save_name, 'wb') as handle:
				cPickle.dump(data, handle)

		x,y = self._build_data(data)
		return x, y
Ejemplo n.º 3
0
def init_classes():
    trainFile = gol.get_val("trainFile")
    validationFile = gol.get_val("validationFile")
    testFile = gol.get_val("testFile")
    classes = DataLoader.loadClasses(trainFile, validationFile,
                                     testFile)  # a list
    gol.set_val("classes", classes)
Ejemplo n.º 4
0
def main():

    dataset = get_post_dataset()
    global_step = 0

    m = nn.DataParallel(ModelPostNet().cuda())

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_postnet,
                                drop_last=True,
                                num_workers=0)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            mel, mag = data

            mel = mel.cuda()
            mag = mag.cuda()

            mag_pred = m.forward(mel)

            loss = nn.MSELoss()(mag_pred, mag)
            if global_step % 10 == 0:
                print('total_loss==', loss.item())
            writer.add_scalars('training_loss', {
                'loss': loss,
            }, global_step)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(hp.checkpoint_path,
                                 'checkpoint_postnet_%d.pth.tar' %
                                 global_step))
Ejemplo n.º 5
0
def main():
    dataset = get_dataset()

    modelo.train()
    writer = SummaryWriter("runs/tranformer")

    estep = 0
    for epoch in range(NUM_EPOCHS):
        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                shuffle=True)
        pbar = tqdm(dataloader)
        losses = 0
        for i, data in enumerate(pbar):
            estep = estep + 1
            pbar.set_description("Processing at epoch %d" % epoch)
            character, mel_input, pos_text, pos_mel, _ = data
            character = character.to(DEVICE)
            mel_input = mel_input.to(DEVICE)
            pos_text = pos_text.to(DEVICE)
            pos_mel = pos_mel.to(DEVICE)

            output = modelo(character, mel_input, pos_text, pos_mel)
            # print(output)
            if estep == 1:
                writer.add_graph(
                    modelo,
                    input_to_model=[character, mel_input, pos_text, pos_mel])

            # print("output modelo...."+str(output.shape))
            # print("output trasformado..."+str(output.reshape(-1, output.shape[-1]).shape))
            # print("caracter ......"+str(character.reshape(-1).shape))
            optimizer.zero_grad()
            loss = loss_fn(output.reshape(-1, output.shape[-1]),
                           character.reshape(-1))
            output = output.transpose(0, 1)
            loss2 = loss.item()
            writer.add_scalar("loss :", loss2, estep)
            # print("/////////////////")
            # print(np.argmax(output[0].detach().numpy(),axis=1))
            print("loss..........." + str(loss2))
            # print("Epoch.........."+str(epoch))

            loss.backward()
            optimizer.step()
            losses += loss.item()
        writer.add_scalar("loss2 :", losses, epoch)
        if epoch + 1 % hp.save_step == 0:
            t.save(
                {
                    'model': modelo.state_dict(),
                    'optimizer': optimizer.state_dict()
                },
                os.path.join(hp.checkpoint_path,
                             'checkpoint_transformer_%d.pth.tar' % epoch))
    writer.close()
Ejemplo n.º 6
0
def train():
    """Start the training procedure 
    """
    num_epochs = 1
    learning_rate = 0.05
    batch_size = 8

    data_loader = DataLoader(os.path.join("data", "fetal_health.csv"))
    data_loader.standardize_column("baseline value")
    x_train, y_train = data_loader.load_data(subset="train")
    x_valid, y_valid = data_loader.load_data(subset="valid")

    num_classes = len(np.unique(y_train))
    num_samples, num_features = x_train.shape

    assert x_train.shape[1] == x_valid.shape[
        1], "Number of features should be equal!"
    assert x_train.shape[0] == y_train.shape[
        0], "Number of training samples should be equal!"
    assert x_valid.shape[0] == y_valid.shape[
        0], "Number of validation samples should be equal!"

    dev = get_default_device()
    tx = tensor.Tensor((num_samples, num_features), dev, tensor.float32)
    ty = tensor.Tensor((num_samples, ), dev, tensor.int32)

    sgd = opt.SGD(learning_rate)
    model = create_MLP_model(perceptron_size=10, num_classes=num_classes)
    model.set_optimizer(sgd)
    model.compile([tx], is_train=True, use_graph=True, sequential=False)
    model.train()

    for i in range(num_epochs):
        tx.copy_from_numpy(x_train.astype(np.float32))
        ty.copy_from_numpy(y_train.astype(np.int32))
        out, loss = model(tx, ty, 'fp32', spars=None)

        # TODO: Add metric evaluation on validation data
        if i % 10 == 0:
            print("training loss = {:.3f}".format(tensor.to_numpy(loss)[0]))
Ejemplo n.º 7
0
def init_dataset():
    trainFile = gol.get_val("trainFile")
    testFile = gol.get_val("testFile")
    validationFile = gol.get_val("validationFile")
    Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y = DataLoader.loadDataset(
        trainFile, validationFile, testFile)
    class_num = len(np.unique(Train_Y))
    length = len(Train_Y) + len(validation_Y) + len(Test_Y)
    gol.set_val("Train_X", Train_X)
    gol.set_val("Train_Y", Train_Y)
    gol.set_val("validation_X", validation_X)
    gol.set_val("validation_Y", validation_Y)
    gol.set_val("Test_X", Test_X)
    gol.set_val("Test_Y", Test_Y)
Ejemplo n.º 8
0
def synthesis(args):
    m = Model()
    m_post = ModelPostNet()
    m_stop = ModelStopToken()
    m.load_state_dict(load_checkpoint(args.restore_step1, "transformer"))
    m_stop.load_state_dict(load_checkpoint(args.restore_step3, "stop_token"))
    m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet"))

    m=m.cuda()
    m_post = m_post.cuda()
    m_stop = m_stop.cuda()
    m.train(False)
    m_post.train(False)
    m_stop.train(False)
    test_dataset = get_dataset(hp.test_data_csv)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1)
    ref_dataset = get_dataset(hp.test_data_csv)
    ref_dataloader = DataLoader(ref_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)

    ref_dataloader_iter = iter(ref_dataloader)
    for i, data in enumerate(test_dataloader):
        character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data
        ref_character, ref_mel, ref_mel_input, ref_pos_text, ref_pos_mel, ref_text_length, ref_mel_length, ref_fname = next(ref_dataloader_iter)
        stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)
        mel_input = t.zeros([1,1,80]).cuda()
        stop=[]
        character = character.cuda()
        mel = mel.cuda()
        mel_input = mel_input.cuda()
        pos_text = pos_text.cuda()
        pos_mel = pos_mel.cuda()
        ref_character = ref_character.cuda()
        ref_mel = ref_mel.cuda()
        ref_mel_input = ref_mel_input.cuda()
        ref_pos_text = ref_pos_text.cuda()
        ref_pos_mel = ref_pos_mel.cuda()

        with t.no_grad():
            start=time.time()
            for i in range(args.max_len):
                pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda()
                mel_pred, postnet_pred, attn_probs, decoder_output, attns_enc, attns_dec, attns_style = m.forward(character, mel_input, pos_text, pos_mel, ref_mel, ref_pos_mel)
                stop_token = m_stop.forward(decoder_output)
                mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1)
                stop.append(t.sigmoid(stop_token).squeeze(-1)[0,-1])
                if stop[-1] > 0.5:
                    print("stop token at " + str(i) + " is :", stop[-1])
                    print("model inference time: ", time.time() - start)
                    break
            if stop[-1] == 0:
                continue
            mag_pred = m_post.forward(postnet_pred)
            inf_time = time.time() - start
            print("inference time: ", inf_time)

        wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy())
        print("rtx : ", (len(wav)/hp.sr) / inf_time)
        wav_path = os.path.join(hp.sample_path, 'wav')
        if not os.path.exists(wav_path):
            os.makedirs(wav_path)
        write(os.path.join(wav_path, "text_{}_ref_{}_synth.wav".format(fname, ref_fname)), hp.sr, wav)
        print("written as text{}_ref_{}_synth.wav".format(fname, ref_fname))
        attns_enc_new=[]
        attns_dec_new=[]
        attn_probs_new=[]
        attns_style_new=[]
        for i in range(len(attns_enc)):
            attns_enc_new.append(attns_enc[i].unsqueeze(0))
            attns_dec_new.append(attns_dec[i].unsqueeze(0))
            attn_probs_new.append(attn_probs[i].unsqueeze(0))
            attns_style_new.append(attns_style[i].unsqueeze(0))
        attns_enc = t.cat(attns_enc_new, 0)
        attns_dec = t.cat(attns_dec_new, 0)
        attn_probs = t.cat(attn_probs_new, 0)
        attns_style = t.cat(attns_style_new, 0)

        attns_enc = attns_enc.contiguous().view(attns_enc.size(0), 1, hp.n_heads, attns_enc.size(2), attns_enc.size(3))
        attns_enc = attns_enc.permute(1,0,2,3,4)
        attns_dec = attns_dec.contiguous().view(attns_dec.size(0), 1, hp.n_heads, attns_dec.size(2), attns_dec.size(3))
        attns_dec = attns_dec.permute(1,0,2,3,4)
        attn_probs = attn_probs.contiguous().view(attn_probs.size(0), 1, hp.n_heads, attn_probs.size(2), attn_probs.size(3))
        attn_probs = attn_probs.permute(1,0,2,3,4)
        attns_style = attns_style.contiguous().view(attns_style.size(0), 1, hp.n_heads, attns_style.size(2), attns_style.size(3))
        attns_style = attns_style.permute(1,0,2,3,4)

        save_dir = os.path.join(hp.sample_path, 'figure', "text_{}_ref_{}_synth.wav".format(fname, ref_fname))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        writer.add_alignments(attns_enc.detach().cpu(), attns_dec.detach().cpu(), attn_probs.detach().cpu(), attns_style.detach().cpu(), mel_length, text_length, args.restore_step1, 'Validation', save_dir)
Ejemplo n.º 9
0
def main():

    train_dataset = get_dataset(hp.train_data_csv)
    val_dataset = get_dataset(hp.val_data_csv)
    restore_step = hp.restore_step
    global_step = restore_step
    if restore_step != 0:
        restore_flag = True
    else:
        restore_flag = False

    m = Model()
    if os.path.exists('./checkpoints/checkpoint_%s_%d.pth.tar' %
                      ('transformer', global_step)):
        state_dict = t.load('./checkpoints/checkpoint_%s_%d.pth.tar' %
                            ('transformer', global_step))
        new_state_dict = OrderedDict()
        for k, value in state_dict['model'].items():
            key = k[7:]
            new_state_dict[key] = value

        m.load_state_dict(new_state_dict)

    m = nn.DataParallel(m.cuda())
    m.train()

    vocoder = SmartVocoder(Hyperparameters(parse_args()))
    vocoder.load_state_dict(
        t.load('./mel2audio/checkpoint_step000588458.pth')["state_dict"])
    vocoder = vocoder.cuda()
    vocoder.eval()

    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)
    cur_epoch = 0

    for epochs in range(hp.epochs):
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=hp.batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn_transformer,
                                      drop_last=True,
                                      num_workers=1)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=hp.batch_size,
                                    shuffle=True,
                                    collate_fn=collate_fn_transformer,
                                    drop_last=True)
        if restore_flag:
            cur_epoch = int(restore_step / len(train_dataloader))
            restore_flag = not restore_flag
        for i, data in enumerate(train_dataloader):
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mag, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data

            mel_max_length_array = t.zeros(mel_length.size(0)).long()
            mel_max_length_array = t.LongTensor(mel_max_length_array)
            mel_max_length_array[:] = t.max(mel_length)
            mel_max_length_array = mel_max_length_array.cuda()

            character = character.cuda()
            mel = mel.cuda()
            mag = mag.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            text_length = text_length.cuda()
            mel_length = mel_length.cuda()
            loading_time = time.time()
            mask = get_mask_from_lengths(mel_length).cuda()

            mel_pred, postnet_pred, attn_probs, decoder_outputs, attns_enc, attns_dec, attns_style, post_linear, duration_predictor_output, duration, weights = m.forward(
                character,
                mel_input,
                pos_text,
                pos_mel,
                mel,
                pos_mel,
                mel_max_length_array=mel_max_length_array)

            mel_loss = t.mean(
                t.abs(mel_pred - mel).masked_select(mask.unsqueeze(-1)))
            post_mel_loss = t.mean(
                t.abs(postnet_pred - mel).masked_select(mask.unsqueeze(-1)))
            n_priority_freq = int(2000 / (hp.sr * 0.5) * (hp.n_fft / 2 + 1))
            post_linear_loss = 0.5 * t.mean(
                t.abs(post_linear - mag).masked_select(mask.unsqueeze(-1))
            ) + 0.5 * t.mean(
                t.abs(post_linear - mag)[:, :, :n_priority_freq].masked_select(
                    mask.unsqueeze(-1)))
            duration_loss = nn.L1Loss()(t.sum(
                duration_predictor_output, -1, keepdim=True),
                                        mel_length) / t.sum(text_length)

            loss = (mel_loss + post_mel_loss + 0.3 * post_linear_loss +
                    duration_loss) / hp.accum
            writer.add_losses(mel_loss.item(), post_mel_loss.item(),
                              0.3 * post_linear_loss, duration_loss,
                              global_step, 'Train')

            # Calculate gradients
            loss.backward()
            msg = "| Epoch: {}, {}/{}th loss : {:.4f} + {:.4f} + {:.4f} + {:.4f} = {:.4f}".format(
                cur_epoch, i, len(train_dataloader), mel_loss, post_mel_loss,
                0.3 * post_linear_loss, duration_loss, loss)
            stream(msg)

            if global_step % hp.accum == 0:
                nn.utils.clip_grad_norm_(m.parameters(), 1.)
                # Update weights
                optimizer.step()
                optimizer.zero_grad()

            if global_step % hp.val_step == 0 or global_step == 1:
                validate(m, vocoder, val_dataloader, global_step, writer)

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
        if cur_epoch == hp.stop_epoch:
            break
        cur_epoch += 1
        print(' ')
Ejemplo n.º 10
0

if not os.path.exists('alignments'):
    os.mkdir('alignments')
check_point = './checkpoint/checkpoint_transformer_820000.pth.tar'
para_file = t.load(check_point, map_location={'cuda:5': 'cuda:0'})

model = nn.DataParallel(Model().cuda())
model.load_state_dict(para_file['model'])
model.eval()
for epoch in range(1):

    dataset = get_dataset()
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            collate_fn=collate_fn_transformer,
                            drop_last=False,
                            num_workers=1)
    k = 0
    # pbar = tqdm(dataloader)
    # for i, data in enumerate(pbar):
    for character, mel, mel_input, pos_text, pos_mel, _ in dataloader:
        # pbar.set_description("Processing at epoch %d"%epoch)

        # character, mel, mel_input, pos_text, pos_mel, _ = data

        stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

        character = character.cuda()
        mel = mel.cuda()
        mel_input = mel_input.cuda()
def main():

    train_dataset = get_dataset(hp.train_data_csv)
    val_dataset = get_dataset(hp.val_data_csv)
    restore_step = hp.restore_step
    global_step = restore_step
    if restore_step != 0:
        restore_flag = True
    else:
        restore_flag = False

    m = Model()
    if os.path.exists('./checkpoints/checkpoint_%s_%d.pth.tar' %
                      ('transformer', global_step)):
        state_dict = t.load('./checkpoints/checkpoint_%s_%d.pth.tar' %
                            ('transformer', global_step))
        new_state_dict = OrderedDict()
        for k, value in state_dict['model'].items():
            key = k[7:]
            new_state_dict[key] = value

        m.load_state_dict(new_state_dict)

    m = nn.DataParallel(m.cuda())
    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)
    cur_epoch = 0

    for epochs in range(hp.epochs):
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=hp.batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn_transformer,
                                      drop_last=True,
                                      num_workers=1)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=hp.batch_size,
                                    shuffle=True,
                                    collate_fn=collate_fn_transformer,
                                    drop_last=True)
        if restore_flag:
            cur_epoch = int(restore_step / len(train_dataloader))
            restore_flag = not restore_flag
        for i, data in enumerate(train_dataloader):
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data
            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            text_length = text_length.cuda()
            mel_length = mel_length.cuda()
            loading_time = time.time()
            mel_pred, postnet_pred, attn_probs, decoder_output, attns_enc, attns_dec, attns_style = m.forward(
                character, mel_input, pos_text, pos_mel, mel, pos_mel)
            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = (mel_loss + post_mel_loss) / hp.accum
            writer.add_losses(mel_loss.item(), post_mel_loss.item(),
                              global_step, 'Train')

            # Calculate gradients
            loss.backward()
            msg = "| Epoch: {}, {}/{}th loss : {:.4f} + {:.4f} = {:.4f}".format(
                cur_epoch, i, len(train_dataloader), mel_loss, post_mel_loss,
                loss)
            stream(msg)

            if global_step % hp.accum == 0:
                nn.utils.clip_grad_norm_(m.parameters(), 1.)
                # Update weights
                optimizer.step()
                optimizer.zero_grad()

            if global_step % hp.val_step == 0 or global_step == 1:
                validate(m, val_dataloader, global_step, writer)

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
        if cur_epoch == hp.stop_epoch:
            break
        cur_epoch += 1
        print(' ')
Ejemplo n.º 12
0
def synthesis(args):
    m = Model()
    m.load_state_dict(load_checkpoint(args.restore_step1, "transformer"))
    m = m.cuda()
    m.train(False)
    vocoder = SmartVocoder(Hyperparameters(parse_args()))
    vocoder.load_state_dict(
        t.load('./mel2audio/merged_STFT_checkpoint.pth')["state_dict"])
    vocoder = vocoder.cuda()
    vocoder.eval()
    with open('./hifi_gan/config.json') as f:
        data = f.read()
    json_config = json.loads(data)
    h = AttrDict(json_config)
    hifi_gan = Generator(h).cuda()
    state_dict_g = t.load('./hifi_gan/g_00334000', map_location='cuda')
    hifi_gan.load_state_dict(state_dict_g['generator'])
    hifi_gan.eval()
    hifi_gan.remove_weight_norm()

    test_dataset = get_dataset(hp.test_data_csv)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 collate_fn=collate_fn_transformer,
                                 drop_last=True,
                                 num_workers=1)
    ref_dataset = get_dataset(hp.test_data_csv_shuf)
    ref_dataloader = DataLoader(ref_dataset,
                                batch_size=1,
                                shuffle=False,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=1)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)

    mel_basis = t.from_numpy(
        librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels, 50,
                            11000)).unsqueeze(0)  # (n_mels, 1+n_fft//2)

    ref_dataloader_iter = iter(ref_dataloader)
    _, ref_mel, _, _, _, ref_pos_mel, _, _, ref_fname = next(
        ref_dataloader_iter)

    for i, data in enumerate(test_dataloader):
        character, _, _, _, pos_text, _, text_length, _, fname = data
        mel_input = t.zeros([1, 1, 80]).cuda()
        character = character.cuda()
        ref_mel = ref_mel.cuda()
        mel_input = mel_input.cuda()
        pos_text = pos_text.cuda()
        with t.no_grad():
            start = time.time()
            memory, c_mask, attns_enc, duration_mask = m.encoder(character,
                                                                 pos=pos_text)
            style, coarse_emb = m.ref_encoder(ref_mel)
            memory = t.cat((memory, coarse_emb.expand(-1, memory.size(1), -1)),
                           -1)
            memory = m.memory_coarse_layer(memory)
            duration_predictor_output = m.duration_predictor(
                memory, duration_mask)
            duration = t.ceil(duration_predictor_output)
            duration = duration * duration_mask
            #            max_length = t.sum(duration).type(t.LongTensor)
            #            print("length : ", max_length)

            monotonic_interpolation, pos_mel_, weights = m.length_regulator(
                memory, duration, duration_mask)
            kv_mask = t.zeros([1, mel_input.size(1),
                               character.size(1)]).cuda()  # B, t', N
            kv_mask[:, :, :3] = 1
            kv_mask = kv_mask.eq(0)
            stop_flag = False
            ctr = 0
            for j in range(1200):
                pos_mel = t.arange(1,
                                   mel_input.size(1) + 1).unsqueeze(0).cuda()
                mel_pred, postnet_pred, attn_probs, decoder_output, attns_dec, attns_style = m.decoder(
                    memory,
                    style,
                    mel_input,
                    c_mask,
                    pos=pos_mel,
                    ref_pos=ref_pos_mel,
                    mono_inter=monotonic_interpolation[:, :mel_input.shape[1]],
                    kv_mask=kv_mask)
                mel_input = t.cat([mel_input, postnet_pred[:, -1:, :]], dim=1)
                #                print("j", j, "mel_input", mel_input.shape)
                if stop_flag and ctr == 10:
                    break
                elif stop_flag:
                    ctr += 1
                kv_mask, stop_flag = update_kv_mask(
                    kv_mask, attn_probs)  # B, t', N --> B, t'+1, N
            postnet_pred = t.cat((postnet_pred,
                                  t.zeros(postnet_pred.size(0), 5,
                                          postnet_pred.size(-1)).cuda()), 1)
            gen_length = mel_input.size(1)
            #            print("gen_length", gen_length)
            post_linear = m.postnet(postnet_pred)
            post_linear = resample(post_linear,
                                   seq_len=mel_input.size(1),
                                   scale=args.rhythm_scale)
            postnet_pred = resample(mel_input,
                                    seq_len=mel_input.size(1),
                                    scale=args.rhythm_scale)
            inf_time = time.time() - start
            print("inference time: ", inf_time)
            #            print("speech_rate: ", len(postnet_pred[0])/len(character[0]))

            postnet_pred_v = postnet_pred.transpose(2, 1)
            postnet_pred_v = (postnet_pred_v * 100 + 20 - 100) / 20
            B, C, T = postnet_pred_v.shape
            z = t.randn(1, 1, T * hp.hop_length).cuda()
            z = z * 0.6  # Temp
            t.cuda.synchronize()
            timestemp = time.time()
            with t.no_grad():
                y_gen = vocoder.reverse(z, postnet_pred_v).squeeze()
            t.cuda.synchronize()
            print('{} seconds'.format(time.time() - timestemp))
            wav = y_gen.to(t.device("cpu")).data.numpy()
            wav = np.pad(
                wav, [0, 4800], mode='constant',
                constant_values=0)  #pad 0 for 0.21 sec silence at the end

            post_linear_v = post_linear.transpose(1, 2)
            post_linear_v = 10**((post_linear_v * 100 + 20 - 100) / 20)
            mel_basis = mel_basis.repeat(post_linear_v.shape[0], 1, 1)
            post_linear_mel_v = t.log10(t.bmm(mel_basis.cuda(), post_linear_v))
            B, C, T = post_linear_mel_v.shape
            z = t.randn(1, 1, T * hp.hop_length).cuda()
            z = z * 0.6  # Temp
            t.cuda.synchronize()
            timestemp = time.time()
            with t.no_grad():
                y_gen_linear = vocoder.reverse(z, post_linear_mel_v).squeeze()
            t.cuda.synchronize()
            wav_linear = y_gen_linear.to(t.device("cpu")).data.numpy()
            wav_linear = np.pad(
                wav_linear, [0, 4800], mode='constant',
                constant_values=0)  #pad 0 for 0.21 sec silence at the end

            wav_hifi = hifi_gan(post_linear_mel_v).squeeze().clamp(
                -1, 1).detach().cpu().numpy()
            wav_hifi = np.pad(
                wav_hifi, [0, 4800], mode='constant',
                constant_values=0)  #pad 0 for 0.21 sec silence at the end

        mel_path = os.path.join(hp.sample_path + '_' + str(args.rhythm_scale),
                                'mel')
        if not os.path.exists(mel_path):
            os.makedirs(mel_path)
        np.save(
            os.path.join(
                mel_path,
                'text_{}_ref_{}_synth_{}.mel'.format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            postnet_pred.cpu())

        linear_path = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'linear')
        if not os.path.exists(linear_path):
            os.makedirs(linear_path)
        np.save(
            os.path.join(
                linear_path, 'text_{}_ref_{}_synth_{}.linear'.format(
                    i, ref_fname, str(args.rhythm_scale))), post_linear.cpu())

        wav_path = os.path.join(hp.sample_path + '_' + str(args.rhythm_scale),
                                'wav')
        if not os.path.exists(wav_path):
            os.makedirs(wav_path)
        write(
            os.path.join(
                wav_path,
                "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            hp.sr, wav)
        print("rtx : ", (len(wav) / hp.sr) / inf_time)

        wav_linear_path = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'wav_linear')
        if not os.path.exists(wav_linear_path):
            os.makedirs(wav_linear_path)
        write(
            os.path.join(
                wav_linear_path,
                "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            hp.sr, wav_linear)

        wav_hifi_path = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'wav_hifi')
        if not os.path.exists(wav_hifi_path):
            os.makedirs(wav_hifi_path)
        write(
            os.path.join(
                wav_hifi_path,
                "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            hp.sr, wav_hifi)

        show_weights = weights.contiguous().view(weights.size(0), 1, 1,
                                                 weights.size(1),
                                                 weights.size(2))
        attns_enc_new = []
        attns_dec_new = []
        attn_probs_new = []
        attns_style_new = []
        for i in range(len(attns_enc)):
            attns_enc_new.append(attns_enc[i].unsqueeze(0))
            attns_dec_new.append(attns_dec[i].unsqueeze(0))
            attn_probs_new.append(attn_probs[i].unsqueeze(0))
            attns_style_new.append(attns_style[i].unsqueeze(0))
        attns_enc = t.cat(attns_enc_new, 0)
        attns_dec = t.cat(attns_dec_new, 0)
        attn_probs = t.cat(attn_probs_new, 0)
        attns_style = t.cat(attns_style_new, 0)

        attns_enc = attns_enc.contiguous().view(attns_enc.size(0), 1,
                                                hp.n_heads, attns_enc.size(2),
                                                attns_enc.size(3))
        attns_enc = attns_enc.permute(1, 0, 2, 3, 4)
        attns_dec = attns_dec.contiguous().view(attns_dec.size(0), 1,
                                                hp.n_heads, attns_dec.size(2),
                                                attns_dec.size(3))
        attns_dec = attns_dec.permute(1, 0, 2, 3, 4)
        attn_probs = attn_probs.contiguous().view(attn_probs.size(0),
                                                  1, hp.n_heads,
                                                  attn_probs.size(2),
                                                  attn_probs.size(3))
        attn_probs = attn_probs.permute(1, 0, 2, 3, 4)
        attns_style = attns_style.contiguous().view(attns_style.size(0), 1,
                                                    hp.n_heads,
                                                    attns_style.size(2),
                                                    attns_style.size(3))
        attns_style = attns_style.permute(1, 0, 2, 3, 4)

        save_dir = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'figure',
            "text_{}_ref_{}_synth_{}.wav".format(fname, ref_fname,
                                                 str(args.rhythm_scale)))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        writer.add_alignments(attns_enc.detach().cpu(),
                              attns_dec.detach().cpu(),
                              attn_probs.detach().cpu(),
                              attns_style.detach().cpu(),
                              show_weights.detach().cpu(),
                              [t.tensor(gen_length).type(t.LongTensor)],
                              text_length, args.restore_step1, 'Inference',
                              save_dir)
Ejemplo n.º 13
0
def main():
    if not os.path.exists("logger"):
        os.mkdir("logger")

    dataset = get_dataset()
    global_step = 0

    m = nn.DataParallel(Model().cuda())
    num_param = sum(param.numel() for param in m.parameters())
    print('Number of Transformer-TTS Parameters:', num_param)

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    pos_weight = t.FloatTensor([5.]).cuda()
    # writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=16)
        # pbar = tqdm(dataloader)
        for i, data in enumerate(dataloader):
            # pbar.set_description("Processing at epoch %d"%epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, _ = data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            # print(mel)

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            t_l = loss.item()
            m_l = mel_loss.item()
            m_p_l = post_mel_loss.item()
            # s_l = stop_pred_loss.item()

            with open(os.path.join("logger", "total_loss.txt"),
                      "a") as f_total_loss:
                f_total_loss.write(str(t_l) + "\n")

            with open(os.path.join("logger", "mel_loss.txt"),
                      "a") as f_mel_loss:
                f_mel_loss.write(str(m_l) + "\n")

            with open(os.path.join("logger", "mel_postnet_loss.txt"),
                      "a") as f_mel_postnet_loss:
                f_mel_postnet_loss.write(str(m_p_l) + "\n")

            # with open(os.path.join("logger", "stop_pred_loss.txt"), "a") as f_s_loss:
            #     f_s_loss.write(str(s_l)+"\n")

            # Print
            if global_step % hp.log_step == 0:
                # Now = time.clock()

                str1 = "Epoch [{}/{}], Step [{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f};".format(
                    epoch + 1, hp.epochs, global_step, mel_loss.item(),
                    post_mel_loss.item())
                str2 = "Total Loss: {:.4f}.".format(loss.item())
                current_learning_rate = 0
                for param_group in optimizer.param_groups:
                    current_learning_rate = param_group['lr']
                str3 = "Current Learning Rate is {:.6f}.".format(
                    current_learning_rate)
                # str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format(
                #     (Now-Start), (total_step-current_step)*np.mean(Time))

                print("\n" + str1)
                print(str2)
                print(str3)
                # print(str4)

                with open(os.path.join("logger", "logger.txt"),
                          "a") as f_logger:
                    f_logger.write(str1 + "\n")
                    f_logger.write(str2 + "\n")
                    f_logger.write(str3 + "\n")
                    # f_logger.write(str4 + "\n")
                    f_logger.write("\n")

            # writer.add_scalars('training_loss',{
            #         'mel_loss':mel_loss,
            #         'post_mel_loss':post_mel_loss,

            #     }, global_step)

            # writer.add_scalars('alphas',{
            #         'encoder_alpha':m.module.encoder.alpha.data,
            #         'decoder_alpha':m.module.decoder.alpha.data,
            #     }, global_step)

            # if global_step % hp.image_step == 1:

            #     for i, prob in enumerate(attn_probs):

            #         num_h = prob.size(0)
            #         for j in range(4):

            #             x = vutils.make_grid(prob[j*16] * 255)
            #             writer.add_image('Attention_%d_0'%global_step, x, i*4+j)

            #     for i, prob in enumerate(attns_enc):
            #         num_h = prob.size(0)

            #         for j in range(4):

            #             x = vutils.make_grid(prob[j*16] * 255)
            #             writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j)

            #     for i, prob in enumerate(attns_dec):

            #         num_h = prob.size(0)
            #         for j in range(4):

            #             x = vutils.make_grid(prob[j*16] * 255)
            #             writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
def main():

    dataset = get_dataset(hp.train_data_csv)
    global_step = 0

    m = nn.DataParallel(ModelStopToken().cuda())
    trans_model = Model()
    trans_model.load_state_dict(load_checkpoint(100000, "transformer"))
    for name, param in trans_model.named_parameters():
        param.requires_grad = False
        print(name, " : weight frozen")
    trans_model = nn.DataParallel(trans_model.cuda())

    m.train()
    trans_model.train(False)

    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=8)
        for i, data in enumerate(dataloader):
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data
            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            mel_length = mel_length.cuda()

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1).cuda()
            for j, length in enumerate(mel_length):
                stop_tokens[j, length - 1] += 1

            mel_pred, postnet_pred, attn, decoder_output, _, attn_dec, attn_style = trans_model.forward(
                character, mel_input, pos_text, pos_mel, mel, pos_mel)
            stop_preds = m.forward(decoder_output)

            if global_step % 100 == 0:
                print("pos_mel", pos_mel[0])
                print("stop_pred", t.sigmoid(stop_preds.squeeze()[0]))
                print("stop_tokens", stop_tokens[0])

            mask = get_mask_from_lengths(mel_length)
            stop_preds = stop_preds.squeeze().masked_select(mask)
            stop_tokens = stop_tokens.masked_select(mask)

            loss = nn.BCEWithLogitsLoss(
                pos_weight=t.tensor(hp.bce_pos_weight))(stop_preds,
                                                        stop_tokens)

            print("| Epoch: {}, {}/{}th loss : {:.4f}".format(
                epoch, i, len(dataloader), loss))

            writer.add_scalars('training_loss', {
                'loss': loss,
            }, global_step)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_stop_token_%d.pth.tar' % global_step))

        if epoch == hp.stop_epoch:
            break
Ejemplo n.º 15
0
def main():

    dataset = get_dataset()
    global_step = 0
    sum_loss = 0
    
    m = nn.DataParallel(Model().cuda()) # TODO:dataparalle
    # m = Model().cuda()

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()
    
    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=16)
        pbar = tqdm(dataloader)
        sum_loss = 0
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d"%epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)
                
            character, mel, mel_input, pos_text, pos_mel, _ = data
            
            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)
            
            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            
            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)
            
            loss = mel_loss + post_mel_loss
            
            writer.add_scalars('training_loss',{
                    'mel_loss':mel_loss,
                    'post_mel_loss':post_mel_loss,

                }, global_step)
                
            writer.add_scalars('alphas',{
                    'encoder_alpha':m.module.encoder.alpha.data,
                    'decoder_alpha':m.module.decoder.alpha.data,
                }, global_step)
            
            
            if global_step % hp.image_step == 1:
                
                for i, prob in enumerate(attn_probs):
                    
                    num_h = prob.size(0)
                    for j in range(4):
                
                        x = vutils.make_grid(prob[j*16] * 255)
                        writer.add_image('Attention_%d_0'%global_step, x, i*4+j)
                
                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)
                    
                    for j in range(4):
                
                        x = vutils.make_grid(prob[j*16] * 255)
                        writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j)
            
                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):
                
                        x = vutils.make_grid(prob[j*16] * 255)
                        writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j)
                
            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()
            
            nn.utils.clip_grad_norm_(m.parameters(), 1.)
            
            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save({'model':m.state_dict(),
                                 'optimizer':optimizer.state_dict()},
                                os.path.join(hp.checkpoint_path,'checkpoint_transformer_%d.pth.tar' % global_step))
            sum_loss += loss.item()

        print(f'epoch:{epoch}, sum_loss: {sum_loss / (i + 1)}')
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--step', type=int, help='Global step to restore checkpoint', default=0)
    args = parser.parse_args()

    dataset = get_post_dataset()
    global_step = args.step
    
    m = nn.DataParallel(ModelPostNet().cuda(1), device_ids=[i+1 for i in range(7)])

    if not os.path.exists(hp.checkpoint_path):
        os.makedirs(hp.checkpoint_path)

    if args.step > 0:
        ckpt_path = os.path.join(hp.checkpoint_path,'checkpoint_postnet_%d.pth.tar' % global_step)
        ckpt = torch.load(ckpt_path)
        m.load_state_dict(ckpt['model'])

    m.train()
    optimizer = torch.optim.Adam(m.parameters(), lr=hp.lr)

    if args.step > 0:
        optimizer.load_state_dict(ckpt['optimizer'])

    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_postnet, drop_last=True, num_workers=8)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d"%epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)
                
            mel, mag = data
        
            mel = mel.cuda(1)
            mag = mag.cuda(1)
            
            mag_pred = m.forward(mel)

            loss = nn.L1Loss()(mag_pred, mag)
            
            writer.add_scalars('training_loss',{
                    'loss':loss,

                }, global_step)
                    
            optimizer.zero_grad()

            loss.backward()
            
            nn.utils.clip_grad_norm_(m.parameters(), 1.)
            
            optimizer.step()

            if global_step % hp.save_step_post == 0:
                torch.save({'model':m.state_dict(),
                                 'optimizer':optimizer.state_dict()},
                                os.path.join(hp.checkpoint_path,'checkpoint_postnet_%d.pth.tar' % global_step))
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--step',
                        type=int,
                        help='Global step to restore checkpoint',
                        default=0)
    args = parser.parse_args()

    dataset = get_dataset()

    global_step = args.step

    m = Model().cuda()
    m = nn.DataParallel(m, device_ids=[i for i in range(8)])

    if not os.path.exists(hp.checkpoint_path):
        os.makedirs(hp.checkpoint_path)

    if args.step > 0:
        ckpt_path = os.path.join(
            hp.checkpoint_path,
            'checkpoint_transformer_%d.pth.tar' % global_step)
        ckpt = torch.load(ckpt_path)
        m.load_state_dict(ckpt['model'])

    m.train()
    optimizer = torch.optim.Adam(m.parameters(), lr=hp.lr)

    if args.step > 0:
        optimizer.load_state_dict(ckpt['optimizer'])

    pos_weight = torch.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=16)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, _ = data

            stop_tokens = torch.abs(pos_mel.ne(0).type(torch.float) - 1)

            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                for i, prob in enumerate(attn_probs):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)

                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * 4 + j)

            optimizer.zero_grad()

            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            optimizer.step()

            if global_step % hp.save_step == 0:
                torch.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Ejemplo n.º 18
0
def main():
    print('starting here...')
    dataset = get_dataset()
    global_step = 0

    m = nn.DataParallel(Model().cuda())

    # if LOADCHECKPOINT:
    #     m.load_state_dict(t.load(hp.checkpoint_file_transformer))
    #     print('loaded checkpoint...')
    #     m.eval()

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(hp.epochs):
        print('at epoch', epoch)
        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=1)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            eeg_array, mel, mel_input, pos_eeg_signal, pos_mel, _ = data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            eeg_array = eeg_array.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_eeg_signal = pos_eeg_signal.cuda()
            pos_mel = pos_mel.cuda()

            print('before m.forward()...')

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                eeg_array, mel_input, pos_eeg_signal, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                # summarywriter add_image params
                num_images_per_loop = 4
                writer_start_val = int(hp.batch_size / 2)
                writer_end_val = int(hp.batch_size * num_images_per_loop)
                writer_step_val = int(hp.batch_size)

                for i, prob in enumerate(attn_probs):
                    num_h = prob.size(0)
                    for j in range(writer_start_val, writer_end_val,
                                   writer_step_val):
                        x = vutils.make_grid([prob[j] * 255])
                        # x  = prob[j] * 255
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * num_images_per_loop + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)
                    for j in range(writer_start_val, writer_end_val,
                                   writer_step_val):
                        x = vutils.make_grid([prob[j] * 255])
                        # x  = prob[j] * 255
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * num_images_per_loop + j)

                for i, prob in enumerate(attns_dec):
                    num_h = prob.size(0)
                    for j in range(writer_start_val, writer_end_val,
                                   writer_step_val):
                        x = vutils.make_grid([prob[j] * 255])
                        # x  = prob[j] * 255
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * num_images_per_loop + j)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Ejemplo n.º 19
0
    if not os.path.isdir(log_dir):
        os.mkdir(log_dir)

    if args.p:
        logger.info("Multiple process mode")
    else:
        logger.info("Single process mode")

    data_pkl = os.path.join(pwd(__file__), './data/data.pkl')

    if os.path.isfile(data_pkl):
        logger.info("Exsiting pkl,loading...")
        datas = load_pickle(data_pkl)
    else:
        logger.info("Loading from json")
        loader = DataLoader()
        datas = loader()
        logger.info("Shuffle data")
        random.shuffle(datas)
        random.shuffle(datas)
        logger.info("Serialize data")
        dump_pickle(datas, data_pkl)

    logger.info("Loaded data {}".format(len(datas)))
    # data_per_worker = int(1e2)*6
    data_per_worker = int(args.s)
    num_worker = round(len(datas) / data_per_worker)

    logger.info("Data per worker={}, num worker={}".format(
        data_per_worker, num_worker))
def main():

    dataset = get_dataset()
    global_step = 0
    # inference: https://blog.csdn.net/weixin_40087578/article/details/87186613
    m = nn.DataParallel(
        Model().cuda())  # 将data分配给多GPU,默认用0号卡训练。如使用多卡,需提前指定device编号并设置环境变量

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)  # Adam

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=16)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer,
                                     global_step)  # 调整学习率。但对Adam来说,似乎没什么必要。
            # pos_text和pos_mel是全局排序。
            character, mel, mel_input, pos_text, pos_mel, _ = data  #取data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            character = character.cuda()  #data拷贝至GPU
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)
            # 这里的stop_token原本是用来标记音频结尾的符号。但代码作者表示,按原文加上loss会使模型不收敛。后续生成的 时候也只能凭借经验值确定生成长度。
            mel_loss = nn.L1Loss()(mel_pred, mel)  # L1 loss
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                for i, prob in enumerate(attn_probs):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)

                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * 4 + j)

            optimizer.zero_grad()  # 手动清零梯度数组,方便下次计算。
            # Calculate gradients
            loss.backward()  # BP

            nn.utils.clip_grad_norm_(m.parameters(), 1.)  # 梯度裁剪

            # Update weights 更新权重。
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Ejemplo n.º 21
0
def main(args):

    dataset = get_dataset()
    global_step = args.restore_step

    m = nn.DataParallel(Model().cuda())
    # # print(type(m.module))
    # for block in m.module:
    #     for each in block.parameters():
    #         print(each.reqiures_grad)
    # for paras in m.parameters():
    #     print(paras.size(), paras.requires_grad)

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    # print(os.path.join(
    #         hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % args.restore_step))
    try:
        print(
            os.path.join(
                hp.checkpoint_path,
                'checkpoint_transformer_%d.pth.tar' % args.restore_step))
        checkpoint = torch.load(
            os.path.join(
                hp.checkpoint_path,
                'checkpoint_transformer_%d.pth.tar' % args.restore_step))
        m.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step %d---\n" % args.restore_step)
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(hp.checkpoint_path):
            os.mkdir(hp.checkpoint_path)

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(args.start_epoch, hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=0)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, _ = data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                for i, prob in enumerate(attn_probs):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)

                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * 4 + j)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))