Esempio n. 1
0
def Train():

    data_train = DataTXT_All('data/trump_full_speech.txt', sequence_length)
    data = DataLoader(data_train,
                      batch_size=batch_size,
                      shuffle=True,
                      drop_last=True)

    model = RNN(len(id2lettre), latent_size, len(id2lettre))

    loss = nn.CrossEntropyLoss()
    optim = torch.optim.Adam(model.parameters(), lr=5 * 10**-3)

    iterations = 15

    # GPU
    model.to(device)
    loss.to(device)

    writer = SummaryWriter("runs/exo4/runs_sequence_length" +
                           str(sequence_length) + "seq_pred_len" +
                           str(sequence_pred_length) +
                           datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    savepath = Path("seq_gen" + "seq_len" + str(sequence_length) +
                    "seq_pred_len" + str(sequence_pred_length) +
                    str(latent_size) + ".pch")
    if savepath.is_file():
        with savepath.open("rb") as fp:
            state = torch.load(fp)
    else:
        state = State(model, optim)
    state.optim.lr = 5 * 10**-2
    for i in range(iterations):

        train_loss = 0
        nt = 0
        for x in data:

            if nt % 1000 == 0:
                print(nt, end=', ')

            nt += 1

            y = x.to(device)
            x = one_hot(x, len(id2lettre)).to(device)
            h = torch.zeros(batch_size, latent_size).to(device)
            x = x.permute(1, 0, 2).to(device)

            h = state.model(x, h).to(device)

            yhat = state.model.decoder(h).transpose(0, 1).to(device)

            l = loss(yhat[:, :-sequence_pred_length].flatten(0, 1),
                     y[:, sequence_pred_length:].flatten())

            state.optim.zero_grad()
            l.backward()
            state.optim.step()

            train_loss += l.data.to(device).item()

        train_loss = train_loss / nt

        writer.add_scalar('Loss/Train', train_loss, i)
        print()
        print('Epoch: ', i + 1, '\tError train: ', train_loss)
        Generate(state)
        with savepath.open("wb") as fp:
            state.epoch = i + 1
            torch.save(state, fp)
Esempio n. 2
0
    def descente_gradient(self, X_train, Y_train, seq_len_train, X_test,
                          Y_test, seq_len_test):
        """
        X_train : dim(len_sequence, batch_size, nb_feature) contains all batchs concat following dim 0

        Y_train : 1D tensor,  
                  32 first label for the first batch, 
                  from 32 to 64 for second batch etc...
        
        seq_len_train: 1D tensor, 
                       seq_len_train[0]=25 ----> 1st batch sequence length = 25
                       seq_len_train[1]=5 ----> 2nd batch sequence length = 5 
        """

        #parametre à optimiser
        optim = torch.optim.SGD(self.model.parameters(), lr=self.eps)

        #cross entropy loss
        loss_func = nn.CrossEntropyLoss()

        #checkpoint
        if savepath.is_file():
            with savepath.open("rb") as fp:
                state = torch.load(fp)

        else:
            state = State(self.model, optim)

        rec_loss_train = [None] * epoch  #record train loss
        rec_loss_test = [None] * epoch
        rec_cost = [None] * epoch
        for n_iter in range(state.epoch, self.epoch):
            cumul_loss = 0
            count_batch = 0
            ind = 0
            for i, len_seq in enumerate(seq_len_train):
                #Reinitialisation du gradient
                state.optim.zero_grad()

                X_batch = X_train[ind:ind + len_seq]
                ind += len_seq
                h0 = torch.zeros(self.batch_size, self.dim_latent)
                H = state.model.forward(X_batch.to(device), h0.to(device))

                Y_hat = state.model.decode(torch.unsqueeze(H[-1], 0))
                Y_batch = Y_train[i * self.batch_size:(i + 1) *
                                  self.batch_size]

                loss = loss_func(Y_hat[0], Y_batch.long().to(device))
                loss.backward()

                #Mise à jour paramétres du modéle
                state.optim.step()
                state.iteration += 1

                cumul_loss += loss
                count_batch += 1

                #vidage GPU
                h0.cpu()
                H.cpu()
                X_batch.cpu()
                Y_hat.cpu()
                Y_batch.cpu()

            with savepath.open("wb") as fp:
                state.epoch = state.epoch + 1
                torch.save(state, fp)

            # on peut visualiser avec
            # tensorboard --logdir runs/
            writer.add_scalar('Loss/train', cumul_loss / count_batch, n_iter)

            # Sortie directe
            print(f"Itérations {n_iter}: loss {cumul_loss/count_batch}")
            rec_loss_train[n_iter] = cumul_loss / count_batch

            #Evalute loss in test
            with torch.no_grad():
                cost_0_1 = 0
                cumul_loss = 0
                count_batch = 0
                ind = 0
                for i, len_seq in enumerate(seq_len_test):
                    X_batch = X_test[ind:ind + len_seq]
                    ind += len_seq
                    h0 = torch.zeros(self.batch_size, self.dim_latent)
                    H = state.model.forward(X_batch.to(device), h0.to(device))
                    Y_hat_test = state.model.decode(torch.unsqueeze(H[-1], 0))
                    Y_batch = Y_test[i * self.batch_size:(i + 1) *
                                     self.batch_size]
                    loss_test = loss_func(Y_hat_test[0],
                                          Y_batch.long().to(device))

                    #Cost 0-1
                    Y_hat_test = Y_hat_test.cpu()
                    Y_batch = Y_batch.cpu()
                    soft_m = torch.tensor(nn.functional.softmax(Y_hat_test[0]))
                    label = torch.argmax(soft_m, dim=1)
                    cost_0_1 += (label !=
                                 Y_batch.long()).sum().item() / self.batch_size

                    cumul_loss += loss_test
                    count_batch += 1

                    #vidage GPU
                    h0.cpu()
                    H.cpu()
                    X_batch.cpu()

            writer.add_scalar('Loss/test', cumul_loss / count_batch, n_iter)
            rec_loss_test[n_iter] = cumul_loss / count_batch
            writer.add_scalar('Pourcentage erreur classif',
                              cost_0_1 / count_batch, n_iter)
            rec_cost[n_iter] = cost_0_1 / count_batch

        return rec_loss_train, rec_loss_test, rec_cost
    def descente_gradient(self, X_train, X_test):

        #parametre à optimiser
        optim = torch.optim.Adam(self.model.parameters(), lr=self.eps)

        #cross entropy loss
        loss_func = nn.CrossEntropyLoss(reduction='sum')

        #checkpoint
        if savepath.is_file():
            with savepath.open("rb") as fp:
                state = torch.load(fp)

        else:
            state = State(self.model, optim)

        rec_loss_train = [None] * epoch  #record train loss
        rec_loss_test = [None] * epoch
        for n_iter in range(state.epoch, self.epoch):
            cumul_loss = 0
            count_batch = 0
            for batch in X_train:
                #Reinitialisation du gradient
                state.optim.zero_grad()
                try:
                    batch = torch.stack(batch.split(100), dim=1)
                    h0 = torch.zeros(self.batch_size, self.dim_latent)

                except RuntimeError:
                    batch = torch.stack(batch.split(100)[:-1], dim=1)
                    h0 = torch.zeros(batch.shape[1], self.dim_latent)

                H = state.model.forward(batch.to(device), h0.to(device))

                pred = state.model.decode(H)
                pred = pred.permute(0, 2, 1)
                real = batch[1:].argmax(dim=2)

                loss = loss_func(pred.narrow(0, 0,
                                             len(pred) - 1), real.to(device))
                loss.backward()

                #Mise à jour paramétres du modéle
                state.optim.step()
                state.iteration += 1

                cumul_loss += loss
                count_batch += 1

                #vidage GPU
                h0.cpu()
                H.cpu()
                batch.cpu()
                pred.cpu()
                real.cpu()

            with savepath.open("wb") as fp:
                state.epoch = state.epoch + 1
                torch.save(state, fp)

            # on peut visualiser avec
            # tensorboard --logdir runs/
            writer.add_scalar('Loss/train', cumul_loss, n_iter)

            # Sortie directe
            print(f"Itérations {n_iter}: loss {cumul_loss}")
            rec_loss_train[n_iter] = cumul_loss

            #Evalute loss in test
            with torch.no_grad():
                cumul_loss = 0
                count_batch = 0
                for batch in X_test:
                    try:
                        batch = torch.stack(batch.split(100), dim=1)
                        h0 = torch.zeros(self.batch_size, self.dim_latent)
                    except RuntimeError:
                        batch = torch.stack(batch.split(100)[:-1], dim=1)
                        h0 = torch.zeros(batch.shape[1], self.dim_latent)

                    H = state.model.forward(
                        batch.narrow(0, 0,
                                     len(batch) - 1).to(device), h0.to(device))

                    pred = state.model.decode(torch.unsqueeze(H[-1], 0))
                    real = batch.argmax(dim=2)
                    real = real[-1]
                    loss = loss_func(pred[0], real.to(device))

                    #vidage GPU
                    h0.cpu()
                    H.cpu()
                    batch.cpu()
                    pred.cpu()
                    real.cpu()

                    cumul_loss += loss
                    count_batch += 1

            writer.add_scalar('Loss/test', cumul_loss, n_iter)
            rec_loss_test[n_iter] = cumul_loss

        return rec_loss_train, rec_loss_test