Ejemplo n.º 1
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Setup the model that we are going to use
    print("Initializing Vanilla RNN model...")
    model = VanillaRNN(
            seq_length=config.input_length,
        input_dim=config.input_dim,
        num_hidden=config.num_hidden,
        num_classes=config.num_classes,
        batch_size=config.batch_size,
        device=device
    )

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs  = batch_inputs.unsqueeze(-1)  # add input dimensionality
        batch_inputs  = batch_inputs.to(device)     # [batch_size, seq_length, 1]
        batch_targets = batch_targets.to(device)    # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        # Check if training is finished
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Ejemplo n.º 2
0
def train(config,n_run):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Train on T-1 first digits
    config.input_length = config.input_length - 1

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device)


    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    model.to(device)

    train_loss = []
    train_acc = []
    t_loss = []
    t_acc = []

    #Convergence condition
    eps = 1e-6

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Clear stored gradient
        model.zero_grad()

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...

        #Convert inputs and labels into tensors
        x = torch.tensor(batch_inputs, device=device)
        y = torch.tensor(batch_targets,device=device)


        #Forward pass
        pred = model.forward(x)
        loss = criterion(pred, y)
        t_loss.append(loss.item())
        optimizer.zero_grad()

        #Backward pass
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?

        # ANSWER : the function torch.nn.utils.clip_grad_norm() is used to prevent
        # exploding gradients by ‘clipping’ the norm of the gradients, to restrain
        # the gradient values to a certain threshold. This essentially acts as a
        # limit to the size of the updates of the parameters of every layer, ensuring
        # that the parameter values don't change too much from their previous values.

        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...

        optimizer.step()
        accuracy = get_accuracy(pred,y, config.batch_size)
        t_acc.append(accuracy.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 1000 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        if step % 100 == 0:
            #Get loss and accuracy averages over 100 steps
            train_loss.append(np.mean(t_loss))
            train_acc.append(np.mean(t_acc))
            t_loss = []
            t_acc = []

            if step > 0 and abs(train_loss[-1] - train_loss[-2]) < eps:
                break


        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break


    print('\nDone training.\n')
    #
    #Save trained model and results
    if config.model_type == 'RNN':
        #save model
        torch.save(model, "./Results/RNN/" + str(config.input_length) + "_RNN_model")
        #save train accuracy and loss
        np.save("./Results/RNN/" + str(config.input_length) + "_RNN_accuracy", train_acc)
        np.save("./Results/RNN/" + str(config.input_length) + "_RNN_loss", train_loss)

        # #save model ####################################################################### For SURFsara
        # torch.save(model, str(config.input_length+1) + "_RNN_model_" + str(n_run))
        # #save train accuracy and loss
        # np.save(str(config.input_length+1) + "_RNN_accuracy_" + str(n_run), train_acc)
        # np.save(str(config.input_length+1) + "_RNN_loss_" + str(n_run), train_loss)

    elif config.model_type == 'LSTM':
        #save model
        torch.save(model, "./Results/LSTM/" + str(config.input_length) + "_LSTM_model")
        #save train accuracy and loss
        np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_accuracy", train_acc)
        np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_loss", train_loss)
Ejemplo n.º 3
0
def train(config):
    

    np.random.seed(42)
    torch.manual_seed(42)
    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    print(device)

    # Initialize the model that we are going to use
    if config.model_type=="RNN":
        
        print("Training VanillaRNN")
        print()
        model = VanillaRNN(config.input_length, config.input_dim,\
                                config.num_hidden, config.num_classes, config.batch_size, config.device)  # fixme
    else:
        print("Training LSTM")
        print()
        model = LSTM(config.input_length, config.input_dim,\
                                config.num_hidden, config.num_classes, config.batch_size, config.device)

    model = model.to(device)
    
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    
    # Setup the loss and optimizer
    criterion =  nn.CrossEntropyLoss()  #fixme
    if config.optimizer=="adam":
        optimizer = optim.Adam(model.parameters(), lr = config.learning_rate) # fixme
    else: 
        optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate)   
    pl_loss =[]
    average_loss =[]
    acc =[]
    
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()
        
        batch_targets = torch.LongTensor(batch_targets)
        batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
        
        
        # zero the parameter gradients
        model.zero_grad()
        
        # Add more code here ...
        output = model(batch_inputs)

        out_loss = criterion(output, batch_targets)
        out_loss.backward()
        
        ############################################################################
        # QUESTION: what happens here and why?
        # ANSWER: helps prevent the exploding gradient problem in RNNs / LSTMs.
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        ############################################################################
        optimizer.step()
        
        # Add more code here ...

        loss = out_loss.item()   # fixme
        # get argmax
        softmax = torch.nn.Softmax(dim=1)
        predictions = torch.argmax(softmax(output), dim=1)
        predictions = config.batch_size-len(torch.nonzero(predictions - batch_targets))
        accuracy = predictions/config.batch_size              
        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)
        
        pl_loss.append(loss)
        average_loss.append(np.mean(pl_loss[:-100:-1]))
        acc.append(accuracy)
        
        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

        # if step%100==0:
        #     # save training loss
        #     plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
        #     plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
        #     plt.legend()
        #     plt.xlabel("Iterations")
        #     plt.ylabel("Loss")  
        #     plt.title("Training Loss")
        #     plt.grid(True)
        #     # plt.show()
        #     plt.savefig(config.optimizer+"_loss_"+config.model_type+"_"+str(config.input_length)+".png")
        #     plt.close()
    ################################training##################################################
    # plt.plot(acc,'g-', alpha=0.5)
    # plt.xlabel("Iterations")
    # plt.ylabel("Accuracy")
    # plt.title("Train Accuracy")
    # plt.grid(True)
    # plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png")
    #  plt.close()
    # fl = config.optimizer+"_acc_"+config.model_type+"_"+str(config.input_length)
   
    
    # np.savez(fl, acc=acc)
    print('Done training.')
Ejemplo n.º 4
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    tol = 0.

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    accuracies = [0, 1]
    losses = [0, 1]

    if config.quite:
        bar = tqdm(total=config.train_steps)
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs[..., None]
        batch_inputs.to(device)
        batch_targets.to(device)

        # FORWARD, BACKWARD, AND STEP
        out = model.forward(batch_inputs)
        model.zero_grad()
        loss = criterion(out, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################
        optimizer.step()

        # Add more code here ...
        accuracy = (out.argmax(dim=1) == batch_targets.long()).float().mean()
        losses.append(loss.item())
        accuracies.append(accuracy.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0 and not config.quite:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracies[-1], losses[-1]))
        if config.quite:
            bar.update()
        if step == config.train_steps or np.isclose(losses[-1], losses[-2],
                                                    tol):
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break
    print('Done training.')
    return accuracies[2:], losses[2:]
Ejemplo n.º 5
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size)
    elif config.model_type == 'LSTM':
        model = LSTM(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size)

    experiment_label = "{}_".format(datetime.now().strftime("%Y-%m-%d %H:%M"))
    for key, value in vars(config).items():
        experiment_label += "{}={}_".format(key, value)

    # TensorBoard API
    cc = CrayonClient(hostname='18.202.229.41', port=6007)
    xp = cc.create_experiment(xp_name=experiment_label)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        if config.input_dim == 10:
            batch_inputs = one_hot(batch_inputs.type(torch.long), config.input_dim)

        p = model(batch_inputs)

        loss = criterion(p, batch_targets)
        predictions = get_predictions(p)
        accuracy = (predictions == batch_targets).sum().item() / len(predictions)

        # backward pass
        model.zero_grad()
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

            # post metrics to tensorboard
            xp.add_scalar_dict({
                'accuracy' : accuracy,
                'loss' : loss.item()
            }, wall_time=time.time(), step=step)

        if step == config.train_steps:

            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    _ = xp.to_zip(experiment_label + ".zip")
    print('Done training.')
Ejemplo n.º 6
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if (config.model_type == 'RNN'):
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(
        model.parameters(), lr=config.learning_rate
    )  #, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False)

    accuracies = []
    losses = []
    old_loss = float('inf')
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs[
            ..., None]  # need to add this because input is a number
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        batch_predictions = model.forward(batch_inputs)
        loss = criterion(batch_predictions, batch_targets)
        losses.append(loss.item())
        model.zero_grad()  #should we do this??
        loss.backward()

        torch.nn.utils.clip_grad_norm(
            model.parameters(),
            max_norm=config.max_norm)  #prevents maximum gradient problem

        optimizer.step()  #before or after clip_grad_norm?

        accuracy = accuracy_(batch_predictions, batch_targets)
        accuracies.append(accuracy)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:
            with open(config.save_logs, 'a') as file:
                file.write(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}"
                    .format(datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                            config.train_steps, config.batch_size,
                            examples_per_second, accuracy, loss) + '\n')

        if step == config.train_steps or old_loss == loss.item(
        ):  # stop if two consecutive losses remain consistent
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break
        old_loss = loss.item()

    print('Done training.')
    return losses, accuracies