Esempio n. 1
0
def test(config, seq_size, n_examples):


    # Initialize the dataset and data loader
    dataset = PalindromeDataset(seq_size+1)
    # dataset = PalindromeDataset(seq_size) ###################################
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    #Get one batch to test
    (batch_inputs, batch_targets) = next(iter(data_loader))

    #Convert inputs and labels into tensors
    x = torch.tensor(batch_inputs, device=config.device)

    # Load the trained model
    model = torch.load('./Results/RNN/' + str(seq_size) + '_RNN_model', map_location=config.device)
    # model = torch.load('./Results/RNN/Run 1/' + str(seq_size) + '_RNN_model_1', map_location=config.device) #############
    model.to(config.device)

    #get predictions for batch
    with torch.no_grad():
        pred = model.forward(x)

    print('\n----------------------\nSequence length: ',str(seq_size+1),'\n----------------------') #####

    for i in range(n_examples):
        print('\nTesting on palindrome',str(i+1),':\n---------------\n\nInput:',str(batch_inputs[i].tolist()),'\nPredicted last digit:',str(pred[i,:].argmax().item()),'\n')
Esempio n. 2
0
def train(config, input_length):

    # Initialize the model that we are going to use
    model = VanillaRNN(input_length, config.input_dim, config.num_hidden,
                       config.num_classes, config.batch_size)  # fixme

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Initialize the dataset and data loader (leave the +1)
    dataset = PalindromeDataset(input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate)  # fixme

    losses = []
    accuracies = []
    loss = 0.0

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Add more code here ...
        optimizer.zero_grad()
        batch_inputs, batch_targets = batch_inputs.to(
            device), batch_targets.to(device)

        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()

        # the following line is to deal with exploding gradients
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)

        # Add more code here ...

        loss += loss.item()  # fixme
        accu = 0.0  # fixme

        if step % 10 == 0:
            # print acuracy/loss here
            print('[step: %5d] loss: %.4f' % (step, loss / 10))
            losses.append(loss / 10)
            loss = 0.0
            accu = accuracy(outputs, batch_targets)
            accuracies.append(accu)
            print('Accuracy on training dataset: %.3f %%' % (accu))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')

    return model, losses, accuracies
Esempio n. 3
0
def train(config):
    print('Vanilla RNN is WORKING...')

    model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), config.learning_rate, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    #optimizer = torch.optim.SGD(model.parameters(), config.learning_rate)

    # model.train()
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # step: epoch
        # Add more code here ...
        # the following line is to deal with exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        optimizer.zero_grad()
        batch_inputs = batch_inputs.unsqueeze(0)
        output= model(batch_inputs)[0]
        loss = criterion(output, batch_targets)
        loss.backward()
        optimizer.step()
        _, pred = torch.max(output, 1)
        all = len(pred)
        correct = 0
        for i in range(len(pred)):
            if batch_targets[i] == pred[i]:
                correct += 1

        # Add more code here ...
        accuracy = correct/all

        if step % 25 == 0:
            plot_step.append(step)
            plot_loss.append(loss)
            plot_accuracy.append(accuracy*100)
            # print acuracy/loss here

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    fig1 = plt.subplot(2,1,1)
    fig2 = plt.subplot(2,1,2)
    fig1.plot(plot_step, plot_accuracy,  c='red', label='accuracy')
    fig1.legend()
    fig2.plot(plot_step, plot_loss, c='green', label='loss')
    fig2.legend()
    plt.show()
    print('Done training.')
Esempio n. 4
0
def train(config):
    # Initialize the model that we are going to use
    model = VanillaRNN(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size)  # fixme

    # Initialize the dataset and data loader (leave the +1)
    dataset = PalindromeDataset(config.input_length + 1)

    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)  # fixme

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Add more code here ...
        hit = 0
        n, dim = batch_inputs.size()
        batch_inputs_T = torch.transpose(batch_inputs, 0, 1)
        # print(batch_inputs_T.size())
        y_hat_oh = model.forward(batch_inputs_T)
        for i in range(n):
            y_pre, _ = max(enumerate(y_hat_oh[i]), key=itemgetter(1))
            y = batch_targets[i].item()
            # print(y_pre, y)
            if y_pre == y:
                hit += 1
        # print("/////////")

        # the following line is to deal with exploding gradients
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)

        # Add more code here ...
        loss = criterion(y_hat_oh, batch_targets)  # fixme
        accuracy = hit / n * 100  # fixme

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 10 == 0:
            print("loss: ", loss.item())
            print("accuracy: ", accuracy)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Esempio n. 5
0
def main_grads(config):
    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    for seq in range(config.input_length-1, config.input_length):
        # Initialize the model that we are going to use
        if config.model_type == "RNN":
            model_def = VanillaRNN
        else:
            model_def = LSTM
        model = model_def(seq, config.input_dim,
                          config.num_hidden, config.num_classes,
                          config.device).to(device)  # fixme

        # Initialize the dataset and data loader (note the +1)
        dataset = PalindromeDataset(seq + 1)
        data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
        batch_inputs, batch_targets = next(iter(data_loader))
        batch_inputs.requires_grad_(True)
        # Setup the loss and optimizer
        criterion = nn.CrossEntropyLoss()  # fixme
        optimizer = optim.RMSprop(model.parameters(), config.learning_rate)  # fixme

        hidden_state, model_outputs = model.forward(batch_inputs)
        # retain grad before doing actual backward pass
        loss = criterion(model_outputs, batch_targets)

        # model.zero_grad()

        # optimizer.step()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        grads_file = "results/{}_grad_hidden_state_seq.txt".format(config.model_type)
        with open(grads_file, 'w+') as fd:
            writer = csv.writer(fd)
            for i, x in enumerate(model.hidden_states):
                print(x.grad.abs().mean().item())
                writer.writerow([i, x.grad.abs().mean().item()])
Esempio n. 6
0
def test(model, config, input_length):
    # Initialize the dataset and data loader (leave the +1)
    dataset = PalindromeDataset(input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    accuracies = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        outputs = model(batch_inputs)
        accu = 0.0

        if step % 10 == 0:
            accu = accuracy(outputs, batch_targets)
            accuracies.append(accu)

        if step == 2000:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done testing.')

    return accuracies
Esempio n. 7
0
def train(config):
    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == "RNN":
        model_def = VanillaRNN
    else:
        model_def = LSTM
    model = model_def(config.input_length, config.input_dim, config.num_hidden,
                      config.num_classes, config.device).to(device)  # fixme
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # fixme
    optimizer = optim.RMSprop(model.parameters(),
                              config.learning_rate)  # fixme
    # init csv file
    for d in ["results", "checkpoints", "assets"]:
        if not os.path.exists(d):
            os.mkdir(d)
    cvs_file = 'results/w_grad_{}_inputlength_{}_hiddenunits_{}_lr_{}_batchsize_{}_{}.csv'.format(
        config.model_type, config.input_length, config.num_hidden,
        config.learning_rate, config.batch_size, int(time.time()))
    cols_data = ['step', 'train_loss', 'train_accuracy', "avg_grad_w"]
    with open(cvs_file, 'a') as fd:
        writer = csv.writer(fd)
        writer.writerow(cols_data)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        # voncert tensors to device for gpu training
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)
        _, model_outputs = model.forward(batch_inputs)
        loss = criterion(model_outputs, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        ############################################################################
        # Add more code here ...
        optimizer.step()
        loss = loss.item()  # fixme
        accuracy = calc_accuracy(model_outputs, batch_targets)  # fixme

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0 and step > 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))
            csv_data = [step, loss, accuracy]
            with open(cvs_file, 'a') as fd:
                writer = csv.writer(fd)
                writer.writerow(csv_data)
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Esempio n. 8
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == "RNN":
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           batch_size=config.batch_size,
                           num_classes=config.num_classes,
                           device=device)

    elif config.model_type == "LSTM":
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     device=device,
                     batch_size=config.batch_size)
    # send model to device
    model.to(device)
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # track training statistics
    train_accuracies = []
    train_losses = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # batch inputs  to device for cuda
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        # convert input batches to tensors on device
        ínput_sequences = torch.tensor(batch_inputs,
                                       dtype=torch.float,
                                       device=device)
        targets = torch.tensor(batch_targets, dtype=torch.long, device=device)

        #print(ínput_sequences)
        #print(targets)

        # Backward pass
        # reset gradients
        optimizer.zero_grad()

        # Forward pass
        # Debugging
        # predict classes for input batches
        # a = ínput_sequences[:, 0].unsqueeze(1)
        # print(ínput_sequences.size())
        # print(a.size())
        # break

        # predict input sequences
        predictions = model.forward(ínput_sequences)
        # accuracy
        accuracy = torch.div(
            torch.sum(targets == predictions.argmax(dim=1)).to(torch.float),
            config.batch_size)
        # print(accuracy)
        # backpropagate loss
        # compute loss per batch
        loss = criterion(predictions, targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        # --> # ANSWER: Gradients are reinforced at each layer. Thus, very large gradients can appear. This leads to
        #  learning problems. Cutting the gradients to a limit overcomes that issue.
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################
        # update weights according to optimizer
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)
        # save stats for each step
        train_accuracies.append(accuracy)
        train_losses.append(loss)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

            # If the last 50 accuracies are already 1 (avg=1), stop the training, as convergence is reached and unnecessary
            # computations dont have to be done
            avg_accuracies = np.sum(train_accuracies[-50:]) / 50
            print(avg_accuracies)
            if avg_accuracies == 1:
                print(
                    "\nTraining finished for length: {} after {} steps".format(
                        config.input_length, step))
                print("Avg Accuracy : {:.3f}".format(avg_accuracies))
                break

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')

    return max(train_accuracies), step
Esempio n. 9
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim, \
                            config.num_hidden, config.num_classes, \
                            config.batch_size, device=config.device)

    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length, config.input_dim, \
                     config.num_hidden, config.num_classes, \
                     config.batch_size, device=config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    results = 'palindrome length:' + str(config.input_length + 1) + '\n'

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        optimizer.zero_grad()
        nn_out = model(batch_inputs)
        loss = criterion(nn_out, batch_targets)
        loss.backward()

        ########################################################################
        # QUESTION: what happens here and why?
        ########################################################################
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        ########################################################################

        optimizer.step()

        accuracy = torch.sum(nn_out.argmax(dim=1) == batch_targets)\
                             .to(torch.float) / (config.batch_size)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {},\
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

            results += str(accuracy.item()) + ", "

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')

    with open('results/' + str(config.model_type) \
                         + str(config.input_length + 1) \
              + '.txt', 'w') as f:
        f.write(results)
Esempio n. 10
0
def train(net, input_length, print_log):

    # Initialize the model that we are going to use
    model = net(input_length, INPUT_DIM, NUM_HIDDEN, OUTPUT_DIM,
                NUM_BATCH_SIZE).cuda()

    # Initialize the dataset and data loader (leave the +1)
    dataset = PalindromeDataset(input_length + 1)
    data_loader = DataLoader(dataset, NUM_BATCH_SIZE, num_workers=1)

    # Setup the loss and optimizer
    cross_entropy = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE)
    # To avoid fluctuation
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=TRAIN_STEPS / 20,
                                                   gamma=0.96)

    record_epochs, accs, losses = [], [], []
    for step, train_data in enumerate(data_loader):
        X, y = train_data
        X, y = X.cuda().float(), y.cuda()

        y_pred = model(X)
        loss = cross_entropy(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        # the following line is to deal with exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=MAX_NORM)

        if step % 10 == 0:
            correct = 0
            total = 0
            with torch.no_grad():
                for i, test_data in enumerate(data_loader):
                    X, y = test_data
                    X, y = X.cuda().float(), y.cuda()

                    y_pred = model(X)
                    total += y.size(0)
                    _, predicted = torch.max(y_pred.data, 1)
                    correct += (predicted == y).sum().item()

                    if (i + 1) % TEST_SIZE == 0:
                        break

            acc = round(correct / total, 4)
            avg_loss = round(loss.item(), 6)
            record_epochs.append(step)
            accs.append(acc)
            losses.append(avg_loss)
            if print_log:
                print('step: {}, loss: {}, test acc: {}'.format(
                    step, avg_loss, acc))

        if step == TRAIN_STEPS:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    correct = 0
    total = 0
    with torch.no_grad():
        for i, test_data in enumerate(data_loader):
            X, y = test_data
            X, y = X.cuda().float(), y.cuda()

            y_pred = model(X)
            total += y.size(0)
            _, predicted = torch.max(y_pred.data, 1)
            correct += (predicted == y).sum().item()

            if (i + 1) % TEST_SIZE == 0:
                break
        acc = round(correct / total, 4)
    if print_log:
        print('Done training.')
    return record_epochs, accs, losses, acc
Esempio n. 11
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if config.device == 'best':
        config.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.embed_dim, config.num_hidden, \
            config.num_classes, device)
    else:
        model = LSTM(config.embed_dim, config.num_hidden, \
            config.num_classes, device)

    # Initialize the dataset and data loader
    dataset = PalindromeDataset(config.input_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = F.cross_entropy
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    # Track metrics 
    losses = []
    losses_last10 = []
    accuracies = []
    accuracies_last10 = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Transform input to RNN input format (sequence, batch, input)
        # batch_inputs = batch_inputs.t().unsqueeze(2).to(device=device, dtype=torch.long)
        batch_inputs = batch_inputs.t().to(device=device, dtype=torch.long)
        batch_targets = batch_targets.to(device=device, dtype=torch.long)

        # Only for time measurement of step through network
        t1 = time.time()

        # forward pass
        logits = model.forward(batch_inputs)

        # backprop
        optimizer.zero_grad()
        loss = criterion(logits, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        # Compute metrics
        accuracy = (logits.cpu().argmax(dim=1) == batch_targets.cpu()).numpy().mean()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        # track metrics
        accuracies_last10.append(accuracy.tolist())
        losses_last10.append(loss.tolist())

        if step % 10 == 0:

            message = "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss)
            print(message)
            if config.log_path != "":
                with open(config.log_path, "a") as f:
                    f.write(message + "\n")
            accuracies.append(np.mean(accuracies_last10))
            losses.append(np.mean(losses_last10))
            accuracies_last10 = []
            losses_last10 = []

        # Early stopping criterion: average accuracy over last 1000 iters was lower than the 1000 before that
        stopping_criterion =  len(accuracies) > 200 and \
            np.mean(accuracies[-100:]) <= np.mean(accuracies[-200:-100])

        if step == config.train_steps or stopping_criterion:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            print('Done training.')
            return losses, accuracies
Esempio n. 12
0
def train():
    train_on_gpu = torch.cuda.is_available()
    # Initialize the model that we are going to use
    model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size)
    if train_on_gpu:
        model.cuda()
    
    # Initialize the dataset and data loader (leave the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    # Adjust learning rate
    lrs = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.96)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Add more code here ...
        if train_on_gpu:
            batch_inputs, batch_targets = batch_inputs.cuda().float(), batch_targets.cuda()
        prediction = model(batch_inputs)
        loss = criterion(prediction, batch_targets)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        lrs.step()

        # the following line is to deal with exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

        if step % 25 == 0:
            # print acuracy/loss here
            plot_epoch.append(step)
            correct = 0
            total = 0
            with torch.no_grad():
                for i, (test_input, test_target) in enumerate(data_loader):
                    if train_on_gpu:
                        test_input, test_target = test_input.cuda(), test_target.cuda()
                    eval_prediction = model(test_input)
                    num = len(test_target)
                    total += num
                    _, target = torch.max(eval_prediction.data, 1)
                    correct_batch = (target == test_target).sum().item()
                    correct += correct_batch
                    if i == len(test_target) - 1:
                        break;

            accuracy = correct/total
            test_loss = loss.item()
            plot_test_accuracy.append(accuracy*100)
            plot_test_loss.append(test_loss)

        if step == config.train_steps:
            break

    fig1 = plt.subplot(2,1,1)
    fig2 = plt.subplot(2,1,2)
    fig1.plot(plot_epoch, plot_test_accuracy,  c='red', label='accuracy')
    fig1.legend()
    fig2.plot(plot_epoch, plot_test_loss, c='green', label='loss')
    fig2.legend()
    plt.show()
    print('Done training.')
Esempio n. 13
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        output = model.forward(batch_inputs)
        loss = criterion(output, batch_targets)

        optimizer.zero_grad()
        loss.backward()

        ############################################################################
        # QUESTION: It cuts off the gradient so we don't get exploding gradients
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        optimizer.step()
        loss = loss.item()
        accuracy = (torch.max(output, 1)[1] == batch_targets).float().mean()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Esempio n. 14
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == "RNN":
        model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden,\
                           config.num_classes, device=device)
    elif config.model_type == "LSTM":
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,\
                           config.num_classes, device=device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    test_loader = iter(DataLoader(dataset, config.test_size, num_workers=1))

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    results = open(config.out_file, "w+")
    results.write(
        "#model_type   : {}\n#input_length : {}\n#input_dim    : {}\n#num_classes  : {}\n#num_hidden   : {}\n#batch_size   : {}\n#learn_rate   : {}\n#train_steps  : {}\n#max_norm     : {}\n"
        .format(config.model_type, config.input_length, config.input_dim,
                config.num_classes, config.num_hidden, config.batch_size,
                config.learning_rate, config.train_steps, config.max_norm))
    results.write("#train_step accuracy loss\n")

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        batch_inputs = torch.nn.functional.one_hot(
            batch_inputs.type(torch.LongTensor)).type(
                torch.FloatTensor).to(device)
        batch_targets = batch_targets.to(device)

        # Only for time measurement of step through network
        t1 = time.time()

        optimizer.zero_grad()

        #         #for calculating gradients
        #         for timestep in range(config.input_length):
        #             model.zero_grad()
        #             batch_y, hGrad = model(batch_inputs, timestep) #without softmax
        #             #prevent gradients from exploding
        #             torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        #             loss = criterion(batch_y, batch_targets)
        #             loss.backward()
        #             results.write("{} {}\n".format(timestep,hGrad.grad.norm()))
        #         print("Done calculating gradients.")
        #         results.close()
        #         return

        batch_y = model(batch_inputs)  #without softmax

        #prevent gradients from exploding
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)

        loss = criterion(batch_y, batch_targets)

        loss.backward()

        optimizer.step() if step > 0 else 0  #to be able to test initial model

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.eval_freq == 0:
            #             predictions = torch.argmax(torch.abs(batch_y),1) #training: smaller batch size than test
            #             accuracy = torch.sum(predictions == batch_targets).type(torch.FloatTensor)/config.batch_size
            with torch.no_grad():
                test_inputs, test_targets = next(test_loader)
                test_inputs = torch.nn.functional.one_hot(
                    test_inputs.type(torch.LongTensor),
                    config.input_dim).type(torch.FloatTensor).to(device)
                test_targets = test_targets.to(device)
                test_y = model(test_inputs)
                test_loss = criterion(test_y, test_targets)
                test_predictions = torch.argmax(test_y, 1)
                test_accuracy = torch.sum(
                    test_predictions == test_targets).type(
                        torch.FloatTensor) / config.test_size

                #                #uncomment for printing
                #                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #                        config.train_steps, config.batch_size, examples_per_second,
                #                        test_accuracy, test_loss))

                results.write("%d %.3f %.3f\n" %
                              (step, test_accuracy, test_loss))

        optimizer.step() if step == 0 else 0

        if np.round(test_accuracy, 2) == 1.00:
            print("Achieved >99.95% accuracy.")
            break

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    results.close()
Esempio n. 15
0
def train(config):
    

    np.random.seed(42)
    torch.manual_seed(42)
    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    print(device)

    # Initialize the model that we are going to use
    if config.model_type=="RNN":
        
        print("Training VanillaRNN")
        print()
        model = VanillaRNN(config.input_length, config.input_dim,\
                                config.num_hidden, config.num_classes, config.batch_size, config.device)  # fixme
    else:
        print("Training LSTM")
        print()
        model = LSTM(config.input_length, config.input_dim,\
                                config.num_hidden, config.num_classes, config.batch_size, config.device)

    model = model.to(device)
    
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    
    # Setup the loss and optimizer
    criterion =  nn.CrossEntropyLoss()  #fixme
    if config.optimizer=="adam":
        optimizer = optim.Adam(model.parameters(), lr = config.learning_rate) # fixme
    else: 
        optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate)   
    pl_loss =[]
    average_loss =[]
    acc =[]
    
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()
        
        batch_targets = torch.LongTensor(batch_targets)
        batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
        
        
        # zero the parameter gradients
        model.zero_grad()
        
        # Add more code here ...
        output = model(batch_inputs)

        out_loss = criterion(output, batch_targets)
        out_loss.backward()
        
        ############################################################################
        # QUESTION: what happens here and why?
        # ANSWER: helps prevent the exploding gradient problem in RNNs / LSTMs.
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        ############################################################################
        optimizer.step()
        
        # Add more code here ...

        loss = out_loss.item()   # fixme
        # get argmax
        softmax = torch.nn.Softmax(dim=1)
        predictions = torch.argmax(softmax(output), dim=1)
        predictions = config.batch_size-len(torch.nonzero(predictions - batch_targets))
        accuracy = predictions/config.batch_size              
        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)
        
        pl_loss.append(loss)
        average_loss.append(np.mean(pl_loss[:-100:-1]))
        acc.append(accuracy)
        
        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

        # if step%100==0:
        #     # save training loss
        #     plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
        #     plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
        #     plt.legend()
        #     plt.xlabel("Iterations")
        #     plt.ylabel("Loss")  
        #     plt.title("Training Loss")
        #     plt.grid(True)
        #     # plt.show()
        #     plt.savefig(config.optimizer+"_loss_"+config.model_type+"_"+str(config.input_length)+".png")
        #     plt.close()
    ################################training##################################################
    # plt.plot(acc,'g-', alpha=0.5)
    # plt.xlabel("Iterations")
    # plt.ylabel("Accuracy")
    # plt.title("Train Accuracy")
    # plt.grid(True)
    # plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png")
    #  plt.close()
    # fl = config.optimizer+"_acc_"+config.model_type+"_"+str(config.input_length)
   
    
    # np.savez(fl, acc=acc)
    print('Done training.')
Esempio n. 16
0
def train(config, pallindrome_length, m):

    config.input_length = pallindrome_length
    config.model_type = m
    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize the model that we are going to use
    hyper_params = [
        config.input_length, config.input_dim, config.num_hidden,
        config.num_classes, config.batch_size, device
    ]
    model = globals()['Vanilla' + config.model_type](
        *hyper_params) if config.model_type == 'RNN' else globals()[
            config.model_type](*hyper_params)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    accuracies = []
    losses = []
    avg_loss = 0

    ########## One hot encoding buffer that you create out of the loop and just keep reusing
    # if config.input_dim != 1:
    #     nb_digits = 10
    #     x_onehot = torch.FloatTensor(config.batch_size, config.input_length, nb_digits)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)
        # Only for time measurement of step through network
        t1 = time.time()

        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        optimizer.zero_grad()

        # Forward pass:
        ########## Convert input to one-hot:
        # if config.input_dim != 1:
        #     batch_inputs = batch_inputs.type(torch.LongTensor).view(config.batch_size, config.input_length, 1)
        #     x_onehot.zero_()
        #     x_onehot.scatter_(2, batch_inputs, 1)
        #     y_pred = model.forward(x_onehot)
        # else:
        #     y_pred = model.forward(batch_inputs)

        y_pred = model.forward(batch_inputs)
        loss = criterion.forward(y_pred, batch_targets)

        #Backward pass

        loss.backward(retain_graph=True)

        optimizer.step()

        accuracy = (y_pred.argmax(
            dim=1) == batch_targets).float().mean().item()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)
        # accuracies.append(accuracy)
        losses.append(loss.item())

        if step % 500 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))
            accuracies.append(accuracy)
            if loss < 0.01 or accuracy == 1:
                break
            else:
                avg_loss = np.average(losses)
                losses = []

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    return max(accuracies)
Esempio n. 17
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length + 1, config.input_dim,
                           config.num_hidden, config.num_classes, device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length + 1, config.input_dim,
                     config.num_hidden, config.num_classes, device)
    else:
        print("Unknown model type, please use RNN or LSTM")
        exit()

    model.store_hidden = True
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    accuracies = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        loss = loss.data.item()
        optimizer.step()
        outputs = outputs.cpu().detach().numpy()

        acc = accuracy(outputs, batch_targets.cpu().detach().numpy())
        accuracies.append(acc)
        grads = [
            torch.norm(t.grad).cpu().detach() for t in model.hiddenActivity
        ]

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    acc, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    drawPlotMagn(
        grads,
        './' + str(config.model_type) + '_len:' + str(config.input_length) +
        '_lr:' + str(config.learning_rate) + '_grads_over_time.jpg',
        "Gradients over time steps with " + str(config.model_type), 1)
Esempio n. 18
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Print all configs to confirm parameter settings
    print_flags()

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)
    model.to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay,
                              momentum=config.momentum)

    # Store some measures
    best_acc = 0.
    los = list()
    iteration = list()
    tmp_acc = list()
    acc = list()

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        optimizer.zero_grad()
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)
        pred = model(batch_inputs)
        accuracy = compute_accuracy(pred, batch_targets)
        tmp_acc.append(accuracy)
        loss = criterion(pred, batch_targets)
        loss.backward()
        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        if not float(t2 - t1) == 0:
            examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))
            iteration.append(step)
            acc.append(accuracy)
            los.append(loss)
            if accuracy > best_acc:
                best_acc = accuracy

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    tmp_acc.sort(reverse=True)
    avg_acc = sum(tmp_acc[:50]) / 50
    print('Average of 50 best accuracies: {}'.format(avg_acc))
    with open('result/{}_acc.txt'.format(config.model_type), 'a') as file:
        file.write('{} {}\n'.format(config.input_length, avg_acc))
        file.close()
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].plot(iteration, acc)
    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Accuracy')
    axs[1].plot(iteration, los)
    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Loss')
    fig.tight_layout()
    plt.show()
Esempio n. 19
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Currently using: ', device)
    # Initialize the model that we are going to use
    input_length = config.input_length
    input_dim = config.input_dim
    num_classes = config.num_classes
    num_hidden = config.num_hidden
    batch_size = config.batch_size
    learning_rate = config.learning_rate
    
    if config.model_type == 'RNN':
    
        model = VanillaRNN(input_length, input_dim, num_hidden, num_classes
                           , batch_size, device).double()
        
    if config.model_type == 'LSTM':
        model = LSTM(input_length, input_dim, num_hidden, num_classes, batch_size, device).double()
    
    
    
    model = model.to(device)
    
    
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(inp_len+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)  # fixme
    accuracy_list = []
    loss_list = []

## first 100 steps are to generate the test set
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        output = model.forward(batch_inputs.transpose(0,1).double())

        optimizer.zero_grad()
        
        output_indices = torch.argmax(output.transpose(0,1), dim=0)
        loss_for_backward = criterion(output,batch_targets).to(device)
        loss_for_backward.backward()
        
        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        #print(output.shape)
        #print(batch_targets.shape)
        
        optimizer.step()
        
        #loss = criterion.forward(output, batch_targets)
        
        correct_indices = output_indices == batch_targets
        
        
        
        
        #if step == 4000:
        #    return correct_indices, output_indices, batch_targets, batch_inputs
        accuracy = int(sum(correct_indices))/int(len(correct_indices))

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss_for_backward
            ))
            accuracy_list.append(accuracy)
            loss_list.append(loss_for_backward)

        if step == config.train_steps or (len(accuracy_list) > 10 and (sum(accuracy_list[-3:])
        /len(accuracy_list[-3:])) == 1.0):
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    line = ' '.join((str(config.model_type),'Palindrome length:',str(input_length),'Accuracy:',str(accuracy_list),'Loss', str(loss_list)))
    with open('LSTMMMMM.txt', 'a') as file:
                          file.write(line + '\n')
Esempio n. 20
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    # if GPU was chosen, check if CUDA is available
    if str(config.device) != "cpu":
        if not torch.cuda.is_available():
            print('\n* GPU was selected but CUDA is not available.\nTraining on CPU ...')
            device = torch.device("cpu")
        else:
            print('\nCUDA is available!  Training on GPU ...')
            device = torch.device(config.device)
    else:
        print('\nTraining on GPU ...')
        device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes, config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim,
                     config.num_hidden, config.num_classes, config.batch_size, device)

    # Print Configuration
    print("Model Type: {!s:5} Input Length: {!s:5} Learning Rate: {}\n"
          .format(config.model_type, config.input_length, config.learning_rate))

    # Initialize model
    model = torch.nn.DataParallel(model).to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    train_loss, train_accuracy, train_steps = [], [], []

    # Enable train mode
    model.train()

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # move tensors to GPU, if enabled
        batch_targets = batch_targets.long().to(device)
        batch_inputs = batch_inputs.to(device)

        # Forward pass
        predictions = model(batch_inputs)

        # Calculate loss
        loss = criterion(predictions, batch_targets)

        # Back-propagate
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        # ANSWER: `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        # ref: https://medium.com/usf-msds/deep-learning-best-practices-1-weight-initialization-14e5c0295b94
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        # Update weights
        optimizer.step()

        # Clear weights gradients
        optimizer.zero_grad()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            # Store accuracy and loss
            train_steps.append(step)
            train_loss.append(loss.item())
            train_accuracy.append(accuracy(predictions, batch_targets))

            if step % 100 == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          config.train_steps, config.batch_size, examples_per_second,
                          train_accuracy[-1], train_loss[-1]))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655

            # Save Train and Test accuracies and losses
            file_name = str(config.model_type) + '_' + str(config.input_length) + '.npz'
            np.savez(file_name,
                     train_steps=train_steps,
                     train_accuracy=train_accuracy,
                     model_type=config.model_type,
                     input_length=config.input_length)

            break

    print('Done training.')
Esempio n. 21
0
def train(config,n_run):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Train on T-1 first digits
    config.input_length = config.input_length - 1

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device)


    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    model.to(device)

    train_loss = []
    train_acc = []
    t_loss = []
    t_acc = []

    #Convergence condition
    eps = 1e-6

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Clear stored gradient
        model.zero_grad()

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...

        #Convert inputs and labels into tensors
        x = torch.tensor(batch_inputs, device=device)
        y = torch.tensor(batch_targets,device=device)


        #Forward pass
        pred = model.forward(x)
        loss = criterion(pred, y)
        t_loss.append(loss.item())
        optimizer.zero_grad()

        #Backward pass
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?

        # ANSWER : the function torch.nn.utils.clip_grad_norm() is used to prevent
        # exploding gradients by ‘clipping’ the norm of the gradients, to restrain
        # the gradient values to a certain threshold. This essentially acts as a
        # limit to the size of the updates of the parameters of every layer, ensuring
        # that the parameter values don't change too much from their previous values.

        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...

        optimizer.step()
        accuracy = get_accuracy(pred,y, config.batch_size)
        t_acc.append(accuracy.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 1000 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        if step % 100 == 0:
            #Get loss and accuracy averages over 100 steps
            train_loss.append(np.mean(t_loss))
            train_acc.append(np.mean(t_acc))
            t_loss = []
            t_acc = []

            if step > 0 and abs(train_loss[-1] - train_loss[-2]) < eps:
                break


        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break


    print('\nDone training.\n')
    #
    #Save trained model and results
    if config.model_type == 'RNN':
        #save model
        torch.save(model, "./Results/RNN/" + str(config.input_length) + "_RNN_model")
        #save train accuracy and loss
        np.save("./Results/RNN/" + str(config.input_length) + "_RNN_accuracy", train_acc)
        np.save("./Results/RNN/" + str(config.input_length) + "_RNN_loss", train_loss)

        # #save model ####################################################################### For SURFsara
        # torch.save(model, str(config.input_length+1) + "_RNN_model_" + str(n_run))
        # #save train accuracy and loss
        # np.save(str(config.input_length+1) + "_RNN_accuracy_" + str(n_run), train_acc)
        # np.save(str(config.input_length+1) + "_RNN_loss_" + str(n_run), train_loss)

    elif config.model_type == 'LSTM':
        #save model
        torch.save(model, "./Results/LSTM/" + str(config.input_length) + "_LSTM_model")
        #save train accuracy and loss
        np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_accuracy", train_acc)
        np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_loss", train_loss)
Esempio n. 22
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)  # fixme
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)
    print(model)
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer

    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    config.learning_rate)  # fixme
    optimizer.zero_grad()
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()
        # Add more code here ...
        model_outputs = model.forward(batch_inputs)

        ############################################################################
        # QUESTION: what happens here and why?
        # This function clips the norm of the gradient to an acceptable level.
        # It accually puts a limit of the update parameters.
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        loss = criterion(torch.t(model_outputs), batch_targets)  # fixme
        accuracy = accuracy_(model_outputs, batch_targets)  # fixme

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        #        writer.add_scalar('accuracy',accuracy,step)
        #        writer.add_scalar('loss',loss,step)

        #        if loss < 0.001:
        #            writer.add_scalar('loss',loss,10000)
        #            writer.add_scalar('accuracy',accuracy,10000)
        #            break

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Esempio n. 23
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Setup the model that we are going to use
    print("Initializing Vanilla RNN model...")
    model = VanillaRNN(
            seq_length=config.input_length,
        input_dim=config.input_dim,
        num_hidden=config.num_hidden,
        num_classes=config.num_classes,
        batch_size=config.batch_size,
        device=device
    )

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs  = batch_inputs.unsqueeze(-1)  # add input dimensionality
        batch_inputs  = batch_inputs.to(device)     # [batch_size, seq_length, 1]
        batch_targets = batch_targets.to(device)    # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        # Check if training is finished
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Esempio n. 24
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if config.device == 'cuda':
        if torch.cuda.is_available():
            device = torch.device(config.device)
        else:
            device = torch.device('cpu')
    else:
        device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size,
                           device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     batch_size=config.batch_size,
                     device=device)

    # make the results directory (if it doesn't exist)
    RESULTS_DIR = Path.cwd() / 'results'
    RESULTS_DIR.mkdir(parents=True, exist_ok=True)
    results_filepath = RESULTS_DIR / (model.__class__.__name__ + '.csv')

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    results = {
        'T': [],
        'step': [],
        'accuracy': [],
        'loss': [],
    }

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        # send the data to device
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        # (re)set the optimizer gradient to 0
        optimizer.zero_grad()

        # forward pass the mini-batch
        pred_targets = model.forward(batch_inputs)
        loss = criterion.forward(pred_targets, batch_targets)

        # backwards propogate the loss
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        # clip_grad_norm is deprecated, use clip_grad_norm_ instead
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        optimizer.step()

        accuracy = (pred_targets.argmax(dim=1) == batch_targets).float().mean()

        # append the results
        results['T'].append(config.input_length)
        results['step'].append(step)
        results['accuracy'].append(accuracy.item())
        results['loss'].append(loss.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 1000 == 0:
            print(
                f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}] Train Step {step:04d}/{config.train_steps:04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}'
            )

        if step == config.train_steps:
            results_df = df.from_dict(results)

            if not results_filepath.exists():
                results_df.to_csv(results_filepath,
                                  sep=';',
                                  mode='w',
                                  encoding='utf-8',
                                  index=False)
            else:
                results_df.to_csv(results_filepath,
                                  sep=';',
                                  mode='a',
                                  header=False,
                                  encoding='utf-8',
                                  index=False)
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Esempio n. 25
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use

    # print(torch.nn.init.constant_(torch.empty(5), 0))
    # print(asdasda)
    # print(torch.nn.Parameter(torch.nn.init.normal_((torch.empty(5, 5)))))

    if (config.model_type == 'RNN'):
        model = VanillaRNN(config.input_length,
                           config.input_dim,
                           config.num_hidden,
                           config.num_classes,
                           config.batch_size,
                           device=device)
        # model = model.to(device)
    else:
        model = LSTM(config.input_length,
                     config.input_dim,
                     config.num_hidden,
                     config.num_classes,
                     config.batch_size,
                     device=device)
        # model = model.to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    # import pdb
    # pdb.set_trace()
    optimizer = optim.RMSprop(
        model.parameters(),
        lr=config.learning_rate)  #, weight_decay=1/(200*9))
    # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9))
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)

    accuracies = []
    losses = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        # print(batch_inputs.shape[1])
        # print(sadasd)
        ############################################################################
        # QUESTION: what happens here and why?
        # Clipping gradients helps prevent exploding gradients (hence clipping)
        # However it does nothing against vanishing gradients for RNN's
        # For vanishing gradients LSTMs are useful
        ############################################################################

        ############################################################################
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        out = model.forward(batch_inputs)
        # Add more code here ...

        # print(out.argmax(dim=1).shape, batch_targets.shape)

        loss = criterion(out, batch_targets)
        optimizer.zero_grad()
        loss.backward()
        # if (config.model_type == 'RNN'):
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 100 == 0:
            compare = (out.argmax(dim=1) == batch_targets)
            summed = compare.sum().item()
            accuracy = summed / compare.size()[0]
            accuracies.append(accuracy)
            losses.append(loss)
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    plt.plot(accuracies, label='accuracies')
    plt.plot(losses, label='losses')
    plt.tight_layout()
    plt.legend()
    plt.show()
    print('Done training.')
Esempio n. 26
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, config.device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, config.device)
    else:
        AssertionError('Models available: RNN, LSTM')

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), config.learning_rate)

    # keep track of variables
    accuracy_list = []
    loss_list = []

    # loop through data (get batches)
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # get model predictions (USE THE PLAIN INTEGERS FROM THE PALINDROME TO INSERT INTO THE MODEL AS INPUT)
        predictions = model(batch_inputs)

        ############################################################################
        # QUESTION: what happens here and why?
        #
        # it clips the gradient to a border value, to prevent exploding or vanishing
        # gradients.
        #
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        # calculate loss
        loss = criterion(predictions, batch_targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        targets = batch_targets.data.numpy()
        predictions_np = predictions.data.numpy()

        # get amount of correct predictions
        correct = 0
        for i in range(len(targets)):
            if targets[i] == np.argmax(predictions_np[i]):
                correct += 1

        # get accuracy
        accuracy = correct / len(targets)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

            # keep track of losses
            loss_list.append(loss.data.numpy())

            # keep track of accuracies
            accuracy_list.append(accuracy)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    # save variables
    data = [loss_list, accuracy_list]
    filename = 'Result_' + config.model_type + '_inputlen_' + str(
        config.input_length) + '.p'
    pickle.dump(data, open(filename, 'wb'))

    print('Done training.')
Esempio n. 27
0
def train(config, device="cpu"):
    assert config.model_type in ('RNN', 'LSTM')

    # Tensorboard summary writer
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" +
                                     config.model_type.lower() + '_' +
                                     str(config.input_length))
    log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id
    writer = SummaryWriter(log_dir=log_dir)

    # Torch settings
    if device == 'cpu':
        torch.set_default_tensor_type(torch.FloatTensor)
    elif device == 'cuda:0':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    dtype = torch.float

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length,
                           config.input_dim,
                           config.num_hidden,
                           config.num_classes,
                           config.batch_size,
                           device=device).to(device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length,
                     config.input_dim,
                     config.num_hidden,
                     config.num_classes,
                     config.batch_size,
                     device=device).to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # Accuracy and loss to be saved
    accuracies = []
    losses = []

    # Useful for convergence check
    avg_range = 200
    last_accuracy = 0
    convergence_threshold = 1e-4

    model.train()
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Load batches in the GPU
        batch_inputs = batch_inputs.to(device=device)
        batch_targets = batch_targets.to(device=device)

        # Forward pass
        predictions = model.forward(batch_inputs)

        # Compute loss
        loss = criterion(predictions, batch_targets)

        # Reset gradients before backwards pass
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Clipping gradients to avoid exploding gradient problem
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)

        # Update weights
        optimizer.step()

        # Compute accuracy
        accuracy = get_accuracy(predictions, batch_targets)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Add accuracy and loss to the writer
        writer.add_scalars('accuracy_and_loss', {
            'acc': accuracy,
            'loss': loss
        }, step)

        # Store accuracy and loss
        accuracies.append(accuracy)
        losses.append(loss)

        # Print information
        if step % 100 == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        # Check for convergence
        if step % avg_range == 0 and step != 0:
            avg_accuracy = np.mean(accuracies[-avg_range:])
            if np.abs(avg_accuracy - last_accuracy) < convergence_threshold:
                print(
                    "The model has converged with accuracy", avg_accuracy,
                    "(" + ("+" if avg_accuracy > last_accuracy else "-") +
                    str(np.abs(avg_accuracy - last_accuracy)) + ")")
                break
            last_accuracy = avg_accuracy

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    save_results(accuracies, losses, run_id, config.model_type,
                 config.input_length, last_accuracy)
    writer.close()
    print('Done training. Accuracy:', avg_accuracy)
Esempio n. 28
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    final_accuracy = []
    seq_list = []

    for i in range(30):
        input_length = config.input_length + i
        # Initialize the model that we are going to use
        if config.model_type == 'RNN':
            model = VanillaRNN(input_length,
                               config.input_dim,
                               config.num_hidden,
                               config.num_classes,
                               device=device)
        elif config.model_type == 'LSTM':
            model = LSTM(input_length,
                         config.input_dim,
                         config.num_hidden,
                         config.num_classes,
                         device=device)

        # Initialize the dataset and data loader (note the +1)
        dataset = PalindromeDataset(input_length + 1)
        data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

        # Setup the loss and optimizer
        optimizer = optim.RMSprop(model.parameters(), config.learning_rate)
        criterion = nn.CrossEntropyLoss()
        accuracies = []
        losses = []

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):
            # Only for time measurement of step through network
            t1 = time.time()
            optimizer.zero_grad()
            prediction = model(batch_inputs.to(device))
            loss = criterion(prediction, batch_targets.to(device))
            loss.backward()

            ############################################################################
            # QUESTION: what happens here and why?
            # this function causes the gradient not to explode
            ############################################################################
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            ############################################################################

            optimizer.step()

            _, predicted = torch.max(prediction, 1)
            accuracy = (predicted == batch_targets.to(device)
                        ).sum().item() / len(predicted)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            accuracies.append(accuracy * 100)
            losses.append(loss)

            if step % 10 == 0:

                print(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size,
                        examples_per_second, accuracy, loss))
            if step % 100 == 0:
                # go on with training if the model predicted with 100% accuracy for the last 100 steps
                accuracy_average = sum(accuracies[-100:]) / 100
                if accuracy_average == 100:
                    break

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

        final_accuracy.append(accuracy_average)
        seq_list.append(input_length)

        print('Done training.')
Esempio n. 29
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size,
                           device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     batch_size=config.batch_size,
                     device=device)

    model.to(device)
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(params=model.parameters(),
                                    lr=config.learning_rate)

    # evaluation metrics
    results = []

    print_setting(config)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        s_inputs = batch_inputs.shape
        s_targets = batch_targets.shape

        #forward pass
        predictions = model.forward(batch_inputs)

        #compute loss
        loss = criterion(predictions, batch_targets)

        #backward pass & updates
        # set gradients to zero
        optimizer.zero_grad()
        loss.backward()
        ############################################################################
        # QUESTION: what happens here and why?
        # Prevents exploding gradients by rescaling to a limit specified by config.max_norm
        # Forcing gradients to be within a certain norm to ensure reasonable updates
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        accuracy = (predictions.argmax(dim=1)
                    == batch_targets).sum().float() / (config.batch_size)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.eval_freq == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

            #l = loss.float().item()
            results.append([step, accuracy.item(), loss.float().item()])

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training. \n')

    return results
Esempio n. 30
0
                X[:, t, :].view(1, -1, 1), X[:, t, :].view(1, -1, 1),
                X[:, t, :].view(1, -1, 1), X[:, t, :].view(1, -1, 1)
            ],
                            dim=0)
            # term1 = torch.einsum('kij,bij->kji', X_in, self.W_x)  # kij,kij->kij , kij,kjl->kil
            tmm1 = torch.einsum('kij,kjl->kil', X_t, self.W_x)
            tmm2 = torch.einsum('ij,kjl->kil', h_t, self.W_h)
            gates_t = self.activation_gates(tmm1 + tmm2 + self.bias_gates)
            c_t = gates_t[0, :, :] * gates_t[1, :, :] + c_t * gates_t[2, :, :]
            h_t = self.activation_hidden(c_t) * gates_t[3, :, :]

        return torch.matmul(h_t, self.W_p) + self.bias_p


if __name__ == '__main__':
    palindrom_generator = PalindromeDataset(3)
    pali = palindrom_generator.generate_palindrome()
    print(pali)

    ############################### Comment out Before Submission #######################
    # defaults
    config = {
        'model_type': 'LSTM',
        'seq_length': 5,
        'input_length': 10,
        'input_dim': 1,
        'num_classes': 10,
        'num_hidden': 100,
        'batch_size': 128,
        'learning_rate': 0.001,
        'train_steps': 10000,