コード例 #1
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    # if GPU was chosen, check if CUDA is available
    if str(config.device) != "cpu":
        if not torch.cuda.is_available():
            print('\n* GPU was selected but CUDA is not available.\nTraining on CPU ...')
            device = torch.device("cpu")
        else:
            print('\nCUDA is available!  Training on GPU ...')
            device = torch.device(config.device)
    else:
        print('\nTraining on GPU ...')
        device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes, config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim,
                     config.num_hidden, config.num_classes, config.batch_size, device)

    # Print Configuration
    print("Model Type: {!s:5} Input Length: {!s:5} Learning Rate: {}\n"
          .format(config.model_type, config.input_length, config.learning_rate))

    # Initialize model
    model = torch.nn.DataParallel(model).to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    train_loss, train_accuracy, train_steps = [], [], []

    # Enable train mode
    model.train()

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # move tensors to GPU, if enabled
        batch_targets = batch_targets.long().to(device)
        batch_inputs = batch_inputs.to(device)

        # Forward pass
        predictions = model(batch_inputs)

        # Calculate loss
        loss = criterion(predictions, batch_targets)

        # Back-propagate
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        # ANSWER: `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        # ref: https://medium.com/usf-msds/deep-learning-best-practices-1-weight-initialization-14e5c0295b94
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        # Update weights
        optimizer.step()

        # Clear weights gradients
        optimizer.zero_grad()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            # Store accuracy and loss
            train_steps.append(step)
            train_loss.append(loss.item())
            train_accuracy.append(accuracy(predictions, batch_targets))

            if step % 100 == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          config.train_steps, config.batch_size, examples_per_second,
                          train_accuracy[-1], train_loss[-1]))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655

            # Save Train and Test accuracies and losses
            file_name = str(config.model_type) + '_' + str(config.input_length) + '.npz'
            np.savez(file_name,
                     train_steps=train_steps,
                     train_accuracy=train_accuracy,
                     model_type=config.model_type,
                     input_length=config.input_length)

            break

    print('Done training.')
コード例 #2
0
def train(config, device="cpu"):
    assert config.model_type in ('RNN', 'LSTM')

    # Tensorboard summary writer
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" +
                                     config.model_type.lower() + '_' +
                                     str(config.input_length))
    log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id
    writer = SummaryWriter(log_dir=log_dir)

    # Torch settings
    if device == 'cpu':
        torch.set_default_tensor_type(torch.FloatTensor)
    elif device == 'cuda:0':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    dtype = torch.float

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length,
                           config.input_dim,
                           config.num_hidden,
                           config.num_classes,
                           config.batch_size,
                           device=device).to(device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length,
                     config.input_dim,
                     config.num_hidden,
                     config.num_classes,
                     config.batch_size,
                     device=device).to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # Accuracy and loss to be saved
    accuracies = []
    losses = []

    # Useful for convergence check
    avg_range = 200
    last_accuracy = 0
    convergence_threshold = 1e-4

    model.train()
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Load batches in the GPU
        batch_inputs = batch_inputs.to(device=device)
        batch_targets = batch_targets.to(device=device)

        # Forward pass
        predictions = model.forward(batch_inputs)

        # Compute loss
        loss = criterion(predictions, batch_targets)

        # Reset gradients before backwards pass
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Clipping gradients to avoid exploding gradient problem
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)

        # Update weights
        optimizer.step()

        # Compute accuracy
        accuracy = get_accuracy(predictions, batch_targets)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Add accuracy and loss to the writer
        writer.add_scalars('accuracy_and_loss', {
            'acc': accuracy,
            'loss': loss
        }, step)

        # Store accuracy and loss
        accuracies.append(accuracy)
        losses.append(loss)

        # Print information
        if step % 100 == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        # Check for convergence
        if step % avg_range == 0 and step != 0:
            avg_accuracy = np.mean(accuracies[-avg_range:])
            if np.abs(avg_accuracy - last_accuracy) < convergence_threshold:
                print(
                    "The model has converged with accuracy", avg_accuracy,
                    "(" + ("+" if avg_accuracy > last_accuracy else "-") +
                    str(np.abs(avg_accuracy - last_accuracy)) + ")")
                break
            last_accuracy = avg_accuracy

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    save_results(accuracies, losses, run_id, config.model_type,
                 config.input_length, last_accuracy)
    writer.close()
    print('Done training. Accuracy:', avg_accuracy)
コード例 #3
0
def train(config, inp_len):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Currently using: ', device)
    # Initialize the model that we are going to use
    input_length = inp_len
    input_dim = config.input_dim
    num_classes = config.num_classes
    num_hidden = config.num_hidden
    batch_size = config.batch_size
    learning_rate = config.learning_rate

    if config.model_type == 'RNN':

        model = VanillaRNN(input_length, input_dim, num_hidden, num_classes,
                           batch_size, device).double()

    if config.model_type == 'LSTM':
        model = LSTM(input_length, input_dim, num_hidden, num_classes,
                     batch_size, device).double()

    model = model.to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(inp_len + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=learning_rate)  # fixme
    accuracy_list = []
    loss_list = []

    test_list_in = []
    test_list_ta = []
    ## first 100 steps are to generate the test set
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        if step < 50:
            test_list_in.append(batch_inputs)
            test_list_ta.append(batch_targets)
        else:

            # Only for time measurement of step through network
            t1 = time.time()

            model.train()

            batch_inputs = batch_inputs.to(device)
            batch_targets = batch_targets.to(device)

            output = model.forward(batch_inputs.transpose(
                0, 1).double()).to(device)
            ############################################################################
            # QUESTION: what happens here and why?
            ############################################################################
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            ############################################################################
            optimizer.zero_grad()

            #print(output.shape)
            #print(batch_targets.shape)
            output_indices = torch.argmax(output.transpose(0, 1),
                                          dim=0).to(device)
            loss_for_backward = criterion(output, batch_targets).to(device)
            loss_for_backward.backward()
            optimizer.step()

            #loss = criterion.forward(output, batch_targets)

            correct_indices = output_indices == batch_targets

            #if step == 4000:
            #    return correct_indices, output_indices, batch_targets, batch_inputs
            accuracy = int(sum(correct_indices)) / int(len(correct_indices))

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % 10 == 0:

                print(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size,
                        examples_per_second, accuracy, loss_for_backward))
                #accuracy_list.append(accuracy)

                ## Run a forward over the test_set
                if len(test_list_in) != len(test_list_ta):
                    print('Input and target list are unequal')

                avg_test_acc = []
                avg_test_loss = []
                for sample in range(len(test_list_in)):
                    model.eval()

                    batch_inputs = test_list_in[sample].to(device)
                    batch_targets = test_list_ta[sample].to(device)

                    output = model.forward(
                        batch_inputs.transpose(0, 1).double()).to(device)
                    output_indices = torch.argmax(output.transpose(0, 1),
                                                  dim=0).to(device)
                    correct_indices = output_indices == batch_targets

                    test_loss = float(
                        criterion(output, batch_targets).to(device))

                    test_accuracy = int(sum(correct_indices)) / int(
                        len(correct_indices))

                    avg_test_acc.append(test_accuracy)
                    avg_test_loss.append(test_loss)
                avg_test = sum(avg_test_acc) / len(avg_test_acc)
                avg_loss = sum(avg_test_loss) / len(avg_test_loss)

                print('Test Accuracy: ', avg_test)

                accuracy_list.append(avg_test)
                loss_list.append(avg_loss)

            if step == config.train_steps or (
                    len(accuracy_list) > 10 and
                (sum(accuracy_list[-3:]) / len(accuracy_list[-3:])) == 1.0):
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')
    line = ' '.join(
        (str(config.model_type), 'Palindrome length:', str(input_length),
         'Accuracy:', str(accuracy_list), 'Loss', str(loss_list)))
    with open('LSTM.txt', 'a') as file:
        file.write(line + '\n')