Exemple #1
0
def train(config, seed=0, seq_length=0):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if seq_length != 0:
        config.input_length = seq_length

    # Initialize tensorboard writer
    # writer = SummaryWriter()

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    loss_history = []
    acc_history = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)
        # print('log', log_probs.size())
        # print('batch', batch_targets.size)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)

        loss_history.append(loss.item())
        acc_history.append(accuracy)

        if step % 200 == 0:
            print('\nLoss:', loss.item())
            print('Acc:', accuracy)
        # writer.add_scalar("Loss", loss, step)
        # writer.add_scalar("Accuracy", accuracy, step)

        # print(predictions[0, ...], batch_targets[0, ...])

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

    # writer.flush()
    # writer.close()
    print(f'Done training with seed {seed} and seq_length {seq_length}')
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])
    return loss_history, acc_history
Exemple #2
0
def train(config):
    #np.random.seed(24)
    #torch.manual_seed(24)

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    accuracy_list = []
    loss_list = []
    old_loss = 1.0
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU

        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]
        #print(batch_inputs[:,0,:].shape)
        #embedding = nn.Embedding(3, config.input_dim)
        #print(embedding(batch_inputs[:,0,:].long()).shape)
        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)
        accuracy_list.append(accuracy)
        loss_list.append(loss.item())
        # print(predictions[0, ...], batch_targets[0, ...])

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if step == config.train_steps or old_loss == loss.item():
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break
        else:
            old_loss = loss.item()
    print('Done training.')
    ###########################################################################
    ###########################################################################

    print('Evaluating...')
    acc = []
    for i in range(3):
        acc_sublist = []
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):
            model.eval()
            batch_inputs = batch_inputs.to(
                device)  # [batch_size, seq_length,1]
            batch_targets = batch_targets.to(device)
            pred = model(batch_inputs)
            predictions = torch.argmax(pred, dim=1)
            correct = (predictions == batch_targets).sum().item()
            accuracy = correct / pred.size(0)
            acc_sublist.append(accuracy)
            if step == 25:
                break
        acc.append(np.mean(acc_sublist))
    print('Mean accuracy is {} and standard deviation is {}'.format(
        np.mean(acc), np.std(acc)))
    return accuracy_list, loss_list
def train(config, seed):
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    losses = []
    train_accuracies = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()
        losses.append(loss.item())

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)
        train_accuracies.append(accuracy)

        # print(predictions[0, ...], batch_targets[0, ...])

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

        # Stop early if the last 100 losses were all low enough
        if all(x < 0.001 for x in losses[-100:]):
            break

    print('Done training.')

    # evaluate the model on new random data
    model.eval()
    test_accuracies = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Forward pass
        with torch.no_grad():
            log_probs = model(batch_inputs)
            predictions = torch.argmax(log_probs, dim=1)
            correct = (predictions == batch_targets).sum().item()
            accuracy = correct / log_probs.size(0)
        test_accuracies.append(accuracy)

        if step >= 5000 / config.batch_size:
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

    return losses, train_accuracies, torch.tensor(
        test_accuracies).mean().item()
Exemple #4
0
def train(config):
    seed = config.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        #torch.backends.cudnn.deterministic=True
        #torch.backends.cudnn.benchmark=False

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2  #config.input_length
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    model.numTrainableParameters()
    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    # For plotting
    acc_plt = []
    loss_plt = []
    convergenceCounter = 0  # to stop after consecutive accuracies of 1.0 on training
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass

        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)

        # print(predictions[0, ...], batch_targets[0, ...])
        acc_plt.append(accuracy)
        loss_plt.append(loss)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if accuracy > 0.999:
            convergenceCounter += 1
        if step == config.train_steps or convergenceCounter > 100:  # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

    # test on new data:
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        test_loss = 0
        numBatchesTestEval = 10
        for step, (x, t) in enumerate(data_loader):
            if device.type == 'cuda':
                x = x.to(device)
                t = t.to(device)
            log_probs = model(x)
            predictions = torch.argmax(log_probs, dim=1)
            correct += (predictions == t).sum().item()
            total += log_probs.size(0)
            test_loss += loss_function(log_probs, t) / numBatchesTestEval
            if step == numBatchesTestEval:
                break
    test_accuracy = correct / total
    model.train()
    print('Done training.')
    print('Accuracy on testset of 5000 examples:', test_accuracy)
    print('Avg. loss on testset:', test_loss)
    pltLossAcc(loss_plt, acc_plt, config)