Example #1
0
def test1():  # baseline
    # Split dataset into batches
    batch_size = 32
    # batch_size = 64

    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Linear(784, 10),
        nn.ReLU(),
        # nn.LeakyReLU(),
        nn.Linear(10, 10),
    )
    lr = 0.1
    # lr = 0.01
    momentum = 0
    # momentum = 0.9
    ##################################

    val_acc = train_model(train_batches,
                          dev_batches,
                          model,
                          lr=lr,
                          momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
    return val_acc
def main():
    X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Load model
    input_dimension = img_rows * img_cols
    model = CNN(input_dimension) # TODO add proper layers to CNN class above

    # Train
    train_model(train_batches, dev_batches, model)

    ## Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print('Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))
Example #3
0
def main():
    print(f"Device: {device}")

    X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]

    # Split dataset into batches
    train_batches = batch_data(X_train, y_train, batch_size)
    dev_batches = batch_data(X_dev, y_dev, batch_size)
    test_batches = batch_data(X_test, y_test, batch_size)

    # Load model
    input_dimension = img_rows * img_cols
    model = MLP(input_dimension).to(device)

    # Train
    train_model(train_batches, dev_batches, model, n_epochs=n_epochs)

    # Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print(f'Test loss1: {loss[0]:.6f}  accuracy1: {acc[0]:.6f}  loss2: {loss[1]:.6f}   accuracy2: {acc[1]:.6f}')
Example #4
0
def run_model(train_batches,
              dev_batches,
              test_batches,
              lr=0.1,
              momentum=0,
              act_fun='ReLU',
              hidden_size=10):
    torch.manual_seed(12321)  # for reproducibility

    act_func_call = nn.ReLU
    if act_fun == 'LeakyReLU':
        act_func_call = nn.LeakyReLU

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Linear(784, hidden_size),
        act_func_call(),
        nn.Linear(hidden_size, 10),
    )
    ##################################

    val_acc = train_model(train_batches,
                          dev_batches,
                          model,
                          lr=lr,
                          momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    return val_acc, loss, accuracy
Example #5
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to reshape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and validation
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    # Shuffle the data
    permutation = torch.randperm(X_train.shape[0])
    X_train = X_train[permutation]
    y_train = y_train[permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    # Model specification
    model = nn.Sequential(
        nn.Conv2d(1, 32, (3, 3)),
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        nn.Conv2d(32, 64, (3, 3)),
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        Flatten(),
        nn.Linear(1600, 128),
        nn.Dropout(0.5),
        nn.Linear(128, 10),
    )
    ##################################

    # Moving model and data to GPU
    if torch.cuda.is_available():
        print("----------------- Using the Device: CUDA -----------------")
        model = model.to('cuda')
    else:
        print("----------------- Using the Device: CPU ----------------- ")
    train_model(train_batches, dev_batches, model, nesterov=True)

    # Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) +
          " Accuracy on test set: " + str(accuracy))
Example #6
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        # (num pictures × dimensions × size[0] × size[1])
        # input: num × 1 × 28 × 28
        nn.Conv2d(1, 32, (3, 3)),
        # after conv1: num × 32 × 26 × 26
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        # after pool1: num × 32 × 13 × 13
        nn.Conv2d(32, 64, (3, 3)),
        # after cov2: num × 64 × 11 × 11
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        # after pool2: num × 64 × 5 × 5
        Flatten(),
        # input for linear is 64 × 5 × 5
        nn.Linear(1600, 128),
        nn.Dropout(p=0.5),
        nn.Linear(128, 10))
    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
Example #7
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image

    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    #pragma: coderesponse template name="pytorchcnn" dedent="true"
    model = nn.Sequential(
        nn.Conv2d(
            1, 32,
            (3, 3)),  #out_size 26 x 26 x 32     (in_size - (kernel_size-1))
        nn.ReLU(),
        nn.MaxPool2d(
            (2,
             2)),  # out_size 13x13 x32 (in_size - (kernel_size-1)-1)/stride +1
        nn.Conv2d(32, 64, (3, 3)),  #out_size 11x11x64
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),  #out_size 5x5x64 = 1600
        Flatten(),
        nn.Linear(1600, 128),
        nn.Dropout(p=0.5),
        nn.Linear(128, 10))
    #pragma: coderesponse end
    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
Example #8
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    # Model specification TODO
    model = nn.Sequential(
        # Valid Convolution with 3x3 Kernel: 28x28 -> 26x26
        nn.Conv2d(1, 32, (3, 3)),
        nn.ReLU(),
        # Pooling with 2x2 Kernel: 26x26 -> 13x13
        nn.MaxPool2d((2, 2)),
        # Valid Convolution with 3x3 Kernel: 13x13 -> 11x11
        nn.Conv2d(32, 64, (3, 3)),
        nn.ReLU(),
        # Pooling with 2x2 Kernel: 11x11 to 5x5
        nn.MaxPool2d((2, 2)),
        Flatten(),
        # Flattening: 5x5x64 -> 1600
        nn.Linear(1600, 128),
        nn.Dropout(),
        nn.Linear(128, 10))
    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    # Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
Example #9
0
def train(batch_size=32,
          hidden_size=10,
          lr=0.1,
          momentum=0,
          activation=nn.ReLU):
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = batch_size
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Linear(784, hidden_size),
        activation(),
        nn.Linear(hidden_size, 10),
    )
    lr = lr
    momentum = momentum
    ##################################

    val_acc = train_model(train_batches,
                          dev_batches,
                          model,
                          lr=lr,
                          momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
    return val_acc, accuracy
Example #10
0
def test_grid(batch_size=32, lr=0.1, momentum=0, LeakyReLU=False):
    np.random.seed(12321)  # for reproducibility
    torch.manual_seed(12321)  # for reproducibility

    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches

    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model_relu = nn.Sequential(
              nn.Linear(784, 128),
              nn.ReLU(),
              nn.Linear(128, 10),
            )
    model_lrelu = nn.Sequential(
              nn.Linear(784, 128),
              nn.LeakyReLU(),
              nn.Linear(128, 10),
            )

    model = model_lrelu if LeakyReLU else model_relu
    ##################################

    val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))
    return val_acc
def oldmain(classes, batch, eta, momentum, LeakyReLU):
    # Load the dataset
    num_classes = classes
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = batch
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    if not LeakyReLU:
        model = nn.Sequential(
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )
    else:
        model = nn.Sequential(
            nn.Linear(784, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 10),
        )
    lr = eta
    #momentum=0
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
    return accuracy
Example #12
0
def run_model(train_batches, dev_batches, test_batches, conv_size,
              dropout_rate, act_func):
    '''
    
    '''
    torch.manual_seed(12321)  # for reproducibility
    model = CNN_1C(conv_size, dropout_rate, act_func)

    # Train
    val_acc = train_model(train_batches, dev_batches, model, n_epochs=10)

    ## Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)

    return val_acc, loss, acc
def run_kfold(args):
    datasets = get_datasets(args)
    cv_results = []
    skf = StratifiedKFold(args['num_kfold'],
                          shuffle=True,
                          random_state=args['random_seed'])
    for kfold, (train_index, test_index) in enumerate(
            skf.split(whole_dataset.X, whole_dataset.Y)):

        set_cv_indices(datasets, train_index, test_index)
        dataloaders = get_dataloaders(datasets, args)

        net, criterion, optimizer = get_model(args)
        args['tb_writer'] = SummaryWriter(args['tb_run_name'] +
                                          '_Fold_{}'.format(kfold))
        args['checkpoint_name_suffix'] = '_Fold_{}'.format(kfold)
        best_net = run_epochs(net, optimizer, dataloaders, criterion, args)

        fold_info = {}
        for dataset_phase in ('train', 'val', 'test'):
            loss, acc = run_epoch(best_net,
                                  optimizer=optimizer,
                                  dataloader=dataloaders[dataset_phase],
                                  criterion=criterion,
                                  phase='test',
                                  device=args['device'])
            fold_info['{}_loss'.format(dataset_phase)] = loss
            fold_info['{}_acc'.format(dataset_phase)] = acc
        cv_results.append(fold_info)

    print('\n')
    for kfold, result in enumerate(cv_results):
        print('Fold {}, '
              'train loss: {train_loss:.4f}, train acc: {train_acc:.2%}, '
              'val loss: {val_loss:.4f}, val acc: {val_acc:.2%}, '
              'test loss: {test_loss:.4f}, test acc: {test_acc:.2%}'.format(
                  kfold, **result))

    print('{}-fold cross-validation'.format(len(cv_results)))
    for phase in ('train', 'val', 'test'):
        print('{} loss: {:.4f}'.format(
            phase,
            sum(x['{}_loss'.format(phase)]
                for x in cv_results) / len(cv_results)))
        print('{} acc: {:.2%}'.format(
            phase,
            sum(x['{}_acc'.format(phase)]
                for x in cv_results) / len(cv_results)))
Example #14
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    print(len(X_train), 'len(X_train)')
    print(len(X_train[0]), 'len(X_train)[0]')
    print(y_train.shape, 'y_train.shape')

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    print(dev_split_index, 'dev_split_index')
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]
    print(X_train[1].shape, 'X_train')
    print(y_train[1], 'y_train[1]')
    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    # print(train_batches,'train_batches.')
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Linear(784, 10),
        nn.LeakyReLU(),
        nn.Linear(10, 10),
    )
    lr = 0.1
    momentum = 0
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
Example #15
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    # N is batch size; D_in is input dimension;
    # H is hidden dimension; D_out is output dimension.
    N, D_in, H, D_out = batch_size, 784, 128, 10

    model = nn.Sequential(
              nn.Linear(D_in, H),
              nn.ReLU(),
              nn.Linear(H, D_out)
            )
    lr=0.1
    momentum=0
    leaky_relu_active = False
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Batch size: {}; Learning Rate: {}; Momentum: {}; LeakyReLU: {}; Hidden Dimension: {}".
          format(batch_size, lr, momentum, leaky_relu_active, H))
    print("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))
Example #16
0
def main(batch_size=32, lr=0.1, momentum=0, act="ReLU", hsize=10):
    print("batch: %d, learnign rate: %f, momentum: %f, activation: %s" %
          (batch_size, lr, momentum, act))

    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    # batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Linear(784, hsize),
        get_activation(act),
        nn.Linear(hsize, 10),
    )
    # lr=0.1
    # momentum=0
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    #batch_size = 64  #Acc1 = 0.9314  #Acc2= 0.976478
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
              nn.Linear(784, 128),
              nn.ReLU(),
              #nn.LeakyReLU(),  #Acc1 = 0.9207 Acc2 = 0.978944 
              nn.Linear(128, 10),
            )
    lr=0.1
    #lr = 0.01  #Acc1 = 0.9206 Acc2= 0.955047
    #momentum=0
    momentum = 0.9 #Acc1 = 0.8928  Acc2 =  0.967246
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))
def main():
    args = parse_args()
    manual_seed(args['random_seed'])
    net, criterion, optimizer = get_model(args)
    dataloaders = get_dataloaders(args)

    if args['inference_mode'] is False:
        tb_writer = SummaryWriter(args['tb_run_name'])
        args['tb_writer'] = tb_writer
        best_net = run_epochs(net, optimizer, dataloaders, criterion, args)
    else:
        net.load_state_dict(torch.load(args['checkpoint']))
        loss, acc = run_epoch(net,
                              optimizer=optimizer,
                              dataloader=dataloaders['test'],
                              criterion=criterion,
                              phase='test',
                              device=args['device'])
        print('Test loss: {:.3f}, Test acc: {:.2%}'.format(loss, acc))
Example #19
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Conv2d(1, 32, (3, 3)),
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
    )
    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
Example #20
0
def main():
    # ======== Load the dataset ===========
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()
    # print(y_train)
    
    # ======= Split into train and dev ==========
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # ========= Split dataset into batches ============
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    
    ## =========== MODEL SPECIFICATION ============
    model = nn.Sequential(
              nn.Linear(784, 10),
              nn.ReLU(),
              nn.Linear(10, 10),
            )
    lr=0.1
    momentum=0
    # model.summary()
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))
Example #21
0
def main():
    X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir,
                                                  use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [[y_train[0][i] for i in permutation],
               [y_train[1][i] for i in permutation]]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)
    # print(train_batches[0]['x'].shape, train_batches[0]['y'].shape)
    # batch[i]['x'] is (64, 1, 42, 28) = (batch_size, 1, img_rows, img_cols)
    # batch[i]['y'] is (2, 64) = (num_labels, batch_size)

    # print(len(X_train), len(y_train))  # 36000, 2
    # print(len(X_dev), len(y_dev))  # 4000, 2
    # print(len(train_batches), len(dev_batches), len(test_batches))  # 562, 62, 62

    # Load model
    input_dimension = img_rows * img_cols
    model = MLP(input_dimension)

    # Train
    train_model(train_batches, dev_batches, model)

    ## Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print(
        'Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'
        .format(loss[0], acc[0], loss[1], acc[1]))
Example #22
0
def main(batch_size=32, lr=1e-1, hidden_size=10, momentum=0):
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(X_train.shape[0])])
    np.random.shuffle(permutation)
    X_train = X_train[permutation]
    y_train = y_train[permutation]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification
    model = nn.Sequential(
        nn.Linear(X_train.shape[1], hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, num_classes),
    )
    ##################################

    val_acr = train_model(train_batches, dev_batches,
                          model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))

    return batch_size, lr, momentum, hidden_size, val_acr
Example #23
0
def main():
    X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir,
                                                  use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = torch.randperm(len(X_train))
    X_train = X_train[permutation]
    y_train = [y_train[0][permutation], y_train[1][permutation]]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Load model
    input_dimension = img_rows * img_cols
    model = CNN()

    # Move model to the GPU
    if torch.cuda.is_available():
        model = model.to(device)
        print("----------------- Using the Device: GPU -----------------")
    else:
        print("----------------- Using the Device: CPU -----------------")

    # Train
    train_model(train_batches, dev_batches, model)

    # Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print(
        'Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'
        .format(loss[0], acc[0], loss[1], acc[1]))
Example #24
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    #pragma: coderesponse template name="pytorchcnn" dedent="true"
    model = nn.Sequential(
        #input image 784pixels -->28x28x1
        nn.Conv2d(1, 32,
                  (3, 3)),  #convolutional layer with 32 filters of size 3x3
        #26x26x32
        nn.ReLU(),  #ReLU nonlinearity
        nn.MaxPool2d((2, 2)),  #max pool layer with size 2x2
        #13x13x32
        nn.Conv2d(32, 64,
                  (3, 3)),  #convolutional layer with 64 filters of size 3x3
        #11x11x64
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),  #max pooling layer with size 2x2
        #5x5x64
        Flatten(),  #flatten layer
        #5*5*64 = 1600
        nn.Linear(1600, 128),  #fully connected layer
        #1600 -> 128
        nn.Dropout(0.5),  #dropout layer
        nn.Linear(128, 10),  #fully connected layer
        #128 -> 10
    )
    #pragma: coderesponse end
    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
Example #25
0
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    #validation_scores = [0.932487, 0.944388, 0.937834, 0.907587, 0.936497]
    #validation_scores2 = [0.977440, 0.977487, 0.978610, 0.968416, 0.978443]
    validation_scores = []
    best_validation = {'score': 0, 'param': None}
    test_scores = []

    baseline = {
        'batch_size': 32,
        'activation': nn.ReLU(),
        'lr': 0.1,
        'momentum': 0
    }

    grid = [(), ('batch_size', 64), ('lr', 0.1), ('momentum', 0.9),
            ('activation', nn.LeakyReLU())]

    for p in grid:
        np.random.seed(12321)  # for reproducibility
        torch.manual_seed(12321)  # for reproducibility
        print('Testing param:', p)
        params = copy.deepcopy(baseline)

        if len(p):
            params[p[0]] = p[1]

        # Split dataset into batches
        batch_size = params['batch_size']
        train_batches = batchify_data(X_train, y_train, batch_size)
        dev_batches = batchify_data(X_dev, y_dev, batch_size)
        test_batches = batchify_data(X_test, y_test, batch_size)

        #################################
        ## Model specification
        model = nn.Sequential(
            nn.Linear(784, 128),
            params['activation'],
            nn.Linear(128, 10),
        )
        lr = params['lr']
        momentum = params['momentum']
        ##################################

        train_model(train_batches,
                    dev_batches,
                    model,
                    lr=lr,
                    momentum=momentum)

        ## Evaluate on validation data
        loss, accuracy = run_epoch(dev_batches, model.eval(), None)
        validation_scores += [accuracy]
        if accuracy > best_validation['score'] and len(p):
            best_validation['score'] = accuracy
            best_validation['param'] = p[0]

        ## Evaluate the model on test data
        loss, accuracy = run_epoch(test_batches, model.eval(), None)
        test_scores += [accuracy]

        print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
              str(accuracy))

    print('Best validation:', best_validation)
    print('Validation scores:', validation_scores)
    print('Test scores:', test_scores)
Example #26
0
def main():
    # ======= Load the dataset ===========
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image to make it a 4D tensor
    # as Conv2d() takes input parameters from 4D tensor
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))
    # print(X_train.shape)
    # print(X_test.shape)

    # =========== Split into train(90%) and dev(10%) for validation set =========
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # ======== Split dataset into batches ==========
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #### ============ MODEL SPECIFICATION ==================
    model = nn.Sequential(
        nn.Conv2d(
            1, 32, (3, 3)
        ),  #A convolutional layer with 32 filters of size  3×3, in_channel=1
        nn.ReLU(),  #A ReLU nonlinearity
        nn.MaxPool2d((2, 2)),  #A max pooling layer with size  2×2
        nn.Conv2d(32, 64,
                  (3, 3)),  #A convolutional layer with 64 filters of size  3×3
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        Flatten(),  #A flatten layer
        nn.Linear(1600, 128),  #A fully connected layer with 128 neurons
        nn.Dropout(0.5),  #A dropout layer with drop probability 0.5
        nn.Linear(128, 10),  #A fully-connected layer with 10 neurons
    )

    # Use the nn package to define our model as a sequence of layers.
    # nn.Sequential is a Module which contains other Modules, and applies them in sequence to produce its output.

    # nn.Conv2d(input image channel, output channel, (square convolution or Kernel size))
    # nn.Maxpool2d((Kernel size),(stride))
    # Kernel size =>  the size of the window to take a max over
    # stride – the stride of the window. Default value is Kernel size. stride = (2,2) halves the image dimension output from Conv2d
    # nn.Linear (in features, out features, bias=True)
    # in features => size of each input sample
    # out features => size of each output sample
    # nn.Dropout(Drop out probability)
    # Dropout is a regularization technique that “drops out” or “deactivates” few neurons in the neural network randomly in order to avoid the problem of overfitting.
    ##################################

    val_acc, train_acc, train_loss, v_loss, v_acc = train_model(train_batches,
                                                                dev_batches,
                                                                model,
                                                                nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))
def run():
    best_val_loss = 100

    args = get_args()
    train, val, EN, JA = get_dataset(args.data_path)

    devices = [0]

    pad_idx = EN.vocab.stoi['<blank>']
    model = make_model(len(EN.vocab), len(JA.vocab), n=6)
    model.cuda()
    criterion = LabelSmoothing(size=len(JA.vocab),
                               padding_idx=pad_idx,
                               smoothing=0.1)
    criterion.cuda()
    BATCH_SIZE = 540
    train_iter = MyIterator(train,
                            batch_size=BATCH_SIZE,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=True)
    valid_iter = MyIterator(val,
                            batch_size=BATCH_SIZE,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=False)
    # model_par = nn.DataParallel(model, device_ids=devices)

    model_opt = NoamOpt(model.src_embed[0].d_model,
                        1,
                        2000,
                        torch.optim.Adam(model.parameters(),
                                         lr=0,
                                         betas=(0.9, 0.98),
                                         eps=1e-9))

    for epoch in range(args.epoch):
        # model_par.train()
        model.train()
        run_epoch((rebatch(pad_idx, b) for b in train_iter),
                  # model_par,
                  model,
                  MultiGPULossCompute(model.generator,
                                      criterion,
                                      devices=devices,
                                      opt=model_opt))

        # model_par.eval()
        model.eval()
        loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter),
                         # model_par,
                         model,
                         MultiGPULossCompute(model.generator,
                                             criterion,
                                             devices=devices,
                                             opt=None))
        print(loss)
        if best_val_loss > loss:
            best_val_loss = loss
            model.cpu()
            with open(args.output_path, 'wb') as f:
                dill.dump((model, EN, JA), f)
            model.cuda()
                            train=True)
    valid_iter = MyIterator(val,
                            batch_size=BATCH_SIZE,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=False)
    model_par = nn.DataParallel(model, device_ids=devices)

if True:
    model_opt = NoamOpt(
        model.src_embed[0].d_model, 1, 2000,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98),
                         eps=1e-9))
    for epoch in range(10):
        model_par.train()
        run_epoch((rebatch(pad_idx, b) for b in train_iter), model_par,
                  MultiGPULossCompute(model.generator,
                                      criterion,
                                      devices=devices,
                                      opt=model_opt))

        model_par.eval()
        loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter), model_par,
                         MultiGPULossCompute(model.generator,
                                             criterion,
                                             devices=devices,
                                             opt=None))
        print(loss)
Example #29
0
    best_loss = 1e9
    patience = NUM_EARLY_STOPPING_PATIENCE

    tb_writer = SummaryWriter('runs/gtzan_fault_filtered_cnn_pretrain')

    manual_seed()
    with tqdm(range(EPOCHS), total=EPOCHS) as epoch_progress:
        for epoch in epoch_progress:
            train_loss = -1
            train_acc = -1
            val_loss = -1
            val_acc = -1
            for phase in ('train', 'val'):
                loss, acc = run_epoch(net,
                                      optimizer,
                                      dataloaders[phase],
                                      criterion,
                                      phase,
                                      pretrain=True)

                if phase == 'val':
                    val_loss = loss
                    val_acc = acc
                else:
                    train_loss = loss
                    train_acc = acc
                    scheduler.step()
            info = OrderedDict(train_loss=train_loss,
                               train_acc=train_acc,
                               val_loss=val_loss,
                               val_acc=val_acc)
def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # print(X_train[0].shape)  # 1x28x28

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Conv2d(1, 32, (3, 3)),  # 0.
        nn.ReLU(),  # 1.
        nn.MaxPool2d((2, 2)),  # 2.
        nn.Conv2d(32, 64, (3, 3)),  # 3.
        nn.ReLU(),  # 4.
        nn.MaxPool2d((2, 2)),  # 5.
        Flatten(
        ),  # 6.    In: torch.Size([32, 64, 5, 5])  Out: torch.Size([32, 1600])
        nn.Linear(1600, 128),  # 7.
        nn.Dropout(0.5),  # 8.
        nn.Linear(128, 10),  # 9.
    )

    # Model's state_dict:
    # 0.weight  torch.Size([32, 1, 3, 3])
    # 0.bias    torch.Size([32])
    # 3.weight  torch.Size([64, 32, 3, 3])
    # 3.bias    torch.Size([64])
    # 7.weight  torch.Size([128, 1600])
    # 7.bias    torch.Size([128])
    # 9.weight  torch.Size([10, 128])
    # 9.bias    torch.Size([10])

    # Optimizer's state_dict:
    # state    {}
    # param_groups     [{'lr': 0.01, 'momentum': 0.9, 'dampening': 0,
    # 'weight_decay': 0, 'nesterov': True,
    # 'params': [140710194828704, 140710194828784, 140710194829184, 140710194829264,
    #            140710194829824, 140710194829904, 140710194830144, 140710194830224]}]

    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print("Loss on test set:" + str(loss) + " Accuracy on test set: " +
          str(accuracy))