Example #1
0
def train():
    """
  Performs training and evaluation of ConvNet model. 

  TODO:
  Implement training and evaluation of ConvNet model. Evaluate your model on 
  the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    #all external parameters in a readable format.
    lr = FLAGS.learning_rate
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    data_dir = FLAGS.data_dir

    #fetch data
    data = cifar10_utils.get_cifar10(data_dir)
    n_classes = 10
    n_channels = 3

    #number of iterations to train the data in the whole dataset:
    n_iter = 1  # int(np.ceil(data["train"]._num_examples/batch_size))

    #number of evaluations
    num_evals = int(np.ceil(data['test']._num_examples / batch_size))

    #load model
    cnn_model = ConvNet(n_channels, n_classes)

    #Loss function
    loss_XE = torch.nn.CrossEntropyLoss()

    #keep track of how loss and accuracy evolves over time.
    loss_train = np.zeros(max_steps + 1)  #loss on training data
    acc_train = np.zeros(max_steps + 1)  #accuracy on training data
    loss_eval = np.zeros(max_steps + 1)  #loss on test data
    acc_eval = np.zeros(max_steps + 1)  #accuracy on test data

    #Optimizer
    optmizer = optim.Adam(cnn_model.parameters(), lr=lr)

    #let's put some gpus to work!
    cnn_model.to(device)

    #index to keep track of the evaluations.
    eval_i = 0

    #Train shit
    for s in range(max_steps):

        for n in range(n_iter):

            #fetch next batch of data
            X, y = data['train'].next_batch(batch_size)

            #use torch tensor + gpu
            X = torch.from_numpy(X).type(dtype).to(device)
            y = torch.from_numpy(y).type(dtype).to(device)

            #reset gradient to zero before gradient descent.
            optmizer.zero_grad()

            #calculate loss
            probs = cnn_model(X)  #automatically calls .forward()
            loss = loss_XE(probs, y.argmax(dim=1))

            #backward propagation
            loss.backward()
            optmizer.step()

            #stores the loss and accuracy of the trainind data for later analysis.
            loss_train[eval_i] += loss.item() / num_evals  #
            acc_train[eval_i] += accuracy(probs, y) / num_evals

        probs.detach()

        if (s % eval_freq == 0) | (s == (max_steps - 1)):
            #calculate accuracy for the whole data set

            for t in range(num_evals):
                #fetch all the data
                X, y = data['test'].next_batch(batch_size)

                #use torch tensor + gpu, no gradient needed.
                X = torch.tensor(X, requires_grad=False).type(dtype).to(device)
                y = torch.tensor(y, requires_grad=False).type(dtype).to(device)

                #actually calculates loss and accuracy for the batch
                probs = cnn_model.forward(X)
                loss_eval[eval_i] += loss_XE(
                    probs,
                    y.argmax(dim=1)).item()  # detach().data.cpu().item()
                acc_eval[eval_i] += accuracy(probs, y)

                probs.detach()

                #frees memory
                X.detach()
                y.detach()

            #average the losses and accuracies across test batches
            loss_eval[eval_i] /= num_evals
            acc_eval[eval_i] /= num_evals

            #print performance
            print(f"step {s} out of {max_steps}")
            print(
                f"    loss: {loss_eval[eval_i]}, accuracy: {acc_eval[eval_i]}")
            print(
                f"    loss: {loss_train[eval_i]}, accuracy: {acc_train[eval_i]}"
            )

            #save the results
            #        np.save("loss_eval", loss_eval)
            #        np.save("accuracy_eval", acc_eval)

            #increments eval counter
            eval_i += 1

    #Save intermediary results for later analysis
    print("saving results in folder...")
    np.save("loss_train", loss_train)
    np.save("accuracy_train", acc_train)
    np.save("loss_eval", loss_eval)
    np.save("accuracy_eval", acc_eval)

    print("savign model")
    torch.save(cnn_model.state_dict(), cnn_model.__class__.__name__ + ".pt")
def train():
    """
    Performs training and evaluation of ConvNet model. 
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ########################
    lr = FLAGS.learning_rate
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    data_dir = FLAGS.data_dir
    optim = FLAGS.optimizer

    #fetch data
    cifar10 = cifar10_utils.get_cifar10(data_dir)
    n_classes = 10
    n_channels = 3

    eval_rounds = int(np.ceil(cifar10['test']._num_examples / batch_size))
    model = ConvNet(n_channels, n_classes)
    ce = torch.nn.CrossEntropyLoss()
    pars = model.parameters()

    # optimizer
    optim_pars = {'params': pars, 'lr': lr, 'weight_decay': FLAGS.weight_decay}
    if optim == 'adadelta':
        optimizer = torch.optim.Adadelta(**optim_pars)
    elif optim == 'adagrad':
        optimizer = torch.optim.Adagrad(**optim_pars)
    elif optim == 'rmsprop':
        optimizer = torch.optim.RMSprop(**optim_pars)
    elif optim == 'adam':
        optimizer = torch.optim.Adam(**optim_pars)
    else:  # SGD
        optimizer = torch.optim.SGD(**optim_pars)

    model.to(device)
    eval_i = 0

    cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs']

    # train
    results = []
    name = f'convnet-pytorch-{optim}'
    with SummaryWriter(name) as w:
        for step in tqdm(range(FLAGS.max_steps)):
            optimizer.zero_grad()
            X, y = cifar10['train'].next_batch(batch_size)
            X = torch.tensor(X).type(dtype).to(device)
            train_predictions = model.forward(X)
            X.detach()
            y = torch.tensor(y).type(dtype).to(device)
            train_acc = accuracy(train_predictions, y)
            idx_train = torch.argmax(y, dim=-1).long()
            y.detach()
            train_loss = ce(train_predictions, idx_train)
            train_predictions.detach()

            # stop if loss has converged!
            check = 10
            if len(results) >= 2 * check:
                threshold = 1e-6
                losses = [result['test_loss'] for result in results]
                current = np.mean(losses[-check:])
                prev = np.mean(losses[-2 * check:-check])
                if (prev - current) < threshold:
                    break

            # # at each epoch, we divide the learning rate by this if the dev accuracy decreases
            # if dev_acc > prev_acc:
            #     lr /= learning_decay
            # prev_acc = dev_acc

            train_loss.backward()
            optimizer.step()

            # evaluate
            if step % FLAGS.eval_freq == 0:
                time = int(step / FLAGS.eval_freq)
                start = timer()
                test_accs = []
                test_losses = []
                for t in range(eval_rounds):
                    X, y = cifar10['test'].next_batch(batch_size)
                    X = torch.tensor(
                        X, requires_grad=False).type(dtype).to(device)
                    y = torch.tensor(
                        y, requires_grad=False).type(dtype).to(device)
                    test_predictions = model.forward(X)
                    X.detach()
                    test_accs.append(accuracy(test_predictions, y))
                    test_losses.append(
                        ce(test_predictions, y.argmax(dim=1)).item())
                    test_predictions.detach()
                    y.detach()
                end = timer()
                secs = end - start

                test_acc = np.mean(test_accs)
                test_loss = np.mean(test_losses)
                vals = [train_acc, test_acc, train_loss, test_loss, secs]
                stats = dict(
                    zip(cols, [
                        np.asscalar(i.detach().cpu().numpy().take(0))
                        if isinstance(i, torch.Tensor) else np.asscalar(i)
                        if isinstance(i, (np.ndarray, np.generic)) else i
                        for i in vals
                    ]))
                print(
                    yaml.dump({
                        k: round(i, 3) if isinstance(i, float) else i
                        for k, i in stats.items()
                    }))
                w.add_scalars('metrics', stats, time)
                results.append(stats)

    df = pd.DataFrame(results, columns=cols)
    meta = {
        'framework': 'pytorch',
        'algo': 'convnet',
        'optimizer': optim,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate,
        'dnn_hidden_units': '',
        'weight_decay': FLAGS.weight_decay,
        'max_steps': FLAGS.max_steps,
    }
    for k, v in meta.items():
        df[k] = v
    output_file = 'results/results.csv'  # f'{name}.csv'
    if os.path.isfile(output_file):
        df.to_csv(f'{name}.csv', header=False, mode='a')
    else:
        df.to_csv(f'{name}.csv', header=True, mode='w')
    torch.save(model.state_dict(), f'{name}.pth')
    print('done!')
    return test_loss