def n_layer_nn(optimiser_function, layer_dims=[28*28 + 1, 128, 10], learning_rate=0.1, epochs=100): layers = len(layer_dims) assert layers >= 3, "Please give at leaset 3 dimensions" modules = [Linear(layer_dims[0], layer_dims[1]), Relu()] for i in range(1, layers - 2): modules.append(Linear(layer_dims[i], layer_dims[i+1])) modules.append(Relu()) modules.append(Linear(layer_dims[layers-2], layer_dims[layers-1])) modules.append(Sigmoid()) print(modules) model = Sequential(*modules).cuda('cuda:0') loss_function = CrossEntropyLoss() optimiser = optimiser_function(model.parameters(), lr=learning_rate) stopper = EarlyStop(patience=3) train_losses=[] val_losses=[] accuracy=[] for epoch in range(epochs): losses=[] for i,(X, y) in enumerate(get_minibatches(train_loader, device)): optimiser.zero_grad() yhat = model.forward(X) loss = loss_function(yhat, y.argmax(1)) losses.append(loss.item()) loss.backward() optimiser.step() train_losses.append(np.mean(losses)) if epoch % 3 == 0: with torch.no_grad(): losses = [] corrects = 0 for i,(X, y) in enumerate(get_minibatches(val_loader, device)): y = y.argmax(1) yhat = model.forward(X) losses.append(loss_function(yhat, y).item()) ypred = yhat.argmax(1) corrects += (ypred == y).sum() val_loss = np.mean(losses) val_losses.append(val_loss) acc = corrects.cpu().numpy() / val_size #print("Accuracy {}".format(acc)) accuracy.append(acc) if not stopper.continue_still(val_loss): print("Early stop at epoch {}".format(epoch)) break return val_losses, accuracy
Sigmoid() ).cuda('cuda:0') loss_function = CrossEntropyLoss() optimiser = SGD(model.parameters(), lr=0.02) train_loss=[] val_loss=[] accuracy=[] for epoch in range(100): losses=[] for i,(X, y) in enumerate(get_minibatches(train_loader, device)): optimiser.zero_grad() yhat = model.forward(X) loss = loss_function(yhat, y.argmax(1)) losses.append(loss.item()) loss.backward() optimiser.step() train_loss.append(np.mean(losses)) if epoch % 10 == 0: with torch.no_grad(): losses = [] corrects = 0 for i,(X, y) in enumerate(get_minibatches(val_loader, device)): y = y.argmax(1) yhat = model.forward(X) losses.append(loss_function(yhat, y).item())