def train_with_optimizer(opt_name, opt_class, fig): torch.manual_seed(seed) # Get hyperparameters hp = answers.part2_optim_hp() hidden_features = [128] * 5 num_epochs = 10 # Create model, loss and optimizer instances model = models.MLP(in_features, num_classes, hidden_features, wstd=hp['wstd']) loss_fn = blocks.CrossEntropyLoss() optimizer = opt_class(model.params(), learn_rate=hp[f'lr_{opt_name}'], reg=hp['reg']) # Train with the Trainer trainer = training.BlocksTrainer(model, loss_fn, optimizer) fit_res = trainer.fit(dl_train, dl_test, num_epochs, max_batches=max_batches) fig, axes = plot_fit(fit_res, fig=fig, legend=opt_name) return fig
def test_cross_entropy(): global cross_entropy # Test CrossEntropy cross_entropy = blocks.CrossEntropyLoss() scores = torch.randn(N, num_classes) labels = torch.randint(low=0, high=num_classes, size=(N, ), dtype=torch.long) # Test forward pass loss = cross_entropy(scores, labels) expected_loss = torch.nn.functional.cross_entropy(scores, labels) test.assertLess(torch.abs(expected_loss - loss).item(), 1e-5) print('loss=', loss.item()) # Test backward pass test_block_grad(cross_entropy, scores, y=labels)
def train_with_optimizer(opt_name, opt_class, fig): torch.manual_seed(seed) res = [] # Get hyperparameters # hp = answers.part2_optim_hp() reg, lr_rmsprop, lr_momentum = np.logspace(0.01, 0.4), np.logspace( 0.0001, 1), np.logspace(0.0001, 1) max = 0 for momentum in lr_momentum: hp = dict(wstd=0.3, lr_vanilla=0.001, lr_momentum=momentum, lr_rmsprop=lr_rmsprop[0], reg=reg) hidden_features = [128] * 5 num_epochs = 10 # Create model, loss and optimizer instances model = models.MLP(in_features, num_classes, hidden_features, wstd=hp['wstd']) loss_fn = blocks.CrossEntropyLoss() optimizer = opt_class(model.params(), learn_rate=hp[f'lr_{opt_name}'], reg=hp['reg']) # Train with the Trainer trainer = training.BlocksTrainer(model, loss_fn, optimizer) fit_res = trainer.fit(dl_train, dl_test, num_epochs, max_batches=max_batches) if fit_res > max: max = fit_res res.append((momentum, max)) return fig
test.assertEqual(len(mlp.sequence), 7) num_linear = 0 for b1, b2 in zip(mlp.sequence, mlp.sequence[1:]): if (str(b2).lower() == activation): test.assertTrue(str(b1).startswith('Linear')) num_linear += 1 test.assertTrue(str(mlp.sequence[-1]).startswith('Linear')) test.assertEqual(num_linear, 3) # Test MLP gradients # Test forward pass x_test = torch.randn(N, in_features) labels = torch.randint(low=0, high=num_classes, size=(N, ), dtype=torch.long) z = mlp(x_test) test.assertSequenceEqual(z.shape, [N, num_classes]) # Create a sequence of MLPs and CE loss # Note: deliberately using the same MLP instance multiple times to create a recurrence. seq_mlp = blocks.Sequential(mlp, mlp, mlp, blocks.CrossEntropyLoss()) loss = seq_mlp(x_test, y=labels) test.assertEqual(loss.dim(), 0) print(f'MLP loss={loss}, activation={activation}') # Test backward pass test_block_grad(seq_mlp, x_test, y=labels)
# Overfit to a very small dataset of 20 samples batch_size = 10 max_batches = 2 dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=False) # Get hyperparameters hp = answers.part2_overfit_hp() torch.manual_seed(seed) # Build a model and loss using our custom MLP and CE implementations model = blocks.MLP(3 * 32 * 32, num_classes=10, hidden_features=[128] * 3, wstd=hp['wstd']) loss_fn = blocks.CrossEntropyLoss() # Use our custom optimizer optimizer = optimizers.VanillaSGD(model.params(), learn_rate=hp['lr'], reg=hp['reg']) # Run training over small dataset multiple times # trainer = training.BlocksTrainer(model, loss_fn, optimizer) # best_acc = 0 # for i in range(20): # res = trainer.train_epoch(dl_train, max_batches=max_batches) # best_acc = res.accuracy if res.accuracy > best_acc else best_acc # # test.assertGreaterEqual(best_acc, 98)