def main(): # specify data transforms train_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) test_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) # load data path = Path.cwd() print(path) train_ds = MNIST(path, train=True, download=True, transform=train_tfms) test_ds = MNIST(path, train=False, download=True, transform=test_tfms) # specify training/validation split val_pct = 0.2 val_size = int(val_pct * len(train_ds)) train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size]) val_ds.transform = test_tfms print(f"Training set size: {len(train_ds)}") print(f"Validation set size: {len(val_ds)}") print(f"Test set size: {len(test_ds)}") # set up data loaders batch_size = 64 print(f"Batch size: {batch_size}") train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]): x_b, y_b = next(iter(dl)) print(f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}") # specify model model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) print(model) # specify loss function def loss_fn(logits, labels): return F.cross_entropy(logits, labels) # specify optimizer optimizer = optim.Adam([{'params': model[0:3].parameters()}, {'params': model[3:8].parameters()}]) # execute training loop run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[Tracer()]) run.train(n_epochs=1, lr=1e-3)
def main(): # specify data transforms train_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) test_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) # load data path = Path.cwd() print(path) train_ds = MNIST(path, train=True, download=True, transform=train_tfms) test_ds = MNIST(path, train=False, download=True, transform=test_tfms) # specify training/validation split val_pct = 0.2 val_size = int(val_pct * len(train_ds)) train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size]) val_ds.transform = test_tfms print(f"Training set size: {len(train_ds)}") print(f"Validation set size: {len(val_ds)}") print(f"Test set size: {len(test_ds)}") # set up data loaders batch_size = 64 print(f"Batch size: {batch_size}") train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]): x_b, y_b = next(iter(dl)) print(f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}") # specify model model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) print(model) # specify loss function def loss_fn(logits, labels): return F.cross_entropy(logits, labels) # specify optimizer optimizer = optim.Adam([{'params': model[0:3].parameters()}, {'params': model[3:8].parameters()}]) # plot schedules torch.Tensor.ndim = property(lambda x: len(x.shape)) # monkey patch for plotting tensors annealings = "null linear cos exp".split() a = torch.arange(0, 100) p = torch.linspace(0.01, 1, 100) fns = [null_schedule, lin_schedule, cos_schedule, exp_schedule] plt.figure() for fn, t in zip(fns, annealings): f = fn(2, 1e-2) plt.plot(a, [f(o) for o in p], label=t) plt.legend() # specify schedules for learning rates sched_1 = combine_schedules([0.3, 0.7], [cos_schedule(1e-4, 1e-3), cos_schedule(1e-3, 1e-5)]) plt.figure() plt.plot(a, [sched_1(o) for o in p]) sched_2 = combine_schedules([0.3, 0.7], [cos_schedule(1e-3, 1e-2), cos_schedule(1e-2, 1e-4)]) plt.figure() plt.plot(a, [sched_2(o) for o in p]) plt.show() # execute training loop run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[Logger(print_every=1), WeightDecay(wd=1e-2), OptimParamScheduler({'lr': [sched_1, sched_2]})]) run.train(n_epochs=3, device='cuda') # plot learning rates lr = run.callbacks['OptimParamScheduler'].history['lr'] lr = list(zip(*lr)) plt.figure() plt.plot(lr[0]) plt.figure() plt.plot(lr[1]) plt.show()
def main(): # specify data transforms train_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) test_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # load data path = Path.cwd() print(path) train_ds = MNIST(path, train=True, download=True, transform=train_tfms) test_ds = MNIST(path, train=False, download=True, transform=test_tfms) # specify training/validation split val_pct = 0.2 val_size = int(val_pct * len(train_ds)) train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size]) val_ds.transform = test_tfms print(f"Training set size: {len(train_ds)}") print(f"Validation set size: {len(val_ds)}") print(f"Test set size: {len(test_ds)}") # set up data loaders batch_size = 64 print(f"Batch size: {batch_size}") train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]): x_b, y_b = next(iter(dl)) print( f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}" ) # get first batch of data xb, yb = next(iter(train_dl)) xb, yb = xb.cuda(), yb.cuda() # specify loss function def loss_fn(logits, labels): return F.cross_entropy(logits, labels) # specify model (18-layer CNN) nf = [ 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 256, 256, 256 ] # initialize model parameters using Kaiming-normal and print activation means/stds model = lsuv_conv_net(ni=1, no=10, nf=nf, nh=[128]).cuda() init_lsuv_conv_net(model, uniform=False) modules = find_modules(model, lambda l: isinstance(l, LSUVConvLayer)) with OutputStatsHook.apply_to_modules(modules, mode='all') as hooks: model(xb) for hook in hooks: print(hook.means[-1].item(), hook.stds[-1].item()) # execute training loop optimizer = optim.SGD(model.parameters(), lr=1e-3) run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[Logger(print_every=1)]) run.train(n_epochs=10, lr=1e-3, device='cuda') # re-initialize using Kaiming-normal and LSUV model = lsuv_conv_net(ni=1, no=10, nf=nf, nh=[128]).cuda() init_lsuv_conv_net(model, uniform=False) modules = find_modules(model, lambda l: isinstance(l, LSUVConvLayer)) for m in modules: print(lsuv_module(m, model, xb)) # execute training loop with LSUV initialization optimizer = optim.SGD(model.parameters(), lr=1e-3) run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[Logger(print_every=1)]) run.train(n_epochs=10, lr=1e-3, device='cuda')
def main(): # specify data transforms train_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) test_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # load data path = Path.cwd() print(path) train_ds = MNIST(path, train=True, download=True, transform=train_tfms) test_ds = MNIST(path, train=False, download=True, transform=test_tfms) # specify training/validation split val_pct = 0.2 val_size = int(val_pct * len(train_ds)) train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size]) val_ds.transform = test_tfms print(f"Training set size: {len(train_ds)}") print(f"Validation set size: {len(val_ds)}") print(f"Test set size: {len(test_ds)}") # set up data loaders batch_size = 64 print(f"Batch size: {batch_size}") train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]): x_b, y_b = next(iter(dl)) print( f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}" ) # specify model model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) print(model) # specify loss function def loss_fn(logits, labels): return F.cross_entropy(logits, labels) # specify optimizer optimizer = optim.Adam([{ 'params': model[0:3].parameters() }, { 'params': model[3:8].parameters() }]) # plot schedules torch.Tensor.ndim = property( lambda x: len(x.shape)) # monkey patch for plotting tensors a = torch.arange(0, 100) p = torch.linspace(0.01, 1, 100) sched_1 = combine_schedules( [0.3, 0.7], [cos_schedule(1e-4, 1e-3), cos_schedule(1e-3, 1e-5)]) plt.plot(a, [sched_1(o) for o in p]) sched_2 = combine_schedules( [0.3, 0.7], [cos_schedule(1e-3, 1e-2), cos_schedule(1e-2, 1e-4)]) plt.figure() plt.plot(a, [sched_2(o) for o in p]) sched_3 = combine_schedules( [0.3, 0.7], [cos_schedule(0.95, 0.85), cos_schedule(0.85, 0.95)]) plt.figure() plt.plot(a, [sched_3(o) for o in p]) sched_4 = combine_schedules( [0.3, 0.7], [cos_schedule(0.95, 0.85), cos_schedule(0.85, 0.95)]) plt.figure() plt.plot(a, [sched_4(o) for o in p]) plt.show() run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[ Logger(print_every=1), WeightDecay(wd=1e-2), OneCycleScheduler() ]) run.train(n_epochs=3, lr=(1e-3, 1e-2)) # plot results lr = run.callbacks['OneCycleScheduler'].history['lr'] lr = np.array(list(zip(*lr))) print(f"LR shape: {lr.shape}") plt.plot(lr[0]) plt.figure() plt.plot(lr[1]) betas = run.callbacks['OneCycleScheduler'].history['betas'] betas = list(zip(*betas)) betas = np.array(betas) print(f"Betas shape: {betas.shape}") plt.figure() plt.plot(betas[0, :, 0]) plt.figure() plt.plot(betas[0, :, 1]) plt.figure() plt.plot(betas[1, :, 0]) plt.figure() plt.plot(betas[1, :, 1]) plt.show()
def main(): # specify data transforms train_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) test_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # load data path = Path.cwd() print(path) train_ds = MNIST(path, train=True, download=True, transform=train_tfms) test_ds = MNIST(path, train=False, download=True, transform=test_tfms) # specify training/validation split val_pct = 0.2 val_size = int(val_pct * len(train_ds)) train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size]) val_ds.transform = test_tfms print(f"Training set size: {len(train_ds)}") print(f"Validation set size: {len(val_ds)}") print(f"Test set size: {len(test_ds)}") # set up data loaders batch_size = 64 print(f"Batch size: {batch_size}") train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]): x_b, y_b = next(iter(dl)) print( f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}" ) # specify model model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) print(model) # specify loss function def loss_fn(logits, labels): return F.cross_entropy(logits, labels) # specify optimizer optimizer = optim.Adam([{ 'params': model[0:3].parameters() }, { 'params': model[3:8].parameters() }]) # execute training loop n_epochs = 3 max_lr = (1e-3, 1e-2) total_steps = n_epochs * (len(train_dl.dataset) // train_dl.batch_size) run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[ Logger(print_every=1), WeightDecay(wd=1e-2), OptimLRScheduler(OneCycleLR, max_lr=max_lr, total_steps=total_steps, pct_start=0.3, div_factor=1e1, final_div_factor=1e4) ]) run.train(n_epochs=n_epochs, device='cuda') # plot learning rates lr = run.callbacks['OptimLRScheduler'].history['lr'] lr = list(zip(*lr)) plt.figure() plt.plot(lr[0]) plt.figure() plt.plot(lr[1]) plt.show()
def main(): # specify data transforms train_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) test_tfms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # load data path = Path.cwd() print(path) train_ds = MNIST(path, train=True, download=True, transform=train_tfms) test_ds = MNIST(path, train=False, download=True, transform=test_tfms) # specify training/validation split val_pct = 0.2 val_size = int(val_pct * len(train_ds)) train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size]) val_ds.transform = test_tfms print(f"Training set size: {len(train_ds)}") print(f"Validation set size: {len(val_ds)}") print(f"Test set size: {len(test_ds)}") # set up data loaders batch_size = 64 print(f"Batch size: {batch_size}") train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True) val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]): x_b, y_b = next(iter(dl)) print( f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}" ) # specify model model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) print(model) # specify loss function def loss_fn(logits, labels): return F.cross_entropy(logits, labels) with OutputStatsHook.apply_to_modules(model[:4]) as hooks: print(hooks) model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) optimizer = optim.Adam([{ 'params': model[:3].parameters() }, { 'params': model[3:].parameters() }]) run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[ Logger(print_every=1), WeightDecay(wd=1e-2), OneCycleScheduler() ]) with OutputStatsHook.apply_to_modules(model[:3]) as hooks: run.train(n_epochs=2, lr=(1e-3, 1e-2), device='cuda') fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4)) for h in hooks: means, stds, hists = h.means, h.stds, h.hists ax0.plot(means[:10]) ax1.plot(stds[:10]) plt.legend(range(6)) fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4)) for h in hooks: means, stds, hists = h.means, h.stds, h.hists ax0.plot(means) ax1.plot(stds) plt.legend(range(6)) fig, axes = plt.subplots(3, 1, figsize=(6, 6)) for ax, h in zip(axes.flatten(), hooks[:3]): ax.imshow(get_train_hist(h)[:, :100], origin='lower') ax.axis('off') plt.tight_layout() fig, axes = plt.subplots(3, 1, figsize=(6, 6)) for ax, h in zip(axes.flatten(), hooks): ax.plot(get_train_min(h)) ax.set_ylim(0, 1) plt.tight_layout() plt.show() # test hook manager callback model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64]) optimizer = optim.Adam([{ 'params': model[:3].parameters() }, { 'params': model[3:].parameters() }]) run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn, metric_fns=[accuracy], optimizer=optimizer, callbacks=[ Logger(print_every=1), WeightDecay(wd=1e-2), OneCycleScheduler(), HookManager(hook_factory=OutputStatsHook.apply_to_modules, modules=model[:3]) ]) run.train(n_epochs=2, lr=(1e-3, 1e-2), device='cuda') hooks = run.callbacks['HookManager'].hooks fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4)) for h in hooks: means, stds, hists = h.means, h.stds, h.hists ax0.plot(means[:10]) ax1.plot(stds[:10]) plt.legend(range(6)) fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4)) for h in hooks: means, stds, hists = h.means, h.stds, h.hists ax0.plot(means) ax1.plot(stds) plt.legend(range(6)) fig, axes = plt.subplots(3, 1, figsize=(6, 6)) for ax, h in zip(axes.flatten(), hooks[:3]): ax.imshow(get_train_hist(h)[:, :100], origin='lower') ax.axis('off') plt.tight_layout() fig, axes = plt.subplots(3, 1, figsize=(6, 6)) for ax, h in zip(axes.flatten(), hooks): ax.plot(get_train_min(h)) ax.set_ylim(0, 1) plt.tight_layout() plt.show()