예제 #1
0
def main():
    # specify data transforms
    train_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    test_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

    # load data
    path = Path.cwd()
    print(path)
    train_ds = MNIST(path, train=True, download=True, transform=train_tfms)
    test_ds = MNIST(path, train=False, download=True, transform=test_tfms)

    # specify training/validation split
    val_pct = 0.2
    val_size = int(val_pct * len(train_ds))
    train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size])
    val_ds.transform = test_tfms
    print(f"Training set size: {len(train_ds)}")
    print(f"Validation set size: {len(val_ds)}")
    print(f"Test set size: {len(test_ds)}")

    # set up data loaders
    batch_size = 64
    print(f"Batch size: {batch_size}")
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]):
        x_b, y_b = next(iter(dl))
        print(f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}")

    # specify model
    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    print(model)

    # specify loss function
    def loss_fn(logits, labels):
        return F.cross_entropy(logits, labels)

    # specify optimizer
    optimizer = optim.Adam([{'params': model[0:3].parameters()}, {'params': model[3:8].parameters()}])

    # execute training loop
    run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn,
                 metric_fns=[accuracy], optimizer=optimizer, callbacks=[Tracer()])
    run.train(n_epochs=1, lr=1e-3)
def main():
    # specify data transforms
    train_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    test_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

    # load data
    path = Path.cwd()
    print(path)
    train_ds = MNIST(path, train=True, download=True, transform=train_tfms)
    test_ds = MNIST(path, train=False, download=True, transform=test_tfms)

    # specify training/validation split
    val_pct = 0.2
    val_size = int(val_pct * len(train_ds))
    train_ds, val_ds = random_split(train_ds, [len(train_ds) - val_size, val_size])
    val_ds.transform = test_tfms
    print(f"Training set size: {len(train_ds)}")
    print(f"Validation set size: {len(val_ds)}")
    print(f"Test set size: {len(test_ds)}")

    # set up data loaders
    batch_size = 64
    print(f"Batch size: {batch_size}")
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    for label, dl in zip(['Training', 'Validation', 'Test'], [train_dl, val_dl, test_dl]):
        x_b, y_b = next(iter(dl))
        print(f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}")

    # specify model
    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    print(model)

    # specify loss function
    def loss_fn(logits, labels):
        return F.cross_entropy(logits, labels)

    # specify optimizer
    optimizer = optim.Adam([{'params': model[0:3].parameters()}, {'params': model[3:8].parameters()}])

    # plot schedules
    torch.Tensor.ndim = property(lambda x: len(x.shape))    # monkey patch for plotting tensors

    annealings = "null linear cos exp".split()
    a = torch.arange(0, 100)
    p = torch.linspace(0.01, 1, 100)
    fns = [null_schedule, lin_schedule, cos_schedule, exp_schedule]
    plt.figure()
    for fn, t in zip(fns, annealings):
        f = fn(2, 1e-2)
        plt.plot(a, [f(o) for o in p], label=t)
    plt.legend()

    # specify schedules for learning rates
    sched_1 = combine_schedules([0.3, 0.7], [cos_schedule(1e-4, 1e-3), cos_schedule(1e-3, 1e-5)])
    plt.figure()
    plt.plot(a, [sched_1(o) for o in p])

    sched_2 = combine_schedules([0.3, 0.7], [cos_schedule(1e-3, 1e-2), cos_schedule(1e-2, 1e-4)])
    plt.figure()
    plt.plot(a, [sched_2(o) for o in p])

    plt.show()

    # execute training loop
    run = Runner(model, train_dl=train_dl, val_dl=val_dl, loss_fn=loss_fn,
                 metric_fns=[accuracy], optimizer=optimizer,
                 callbacks=[Logger(print_every=1), WeightDecay(wd=1e-2),
                            OptimParamScheduler({'lr': [sched_1, sched_2]})])
    run.train(n_epochs=3, device='cuda')

    # plot learning rates
    lr = run.callbacks['OptimParamScheduler'].history['lr']
    lr = list(zip(*lr))
    plt.figure()
    plt.plot(lr[0])
    plt.figure()
    plt.plot(lr[1])
    plt.show()
예제 #3
0
def main():
    # specify data transforms
    train_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    test_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    # load data
    path = Path.cwd()
    print(path)
    train_ds = MNIST(path, train=True, download=True, transform=train_tfms)
    test_ds = MNIST(path, train=False, download=True, transform=test_tfms)

    # specify training/validation split
    val_pct = 0.2
    val_size = int(val_pct * len(train_ds))
    train_ds, val_ds = random_split(train_ds,
                                    [len(train_ds) - val_size, val_size])
    val_ds.transform = test_tfms
    print(f"Training set size: {len(train_ds)}")
    print(f"Validation set size: {len(val_ds)}")
    print(f"Test set size: {len(test_ds)}")

    # set up data loaders
    batch_size = 64
    print(f"Batch size: {batch_size}")
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    for label, dl in zip(['Training', 'Validation', 'Test'],
                         [train_dl, val_dl, test_dl]):
        x_b, y_b = next(iter(dl))
        print(
            f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}"
        )

    # get first batch of data
    xb, yb = next(iter(train_dl))
    xb, yb = xb.cuda(), yb.cuda()

    # specify loss function
    def loss_fn(logits, labels):
        return F.cross_entropy(logits, labels)

    # specify model (18-layer CNN)
    nf = [
        8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 256, 256,
        256
    ]

    # initialize model parameters using Kaiming-normal and print activation means/stds
    model = lsuv_conv_net(ni=1, no=10, nf=nf, nh=[128]).cuda()
    init_lsuv_conv_net(model, uniform=False)
    modules = find_modules(model, lambda l: isinstance(l, LSUVConvLayer))
    with OutputStatsHook.apply_to_modules(modules, mode='all') as hooks:
        model(xb)
        for hook in hooks:
            print(hook.means[-1].item(), hook.stds[-1].item())

    # execute training loop
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
    run = Runner(model,
                 train_dl=train_dl,
                 val_dl=val_dl,
                 loss_fn=loss_fn,
                 metric_fns=[accuracy],
                 optimizer=optimizer,
                 callbacks=[Logger(print_every=1)])
    run.train(n_epochs=10, lr=1e-3, device='cuda')

    # re-initialize using Kaiming-normal and LSUV
    model = lsuv_conv_net(ni=1, no=10, nf=nf, nh=[128]).cuda()
    init_lsuv_conv_net(model, uniform=False)
    modules = find_modules(model, lambda l: isinstance(l, LSUVConvLayer))
    for m in modules:
        print(lsuv_module(m, model, xb))

    # execute training loop with LSUV initialization
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
    run = Runner(model,
                 train_dl=train_dl,
                 val_dl=val_dl,
                 loss_fn=loss_fn,
                 metric_fns=[accuracy],
                 optimizer=optimizer,
                 callbacks=[Logger(print_every=1)])
    run.train(n_epochs=10, lr=1e-3, device='cuda')
예제 #4
0
def main():
    # specify data transforms
    train_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    test_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    # load data
    path = Path.cwd()
    print(path)
    train_ds = MNIST(path, train=True, download=True, transform=train_tfms)
    test_ds = MNIST(path, train=False, download=True, transform=test_tfms)

    # specify training/validation split
    val_pct = 0.2
    val_size = int(val_pct * len(train_ds))
    train_ds, val_ds = random_split(train_ds,
                                    [len(train_ds) - val_size, val_size])
    val_ds.transform = test_tfms
    print(f"Training set size: {len(train_ds)}")
    print(f"Validation set size: {len(val_ds)}")
    print(f"Test set size: {len(test_ds)}")

    # set up data loaders
    batch_size = 64
    print(f"Batch size: {batch_size}")
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    for label, dl in zip(['Training', 'Validation', 'Test'],
                         [train_dl, val_dl, test_dl]):
        x_b, y_b = next(iter(dl))
        print(
            f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}"
        )

    # specify model
    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    print(model)

    # specify loss function
    def loss_fn(logits, labels):
        return F.cross_entropy(logits, labels)

    # specify optimizer
    optimizer = optim.Adam([{
        'params': model[0:3].parameters()
    }, {
        'params': model[3:8].parameters()
    }])

    # plot schedules
    torch.Tensor.ndim = property(
        lambda x: len(x.shape))  # monkey patch for plotting tensors
    a = torch.arange(0, 100)
    p = torch.linspace(0.01, 1, 100)

    sched_1 = combine_schedules(
        [0.3, 0.7], [cos_schedule(1e-4, 1e-3),
                     cos_schedule(1e-3, 1e-5)])
    plt.plot(a, [sched_1(o) for o in p])

    sched_2 = combine_schedules(
        [0.3, 0.7], [cos_schedule(1e-3, 1e-2),
                     cos_schedule(1e-2, 1e-4)])
    plt.figure()
    plt.plot(a, [sched_2(o) for o in p])

    sched_3 = combine_schedules(
        [0.3, 0.7], [cos_schedule(0.95, 0.85),
                     cos_schedule(0.85, 0.95)])
    plt.figure()
    plt.plot(a, [sched_3(o) for o in p])

    sched_4 = combine_schedules(
        [0.3, 0.7], [cos_schedule(0.95, 0.85),
                     cos_schedule(0.85, 0.95)])
    plt.figure()
    plt.plot(a, [sched_4(o) for o in p])

    plt.show()

    run = Runner(model,
                 train_dl=train_dl,
                 val_dl=val_dl,
                 loss_fn=loss_fn,
                 metric_fns=[accuracy],
                 optimizer=optimizer,
                 callbacks=[
                     Logger(print_every=1),
                     WeightDecay(wd=1e-2),
                     OneCycleScheduler()
                 ])
    run.train(n_epochs=3, lr=(1e-3, 1e-2))

    # plot results
    lr = run.callbacks['OneCycleScheduler'].history['lr']
    lr = np.array(list(zip(*lr)))
    print(f"LR shape: {lr.shape}")
    plt.plot(lr[0])
    plt.figure()
    plt.plot(lr[1])

    betas = run.callbacks['OneCycleScheduler'].history['betas']
    betas = list(zip(*betas))
    betas = np.array(betas)
    print(f"Betas shape: {betas.shape}")
    plt.figure()
    plt.plot(betas[0, :, 0])
    plt.figure()
    plt.plot(betas[0, :, 1])
    plt.figure()
    plt.plot(betas[1, :, 0])
    plt.figure()
    plt.plot(betas[1, :, 1])

    plt.show()
예제 #5
0
def main():
    # specify data transforms
    train_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    test_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    # load data
    path = Path.cwd()
    print(path)
    train_ds = MNIST(path, train=True, download=True, transform=train_tfms)
    test_ds = MNIST(path, train=False, download=True, transform=test_tfms)

    # specify training/validation split
    val_pct = 0.2
    val_size = int(val_pct * len(train_ds))
    train_ds, val_ds = random_split(train_ds,
                                    [len(train_ds) - val_size, val_size])
    val_ds.transform = test_tfms
    print(f"Training set size: {len(train_ds)}")
    print(f"Validation set size: {len(val_ds)}")
    print(f"Test set size: {len(test_ds)}")

    # set up data loaders
    batch_size = 64
    print(f"Batch size: {batch_size}")
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    for label, dl in zip(['Training', 'Validation', 'Test'],
                         [train_dl, val_dl, test_dl]):
        x_b, y_b = next(iter(dl))
        print(
            f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}"
        )

    # specify model
    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    print(model)

    # specify loss function
    def loss_fn(logits, labels):
        return F.cross_entropy(logits, labels)

    # specify optimizer
    optimizer = optim.Adam([{
        'params': model[0:3].parameters()
    }, {
        'params': model[3:8].parameters()
    }])

    # execute training loop
    n_epochs = 3
    max_lr = (1e-3, 1e-2)
    total_steps = n_epochs * (len(train_dl.dataset) // train_dl.batch_size)
    run = Runner(model,
                 train_dl=train_dl,
                 val_dl=val_dl,
                 loss_fn=loss_fn,
                 metric_fns=[accuracy],
                 optimizer=optimizer,
                 callbacks=[
                     Logger(print_every=1),
                     WeightDecay(wd=1e-2),
                     OptimLRScheduler(OneCycleLR,
                                      max_lr=max_lr,
                                      total_steps=total_steps,
                                      pct_start=0.3,
                                      div_factor=1e1,
                                      final_div_factor=1e4)
                 ])
    run.train(n_epochs=n_epochs, device='cuda')

    # plot learning rates
    lr = run.callbacks['OptimLRScheduler'].history['lr']
    lr = list(zip(*lr))
    plt.figure()
    plt.plot(lr[0])
    plt.figure()
    plt.plot(lr[1])
    plt.show()
예제 #6
0
def main():
    # specify data transforms
    train_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    test_tfms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    # load data
    path = Path.cwd()
    print(path)
    train_ds = MNIST(path, train=True, download=True, transform=train_tfms)
    test_ds = MNIST(path, train=False, download=True, transform=test_tfms)

    # specify training/validation split
    val_pct = 0.2
    val_size = int(val_pct * len(train_ds))
    train_ds, val_ds = random_split(train_ds,
                                    [len(train_ds) - val_size, val_size])
    val_ds.transform = test_tfms
    print(f"Training set size: {len(train_ds)}")
    print(f"Validation set size: {len(val_ds)}")
    print(f"Test set size: {len(test_ds)}")

    # set up data loaders
    batch_size = 64
    print(f"Batch size: {batch_size}")
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    for label, dl in zip(['Training', 'Validation', 'Test'],
                         [train_dl, val_dl, test_dl]):
        x_b, y_b = next(iter(dl))
        print(
            f"{label} set: Input shape: {list(x_b.shape)}, Output shape: {list(y_b.shape)}"
        )

    # specify model
    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    print(model)

    # specify loss function
    def loss_fn(logits, labels):
        return F.cross_entropy(logits, labels)

    with OutputStatsHook.apply_to_modules(model[:4]) as hooks:
        print(hooks)

    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    optimizer = optim.Adam([{
        'params': model[:3].parameters()
    }, {
        'params': model[3:].parameters()
    }])
    run = Runner(model,
                 train_dl=train_dl,
                 val_dl=val_dl,
                 loss_fn=loss_fn,
                 metric_fns=[accuracy],
                 optimizer=optimizer,
                 callbacks=[
                     Logger(print_every=1),
                     WeightDecay(wd=1e-2),
                     OneCycleScheduler()
                 ])

    with OutputStatsHook.apply_to_modules(model[:3]) as hooks:
        run.train(n_epochs=2, lr=(1e-3, 1e-2), device='cuda')

        fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4))
        for h in hooks:
            means, stds, hists = h.means, h.stds, h.hists
            ax0.plot(means[:10])
            ax1.plot(stds[:10])
        plt.legend(range(6))

        fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4))
        for h in hooks:
            means, stds, hists = h.means, h.stds, h.hists
            ax0.plot(means)
            ax1.plot(stds)
        plt.legend(range(6))

        fig, axes = plt.subplots(3, 1, figsize=(6, 6))
        for ax, h in zip(axes.flatten(), hooks[:3]):
            ax.imshow(get_train_hist(h)[:, :100], origin='lower')
            ax.axis('off')
        plt.tight_layout()

        fig, axes = plt.subplots(3, 1, figsize=(6, 6))
        for ax, h in zip(axes.flatten(), hooks):
            ax.plot(get_train_min(h))
            ax.set_ylim(0, 1)
        plt.tight_layout()

        plt.show()

    # test hook manager callback
    model = conv_net(ni=1, no=10, nf=[16, 32, 64], nh=[128, 64])
    optimizer = optim.Adam([{
        'params': model[:3].parameters()
    }, {
        'params': model[3:].parameters()
    }])
    run = Runner(model,
                 train_dl=train_dl,
                 val_dl=val_dl,
                 loss_fn=loss_fn,
                 metric_fns=[accuracy],
                 optimizer=optimizer,
                 callbacks=[
                     Logger(print_every=1),
                     WeightDecay(wd=1e-2),
                     OneCycleScheduler(),
                     HookManager(hook_factory=OutputStatsHook.apply_to_modules,
                                 modules=model[:3])
                 ])
    run.train(n_epochs=2, lr=(1e-3, 1e-2), device='cuda')

    hooks = run.callbacks['HookManager'].hooks
    fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4))
    for h in hooks:
        means, stds, hists = h.means, h.stds, h.hists
        ax0.plot(means[:10])
        ax1.plot(stds[:10])
    plt.legend(range(6))

    fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4))
    for h in hooks:
        means, stds, hists = h.means, h.stds, h.hists
        ax0.plot(means)
        ax1.plot(stds)
    plt.legend(range(6))

    fig, axes = plt.subplots(3, 1, figsize=(6, 6))
    for ax, h in zip(axes.flatten(), hooks[:3]):
        ax.imshow(get_train_hist(h)[:, :100], origin='lower')
        ax.axis('off')
    plt.tight_layout()

    fig, axes = plt.subplots(3, 1, figsize=(6, 6))
    for ax, h in zip(axes.flatten(), hooks):
        ax.plot(get_train_min(h))
        ax.set_ylim(0, 1)
    plt.tight_layout()

    plt.show()