def run(count, memory_size, iteration, device='cuda'): traintransform = transforms.Compose( [transforms.RandomRotation(20), transforms.ToTensor()]) trainset = torchvision.datasets.MNIST(root='./data/mnist', train=True, download=True, transform=traintransform) trainloader = torch.utils.data.DataLoader(trainset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) testtransform = transforms.Compose([transforms.ToTensor()]) testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=testtransform) testloader = torch.utils.data.DataLoader(testset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) base_dir = os.path.join('mnist_' + str(memory_size), "4") model = MnistDraw(count, memory_size) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') trial = Trial( model, optimizer, nn.MSELoss(reduction='sum'), ['loss'], pass_state=True, callbacks=[ tm.kl_divergence(MU, LOGVAR), callbacks.MostRecent( os.path.join(base_dir, 'iter_' + str(iteration) + '.{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.ExponentialLR(0.99), callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED), callbacks.TensorBoardImages(comment=current_time + '_mnist', name='Target', write_each_epoch=True, key=torchbearer.Y_TRUE) ]).with_generators(train_generator=trainloader, val_generator=testloader).to(device) trial.run(100)
def run(count, memory_size, device='cuda'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = os.path.join('cifar_' + str(memory_size), str(count)) model = nn.DataParallel(CifarClassifier(count, memory_size)) optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, momentum=0.9, weight_decay=5e-4) trial = Trial(model, optimizer, nn.NLLLoss(), [torchbearer.metrics.CategoricalAccuracy(), 'loss'], callbacks=[ callbacks.MostRecent(os.path.join(base_dir, '{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.MultiStepLR(milestones=[150, 250]), callbacks.TensorBoard(write_graph=False, comment=base_dir) ]).with_train_generator(trainloader).to(device) trial.run(350) trial.with_test_generator(testloader).evaluate(data_key=torchbearer.TEST_DATA)
def run(count, memory_size, file, device='cuda'): transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.25, 0.25, 0.25, 0.25), transforms.ToTensor() ]) transform_test = transforms.Compose([transforms.ToTensor()]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = os.path.join('cifarss_' + str(memory_size), "16") model = CifarDraw(count, memory_size) model.load_state_dict(torch.load(file)[torchbearer.MODEL]) model = SelfTaught(count, 512, memory_size, model.memory) for param in model.memory.parameters(): param.requires_grad = False model.memory.decay.requires_grad = True model.memory.learn.requires_grad = True model.memory.learn2.requires_grad = True optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) trial = Trial(model, optimizer, nn.NLLLoss(), ['acc', 'loss'], pass_state=True, callbacks=[ callbacks.MultiStepLR([25, 40, 45]), callbacks.MostRecent( os.path.join(base_dir, '{epoch:02d}.pt')), callbacks.GradientClipping(5) ]).with_generators( train_generator=trainloader, val_generator=testloader).for_val_steps(5).to(device) trial.run(50)
def run(count, memory_size, device='cuda'): traintransform = transforms.Compose([ transforms.RandomRotation(20), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) trainset = torchvision.datasets.MNIST(root='./data/mnist', train=True, download=True, transform=traintransform) trainloader = torch.utils.data.DataLoader(trainset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) testtransform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=testtransform) testloader = torch.utils.data.DataLoader(testset, pin_memory=True, batch_size=128, shuffle=False, num_workers=10) base_dir = os.path.join('mnist_' + str(memory_size), str(count)) model = MnistClassifier(count, memory_size) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001) trial = Trial( model, optimizer, nn.NLLLoss(), ['acc', 'loss'], callbacks=[ callbacks.MostRecent(os.path.join(base_dir, '{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.MultiStepLR(milestones=[50, 100, 150, 190, 195]), callbacks.ExponentialLR(0.99), callbacks.TensorBoard(write_graph=False, comment=base_dir) ]).with_train_generator(trainloader).to(device) trial.run(200) trial.with_test_generator(testloader).evaluate( data_key=torchbearer.TEST_DATA)
def run(count, glimpse_size, memory_size, iteration, device='cuda'): base_dir = os.path.join('celeba_' + str(memory_size), str(glimpse_size)) if not os.path.exists(base_dir): os.makedirs(base_dir) transform_train = transforms.Compose([ transforms.ToTensor() ]) dataset = torchvision.datasets.ImageFolder(root='./cropped_celeba/', transform=transform_train) splitter = DatasetValidationSplitter(len(dataset), 0.05) trainset = splitter.get_train_dataset(dataset) # Save the ids torch.save((splitter.train_ids, splitter.valid_ids), os.path.join(base_dir, 'split.dat')) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) model = CelebDraw(count, glimpse_size, memory_size) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') call_a = callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED) call_a.on_step_training = call_a.on_step_validation # Hack to make this log training samples call_b = callbacks.TensorBoardImages(comment=current_time + '_celeba', name='Target', write_each_epoch=True, key=torchbearer.Y_TRUE) call_b.on_step_training = call_b.on_step_validation # Hack to make this log training samples trial = Trial(model, optimizer, nn.MSELoss(reduction='sum'), ['acc', 'loss'], pass_state=True, callbacks=[ joint_kl_divergence(MU, LOGVAR), callbacks.MostRecent(os.path.join(base_dir, 'iter_' + str(iteration) + '.{epoch:02d}.pt')), callbacks.GradientClipping(5), call_a, call_b ]).with_generators(train_generator=trainloader).to(device) trial.run(100)
def test_callbacks(self): from torch.utils.data import TensorDataset traingen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1)) valgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1)) testgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1)) model = torch.nn.Linear(3, 1) optim = torch.optim.SGD(model.parameters(), lr=0.01) cbs = [] cbs.extend([ c.EarlyStopping(), c.GradientClipping(10, model.parameters()), c.Best('test.pt'), c.MostRecent('test.pt'), c.ReduceLROnPlateau(), c.CosineAnnealingLR(0.1, 0.01), c.ExponentialLR(1), c.Interval('test.pt'), c.CSVLogger('test_csv.pt'), c.L1WeightDecay(), c.L2WeightDecay(), c.TerminateOnNaN(monitor='fail_metric') ]) trial = torchbearer.Trial(model, optim, torch.nn.MSELoss(), metrics=['loss'], callbacks=cbs) trial = trial.with_generators(traingen, valgen, testgen) trial.run(2) trial.predict() trial.evaluate(data_key=torchbearer.TEST_DATA) trial.evaluate() import os os.remove('test.pt') os.remove('test_csv.pt')
def run(iteration, device='cuda:1'): transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.25, 0.25, 0.25, 0.25), transforms.ToTensor() ]) transform_test = transforms.Compose([transforms.ToTensor()]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = 'cifar_vae' model = CifarVAE() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-4) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') trial = Trial( model, optimizer, nn.MSELoss(reduction='sum'), ['acc', 'loss'], pass_state=True, callbacks=[ tm.kl_divergence(MU, LOGVAR, beta=2), callbacks.MultiStepLR([50, 90]), callbacks.MostRecent( os.path.join(base_dir, 'iter_' + str(iteration) + '.{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED), callbacks.TensorBoardImages(comment=current_time + '_cifar_vae', name='Target', write_each_epoch=False, key=torchbearer.Y_TRUE), ]).with_generators( train_generator=trainloader, val_generator=testloader).for_val_steps(5).to(device) trial.run(100)