def _new_trainer(): transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_dataset = serialize(MNIST, root='data/mnist', train=True, download=True, transform=transform) test_dataset = serialize(MNIST, root='data/mnist', train=False, download=True, transform=transform) multi_module = MultiModelSupervisedLearningModule( nn.CrossEntropyLoss, {'acc': pl._AccuracyWithLogits}) lightning = pl.Lightning(multi_module, cgo_trainer.Trainer(use_cgo=True, max_epochs=1, limit_train_batches=0.25, progress_bar_refresh_rate=0), train_dataloader=pl.DataLoader(train_dataset, batch_size=100), val_dataloaders=pl.DataLoader(test_dataset, batch_size=100)) return lightning
def _multi_trial_test(epochs, batch_size, port, benchmark): # initalize dataset. Note that 50k+10k is used. It's a little different from paper transf = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip() ] normalize = [ transforms.ToTensor(), transforms.Normalize([0.49139968, 0.48215827, 0.44653124], [0.24703233, 0.24348505, 0.26158768]) ] train_dataset = serialize(CIFAR10, 'data', train=True, download=True, transform=transforms.Compose(transf + normalize)) test_dataset = serialize(CIFAR10, 'data', train=False, transform=transforms.Compose(normalize)) # specify training hyper-parameters training_module = NasBench101TrainingModule(max_epochs=epochs) # FIXME: need to fix a bug in serializer for this to work # lr_monitor = serialize(LearningRateMonitor, logging_interval='step') trainer = pl.Trainer(max_epochs=epochs, gpus=1) lightning = pl.Lightning( lightning_module=training_module, trainer=trainer, train_dataloader=pl.DataLoader(train_dataset, batch_size=batch_size, shuffle=True), val_dataloaders=pl.DataLoader(test_dataset, batch_size=batch_size), ) strategy = Random() model = NasBench101() exp = RetiariiExperiment(model, lightning, [], strategy) exp_config = RetiariiExeConfig('local') exp_config.trial_concurrency = 2 exp_config.max_trial_number = 20 exp_config.trial_gpu_number = 1 exp_config.training_service.use_active_gpu = False if benchmark: exp_config.benchmark = 'nasbench101' exp_config.execution_engine = 'benchmark' exp.run(exp_config, port)
def _multi_trial_test(epochs, batch_size, port): # initalize dataset. Note that 50k+10k is used. It's a little different from paper transf = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip() ] normalize = [ transforms.ToTensor(), transforms.Normalize([x / 255 for x in [129.3, 124.1, 112.4]], [x / 255 for x in [68.2, 65.4, 70.4]]) ] train_dataset = serialize(CIFAR100, 'data', train=True, download=True, transform=transforms.Compose(transf + normalize)) test_dataset = serialize(CIFAR100, 'data', train=False, transform=transforms.Compose(normalize)) # specify training hyper-parameters training_module = NasBench201TrainingModule(max_epochs=epochs) # FIXME: need to fix a bug in serializer for this to work # lr_monitor = serialize(LearningRateMonitor, logging_interval='step') trainer = pl.Trainer(max_epochs=epochs, gpus=1) lightning = pl.Lightning( lightning_module=training_module, trainer=trainer, train_dataloader=pl.DataLoader(train_dataset, batch_size=batch_size, shuffle=True), val_dataloaders=pl.DataLoader(test_dataset, batch_size=batch_size), ) strategy = Random() model = NasBench201() exp = RetiariiExperiment(model, lightning, [], strategy) exp_config = RetiariiExeConfig('local') exp_config.trial_concurrency = 2 exp_config.max_trial_number = 20 exp_config.trial_gpu_number = 1 exp_config.training_service.use_active_gpu = False exp.run(exp_config, port)
def test_multi_model_trainer_gpu(self): _reset() if not (torch.cuda.is_available() and torch.cuda.device_count() >= 2): pytest.skip('test requires GPU and torch+cuda') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) train_dataset = serialize(MNIST, root='data/mnist', train=True, download=True, transform=transform) test_dataset = serialize(MNIST, root='data/mnist', train=False, download=True, transform=transform) multi_module = _MultiModelSupervisedLearningModule( nn.CrossEntropyLoss, {'acc': pl._AccuracyWithLogits}, n_models=2) lightning = pl.Lightning(multi_module, cgo_trainer.Trainer(use_cgo=True, max_epochs=1, limit_train_batches=0.25), train_dataloader=pl.DataLoader( train_dataset, batch_size=100), val_dataloaders=pl.DataLoader(test_dataset, batch_size=100)) lightning._execute(_model_gpu) result = _get_final_result() assert len(result) == 2 for _ in result: assert _ > 0.8