def run(self, hparams): transform = T.Compose( [T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))]) if self.is_global_zero: MNIST("./data", download=True) self.barrier() train_dataset = MNIST("./data", train=True, transform=transform) test_dataset = MNIST("./data", train=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_dataset, hparams.batch_size) test_loader = torch.utils.data.DataLoader(test_dataset, hparams.test_batch_size) train_loader, test_loader = self.setup_dataloaders( train_loader, test_loader) model = Net() optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr) model, optimizer = self.setup(model, optimizer) scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma) MainLoop(self, hparams, model, optimizer, scheduler, train_loader, test_loader).run() if hparams.save_model and self.is_global_zero: self.save(model.state_dict(), "mnist_cnn.pt")
def __init__(self, batch_size: int = 32): super().__init__() dataset = MNIST(_DATASETS_PATH, train=True, download=True, transform=transforms.ToTensor()) self.mnist_test = MNIST(_DATASETS_PATH, train=False, download=True, transform=transforms.ToTensor()) self.mnist_train, self.mnist_val = random_split(dataset, [55000, 5000]) eii_train = ExternalMNISTInputIterator(self.mnist_train, batch_size) eii_val = ExternalMNISTInputIterator(self.mnist_val, batch_size) eii_test = ExternalMNISTInputIterator(self.mnist_test, batch_size) self.pipe_train = ExternalSourcePipeline(batch_size=batch_size, eii=eii_train, num_threads=2, device_id=0) self.pipe_val = ExternalSourcePipeline(batch_size=batch_size, eii=eii_val, num_threads=2, device_id=0) self.pipe_test = ExternalSourcePipeline(batch_size=batch_size, eii=eii_test, num_threads=2, device_id=0)
def __init__( self, batch_size: int = 32, ): super().__init__() dataset = MNIST(_DATASETS_PATH, train=True, download=True, transform=transforms.ToTensor()) self.mnist_test = MNIST(_DATASETS_PATH, train=False, download=True, transform=transforms.ToTensor()) self.mnist_train, self.mnist_val = random_split(dataset, [55000, 5000]) self.batch_size = batch_size
def test_dataloader(self): test_dataset = MNIST("./data", train=False, download=False, transform=self.transform) return torch.utils.data.DataLoader(test_dataset, batch_size=self.hparams.batch_size)
def prepare_data(self, stage=None): # Use this method to do things that might write to disk or that need to be done only from a single GPU # in distributed settings. Like downloading the dataset for the first time. MNIST(self.data_path, train=True, download=True, transform=transforms.ToTensor())
def setup(self, stage: Optional[str] = None) -> None: # load the data dataset = MNIST(_DATASETS_PATH, transform=T.Compose([ T.ToTensor(), T.Normalize(mean=(0.5, ), std=(0.5, )) ])) self.train_dataset, self.test_dataset = random_split( dataset, [50000, 10000])
def run(self, hparams): self.hparams = hparams seed_everything(hparams.seed) # instead of torch.manual_seed(...) transform = T.Compose( [T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))]) # This is meant to ensure the data are download only by 1 process. if self.is_global_zero: MNIST("./data", download=True) self.barrier() train_dataset = MNIST("./data", train=True, transform=transform) test_dataset = MNIST("./data", train=False, transform=transform) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=hparams.batch_size, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=hparams.batch_size) # don't forget to call `setup_dataloaders` to prepare for dataloaders for distributed training. train_loader, test_loader = self.setup_dataloaders( train_loader, test_loader) model = Net() # remove call to .to(device) optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr) # don't forget to call `setup` to prepare for model / optimizer for distributed training. # the model is moved automatically to the right device. model, optimizer = self.setup(model, optimizer) scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma) # use torchmetrics instead of manually computing the accuracy test_acc = Accuracy().to(self.device) # EPOCH LOOP for epoch in range(1, hparams.epochs + 1): # TRAINING LOOP model.train() for batch_idx, (data, target) in enumerate(train_loader): # NOTE: no need to call `.to(device)` on the data, target optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) self.backward(loss) # instead of loss.backward() optimizer.step() if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0): print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}". format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), )) if hparams.dry_run: break scheduler.step() # TESTING LOOP model.eval() test_loss = 0 with torch.no_grad(): for data, target in test_loader: # NOTE: no need to call `.to(device)` on the data, target output = model(data) test_loss += F.nll_loss(output, target, reduction="sum").item() # WITHOUT TorchMetrics # pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability # correct += pred.eq(target.view_as(pred)).sum().item() # WITH TorchMetrics test_acc(output, target) if hparams.dry_run: break # all_gather is used to aggregated the value across processes test_loss = self.all_gather(test_loss).sum() / len( test_loader.dataset) print( f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({test_acc.compute():.0f}%)\n" ) test_acc.reset() if hparams.dry_run: break # When using distributed training, use `self.save` # to ensure the current process is allowed to save a checkpoint if hparams.save_model: self.save(model.state_dict(), "mnist_cnn.pt")
def prepare_data(self) -> None: MNIST("./data", download=True)
def prepare_data(self) -> None: # download the data. MNIST(_DATASETS_PATH, transform=T.Compose( [T.ToTensor(), T.Normalize(mean=(0.5, ), std=(0.5, ))]))
def setup(self, stage=None): # There are also data operations you might want to perform on every GPU, such as applying transforms # defined explicitly in your datamodule or assigned in init. self.mnist_train = MNIST(self.data_path, train=True, transform=self.transform)
def run(hparams): torch.manual_seed(hparams.seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") transform = T.Compose([T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))]) train_dataset = MNIST("./data", train=True, download=True, transform=transform) test_dataset = MNIST("./data", train=False, transform=transform) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=hparams.batch_size, ) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=hparams.batch_size) model = Net().to(device) optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr) scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma) # EPOCH LOOP for epoch in range(1, hparams.epochs + 1): # TRAINING LOOP model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0): print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), )) if hparams.dry_run: break scheduler.step() # TESTING LOOP model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss( output, target, reduction="sum").item() # sum up batch loss pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() if hparams.dry_run: break test_loss /= len(test_loader.dataset) print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n". format(test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset))) if hparams.dry_run: break if hparams.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")