Esempio n. 1
0
    def run(self, hparams):
        transform = T.Compose(
            [T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))])
        if self.is_global_zero:
            MNIST("./data", download=True)
        self.barrier()
        train_dataset = MNIST("./data", train=True, transform=transform)
        test_dataset = MNIST("./data", train=False, transform=transform)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   hparams.batch_size)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  hparams.test_batch_size)

        train_loader, test_loader = self.setup_dataloaders(
            train_loader, test_loader)

        model = Net()
        optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)

        model, optimizer = self.setup(model, optimizer)
        scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)

        MainLoop(self, hparams, model, optimizer, scheduler, train_loader,
                 test_loader).run()

        if hparams.save_model and self.is_global_zero:
            self.save(model.state_dict(), "mnist_cnn.pt")
Esempio n. 2
0
    def run(self, hparams):
        self.hparams = hparams
        seed_everything(hparams.seed)  # instead of torch.manual_seed(...)

        transform = T.Compose(
            [T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))])
        # This is meant to ensure the data are download only by 1 process.
        if self.is_global_zero:
            MNIST("./data", download=True)
        self.barrier()
        train_dataset = MNIST("./data", train=True, transform=transform)
        test_dataset = MNIST("./data", train=False, transform=transform)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=hparams.batch_size,
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=hparams.batch_size)

        # don't forget to call `setup_dataloaders` to prepare for dataloaders for distributed training.
        train_loader, test_loader = self.setup_dataloaders(
            train_loader, test_loader)

        model = Net()  # remove call to .to(device)
        optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)

        # don't forget to call `setup` to prepare for model / optimizer for distributed training.
        # the model is moved automatically to the right device.
        model, optimizer = self.setup(model, optimizer)

        scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)

        # use torchmetrics instead of manually computing the accuracy
        test_acc = Accuracy().to(self.device)

        # EPOCH LOOP
        for epoch in range(1, hparams.epochs + 1):

            # TRAINING LOOP
            model.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                # NOTE: no need to call `.to(device)` on the data, target
                optimizer.zero_grad()
                output = model(data)
                loss = F.nll_loss(output, target)
                self.backward(loss)  # instead of loss.backward()

                optimizer.step()
                if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval
                                        == 0):
                    print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".
                          format(
                              epoch,
                              batch_idx * len(data),
                              len(train_loader.dataset),
                              100.0 * batch_idx / len(train_loader),
                              loss.item(),
                          ))
                    if hparams.dry_run:
                        break

            scheduler.step()

            # TESTING LOOP
            model.eval()
            test_loss = 0
            with torch.no_grad():
                for data, target in test_loader:
                    # NOTE: no need to call `.to(device)` on the data, target
                    output = model(data)
                    test_loss += F.nll_loss(output, target,
                                            reduction="sum").item()

                    # WITHOUT TorchMetrics
                    # pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                    # correct += pred.eq(target.view_as(pred)).sum().item()

                    # WITH TorchMetrics
                    test_acc(output, target)

                    if hparams.dry_run:
                        break

            # all_gather is used to aggregated the value across processes
            test_loss = self.all_gather(test_loss).sum() / len(
                test_loader.dataset)

            print(
                f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({test_acc.compute():.0f}%)\n"
            )
            test_acc.reset()

            if hparams.dry_run:
                break

        # When using distributed training, use `self.save`
        # to ensure the current process is allowed to save a checkpoint
        if hparams.save_model:
            self.save(model.state_dict(), "mnist_cnn.pt")
 def __init__(self, model=None, lr=1.0, gamma=0.7, batch_size=32):
     super().__init__()
     self.save_hyperparameters(ignore="model")
     self.model = model or Net()
     self.test_acc = Accuracy()
Esempio n. 4
0
    def run(self, hparams):
        self.hparams = hparams
        seed_everything(hparams.seed)  # instead of torch.manual_seed(...)

        self.model = Net()
        [optimizer], [scheduler] = self.configure_optimizers()
        model, optimizer = self.setup(self.model, optimizer)

        if self.is_global_zero:
            # In multi-device training, this code will only run on the first process / GPU
            self.prepare_data()

        train_loader, test_loader = self.setup_dataloaders(
            self.train_dataloader(), self.train_dataloader())

        self.test_acc = Accuracy().to(self.device)

        # EPOCH LOOP
        for epoch in range(1, hparams.epochs + 1):

            # TRAINING LOOP
            self.model.train()
            for batch_idx, batch in enumerate(train_loader):
                optimizer.zero_grad()
                loss = self.training_step(batch, batch_idx)
                self.backward(loss)
                optimizer.step()

                if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval
                                        == 0):
                    print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".
                          format(
                              epoch,
                              (batch_idx + 1) * self.hparams.batch_size,
                              len(train_loader.dataset),
                              100.0 * batch_idx / len(train_loader),
                              loss.item(),
                          ))
                    if hparams.dry_run:
                        break

            scheduler.step()

            # TESTING LOOP
            self.model.eval()
            test_loss = 0
            with torch.no_grad():
                for batch_idx, batch in enumerate(test_loader):
                    test_loss += self.test_step(batch, batch_idx)
                    if hparams.dry_run:
                        break

            test_loss = self.all_gather(test_loss).sum() / len(
                test_loader.dataset)

            print(
                f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({self.test_acc.compute():.0f}%)\n"
            )
            self.test_acc.reset()

            if hparams.dry_run:
                break

        if hparams.save_model:
            self.save(model.state_dict(), "mnist_cnn.pt")
Esempio n. 5
0
class Lite(LightningLite):
    """Lite is starting to look like a LightningModule."""
    def run(self, hparams):
        self.hparams = hparams
        seed_everything(hparams.seed)  # instead of torch.manual_seed(...)

        self.model = Net()
        [optimizer], [scheduler] = self.configure_optimizers()
        model, optimizer = self.setup(self.model, optimizer)

        if self.is_global_zero:
            # In multi-device training, this code will only run on the first process / GPU
            self.prepare_data()

        train_loader, test_loader = self.setup_dataloaders(
            self.train_dataloader(), self.train_dataloader())

        self.test_acc = Accuracy().to(self.device)

        # EPOCH LOOP
        for epoch in range(1, hparams.epochs + 1):

            # TRAINING LOOP
            self.model.train()
            for batch_idx, batch in enumerate(train_loader):
                optimizer.zero_grad()
                loss = self.training_step(batch, batch_idx)
                self.backward(loss)
                optimizer.step()

                if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval
                                        == 0):
                    print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".
                          format(
                              epoch,
                              (batch_idx + 1) * self.hparams.batch_size,
                              len(train_loader.dataset),
                              100.0 * batch_idx / len(train_loader),
                              loss.item(),
                          ))
                    if hparams.dry_run:
                        break

            scheduler.step()

            # TESTING LOOP
            self.model.eval()
            test_loss = 0
            with torch.no_grad():
                for batch_idx, batch in enumerate(test_loader):
                    test_loss += self.test_step(batch, batch_idx)
                    if hparams.dry_run:
                        break

            test_loss = self.all_gather(test_loss).sum() / len(
                test_loader.dataset)

            print(
                f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({self.test_acc.compute():.0f}%)\n"
            )
            self.test_acc.reset()

            if hparams.dry_run:
                break

        if hparams.save_model:
            self.save(model.state_dict(), "mnist_cnn.pt")

    # Methods for the `LightningModule` conversion

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        """Here you compute and return the training loss and compute extra training metrics."""
        x, y = batch
        logits = self.forward(x)
        loss = F.nll_loss(logits, y.long())
        return loss

    def test_step(self, batch, batch_idx):
        """Here you compute and return the testing loss and compute extra testing metrics."""
        x, y = batch
        logits = self.forward(x)
        loss = F.nll_loss(logits, y.long())
        self.test_acc(logits, y.long())
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adadelta(self.model.parameters(), lr=self.hparams.lr)
        return [optimizer
                ], [StepLR(optimizer, step_size=1, gamma=self.hparams.gamma)]

    # Methods for the `LightningDataModule` conversion

    @property
    def transform(self):
        return T.Compose([T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))])

    def prepare_data(self) -> None:
        MNIST("./data", download=True)

    def train_dataloader(self):
        train_dataset = MNIST("./data",
                              train=True,
                              download=False,
                              transform=self.transform)
        return torch.utils.data.DataLoader(train_dataset,
                                           batch_size=self.hparams.batch_size)

    def test_dataloader(self):
        test_dataset = MNIST("./data",
                             train=False,
                             download=False,
                             transform=self.transform)
        return torch.utils.data.DataLoader(test_dataset,
                                           batch_size=self.hparams.batch_size)