예제 #1
0
    def run(self, hparams):
        transform = T.Compose(
            [T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))])
        if self.is_global_zero:
            MNIST("./data", download=True)
        self.barrier()
        train_dataset = MNIST("./data", train=True, transform=transform)
        test_dataset = MNIST("./data", train=False, transform=transform)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   hparams.batch_size)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  hparams.test_batch_size)

        train_loader, test_loader = self.setup_dataloaders(
            train_loader, test_loader)

        model = Net()
        optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)

        model, optimizer = self.setup(model, optimizer)
        scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)

        MainLoop(self, hparams, model, optimizer, scheduler, train_loader,
                 test_loader).run()

        if hparams.save_model and self.is_global_zero:
            self.save(model.state_dict(), "mnist_cnn.pt")
예제 #2
0
    def __init__(self, batch_size: int = 32):
        super().__init__()
        dataset = MNIST(_DATASETS_PATH,
                        train=True,
                        download=True,
                        transform=transforms.ToTensor())
        self.mnist_test = MNIST(_DATASETS_PATH,
                                train=False,
                                download=True,
                                transform=transforms.ToTensor())
        self.mnist_train, self.mnist_val = random_split(dataset, [55000, 5000])

        eii_train = ExternalMNISTInputIterator(self.mnist_train, batch_size)
        eii_val = ExternalMNISTInputIterator(self.mnist_val, batch_size)
        eii_test = ExternalMNISTInputIterator(self.mnist_test, batch_size)

        self.pipe_train = ExternalSourcePipeline(batch_size=batch_size,
                                                 eii=eii_train,
                                                 num_threads=2,
                                                 device_id=0)
        self.pipe_val = ExternalSourcePipeline(batch_size=batch_size,
                                               eii=eii_val,
                                               num_threads=2,
                                               device_id=0)
        self.pipe_test = ExternalSourcePipeline(batch_size=batch_size,
                                                eii=eii_test,
                                                num_threads=2,
                                                device_id=0)
예제 #3
0
 def __init__(
     self,
     batch_size: int = 32,
 ):
     super().__init__()
     dataset = MNIST(_DATASETS_PATH, train=True, download=True, transform=transforms.ToTensor())
     self.mnist_test = MNIST(_DATASETS_PATH, train=False, download=True, transform=transforms.ToTensor())
     self.mnist_train, self.mnist_val = random_split(dataset, [55000, 5000])
     self.batch_size = batch_size
 def test_dataloader(self):
     test_dataset = MNIST("./data",
                          train=False,
                          download=False,
                          transform=self.transform)
     return torch.utils.data.DataLoader(test_dataset,
                                        batch_size=self.hparams.batch_size)
 def prepare_data(self, stage=None):
     # Use this method to do things that might write to disk or that need to be done only from a single GPU
     # in distributed settings. Like downloading the dataset for the first time.
     MNIST(self.data_path,
           train=True,
           download=True,
           transform=transforms.ToTensor())
예제 #6
0
 def setup(self, stage: Optional[str] = None) -> None:
     # load the data
     dataset = MNIST(_DATASETS_PATH,
                     transform=T.Compose([
                         T.ToTensor(),
                         T.Normalize(mean=(0.5, ), std=(0.5, ))
                     ]))
     self.train_dataset, self.test_dataset = random_split(
         dataset, [50000, 10000])
예제 #7
0
    def run(self, hparams):
        self.hparams = hparams
        seed_everything(hparams.seed)  # instead of torch.manual_seed(...)

        transform = T.Compose(
            [T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))])
        # This is meant to ensure the data are download only by 1 process.
        if self.is_global_zero:
            MNIST("./data", download=True)
        self.barrier()
        train_dataset = MNIST("./data", train=True, transform=transform)
        test_dataset = MNIST("./data", train=False, transform=transform)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=hparams.batch_size,
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=hparams.batch_size)

        # don't forget to call `setup_dataloaders` to prepare for dataloaders for distributed training.
        train_loader, test_loader = self.setup_dataloaders(
            train_loader, test_loader)

        model = Net()  # remove call to .to(device)
        optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)

        # don't forget to call `setup` to prepare for model / optimizer for distributed training.
        # the model is moved automatically to the right device.
        model, optimizer = self.setup(model, optimizer)

        scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)

        # use torchmetrics instead of manually computing the accuracy
        test_acc = Accuracy().to(self.device)

        # EPOCH LOOP
        for epoch in range(1, hparams.epochs + 1):

            # TRAINING LOOP
            model.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                # NOTE: no need to call `.to(device)` on the data, target
                optimizer.zero_grad()
                output = model(data)
                loss = F.nll_loss(output, target)
                self.backward(loss)  # instead of loss.backward()

                optimizer.step()
                if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval
                                        == 0):
                    print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".
                          format(
                              epoch,
                              batch_idx * len(data),
                              len(train_loader.dataset),
                              100.0 * batch_idx / len(train_loader),
                              loss.item(),
                          ))
                    if hparams.dry_run:
                        break

            scheduler.step()

            # TESTING LOOP
            model.eval()
            test_loss = 0
            with torch.no_grad():
                for data, target in test_loader:
                    # NOTE: no need to call `.to(device)` on the data, target
                    output = model(data)
                    test_loss += F.nll_loss(output, target,
                                            reduction="sum").item()

                    # WITHOUT TorchMetrics
                    # pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                    # correct += pred.eq(target.view_as(pred)).sum().item()

                    # WITH TorchMetrics
                    test_acc(output, target)

                    if hparams.dry_run:
                        break

            # all_gather is used to aggregated the value across processes
            test_loss = self.all_gather(test_loss).sum() / len(
                test_loader.dataset)

            print(
                f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({test_acc.compute():.0f}%)\n"
            )
            test_acc.reset()

            if hparams.dry_run:
                break

        # When using distributed training, use `self.save`
        # to ensure the current process is allowed to save a checkpoint
        if hparams.save_model:
            self.save(model.state_dict(), "mnist_cnn.pt")
 def prepare_data(self) -> None:
     MNIST("./data", download=True)
예제 #9
0
 def prepare_data(self) -> None:
     # download the data.
     MNIST(_DATASETS_PATH,
           transform=T.Compose(
               [T.ToTensor(),
                T.Normalize(mean=(0.5, ), std=(0.5, ))]))
 def setup(self, stage=None):
     # There are also data operations you might want to perform on every GPU, such as applying transforms
     # defined explicitly in your datamodule or assigned in init.
     self.mnist_train = MNIST(self.data_path,
                              train=True,
                              transform=self.transform)
def run(hparams):

    torch.manual_seed(hparams.seed)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    transform = T.Compose([T.ToTensor(), T.Normalize((0.1307, ), (0.3081, ))])
    train_dataset = MNIST("./data",
                          train=True,
                          download=True,
                          transform=transform)
    test_dataset = MNIST("./data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=hparams.batch_size,
    )
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=hparams.batch_size)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)

    # EPOCH LOOP
    for epoch in range(1, hparams.epochs + 1):

        # TRAINING LOOP
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval
                                    == 0):
                print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    100.0 * batch_idx / len(train_loader),
                    loss.item(),
                ))
                if hparams.dry_run:
                    break
        scheduler.step()

        # TESTING LOOP
        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += F.nll_loss(
                    output, target,
                    reduction="sum").item()  # sum up batch loss
                pred = output.argmax(
                    dim=1,
                    keepdim=True)  # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()
                if hparams.dry_run:
                    break

        test_loss /= len(test_loader.dataset)

        print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".
              format(test_loss, correct, len(test_loader.dataset),
                     100.0 * correct / len(test_loader.dataset)))

        if hparams.dry_run:
            break

    if hparams.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")