Exemple #1
0
def test_igpt(tmpdir, datadir):
    seed_everything(0)
    dm = MNISTDataModule(data_dir=datadir, normalize=False)
    model = ImageGPT()

    trainer = Trainer(
        limit_train_batches=2,
        limit_val_batches=2,
        limit_test_batches=2,
        max_epochs=1,
    )
    trainer.fit(model, datamodule=dm)
    trainer.test(datamodule=dm)
    assert trainer.callback_metrics["test_loss"] < 1.7

    dm = FashionMNISTDataModule(data_dir=datadir, num_workers=1)
    model = ImageGPT(classify=True)
    trainer = Trainer(
        limit_train_batches=2,
        limit_val_batches=2,
        limit_test_batches=2,
        max_epochs=1,
        logger=False,
        checkpoint_callback=False,
    )
    trainer.fit(model, datamodule=dm)
def cli_main():
    parser = ArgumentParser()

    # trainer args
    parser = pl.Trainer.add_argparse_args(parser)

    # model args
    parser = ImageGPT.add_model_specific_args(parser)
    args = parser.parse_args()

    if args.dataset == "fashion_mnist":
        datamodule = FashionMNISTDataModule.from_argparse_args(args)

    elif args.dataset == "imagenet128":
        datamodule = ImagenetDataModule.from_argparse_args(args)

    model = ImageGPT(**args.__dict__, datamodule=datamodule)

    trainer = pl.Trainer.from_argparse_args(args)
    trainer.fit(model)
Exemple #3
0
def test_igpt(tmpdir):
    pl.seed_everything(0)
    dm = MNISTDataModule(tmpdir, normalize=False)
    model = ImageGPT(datamodule=dm)

    trainer = pl.Trainer(
        limit_train_batches=2,
        limit_val_batches=2,
        limit_test_batches=2,
        max_epochs=1,
    )
    trainer.fit(model)
    trainer.test()
    assert trainer.callback_metrics["test_loss"] < 1.7

    dm = FashionMNISTDataModule(tmpdir, num_workers=1)
    model = ImageGPT(classify=True, datamodule=dm)
    trainer = pl.Trainer(
        limit_train_batches=2,
        limit_val_batches=2,
        limit_test_batches=2,
        max_epochs=1,
    )
    trainer.fit(model)
class ImageGPT(pl.LightningModule):
    def __init__(
        self,
        datamodule: pl.LightningDataModule = None,
        embed_dim: int = 16,
        heads: int = 2,
        layers: int = 2,
        pixels: int = 28,
        vocab_size: int = 16,
        num_classes: int = 10,
        classify: bool = False,
        batch_size: int = 64,
        learning_rate: float = 1e-2,
        steps: int = 25_000,
        data_dir: str = ".",
        num_workers: int = 8,
        **kwargs,
    ):
        """
        **Paper**: `Generative Pretraining from Pixels
        <https://cdn.openai.com/papers/Generative_Pretraining_from_Pixels_V2.pdf>`_
        [original paper `code <https://github.com/openai/image-gpt>`_].

        **Paper by:** Mark Che, Alec Radford, Rewon Child, Jeff Wu, Heewoo Jun,
        Prafulla Dhariwal, David Luan, Ilya Sutskever

        **Implementation contributed by**:

            - `Teddy Koker <https://github.com/teddykoker>`_

        **Original repo with results and more implementation details**:

            - `https://github.com/teddykoker/image-gpt <https://github.com/teddykoker/image-gpt>`_

        **Example Results (Photo credits: Teddy Koker)**:

        .. image:: https://raw.githubusercontent.com/teddykoker/image-gpt/master/figures/mnist.png
            :width: 250
            :alt: credit-Teddy-Koker

        .. image:: https://raw.githubusercontent.com/teddykoker/image-gpt/master/figures/fmnist.png
            :width: 250
            :alt: credit-Teddy-Koker

        **Default arguments:**

        .. list-table:: Argument Defaults
            :widths: 50 25 25
            :header-rows: 1

            * - Argument
              - Default
              - iGPT-S (`Chen et al. <https://cdn.openai.com/papers/Generative_Pretraining_from_Pixels_V2.pdf>`_)
            * - `--embed_dim`
              - 16
              - 512
            * - `--heads`
              - 2
              - 8
            * - `--layers`
              - 8
              - 24
            * - `--pixels`
              - 28
              - 32
            * - `--vocab_size`
              - 16
              - 512
            * - `--num_classes`
              - 10
              - 10
            * - `--batch_size`
              - 64
              - 128
            * - `--learning_rate`
              - 0.01
              - 0.01
            * - `--steps`
              - 25000
              - 1000000

        Example::

            import pytorch_lightning as pl
            from pl_bolts.models.vision import ImageGPT

            dm = MNISTDataModule('.')
            model = ImageGPT(dm)

            pl.Trainer(gpu=4).fit(model)

        As script:

        .. code-block:: bash

            cd pl_bolts/models/vision/image_gpt
            python igpt_module.py --learning_rate 1e-2 --batch_size 32 --gpus 4

        Args:

            datamodule: LightningDataModule
            embed_dim: the embedding dim
            heads: number of attention heads
            layers: number of layers
            pixels: number of input pixels
            vocab_size: vocab size
            num_classes: number of classes in the input
            classify: true if should classify
            batch_size: the batch size
            learning_rate: learning rate
            steps: number of steps for cosine annealing
            data_dir: where to store data
            num_workers: num_data workers
        """
        super(ImageGPT, self).__init__()
        self.save_hyperparameters()

        # default to MNIST if no datamodule given
        if datamodule is None:
            datamodule = FashionMNISTDataModule(
                self.hparams.data_dir, num_workers=self.hparams.num_workers)
            self.hparams.pixels = datamodule.size(1)
            self.hparams.num_classes = datamodule.num_classes

        self.datamodule = datamodule

        self.gpt = GPT2(
            embed_dim=self.hparams.embed_dim,
            heads=self.hparams.heads,
            layers=self.hparams.layers,
            num_positions=self.hparams.pixels * self.hparams.pixels,
            vocab_size=self.hparams.vocab_size,
            num_classes=self.hparams.num_classes,
        )

        self.criterion = nn.CrossEntropyLoss()
        parser.add_argument("--classify", action="store_true", default=False)
        parser.add_argument("--batch_size", type=int, default=64)
        parser.add_argument("--learning_rate", type=float, default=1e-2)
        parser.add_argument("--steps", type=int, default=25_000)
        return parser


# todo: covert to CLI func and add test
if __name__ == "__main__":
    from argparse import ArgumentParser

    parser = ArgumentParser()

    # trainer args
    parser = pl.Trainer.add_argparse_args(parser)

    # model args
    parser = ImageGPT.add_model_specific_args(parser)
    args = parser.parse_args()

    if args.dataset == "fashion_mnist":
        datamodule = FashionMNISTDataModule.from_argparse_args(args)

    elif args.dataset == "imagenet128":
        datamodule = ImagenetDataModule.from_argparse_args(args)

    model = ImageGPT(**args.__dict__, datamodule=datamodule)

    trainer = pl.Trainer.from_argparse_args(args)
    trainer.fit(model)