def test_igpt(tmpdir, datadir): seed_everything(0) dm = MNISTDataModule(data_dir=datadir, normalize=False) model = ImageGPT() trainer = Trainer( limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1, ) trainer.fit(model, datamodule=dm) trainer.test(datamodule=dm) assert trainer.callback_metrics["test_loss"] < 1.7 dm = FashionMNISTDataModule(data_dir=datadir, num_workers=1) model = ImageGPT(classify=True) trainer = Trainer( limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1, logger=False, checkpoint_callback=False, ) trainer.fit(model, datamodule=dm)
def cli_main(): parser = ArgumentParser() # trainer args parser = pl.Trainer.add_argparse_args(parser) # model args parser = ImageGPT.add_model_specific_args(parser) args = parser.parse_args() if args.dataset == "fashion_mnist": datamodule = FashionMNISTDataModule.from_argparse_args(args) elif args.dataset == "imagenet128": datamodule = ImagenetDataModule.from_argparse_args(args) model = ImageGPT(**args.__dict__, datamodule=datamodule) trainer = pl.Trainer.from_argparse_args(args) trainer.fit(model)
def test_igpt(tmpdir): pl.seed_everything(0) dm = MNISTDataModule(tmpdir, normalize=False) model = ImageGPT(datamodule=dm) trainer = pl.Trainer( limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1, ) trainer.fit(model) trainer.test() assert trainer.callback_metrics["test_loss"] < 1.7 dm = FashionMNISTDataModule(tmpdir, num_workers=1) model = ImageGPT(classify=True, datamodule=dm) trainer = pl.Trainer( limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1, ) trainer.fit(model)
class ImageGPT(pl.LightningModule): def __init__( self, datamodule: pl.LightningDataModule = None, embed_dim: int = 16, heads: int = 2, layers: int = 2, pixels: int = 28, vocab_size: int = 16, num_classes: int = 10, classify: bool = False, batch_size: int = 64, learning_rate: float = 1e-2, steps: int = 25_000, data_dir: str = ".", num_workers: int = 8, **kwargs, ): """ **Paper**: `Generative Pretraining from Pixels <https://cdn.openai.com/papers/Generative_Pretraining_from_Pixels_V2.pdf>`_ [original paper `code <https://github.com/openai/image-gpt>`_]. **Paper by:** Mark Che, Alec Radford, Rewon Child, Jeff Wu, Heewoo Jun, Prafulla Dhariwal, David Luan, Ilya Sutskever **Implementation contributed by**: - `Teddy Koker <https://github.com/teddykoker>`_ **Original repo with results and more implementation details**: - `https://github.com/teddykoker/image-gpt <https://github.com/teddykoker/image-gpt>`_ **Example Results (Photo credits: Teddy Koker)**: .. image:: https://raw.githubusercontent.com/teddykoker/image-gpt/master/figures/mnist.png :width: 250 :alt: credit-Teddy-Koker .. image:: https://raw.githubusercontent.com/teddykoker/image-gpt/master/figures/fmnist.png :width: 250 :alt: credit-Teddy-Koker **Default arguments:** .. list-table:: Argument Defaults :widths: 50 25 25 :header-rows: 1 * - Argument - Default - iGPT-S (`Chen et al. <https://cdn.openai.com/papers/Generative_Pretraining_from_Pixels_V2.pdf>`_) * - `--embed_dim` - 16 - 512 * - `--heads` - 2 - 8 * - `--layers` - 8 - 24 * - `--pixels` - 28 - 32 * - `--vocab_size` - 16 - 512 * - `--num_classes` - 10 - 10 * - `--batch_size` - 64 - 128 * - `--learning_rate` - 0.01 - 0.01 * - `--steps` - 25000 - 1000000 Example:: import pytorch_lightning as pl from pl_bolts.models.vision import ImageGPT dm = MNISTDataModule('.') model = ImageGPT(dm) pl.Trainer(gpu=4).fit(model) As script: .. code-block:: bash cd pl_bolts/models/vision/image_gpt python igpt_module.py --learning_rate 1e-2 --batch_size 32 --gpus 4 Args: datamodule: LightningDataModule embed_dim: the embedding dim heads: number of attention heads layers: number of layers pixels: number of input pixels vocab_size: vocab size num_classes: number of classes in the input classify: true if should classify batch_size: the batch size learning_rate: learning rate steps: number of steps for cosine annealing data_dir: where to store data num_workers: num_data workers """ super(ImageGPT, self).__init__() self.save_hyperparameters() # default to MNIST if no datamodule given if datamodule is None: datamodule = FashionMNISTDataModule( self.hparams.data_dir, num_workers=self.hparams.num_workers) self.hparams.pixels = datamodule.size(1) self.hparams.num_classes = datamodule.num_classes self.datamodule = datamodule self.gpt = GPT2( embed_dim=self.hparams.embed_dim, heads=self.hparams.heads, layers=self.hparams.layers, num_positions=self.hparams.pixels * self.hparams.pixels, vocab_size=self.hparams.vocab_size, num_classes=self.hparams.num_classes, ) self.criterion = nn.CrossEntropyLoss()
parser.add_argument("--classify", action="store_true", default=False) parser.add_argument("--batch_size", type=int, default=64) parser.add_argument("--learning_rate", type=float, default=1e-2) parser.add_argument("--steps", type=int, default=25_000) return parser # todo: covert to CLI func and add test if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() # trainer args parser = pl.Trainer.add_argparse_args(parser) # model args parser = ImageGPT.add_model_specific_args(parser) args = parser.parse_args() if args.dataset == "fashion_mnist": datamodule = FashionMNISTDataModule.from_argparse_args(args) elif args.dataset == "imagenet128": datamodule = ImagenetDataModule.from_argparse_args(args) model = ImageGPT(**args.__dict__, datamodule=datamodule) trainer = pl.Trainer.from_argparse_args(args) trainer.fit(model)