예제 #1
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    if args.from_pretrained is not None:
        model = Summarizer.load_from_checkpoint(args.from_pretrained, args)
    else:
        model = Summarizer(args)

    model.hf_datasets = nlp.load_dataset('scientific_papers', 'arxiv')

    logger = pl_loggers.CometLogger(save_dir='logs/')

    checkpoint_callback = ModelCheckpoint(filepath=os.path.join(
        args.save_dir, args.save_prefix, "checkpoints"),
                                          save_top_k=5,
                                          verbose=True,
                                          monitor='avg_val_loss',
                                          mode='min',
                                          period=-1,
                                          prefix='')

    print(args)

    args.dataset_size = 203037  # hardcode dataset size. Needed to compute number of steps for the lr scheduler

    trainer = pl.Trainer(
        gpus=args.gpus,
        distributed_backend='ddp' if torch.cuda.is_available() else None,
        track_grad_norm=-1,
        max_epochs=args.epochs if not args.debug else 100,
        max_steps=None if not args.debug else 1,
        replace_sampler_ddp=False,
        accumulate_grad_batches=args.grad_accum,
        val_check_interval=args.val_every if not args.debug else 1,
        num_sanity_val_steps=2 if not args.debug else 0,
        check_val_every_n_epoch=1 if not args.debug else 1,
        val_percent_check=args.val_percent_check,
        test_percent_check=args.val_percent_check,
        logger=logger,
        checkpoint_callback=checkpoint_callback
        if not args.disable_checkpointing else False,
        show_progress_bar=not args.no_progress_bar,
        use_amp=not args.fp32,
        amp_level='O2',
        resume_from_checkpoint=args.resume_ckpt,
    )
    if not args.test:
        trainer.fit(model)
    trainer.test(model)
예제 #2
0
def main(args, model_name: str, reproducible: bool, comet: bool, wandb: bool):
    if reproducible:
        seed_everything(42)
        args.deterministic = True
        args.benchmark = True

    if args.default_root_dir is None:
        args.default_root_dir = 'results'

    if comet:
        from pytorch_lightning.loggers import CometLogger
        comet_logger = loggers.CometLogger(
            api_key=os.environ.get('COMET_API_KEY'),
            workspace=os.environ.get('COMET_WORKSPACE'),  # Optional
            project_name=os.environ.get('COMET_PROJECT_NAME'),  # Optional
            experiment_name=model_name  # Optional
        )
        args.logger = comet_logger
    if wandb:
        from pytorch_lightning.loggers import WandbLogger
        wandb_logger = WandbLogger(project=os.environ.get('WANDB_PROJECT_NAME'), log_model=True, sync_step=True)
        args.logger = wandb_logger

    # Save best model
    model_checkpoint = ModelCheckpoint(
        filename=model_name + '_{epoch}',
        save_top_k=1,
        monitor='val_iou',
        mode='max',
    )
    args.checkpoint_callback = model_checkpoint

    data = TwoDomainDM(dataPath=args.dataPath, augment=args.augment, batch_size=args.batch_size, num_workers=8)
    model = RightLaneSTModule(lr=args.learningRate, lrRatio=args.lrRatio, decay=args.decay, num_cls=4)

    # Parse all trainer options available from the command line
    trainer = Trainer.from_argparse_args(args)
    trainer.fit(model, datamodule=data)

    # Reload best model
    model = RightLaneSTModule.load_from_checkpoint(model_checkpoint.best_model_path, dataPath=args.dataPath, num_cls=4)

    # Upload weights
    if comet:
        comet_logger.experiment.log_model(model_name + '_weights', model_checkpoint.best_model_path)

    # Perform testing
    trainer.test(model, datamodule=data)
예제 #3
0
def main(args=None):

    pl.seed_everything(52)

    parser = ArgumentParser()
    dm_cls = NeutronDataLoader

    checkpoint_callback = ModelCheckpoint(monitor="Valid Loss", save_last=True, save_top_k=3, mode='min')

    script_args, _ = parser.parse_known_args(args)
    parser = dm_cls.add_argparse_args(parser)
    parser = MyModel.add_model_specific_args(parser)
    parser = pl.Trainer.add_argparse_args(parser)
    args = parser.parse_args(args)

    comet_logger = pl_loggers.CometLogger(save_dir="comet_logs", experiment_name=EXPERIMENT_NAME,
                                          project_name="HIDA", offline=True)
    dm = dm_cls.from_argparse_args(args)
    model = MyModel(**vars(args))
    trainer = pl.Trainer.from_argparse_args(args, logger=comet_logger, checkpoint_callback=checkpoint_callback)
    trainer.fit(model, dm)
예제 #4
0
def main(args=None):
    pl.seed_everything(52)

    parser = ArgumentParser()
    dm_cls = NeutronDataLoader

    script_args, _ = parser.parse_known_args(args)
    parser = dm_cls.add_argparse_args(parser)
    parser = MyModel.add_model_specific_args(parser)
    parser = pl.Trainer.add_argparse_args(parser)
    args = parser.parse_args(args)

    comet_logger = pl_loggers.CometLogger(save_dir="comet_logs",
                                          experiment_name=EXPERIMENT_NAME,
                                          project_name="HIDA",
                                          offline=True)

    dm = dm_cls.from_argparse_args(args)
    dm.setup()
    model = MyModel.load_from_checkpoint(CHECKPOINT, **vars(args))

    trainer = pl.Trainer.from_argparse_args(args, logger=comet_logger)
    trainer.test(model, test_dataloaders=dm.test_dataloader())
예제 #5
0
        'lr': 1e-3,
        'num_warmup_steps': 0,
        'batch_size': 64,
        'accumulate_grad_batches': 1,
        'bptt': 140
    })

    if path.exists('.comet.config'):
        import configparser
        comet_config = configparser.ConfigParser()

        comet_config.read('.comet.config')

        logger = loggers.CometLogger(
            api_key=comet_config['comet']['api_key'],
            project_name="lstm-chatbot-test",
            workspace="luungoc2005"
        )

        for k, v in config.items():
            logger.experiment.log_parameter(k, v)
    else:
        logger = loggers.TensorBoardLogger()

    checkpoint_callback = ModelCheckpoint(filepath='./checkpoints/')

    model = LanguageModelTrainer(config)

    trainer = Trainer(
        gradient_clip_val=.5,
        gpus=1,