コード例 #1
0
def test_wandb_logger(wandb):
    """Verify that basic functionality of wandb logger works.
    Wandb doesn't work well with pytest so we have to mock it out here."""
    logger = WandbLogger(anonymous=True, offline=True)

    logger.log_metrics({'acc': 1.0})
    wandb.init().log.assert_called_once_with({'acc': 1.0}, step=None)

    wandb.init().log.reset_mock()
    logger.log_metrics({'acc': 1.0}, step=3)
    wandb.init().log.assert_called_once_with({'acc': 1.0}, step=3)

    # continue training on same W&B run
    wandb.init().step = 3
    logger.finalize('success')
    logger.log_metrics({'acc': 1.0}, step=3)
    wandb.init().log.assert_called_with({'acc': 1.0}, step=6)

    logger.log_hyperparams({'test': None, 'nested': {'a': 1}, 'b': [2, 3, 4]})
    wandb.init().config.update.assert_called_once_with(
        {
            'test': 'None',
            'nested/a': 1,
            'b': [2, 3, 4]
        },
        allow_val_change=True,
    )

    logger.watch('model', 'log', 10)
    wandb.init().watch.assert_called_once_with('model', log='log', log_freq=10)

    assert logger.name == wandb.init().project_name()
    assert logger.version == wandb.init().id
コード例 #2
0
ファイル: run.py プロジェクト: toyai/neuro_models
def main(cfg: DictConfig = None):
    log.info("==> Training Configs:\n%s", OmegaConf.to_yaml(cfg))

    width, _, img_size, dropout_p, _, _ = compound_params(cfg.name)
    transforms = T.Compose(
        [
            T.Resize(size=(img_size, img_size)),
            T.ToTensor(),
            T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ]
    )
    if cfg.pretrained:
        network = EfficientNet(
            name=cfg.name,
            num_classes=cfg.num_classes,
        ).from_pretrained(name=cfg.name)
        for params in network.parameters():
            params.requires_grad = False

        final_out_channels = round_filters(1280, 8, width)

        network.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(1),
            nn.Dropout(dropout_p),
            nn.Linear(final_out_channels, cfg.num_classes),
        )
    else:
        network = EfficientNet(name=cfg.name, num_classes=cfg.num_classes)

    gym = EfficientNetGym(network, cfg)
    dm = instantiate(
        cfg.dm,
        **{"train_transforms_conf": transforms, "test_transforms_conf": transforms},
    )

    with open(f"{cfg.name}.md", "w") as f:
        f.write(f"## {cfg.name}\n```py\n")
        f.write(str(network))
        f.write("\n```")

    with open(f"{cfg.name}-summary.md", "w") as f:
        f.write(f"## {cfg.name}-summary\n```py\n")
        f.write(str(ModelSummary(gym, "full")))
        f.write("\n```")

    if cfg.logger:
        logger_ = WandbLogger(
            name=f"{cfg.optim}",
            project=cfg.name,
        )
        logger_.watch(network, "all")
    else:
        logger_ = True

    ckpt = ModelCheckpoint("ckpt/{epoch}", prefix="-" + cfg.name) if cfg.ckpt else False
    trainer = Trainer(**cfg.pl, logger=logger_, checkpoint_callback=ckpt)
    trainer.fit(gym, datamodule=dm)
    if cfg.test:
        trainer.test(datamodule=dm)
コード例 #3
0
ファイル: train.py プロジェクト: borisdayma/lightning-kitti
def main(config):
    # ------------------------
    # 1 LIGHTNING MODEL
    # ------------------------
    model = SegModel(config)

    # ------------------------
    # 2 DATA PIPELINES
    # ------------------------
    kittiData = KittiDataModule(config)

    # ------------------------
    # 3 WANDB LOGGER
    # ------------------------
    wandb_logger = WandbLogger()

    # optional: log model topology
    wandb_logger.watch(model.net)

    # ------------------------
    # 4 TRAINER
    # ------------------------
    trainer = pl.Trainer(
        gpus=-1,
        logger=wandb_logger,
        max_epochs=config.epochs,
        accumulate_grad_batches=config.grad_batches,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model, kittiData)
コード例 #4
0
ファイル: mnist.py プロジェクト: Akhilez/vision_lab
def train():
    hp = {
        "epochs": 10,
        "lr_initial": 0.001,
        "lr_decay_every": 30,
        "lr_decay_by": 0.3,
    }

    config = {
        "data_path": "../data",
        "val_split": 0.05,
        "batch_size": 64,
        "manual_seed": 2,
        "output_path": "./output",
        "model_save_frequency": 5,
        "dataloader_num_workers": 0,
    }

    dataset = MnistDataset(**config)
    model = MnistModel(**hp, **config)
    wandb_logger = WandbLogger(project="classification_test", log_model=True)
    trainer = pl.Trainer(
        gpus=0,
        max_epochs=hp["epochs"],
        default_root_dir=config["output_path"],
        logger=wandb_logger,
    )
    wandb_logger.watch(model)

    trainer.fit(model, datamodule=dataset)
コード例 #5
0
ファイル: train.py プロジェクト: sankovalev/goznak
def main(args) -> None:
    """
    Функция запуска обучения.
    """
    config = load_cfg(args.config)
    pretty_printer = pprint.PrettyPrinter(indent=2)
    pretty_printer.pprint(config)

    model = BaselineLearner(config)

    logger = False
    if args.use_logger:
        logger = WandbLogger(name=config.name)
        logger.watch(model.net)

    trainer = pl.Trainer(
        gpus=args.gpus,
        logger=logger,
        callbacks=[
            ModelCheckpoint(monitor='valid_loss',
                            dirpath=config.sources.ckpt_path,
                            filename=config.name)
        ],
        max_epochs=config.training.epochs,
        distributed_backend=args.distributed_backend,
        precision=16 if args.use_amp else 32,
    )

    trainer.fit(model)
    print('Model training completed!')
コード例 #6
0
def main(hparams):
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    model = SegModel(hparams)

    # ------------------------
    # 2 SET WANDB LOGGER
    # ------------------------
    wandb_logger = WandbLogger()

    # optional: log model topology
    wandb_logger.watch(model.net)

    # ------------------------
    # 3 INIT TRAINER
    # ------------------------
    trainer = pl.Trainer(gpus=hparams.gpus,
                         logger=wandb_logger,
                         max_epochs=hparams.epochs,
                         accumulate_grad_batches=hparams.grad_batches,
                         checkpoint_callback=False)

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
コード例 #7
0
ファイル: train.py プロジェクト: oleges1/kws
def train(config):
    fix_seeds(seed=config.train.seed)

    crnn = CRNNEncoder(
        in_channels=config.model.get('in_channels', 42),
        hidden_size=config.model.get('hidden_size', 16),
        dropout=config.model.get('dropout', 0.1),
        cnn_layers=config.model.get('cnn_layers', 2),
        rnn_layers=config.model.get('rnn_layers', 2),
        kernel_size=config.model.get('kernel_size', 9)
    )
    model = AttentionNet(
        crnn,
        hidden_size=config.model.get('hidden_size', 16),
        num_classes=config.model.get('num_classes', 3)
    )
    pl_model = KWSModel(
        model, lr=config.train.get('lr', 4e-5),
         in_channels=config.model.get('in_channels', 42),
         batch_size=config.train.get('batch_size', 32)
    )
    wandb_logger = WandbLogger(name=config.train.get('experiment_name', 'final_run'), project='kws-attention', log_model=True)
    wandb_logger.log_hyperparams(config)
    wandb_logger.watch(model, log='all', log_freq=100)
    trainer = pl.Trainer(max_epochs=config.train.get('max_epochs', 15), logger=wandb_logger, gpus=config.train.get('gpus', 1))
    trainer.fit(pl_model)
コード例 #8
0
def main(hparams: Namespace):
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    model = SegModel(**vars(hparams))

    # ------------------------
    # 2 SET LOGGER
    # ------------------------
    logger = False
    if hparams.log_wandb:
        logger = WandbLogger()

        # optional: log model topology
        logger.watch(model.net)

    # ------------------------
    # 3 INIT TRAINER
    # ------------------------
    trainer = pl.Trainer(
        gpus=hparams.gpus,
        logger=logger,
        max_epochs=hparams.epochs,
        accumulate_grad_batches=hparams.grad_batches,
        distributed_backend=hparams.distributed_backend,
        precision=16 if hparams.use_amp else 32,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
コード例 #9
0
def main():
    print("Running main")
    print(time.ctime())

    default_config_path = "default_config.yaml"

    with open(default_config_path) as file:
        default_configs = yaml.load(file, Loader=yaml.FullLoader)

    wandb.init(config=default_configs, project=default_configs["project"])
    config = wandb.config

    if "random_seed" in dict(config).keys():
        set_random_seed(dict(config)["random_seed"])

    print("Initialising model")
    print(time.ctime())
    model_name = eval(dict(config)["model"])
    model = model_name(dict(config))
    logger = WandbLogger(save_dir=default_configs["artifacts"])
    logger.watch(model, log="all")

    if default_configs["gpus"] == 1:
        trainer = Trainer(
            gpus=1, max_epochs=default_configs["max_epochs"], logger=logger
        )  # , strategy=CustomDDPPlugin(find_unused_parameters=False))
    else:
        trainer = Trainer(
            gpus=default_configs["gpus"],
            max_epochs=default_configs["max_epochs"],
            logger=logger,
            strategy=CustomDDPPlugin(find_unused_parameters=False),
        )

    trainer.fit(model)
コード例 #10
0
def main(hparams, network):
    # init module

    model = network(hparams)
    print(model.hparams)
    project_folder = 'audio_emotion_team'
    wandb_logger = WandbLogger(name='lflb_dropout_rnn',
                               project=project_folder,
                               entity='thesis',
                               offline=False)

    early_stop_callback = EarlyStopping(monitor='val_loss',
                                        min_delta=0.00,
                                        patience=20,
                                        verbose=False,
                                        mode='min')

    # most basic trainer, uses good defaults
    trainer = Trainer(
        max_nb_epochs=hparams.max_nb_epochs,
        gpus=hparams.gpus,
        nb_gpu_nodes=hparams.nodes,
        logger=wandb_logger,
        #weights_summary='full',
        early_stop_callback=early_stop_callback,
        #profiler=True,
        benchmark=True,
        #log_gpu_memory='all'
    )
    wandb_logger.experiment
    wandb_logger.watch(model)

    trainer.fit(model)
コード例 #11
0
def main(hparams: Namespace):
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    model = SegModel(**vars(hparams))

    # ------------------------
    # 2 SET LOGGER
    # ------------------------
    logger = False
    if hparams.log_wandb:
        logger = WandbLogger()

        # optional: log model topology
        logger.watch(model.net)

    # ------------------------
    # 3 INIT TRAINER
    # ------------------------
    trainer = pl.Trainer.from_argparse_args(hparams)

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
コード例 #12
0
def train_classifier(logging=False, train=True):

    hparams = {
        'gpus': [1],
        'max_epochs': 25,
        'num_classes': 700,
        'feature_dimension': 512,
        'model_dimension': 1024,
        'pretrained_text': False,
        'num_modalities': 1,
        'batch_size': 32,
        'learning_rate': 1e-3,
        'model_path':
        "/home/sgurram/Projects/aai/aai/experimental/sgurram/lava/src/wandb/run-20210626_215155-yqwe58z7/files/lava/yqwe58z7/checkpoints/epoch=6-step=12529.ckpt",
        'model_descriptor': 'lava timesformer 1/3 kinetics data, unshuffled',
        'accumulate_grad_batches': 2,
        'overfit_batches': 0,
        'type_modalities': 'av',
        'modality_fusion': 'concat',
        'loss_funtions': ['cross_entropy'],
        'metrics': None,
        'optimizer': 'adam',
        'scheduler': 'n/a',
        'profiler': 'simple',
        'default_root_dir': '/home/sgurram/Desktop/video_lava_classifer',
    }

    model = EvalLightning(
        num_classes=hparams['num_classes'],
        feature_dimension=hparams['feature_dimension'],
        model_dimension=hparams['model_dimension'],
        num_modalities=hparams['num_modalities'],
        batch_size=hparams['batch_size'],
        learning_rate=hparams['learning_rate'],
        model_path=hparams['model_path'],
        model=LAVALightning,
        pretrained_text=hparams['pretrained_text'],
    )

    if logging:
        wandb_logger = WandbLogger(name='run', project='lava')
        wandb_logger.log_hyperparams(hparams)
        wandb_logger.watch(model, log='gradients', log_freq=10)
    else:
        wandb_logger = None

    if not train:
        return model

    trainer = pl.Trainer(
        default_root_dir=hparams['default_root_dir'],
        gpus=hparams['gpus'],
        max_epochs=hparams['max_epochs'],
        accumulate_grad_batches=hparams['accumulate_grad_batches'],
        overfit_batches=hparams['overfit_batches'],
        logger=wandb_logger,
        profiler=hparams['profiler'])

    trainer.fit(model)
コード例 #13
0
def train(dataset_name: str,
          model_name: str,
          expt_dir: str,
          data_folder: str,
          num_workers: int = 0,
          is_test: bool = False,
          resume_from_checkpoint: str = None):
    seed_everything(SEED)
    dataset_main_folder = data_folder
    vocab = Vocabulary.load(join(dataset_main_folder, "vocabulary.pkl"))

    if model_name == "code2seq":
        config_function = get_code2seq_test_config if is_test else get_code2seq_default_config
        config = config_function(dataset_main_folder)
        model = Code2Seq(config, vocab, num_workers)
        model.half()
    #elif model_name == "code2class":
    #	config_function = get_code2class_test_config if is_test else get_code2class_default_config
    #	config = config_function(dataset_main_folder)
    #	model = Code2Class(config, vocab, num_workers)
    else:
        raise ValueError(f"Model {model_name} is not supported")

    # define logger
    wandb_logger = WandbLogger(project=f"{model_name}-{dataset_name}",
                               log_model=True,
                               offline=True)
    wandb_logger.watch(model)
    # define model checkpoint callback
    model_checkpoint_callback = ModelCheckpoint(
        filepath=join(expt_dir, "{epoch:02d}-{val_loss:.4f}"),
        period=config.hyperparams.save_every_epoch,
        save_top_k=3,
    )
    # define early stopping callback
    early_stopping_callback = EarlyStopping(
        patience=config.hyperparams.patience, verbose=True, mode="min")
    # use gpu if it exists
    gpu = 1 if torch.cuda.is_available() else None
    # define learning rate logger
    lr_logger = LearningRateLogger()
    trainer = Trainer(
        max_epochs=20,
        gradient_clip_val=config.hyperparams.clip_norm,
        deterministic=True,
        check_val_every_n_epoch=config.hyperparams.val_every_epoch,
        row_log_interval=config.hyperparams.log_every_epoch,
        logger=wandb_logger,
        checkpoint_callback=model_checkpoint_callback,
        early_stop_callback=early_stopping_callback,
        resume_from_checkpoint=resume_from_checkpoint,
        gpus=gpu,
        callbacks=[lr_logger],
        reload_dataloaders_every_epoch=True,
    )
    trainer.fit(model)
    trainer.save_checkpoint(join(expt_dir, 'Latest.ckpt'))

    trainer.test()
コード例 #14
0
def test_wandb_logger_init(wandb, recwarn):
    """Verify that basic functionality of wandb logger works.
    Wandb doesn't work well with pytest so we have to mock it out here."""

    # test wandb.init called when there is no W&B run
    wandb.run = None
    logger = WandbLogger()
    logger.log_metrics({'acc': 1.0})
    wandb.init.assert_called_once()
    wandb.init().log.assert_called_once_with({'acc': 1.0}, step=None)

    # mock wandb step
    wandb.init().step = 0

    # test wandb.init not called if there is a W&B run
    wandb.init().log.reset_mock()
    wandb.init.reset_mock()
    wandb.run = wandb.init()
    logger = WandbLogger()
    logger.log_metrics({'acc': 1.0}, step=3)
    wandb.init.assert_called_once()
    wandb.init().log.assert_called_once_with({'acc': 1.0}, step=3)

    # continue training on same W&B run and offset step
    wandb.init().step = 3
    logger.finalize('success')
    logger.log_metrics({'acc': 1.0}, step=3)
    wandb.init().log.assert_called_with({'acc': 1.0}, step=6)

    # log hyper parameters
    logger.log_hyperparams({'test': None, 'nested': {'a': 1}, 'b': [2, 3, 4]})
    wandb.init().config.update.assert_called_once_with(
        {
            'test': 'None',
            'nested/a': 1,
            'b': [2, 3, 4]
        },
        allow_val_change=True,
    )

    # watch a model
    logger.watch('model', 'log', 10)
    wandb.init().watch.assert_called_once_with('model', log='log', log_freq=10)

    # verify warning for logging at a previous step
    assert 'Trying to log at a previous step' not in get_warnings(recwarn)
    # current step from wandb should be 6 (last logged step)
    logger.experiment.step = 6
    # logging at step 2 should raise a warning (step_offset is still 3)
    logger.log_metrics({'acc': 1.0}, step=2)
    assert 'Trying to log at a previous step' in get_warnings(recwarn)
    # logging again at step 2 should not display again the same warning
    logger.log_metrics({'acc': 1.0}, step=2)
    assert 'Trying to log at a previous step' not in get_warnings(recwarn)

    assert logger.name == wandb.init().project_name()
    assert logger.version == wandb.init().id
コード例 #15
0
def test_wandb_logger_init(wandb):
    """Verify that basic functionality of wandb logger works.

    Wandb doesn't work well with pytest so we have to mock it out here.
    """

    # test wandb.init called when there is no W&B run
    wandb.run = None
    logger = WandbLogger(
        name="test_name", save_dir="test_save_dir", version="test_id", project="test_project", resume="never"
    )
    logger.log_metrics({"acc": 1.0})
    wandb.init.assert_called_once_with(
        name="test_name", dir="test_save_dir", id="test_id", project="test_project", resume="never", anonymous=None
    )
    wandb.init().log.assert_called_once_with({"acc": 1.0})

    # test wandb.init and setting logger experiment externally
    wandb.run = None
    run = wandb.init()
    logger = WandbLogger(experiment=run)
    assert logger.experiment

    # test wandb.init not called if there is a W&B run
    wandb.init().log.reset_mock()
    wandb.init.reset_mock()
    wandb.run = wandb.init()
    logger = WandbLogger()

    # verify default resume value
    assert logger._wandb_init["resume"] == "allow"

    with pytest.warns(UserWarning, match="There is a wandb run already in progress"):
        _ = logger.experiment

    logger.log_metrics({"acc": 1.0}, step=3)
    wandb.init.assert_called_once()
    wandb.init().log.assert_called_once_with({"acc": 1.0, "trainer/global_step": 3})

    # continue training on same W&B run and offset step
    logger.finalize("success")
    logger.log_metrics({"acc": 1.0}, step=6)
    wandb.init().log.assert_called_with({"acc": 1.0, "trainer/global_step": 6})

    # log hyper parameters
    logger.log_hyperparams({"test": None, "nested": {"a": 1}, "b": [2, 3, 4]})
    wandb.init().config.update.assert_called_once_with(
        {"test": "None", "nested/a": 1, "b": [2, 3, 4]}, allow_val_change=True
    )

    # watch a model
    logger.watch("model", "log", 10, False)
    wandb.init().watch.assert_called_once_with("model", log="log", log_freq=10, log_graph=False)

    assert logger.name == wandb.init().project_name()
    assert logger.version == wandb.init().id
コード例 #16
0
def main(args):
    # Load the dataset
    movielens = MovielensDataModule(
        args.data_dir,
        args.filename,
        args.split,
        args.threshold,
        args.negatives,
        args.batch_size,
        args.num_workers,
    )

    args.num_items = movielens.dataset.num_items
    args.num_users = movielens.dataset.num_users

    # Set up the model and logger
    model = ImplicitMatrixFactorization(hparams=args)

    if th.cuda.is_available() and args.gpus > 0:
        model.cuda()

    wandb_logger = WandbLogger(project="torch-factorization-models")
    wandb_logger.watch(model, log="all", log_freq=100)

    if args.early_stopping:
        args.early_stopping = EarlyStopping(monitor="tuning_loss")

    # Most basic trainer, uses good defaults
    trainer = Trainer.from_argparse_args(
        args,
        check_val_every_n_epoch=1,
        logger=wandb_logger,
        early_stop_callback=args.early_stopping,
    )

    if args.use_lr_finder:
        movielens.setup()

        lr_finder = trainer.lr_find(
            model,
            train_dataloader=movielens.train_dataloader(),
            val_dataloaders=[movielens.val_dataloader()],
            early_stop_threshold=None,
            min_lr=1e-6,
            max_lr=5e-1,
        )

        lr_finder.plot(suggest=True)
        plt.show(block=True)

    else:
        trainer.fit(model, movielens)

        # Save the model
        th.save(model.state_dict(),
                Path(wandb_logger.experiment.dir) / "model.pt")
コード例 #17
0
def train(split, band_type):
    # Model init
    model = Densenet()
    #"/content/drive/Shared drives/EEG_Aditya/data/EEG3DTIME_3SPLIT.pt"
    train_dataset, validation_dataset, test_dataset = model.datasets(
        "/content/drive/Shared drives/EEG_Aditya/data/EEG3DTIME_3SPLIT.pt",
        split, band_type, [45, 21])

    train_dataloader, validation_dataloader, test_dataloader = model.dataloaders(
        train_dataset, validation_dataset, test_dataset, batch_size=256)
    # Logging
    model.model_tags.append(split)
    model.model_tags.append(band_type)
    model.model_tags.append("train:" + str(len(train_dataset)))
    model.model_tags.append("validation:" + str(len(validation_dataset)))
    model.model_tags.append("test:" + str(len(test_dataset)))
    model.model_tags.append("seed:" + str(model.seed))

    wandb_logger = WandbLogger(
        name=model.model_name,
        tags=model.model_tags,
        project="eeg-connectome-analysis",
        save_dir="/content/drive/Shared drives/EEG_Aditya/model-results/wandb",
        log_model=True)
    wandb_logger.watch(model, log='gradients', log_freq=100)

    # Checkpoints
    val_loss_cp = pl.callbacks.ModelCheckpoint(monitor='validation-loss')

    trainer = pl.Trainer(max_epochs=1000,
                         gpus=1,
                         logger=wandb_logger,
                         precision=16,
                         fast_dev_run=False,
                         auto_lr_find=True,
                         auto_scale_batch_size=True,
                         log_every_n_steps=1,
                         checkpoint_callback=val_loss_cp)
    trainer.fit(model, train_dataloader, validation_dataloader)
    print("Done training.")

    print("Testing model on last epoch.")
    model_path = val_loss_cp.best_model_path
    model_path = model_path[:model_path.rfind('/')] + "lastModel.ckpt"
    trainer.save_checkpoint(model_path)

    print(
        f"Testing model with best validation loss\t{val_loss_cp.best_model_score}."
    )
    model = model.load_from_checkpoint(val_loss_cp.best_model_path)
    results = trainer.test(model, test_dataloader)

    if results[0]["test-accuracy"] < 0.675:
        train(split, band_type)

    print("Done testing.")
コード例 #18
0
ファイル: train.py プロジェクト: MiaoDexingz/code2seq-1
def train(config: DictConfig):
    filter_warnings()
    print_config(config)
    seed_everything(config.seed)

    known_models = {"code2seq": get_code2seq, "code2class": get_code2class, "typed-code2seq": get_typed_code2seq}
    if config.name not in known_models:
        print(f"Unknown model: {config.name}, try on of {known_models.keys()}")

    vocabulary = Vocabulary.load_vocabulary(join(config.data_folder, config.dataset.name, config.vocabulary_name))
    model, data_module = known_models[config.name](config, vocabulary)

    # define logger
    wandb_logger = WandbLogger(
        project=f"{config.name}-{config.dataset.name}", log_model=True, offline=config.log_offline
    )
    wandb_logger.watch(model)
    # define model checkpoint callback
    checkpoint_callback = ModelCheckpoint(
        dirpath=wandb_logger.experiment.dir,
        filename="{epoch:02d}-{val_loss:.4f}",
        period=config.save_every_epoch,
        save_top_k=-1,
    )
    upload_checkpoint_callback = UploadCheckpointCallback(wandb_logger.experiment.dir)
    # define early stopping callback
    early_stopping_callback = EarlyStopping(
        patience=config.hyper_parameters.patience, monitor="val_loss", verbose=True, mode="min"
    )
    # define callback for printing intermediate result
    print_epoch_result_callback = PrintEpochResultCallback("train", "val")
    # use gpu if it exists
    gpu = 1 if torch.cuda.is_available() else None
    # define learning rate logger
    lr_logger = LearningRateMonitor("step")
    trainer = Trainer(
        max_epochs=config.hyper_parameters.n_epochs,
        gradient_clip_val=config.hyper_parameters.clip_norm,
        deterministic=True,
        check_val_every_n_epoch=config.val_every_epoch,
        log_every_n_steps=config.log_every_epoch,
        logger=wandb_logger,
        gpus=gpu,
        progress_bar_refresh_rate=config.progress_bar_refresh_rate,
        callbacks=[
            lr_logger,
            early_stopping_callback,
            checkpoint_callback,
            upload_checkpoint_callback,
            print_epoch_result_callback,
        ],
        resume_from_checkpoint=config.resume_from_checkpoint,
    )

    trainer.fit(model=model, datamodule=data_module)
    trainer.test()
コード例 #19
0
def test_wandb_logger_init(wandb):
    """Verify that basic functionality of wandb logger works.
    Wandb doesn't work well with pytest so we have to mock it out here."""

    # test wandb.init called when there is no W&B run
    wandb.run = None
    logger = WandbLogger(
        name='test_name', save_dir='test_save_dir', version='test_id', project='test_project', resume='never'
    )
    logger.log_metrics({'acc': 1.0})
    wandb.init.assert_called_once_with(
        name='test_name', dir='test_save_dir', id='test_id', project='test_project', resume='never', anonymous=None
    )
    wandb.init().log.assert_called_once_with({'acc': 1.0})

    # test wandb.init and setting logger experiment externally
    wandb.run = None
    run = wandb.init()
    logger = WandbLogger(experiment=run)
    assert logger.experiment

    # test wandb.init not called if there is a W&B run
    wandb.init().log.reset_mock()
    wandb.init.reset_mock()
    wandb.run = wandb.init()
    logger = WandbLogger()
    # verify default resume value
    assert logger._wandb_init['resume'] == 'allow'
    logger.log_metrics({'acc': 1.0}, step=3)
    wandb.init.assert_called_once()
    wandb.init().log.assert_called_once_with({'acc': 1.0, 'trainer/global_step': 3})

    # continue training on same W&B run and offset step
    logger.finalize('success')
    logger.log_metrics({'acc': 1.0}, step=6)
    wandb.init().log.assert_called_with({'acc': 1.0, 'trainer/global_step': 6})

    # log hyper parameters
    logger.log_hyperparams({'test': None, 'nested': {'a': 1}, 'b': [2, 3, 4]})
    wandb.init().config.update.assert_called_once_with(
        {
            'test': 'None',
            'nested/a': 1,
            'b': [2, 3, 4]
        },
        allow_val_change=True,
    )

    # watch a model
    logger.watch('model', 'log', 10)
    wandb.init().watch.assert_called_once_with('model', log='log', log_freq=10)

    assert logger.name == wandb.init().project_name()
    assert logger.version == wandb.init().id
コード例 #20
0
def main():
    print("Running main")
    print(time.ctime())

    args = parse_args()

    with open(args.config) as file:
        print(f"Using config file: {args.config}")
        default_configs = yaml.load(file, Loader=yaml.FullLoader)

    if args.checkpoint is not None:
        default_configs = torch.load(args.checkpoint)["hyper_parameters"]

    # Set random seed
    if args.random_seed is not None:
        set_random_seed(args.random_seed)
        default_configs["random_seed"] = args.random_seed

    elif "random_seed" in default_configs.keys():
        set_random_seed(default_configs["random_seed"])

    print("Initialising model")
    print(time.ctime())
    model_name = eval(default_configs["model"])
    model = model_name(default_configs)

    checkpoint_callback = ModelCheckpoint(monitor="tot_auc",
                                          mode="max",
                                          save_top_k=2,
                                          save_last=True)

    logger = WandbLogger(
        project=default_configs["project"],
        save_dir=default_configs["artifacts"],
    )
    logger.watch(model, log="all")

    if args.root_dir is None:
        if "SLURM_JOB_ID" in os.environ:
            default_root_dir = os.path.join(".", os.environ["SLURM_JOB_ID"])
        else:
            default_root_dir = None
    else:
        default_root_dir = os.path.join(".", args.root_dir)

    trainer = Trainer(gpus=default_configs["gpus"],
                      num_nodes=default_configs["nodes"],
                      max_epochs=default_configs["max_epochs"],
                      logger=logger,
                      strategy=CustomDDPPlugin(find_unused_parameters=False),
                      callbacks=[checkpoint_callback],
                      default_root_dir=default_root_dir)
    trainer.fit(model, ckpt_path=args.checkpoint)
コード例 #21
0
ファイル: PL_Main.py プロジェクト: PascalHbr/Bachelorarbeit
def main(arg):
    seed_everything(42)
    model = PLModel(arg)
    wandb_logger = WandbLogger(project="Bachelorarbeit", name=arg.name)
    wandb_logger.watch(model)
    wandb_logger.log_hyperparams(arg)
    trainer = Trainer(gpus=2,
                      logger=wandb_logger,
                      distributed_backend='ddp',
                      deterministic=True,
                      auto_select_gpus=True,
                      num_sanity_val_steps=0)
    trainer.fit(model)
コード例 #22
0
def experiment(args):
    utils.seed_everything(seed=args.seed)
    qa_model = models.QAModel(hparams=args)
    train_dl, valid_dl, test_dl = data.prepare_data(args)

    wandb_logger = WandbLogger(project='qa',
                               entity='nlp',
                               tags=args.tags,
                               offline=args.fast_dev_run)
    wandb_logger.watch(qa_model, log='all')
    args.logger = wandb_logger

    trainer = pl.Trainer.from_argparse_args(args)
    trainer.fit(qa_model, train_dataloader=train_dl, val_dataloaders=valid_dl)
    trainer.test(qa_model, test_dataloaders=test_dl)
コード例 #23
0
def main(hparams):
    if hparams.supress_logs:
        import logging
        logger = logging.getLogger("wandb")
        logger.setLevel(logging.ERROR)

    colornet = model.ColorNet()
    wandb_logger = WandbLogger(project='video-colorization', tags=["colornet"])
    wandb_logger.watch(colornet, log_freq=hparams.log_frequency)

    early_stopping = EarlyStopping('val_loss', patience=hparams.patience)
    checkpoint_callback = ModelCheckpoint(filepath='checkpoints/checkpoint_{epoch:02d}-{val_loss:.2f}')
    trainer = pl.Trainer(max_epochs=hparams.epoch, gpus=hparams.gpus, logger=wandb_logger,
                         early_stop_callback=early_stopping, checkpoint_callback=checkpoint_callback)

    trainer.fit(colornet)
コード例 #24
0
def main():

    with open("../lightning_modules/GNNEmbedding/train_toy_gnn.yaml") as f:
        hparams = yaml.load(f, Loader=yaml.FullLoader)

    model = AttentionNodeEmbedding(hparams)
    wandb_logger = WandbLogger(project="End2End-ToyNodeEmbedding")
    wandb_logger.watch(model)
    trainer = Trainer(
        gpus=1,
        max_epochs=hparams["max_epochs"],
        logger=wandb_logger,
        num_sanity_val_steps=0,
        accumulate_grad_batches=1,
    )

    trainer.fit(model)
コード例 #25
0
ファイル: train.py プロジェクト: maximzubkov/opt-project
def train(model_name: str,
          n_cr: int,
          num_workers: int = 0,
          is_test: bool = False,
          resume_from_checkpoint: str = None):
    seed_everything(SEED)

    if model_name == "improved_gan":
        config_function = get_gan_test_config if is_test else get_gan_default_config
        config = config_function(n_cr)
        model = GAN(config, num_workers, improved=True)
    elif model_name == "default_gan":
        config_function = get_gan_test_config if is_test else get_gan_default_config
        config = config_function(n_cr)
        model = GAN(config, num_workers, improved=False)
    else:
        raise ValueError(f"Model {model_name} is not supported")
    # define logger
    wandb_logger = WandbLogger(project="GAN", log_model=True, offline=is_test)
    wandb_logger.watch(model, log="all")
    # define model checkpoint callback
    model_checkpoint_callback = ModelCheckpoint(
        filepath=join(wandb.run.dir, "{epoch:02d}-{val_loss:.4f}"),
        period=config.save_every_epoch,
        save_top_k=3,
    )
    # use gpu if it exists
    gpu = 1 if torch.cuda.is_available() else None
    # define learning rate logger
    lr_logger = LearningRateLogger()
    trainer = Trainer(
        max_epochs=config.n_epochs,
        deterministic=True,
        check_val_every_n_epoch=config.val_every_epoch,
        row_log_interval=config.log_every_epoch,
        logger=wandb_logger,
        checkpoint_callback=model_checkpoint_callback,
        resume_from_checkpoint=resume_from_checkpoint,
        gpus=gpu,
        callbacks=[lr_logger],
        reload_dataloaders_every_epoch=True,
    )

    trainer.fit(model)

    trainer.test()
コード例 #26
0
def test_wandb_logger_init(wandb, recwarn):
    """Verify that basic functionality of wandb logger works.
    Wandb doesn't work well with pytest so we have to mock it out here."""

    # test wandb.init called when there is no W&B run
    wandb.run = None
    logger = WandbLogger()
    logger.log_metrics({'acc': 1.0})
    wandb.init.assert_called_once()
    wandb.init().log.assert_called_once_with({'acc': 1.0})

    # test wandb.init not called if there is a W&B run
    wandb.init().log.reset_mock()
    wandb.init.reset_mock()
    wandb.run = wandb.init()
    logger = WandbLogger()
    logger.log_metrics({'acc': 1.0}, step=3)
    wandb.init.assert_called_once()
    wandb.init().log.assert_called_once_with({
        'acc': 1.0,
        'trainer/global_step': 3
    })

    # continue training on same W&B run and offset step
    logger.finalize('success')
    logger.log_metrics({'acc': 1.0}, step=6)
    wandb.init().log.assert_called_with({'acc': 1.0, 'trainer/global_step': 6})

    # log hyper parameters
    logger.log_hyperparams({'test': None, 'nested': {'a': 1}, 'b': [2, 3, 4]})
    wandb.init().config.update.assert_called_once_with(
        {
            'test': 'None',
            'nested/a': 1,
            'b': [2, 3, 4]
        },
        allow_val_change=True,
    )

    # watch a model
    logger.watch('model', 'log', 10)
    wandb.init().watch.assert_called_once_with('model', log='log', log_freq=10)

    assert logger.name == wandb.init().project_name()
    assert logger.version == wandb.init().id
コード例 #27
0
def main(hparams):
    """
    Main testing routine specific for this project

    :param hparams: Namespace containing configuration values
    :type hparams: Namespace
    """

    # ------------------------
    # 1 INIT MODEL
    # ------------------------

    model = get_model(hparams)
    model.load_state_dict(torch.load(hparams.checkpoint_file)["state_dict"])
    model.eval()

    name = "-".join([hparams.model, hparams.out, "-test"])

    # ------------------------
    # LOGGING SETUP
    # ------------------------

    tb_logger = TensorBoardLogger(save_dir="logs/tb_logs/", name=name)
    tb_logger.experiment.add_graph(model, model.data[0][0].unsqueeze(0))
    wandb_logger = WandbLogger(
        name=hparams.comment if hparams.comment else time.ctime(),
        project=name,
        save_dir="logs",
    )
    wandb_logger.watch(model, log="all", log_freq=200)
    wandb_logger.log_hyperparams(model.hparams)
    for file in [
            i for s in
        [glob(x) for x in ["*.py", "dataloader/*.py", "model/*.py"]] for i in s
    ]:
        shutil.copy(file, wandb.run.dir)

    trainer = pl.Trainer(gpus=hparams.gpus,
                         logger=[wandb_logger])  # , tb_logger],

    # ------------------------
    # 3 START TESTING
    # ------------------------

    trainer.test(model)
コード例 #28
0
ファイル: cmds.py プロジェクト: avilay/kaggle-projects
def train(cfg):
    """
    Trains the classifier.
    """
    if cfg.name == "auto":
        cfg.name = Haikunator().haikunate()
    train_csv = Path(cfg.dataroot) / cfg.train_csv
    logger.info(f"Starting run {cfg.name}")
    model = HiggsClassifier(hp=cfg.hparams.model)
    data = HiggsDataModule(
        trainfile=train_csv,
        trainset_prop=cfg.train_val_split_frac,
        hp=cfg.hparams.trainer,
    )
    data.prepare()
    logger.info(
        f"Train set size: {data.trainsize}, Validation set size: {data.valsize}"
    )
    os.makedirs(cfg.runroot, exist_ok=True)

    if cfg.logger == "wandb":
        ml_logger = WandbLogger(
            project="higgs",
            name=cfg.name,
            save_dir=cfg.runroot,
            log_model="all",
            id=cfg.name,
        )
        ml_logger.watch(model, log="all")
    elif cfg.logger == "csv":
        ml_logger = CSVLogger(save_dir=cfg.runroot, name="higgs", version=cfg.name)

    checkpoint = ModelCheckpoint(monitor="val_loss", mode="min")
    start = datetime.now()
    trainer = Trainer(
        default_root_dir=cfg.runroot,
        max_epochs=cfg.hparams.trainer.n_epochs,
        logger=ml_logger,
        callbacks=[checkpoint],
    )
    trainer.fit(model, data)
    end = datetime.now()
    logger.info(f"Took {end - start} to finish training.")
コード例 #29
0
def main():
    print("Running main")
    print(time.ctime())

    args = parse_args()

    with open(args.config) as file:
        default_configs = yaml.load(file, Loader=yaml.FullLoader)

    print("Initialising model")
    print(time.ctime())
    model_name = eval(default_configs["model"])
    model = model_name(default_configs)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss", mode="min", save_top_k=2, save_last=True
    )

    logger = WandbLogger(
        project=default_configs["project"],
        save_dir=default_configs["artifacts"],
    )
    logger.watch(model, log="all")

    if args.root_dir is None: 
        if "SLURM_JOB_ID" in os.environ:
            default_root_dir = os.path.join(".", os.environ["SLURM_JOB_ID"])
        else:
            default_root_dir = None
    else:
        default_root_dir = os.path.join(".", args.root_dir)

    trainer = Trainer(
        gpus=default_configs["gpus"], 
        max_epochs=default_configs["max_epochs"], 
        logger=logger, 
        strategy="ddp",
        num_sanity_val_steps=0,
        callbacks=[checkpoint_callback],
        default_root_dir=default_root_dir
    )
    trainer.fit(model)
コード例 #30
0
def main(hparams, network):
    # init module

    model = network(hparams)
    project_folder = 'audio_emotion_team'
    wandb_logger = WandbLogger(name='lflb_dropout_rnn',
                               project=project_folder,
                               entity='thesis',
                               offline=False)

    early_stop_callback = EarlyStopping(monitor='val_loss',
                                        min_delta=0.00,
                                        patience=20,
                                        verbose=False,
                                        mode='min')

    # most basic trainer, uses good defaults
    trainer = Trainer(max_nb_epochs=hparams.max_nb_epochs,
                      gpus=hparams.gpus,
                      nb_gpu_nodes=hparams.nodes,
                      logger=wandb_logger,
                      weights_summary='full',
                      early_stop_callback=early_stop_callback,
                      profiler=True,
                      benchmark=True,
                      log_gpu_memory='all')

    wandb_logger.experiment.config.update(
        {'dataset': 'IEMOCAP_SPECT_GS_8s_512h_2048n'})
    wandb_logger.watch(model)

    trainer.fit(model)
    # load best model
    exp_folder = project_folder + '/version_' + wandb_logger.experiment.id
    model_file = os.listdir(exp_folder + '/checkpoints')[0]
    # eval and upload best model
    model = network.load_from_checkpoint(exp_folder + '/checkpoints/' +
                                         model_file)
    report(model, wandb_logger)
    copyfile(exp_folder + '/checkpoints/' + model_file,
             wandb_logger.experiment.dir + '/model.ckpt')
    wandb_logger.experiment.save('model.ckpt')