def test_legacy_functions(self, neptune, neptune_file_mock, warnings_mock): logger = NeptuneLogger(api_key="test", project="project") # test deprecated functions which will be shut down in pytorch-lightning 1.7.0 attr_mock = logger.run.__getitem__ attr_mock.reset_mock() fake_image = {} logger.log_metric("metric", 42) logger.log_text("text", "some string") logger.log_image("image_obj", fake_image) logger.log_image("image_str", "img/path") logger.log_artifact("artifact", "some/path") assert attr_mock.call_count == 5 assert warnings_mock.warn.call_count == 5 attr_mock.assert_has_calls( [ call("training/metric"), call().log(42, step=None), call("training/text"), call().log("some string", step=None), call("training/image_obj"), call().log(fake_image, step=None), call("training/image_str"), call().log(neptune_file_mock(), step=None), call("training/artifacts/artifact"), call().log("some/path"), ] ) # test Exception raising functions functions self._assert_legacy_usage(logger.set_property) self._assert_legacy_usage(logger.append_tags)
def test_neptune_leave_open_experiment_after_fit(tmpdir): """Verify that neptune experiment was closed after training""" tutils.reset_seed() hparams = tutils.get_hparams() model = LightningTestModel(hparams) def _run_training(logger): logger._experiment = MagicMock() trainer_options = dict( default_save_path=tmpdir, max_epochs=1, train_percent_check=0.05, logger=logger ) trainer = Trainer(**trainer_options) trainer.fit(model) return logger logger_close_after_fit = _run_training(NeptuneLogger(offline_mode=True)) assert logger_close_after_fit._experiment.stop.call_count == 1 logger_open_after_fit = _run_training( NeptuneLogger(offline_mode=True, close_after_fit=False)) assert logger_open_after_fit._experiment.stop.call_count == 0
def _get_logger_with_mocks(**kwargs): logger = NeptuneLogger(**kwargs) run_instance_mock = MagicMock() logger._run_instance = run_instance_mock logger._run_instance.__getitem__.return_value.fetch.return_value = "exp-name" run_attr_mock = MagicMock() logger._run_instance.__getitem__.return_value = run_attr_mock return logger, run_instance_mock, run_attr_mock
def test_save_dir(self, neptune): # given logger = NeptuneLogger(api_key="test", project="project") # expect self.assertEqual(logger.save_dir, os.path.join(os.getcwd(), ".neptune"))
def main(): args = get_args() with open(args.config_path) as f: hparams = yaml.load(f, Loader=yaml.SafeLoader) hparams["data_path"] = args.data_path pipeline = SegmentDocs(hparams) logger = NeptuneLogger( api_key=os.environ["NEPTUNE_API_TOKEN"], project_name="ternaus/documentsegmentation", experiment_name=f"{hparams['experiment_name']}", # Optional, tags=["pytorch-lightning", "mlp"], # Optional, upload_source_files=[], ) Path(hparams["checkpoint_callback"]["filepath"]).mkdir(exist_ok=True, parents=True) trainer = object_from_dict( hparams["trainer"], checkpoint_callback=object_from_dict(hparams["checkpoint_callback"]), logger=logger, ) trainer.fit(pipeline)
def main(args): seed_everything(args.seed) logger = NeptuneLogger(api_key=os.environ.get("NEPTUNE_API_TOKEN"), project_name="wibbn/predictive-maintenance", params=vars(args), experiment_name="lstm_logs", ) trainer = Trainer(max_epochs=args.max_epochs, logger=logger, gpus=0, progress_bar_refresh_rate=2, ) model = LSTM(n_features=args.n_features, hidden_size=args.hidden_size, seq_len=args.seq_len, out_seq_len=args.out_seq_len, batch_size=args.batch_size, criterion=args.criterion, num_layers=args.num_layers, dropout=args.dropout, learning_rate=args.learning_rate, ) dm = TelemetryDataModule(path=args.telemetry_path, seq_len=args.seq_len, out_seq_len=args.out_seq_len, batch_size=args.batch_size, num_workers=args.num_workers, ) trainer.fit(model, dm) trainer.test(model, datamodule=dm) model.save_hyperparameters() trainer.save_checkpoint(args.checkpoint_path)
def test_neptune_online(neptune): logger = NeptuneLogger(api_key='test', project_name='project') neptune.init.assert_called_once_with(api_token='test', project_qualified_name='project') assert logger.name == neptune.create_experiment().name assert logger.version == neptune.create_experiment().id
def set_up_neptune(project_name='debug', experiment_name='debug', params={}, tags=[], close_after_fit=False, **kwargs): """ Set up a neptune logger from file. :param keyfile: :param project_name: :param experiment_name: :param params: :param tags: :param close_after_fit: :param kwargs: :return: """ if not "NEPTUNE_API_TOKEN" in os.environ: raise EnvironmentError( 'Please set environment variable `NEPTUNE_API_TOKEN`.') neptune_logger = NeptuneLogger(api_key=os.environ["NEPTUNE_API_TOKEN"], project_name=project_name, experiment_name=experiment_name, params=params, tags=tags, close_after_fit=close_after_fit) return neptune_logger
def test__get_full_model_names_from_exp_structure(self): # given: input_dict = { "foo": { "bar": { "lvl1_1": { "lvl2": { "lvl3_1": "some non important value", "lvl3_2": "some non important value" } }, "lvl1_2": "some non important value", }, "other_non_important": { "val100": 100 }, }, "other_non_important": { "val42": 42 }, } expected_keys = {"lvl1_1/lvl2/lvl3_1", "lvl1_1/lvl2/lvl3_2", "lvl1_2"} # expect: self.assertEqual( NeptuneLogger._get_full_model_names_from_exp_structure( input_dict, "foo/bar"), expected_keys)
def test_neptune_offline(neptune): logger = NeptuneLogger(offline_mode=True) neptune.Session.assert_not_called() _ = logger.experiment neptune.Session.assert_called_once_with(backend=neptune.OfflineBackend()) assert logger.experiment == neptune.Session().get_project( ).create_experiment()
def get_logger(): neptune_logger = NeptuneLogger( api_key=open('../neptune_api.txt').read().strip(), project_name='xxx/cass-leaf', experiment_name='xxxxxxx', params=dict(vars(args)), ) return neptune_logger
def test_online_with_custom_run(self, neptune): created_run = Run() logger = NeptuneLogger(run=created_run) assert logger._run_instance == created_run self.assertEqual(logger._run_instance, created_run) self.assertEqual(logger.version, "TEST-42") self.assertEqual(neptune.init.call_count, 0)
def get_callback(self, integration_name: str) -> any: if not self.disabled: if integration_name == 'pytorch-lightning': return NeptuneLogger( api_key=self.api_token, project_name=self.project_name, close_after_fit=False, experiment_id=self.exp.id) return None
def test_neptune_leave_open_experiment_after_fit(tmpdir): """Verify that neptune experiment was closed after training""" model = EvalModelTemplate() def _run_training(logger): logger._experiment = MagicMock() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, train_percent_check=0.05, logger=logger) trainer.fit(model) return logger logger_close_after_fit = _run_training(NeptuneLogger(offline_mode=True)) assert logger_close_after_fit._experiment.stop.call_count == 1 logger_open_after_fit = _run_training( NeptuneLogger(offline_mode=True, close_after_fit=False)) assert logger_open_after_fit._experiment.stop.call_count == 0
def test_neptune_existing_experiment(neptune): logger = NeptuneLogger(experiment_id='TEST-123') neptune.Session.with_default_backend().get_project().get_experiments.assert_called_once_with(id='TEST-123') experiment = logger.experiment assert logger.experiment_name == experiment.get_system_properties()['name'] assert logger.params == experiment.get_parameters() assert logger.properties == experiment.get_properties() assert logger.tags == experiment.get_tags()
def test_neptune_pickling(self, neptune): unpickleable_run = Run() logger = NeptuneLogger(run=unpickleable_run) self.assertEqual(0, neptune.init.call_count) pickled_logger = pickle.dumps(logger) unpickled = pickle.loads(pickled_logger) neptune.init.assert_called_once_with(name="Test name", run="TEST-42") self.assertIsNotNone(unpickled.experiment)
def get_neptune_logger(model: PretrainedModelBase, *, user_name: str, project_name: str, experiment_name: str, description: str): from pytorch_lightning.loggers import NeptuneLogger neptune = NeptuneLogger(api_key=os.environ["NEPTUNE_API_TOKEN"], project_name=f'{user_name}/{project_name}', experiment_name=experiment_name, description=description, tags=[experiment_name], params=get_all_hyperparams(model)) return neptune
def build_logger(config: Dict): if config['trainer']['logger'] == 'neptune': logger = NeptuneLogger( api_key=os.environ['NEPTUNE_API_TOKEN'], project_name=os.environ['NEPTUNE_PROJECT_NAME'], params=config['runner'], ) else: raise ValueError('Logger {} unknown'.format(config['trainer']['logger'])) return logger
def test_neptune_online(neptune): logger = NeptuneLogger(api_key='test', project_name='project') created_experiment = neptune.Session.with_default_backend().get_project().create_experiment() # It's important to check if the internal variable _experiment was initialized in __init__. # Calling logger.experiment would cause a side-effect of initializing _experiment, # if it wasn't already initialized. assert logger._experiment == created_experiment assert logger.name == created_experiment.name assert logger.version == created_experiment.id
def test_neptune_online(self, neptune): logger = NeptuneLogger(api_key="test", project="project") created_run_mock = logger.run self.assertEqual(logger._run_instance, created_run_mock) self.assertEqual(logger.name, "Run test name") self.assertEqual(logger.version, "TEST-1") self.assertEqual(neptune.init.call_count, 1) self.assertEqual(created_run_mock.__getitem__.call_count, 2) self.assertEqual(created_run_mock.__setitem__.call_count, 1) created_run_mock.__getitem__.assert_has_calls([call("sys/id"), call("sys/name")], any_order=True) created_run_mock.__setitem__.assert_called_once_with("source_code/integrations/pytorch-lightning", __version__)
def training_loop(train, valid, save_path, pl_module, callbacks, n_epochs, checkpoint_callback, use_neptune=False, resume=True, limit_train_batches=2, neptune_tags="", neptune_name=""): """ Largely model/application agnostic training code. """ # Train with proper resuming # Copy gin configs used, for reference, to the save folder os.system("rm " + os.path.join(save_path, "*gin")) for gin_config in sys.argv[2].split(";"): os.system("cp {} {}/base_config.gin".format(gin_config, save_path)) with open(os.path.join(save_path, "config.gin"), "w") as f: f.write(gin.operative_config_str()) hparams = parse_gin_config(os.path.join(save_path, 'config.gin')) if 'train.callbacks' in hparams: del hparams['train.callbacks'] # TODO: What is a less messy way to pass hparams? This is only that logging is aware of hyperparameters pl_module._set_hparams(hparams) pl_module._hparams_initial = copy.deepcopy(hparams) loggers = [] loggers.append(pl_loggers.CSVLogger(save_path)) if use_neptune: from pytorch_lightning.loggers import NeptuneLogger loggers.append( NeptuneLogger( api_key=NEPTUNE_TOKEN, project_name=NEPTUNE_USER + "/" + NEPTUNE_PROJECT, experiment_name=neptune_name if len(neptune_name) else os.path.basename(save_path), tags=neptune_tags.split(',') if len(neptune_tags) else None, )) callbacks += [MetaSaver(), Heartbeat(), LearningRateMonitor()] trainer = pl.Trainer( default_root_dir=save_path, limit_train_batches=limit_train_batches, max_epochs=n_epochs, logger=loggers, callbacks=callbacks, log_every_n_steps=1, checkpoint_callback=checkpoint_callback, resume_from_checkpoint=os.path.join(save_path, 'last.ckpt') if resume and os.path.exists(os.path.join(save_path, 'last.ckpt')) else None) trainer.fit(pl_module, train, valid) return trainer
def test_neptune_leave_open_experiment_after_fit(neptune, tmpdir): """Verify that neptune experiment was closed after training.""" model = BoringModel() def _run_training(logger): logger._experiment = MagicMock() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_train_batches=0.05, logger=logger) assert trainer.log_dir is None trainer.fit(model) assert trainer.log_dir is None return logger logger_close_after_fit = _run_training(NeptuneLogger(offline_mode=True)) assert logger_close_after_fit._experiment.stop.call_count == 1 logger_open_after_fit = _run_training( NeptuneLogger(offline_mode=True, close_after_fit=False)) assert logger_open_after_fit._experiment.stop.call_count == 0
def test_neptune_pickle(tmpdir): """Verify that pickling trainer with neptune logger works.""" tutils.reset_seed() logger = NeptuneLogger(offline_mode=True) trainer_options = dict(default_root_dir=tmpdir, max_epochs=1, logger=logger) trainer = Trainer(**trainer_options) pkl_bytes = pickle.dumps(trainer) trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({'acc': 1.0})
def test__get_full_model_name(self): # given: SimpleCheckpoint = namedtuple("SimpleCheckpoint", ["dirpath"]) test_input_data = [ ("key.ext", "foo/bar/key.ext", SimpleCheckpoint(dirpath="foo/bar")), ("key/in/parts.ext", "foo/bar/key/in/parts.ext", SimpleCheckpoint(dirpath="foo/bar")), ] # expect: for expected_model_name, *key_and_path in test_input_data: self.assertEqual(NeptuneLogger._get_full_model_name(*key_and_path), expected_model_name)
def pytorch_run(environment): # given PARAMS = { "max_epochs": 3, "save_top_k": 2, "learning_rate": 0.005, "decay_factor": 0.99, "batch_size": 64, "linear": 64, } # and run = neptune.init(name="Integration pytorch-lightning", project=environment.project) # and model_checkpoint = ModelCheckpoint( dirpath="my_model/checkpoints/", filename="{epoch:02d}-{val/loss/dataloader_idx_1:.2f}", save_weights_only=True, save_top_k=PARAMS["save_top_k"], save_last=True, monitor="val/loss/dataloader_idx_1", every_n_epochs=1, ) neptune_logger = NeptuneLogger(run=run, prefix="custom_prefix") # and (Subject) trainer = pl.Trainer( max_epochs=PARAMS["max_epochs"], log_every_n_steps=10, logger=neptune_logger, track_grad_norm=2, callbacks=[model_checkpoint], ) model = LitModel( linear=PARAMS["linear"], learning_rate=PARAMS["learning_rate"], decay_factor=PARAMS["decay_factor"], neptune_logger=neptune_logger, ) data_module = MNISTDataModule(normalization_vector=((0.1307, ), (0.3081, )), batch_size=PARAMS["batch_size"]) # then trainer.fit(model, datamodule=data_module) trainer.test(model, datamodule=data_module) run.sync() yield run
def test_neptune_logger(tmpdir): """Verify that basic functionality of neptune logger works.""" tutils.reset_seed() hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) logger = NeptuneLogger(offline_mode=True) trainer_options = dict(default_root_dir=tmpdir, max_epochs=1, train_percent_check=0.05, logger=logger) trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result == 1, 'Training failed'
def test_neptune_pickle(tmpdir): """Verify that pickling trainer with neptune logger works.""" tutils.reset_seed() # hparams = tutils.get_hparams() # model = LightningTestModel(hparams) logger = NeptuneLogger(offline_mode=True) trainer_options = dict(default_save_path=tmpdir, max_epochs=1, logger=logger) trainer = Trainer(**trainer_options) pkl_bytes = pickle.dumps(trainer) trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({"acc": 1.0})
def test_online_with_wrong_kwargs(self, neptune): """Tests combinations of kwargs together with `run` kwarg which makes some of other parameters unavailable in init.""" with self.assertRaises(ValueError): NeptuneLogger(run="some string") with self.assertRaises(ValueError): NeptuneLogger(run=Run(), project="redundant project") with self.assertRaises(ValueError): NeptuneLogger(run=Run(), api_key="redundant api key") with self.assertRaises(ValueError): NeptuneLogger(run=Run(), name="redundant api name") with self.assertRaises(ValueError): NeptuneLogger(run=Run(), foo="random **kwarg") # this should work NeptuneLogger(run=Run()) NeptuneLogger(project="foo") NeptuneLogger(foo="bar")
def perform_training( trial_info: TrialInfo, training_data=None, model=None, logger_tags: Optional[List[str]] = None, ): if model is None: model = EfficientNet(trial_info=trial_info) if training_data is None: training_data = StanfordCarsDataModule( batch_size=trial_info.batch_size, in_channels=trial_info.in_channels, image_size=model.image_size) neptune_logger = NeptuneLogger(project_name="matkalinowski/sandbox", experiment_name=f"{str(trial_info)}", tags=logger_tags) early_stop_callback = pl.callbacks.early_stopping.EarlyStopping( min_delta=1e-3, patience=10) checkpoint_callback = ModelCheckpoint( filepath=str(trial_info.output_folder)) callback = StanfordCarsDatasetCallback(trial_info) lrl = LearningRateLogger() trainer = pl.Trainer( max_epochs=trial_info.epochs, gpus=1, # fast_dev_run=True, logger=neptune_logger, callbacks=[callback, lrl], checkpoint_callback=checkpoint_callback, early_stop_callback=early_stop_callback) trainer.fit(model, datamodule=training_data)
def main(args: Namespace) -> None: if args.seed is not None: pl.seed_everything(args.seed) if args.accelerator == 'ddp': # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / max(1, args.gpus)) args.workers = int(args.workers / max(1, args.gpus)) model = ImageNetLightningModelForVit(**vars(args)) kwargs = {} if args.neptune: kwargs = dict( logger=NeptuneLogger(project_name="ivan.prado/vit-sandbox", params={ k: v for k, v in vars(args).items() if isinstance(v, (type(None), int, float, str)) })) lr_monitor = LearningRateMonitor(logging_interval='step') trainer = pl.Trainer.from_argparse_args(args, callbacks=[lr_monitor], **kwargs) if args.auto_lr_find: trainer.tune(model) if args.evaluate: trainer.test(model) else: trainer.fit(model)