def test_log_catalog(self, tmp_path): record_data = {"run_id": "fake_id", "project_path": str(tmp_path)} journal = Journal(record_data) journal.log_catalog("fake_data", "fake_operation", "fake_version") file_path = list(tmp_path.glob("journal_*")) assert journal.run_id in str(file_path[0]) assert len(file_path) == 1 with file_path[0].open() as log_file: context_log = json.loads(log_file.readline()) catalog_log = json.loads(log_file.readline()) assert catalog_log["type"] == "DatasetJournalRecord" assert catalog_log["name"] == "fake_data" assert catalog_log["operation"] == "fake_operation" assert catalog_log["version"] == "fake_version" assert catalog_log["run_id"] == context_log["run_id"]
def test_invalid_context_record(self, tmp_path, caplog): record_data = { "run_id": "fake_id", "project_path": str(tmp_path), "blah": lambda x: x, } _ = Journal(record_data) assert "Unable to record" in caplog.record_tuples[0][2]
def test_context_record(self, tmp_path): """Test journal initialisation""" record_data = {"run_id": "fake_id", "project_path": str(tmp_path)} journal = Journal(record_data) file_path = list(tmp_path.glob("journal_*")) assert len(file_path) == 1 assert journal.run_id in str(file_path[0]) log = json.loads(file_path[0].read_text()) assert log["type"] == "ContextJournalRecord" assert log["project_path"] == str(tmp_path) assert log["git_sha"] == "git_sha" assert "run_id" in log
def test_from_sane_config_versioned(self, sane_config, dummy_dataframe): """Test load and save of versioned data sets from config""" sane_config["catalog"]["boats"]["versioned"] = True version = generate_timestamp() journal = Journal({"run_id": "fake-id", "project_path": "fake-path"}) catalog = DataCatalog.from_config(**sane_config, load_versions={"boats": version}, save_version=version, journal=journal) assert catalog._journal == journal # pylint: disable=protected-access catalog.save("boats", dummy_dataframe) path = Path(sane_config["catalog"]["boats"]["filepath"]) path = path / version / path.name assert path.is_file() reloaded_df = catalog.load("boats") assert_frame_equal(reloaded_df, dummy_dataframe)
def test_deprecation_warning(self, tmp_path): record_data = {"run_id": "fake_id", "project_path": str(tmp_path)} with pytest.warns(DeprecationWarning): Journal(record_data)