def test_no_input_seq(self, is_async, branchless_no_input_pipeline): outputs = SequentialRunner(is_async=is_async).run( branchless_no_input_pipeline, DataCatalog()) assert "E" in outputs assert len(outputs) == 1
def dummy_catalog(): return DataCatalog({ "raw_data": MemoryDataSet(), "data": MemoryDataSet(), "model": CSVDataSet("fake/path/to/file.csv"), })
def memory_catalog(): ds1 = MemoryDataSet({"data": 42}) ds2 = MemoryDataSet([1, 2, 3, 4, 5]) return DataCatalog({"ds1": ds1, "ds2": ds2})
def test_confirms(self, mocker, pipeline, is_async): fake_dataset_instance = mocker.Mock() catalog = DataCatalog(data_sets={"ds1": fake_dataset_instance}) SequentialRunner(is_async=is_async).run(pipeline, catalog) fake_dataset_instance.confirm.assert_called_once_with()
def test_node_returning_none(self, is_async, saving_none_pipeline): pattern = "Saving `None` to a `DataSet` is not allowed" with pytest.raises(DataSetError, match=pattern): SequentialRunner(is_async=is_async).run(saving_none_pipeline, DataCatalog())
def test_unsatisfied_inputs(self, is_async, unfinished_outputs_pipeline): """ds1, ds2 and ds3 were not specified.""" with pytest.raises(ValueError, match=r"not found in the DataCatalog"): SequentialRunner(is_async=is_async).run( unfinished_outputs_pipeline, DataCatalog())
def catalog(): return DataCatalog()
def test_no_data_sets(self, is_async, branchless_pipeline): catalog = DataCatalog({}, {"ds1": 42}) outputs = SequentialRunner(is_async=is_async).run( branchless_pipeline, catalog) assert "ds3" in outputs assert outputs["ds3"] == 42
def multi_catalog(): csv = CSVLocalDataSet(filepath="abc.csv") parq = ParquetLocalDataSet(filepath="xyz.parq") return DataCatalog({"abc": csv, "xyz": parq})
def test_save_to_unregistered(self, dummy_dataframe): """Check the error when attempting to save to unregistered data set""" catalog = DataCatalog(data_sets={}) pattern = r"DataSet 'test' not found in the catalog" with pytest.raises(DataSetNotFoundError, match=pattern): catalog.save("test", dummy_dataframe)
def test_load_from_unregistered(self): """Check the error when attempting to load unregistered data set""" catalog = DataCatalog(data_sets={}) pattern = r"DataSet 'test' not found in the catalog" with pytest.raises(DataSetNotFoundError, match=pattern): catalog.load("test")
def data_catalog(data_set): return DataCatalog(data_sets={"test": data_set})
def test_node_returning_none(self, is_async): pipeline = Pipeline([node(identity, "A", "B"), node(return_none, "B", "C")]) catalog = DataCatalog({"A": MemoryDataSet("42")}) pattern = "Saving `None` to a `DataSet` is not allowed" with pytest.raises(DataSetError, match=pattern): ParallelRunner(is_async=is_async).run(pipeline, catalog)