Ejemplo n.º 1
0
 def test_no_input_seq(self, is_async, branchless_no_input_pipeline):
     outputs = SequentialRunner(is_async=is_async).run(
         branchless_no_input_pipeline, DataCatalog())
     assert "E" in outputs
     assert len(outputs) == 1
Ejemplo n.º 2
0
def dummy_catalog():
    return DataCatalog({
        "raw_data": MemoryDataSet(),
        "data": MemoryDataSet(),
        "model": CSVDataSet("fake/path/to/file.csv"),
    })
Ejemplo n.º 3
0
def memory_catalog():
    ds1 = MemoryDataSet({"data": 42})
    ds2 = MemoryDataSet([1, 2, 3, 4, 5])
    return DataCatalog({"ds1": ds1, "ds2": ds2})
Ejemplo n.º 4
0
 def test_confirms(self, mocker, pipeline, is_async):
     fake_dataset_instance = mocker.Mock()
     catalog = DataCatalog(data_sets={"ds1": fake_dataset_instance})
     SequentialRunner(is_async=is_async).run(pipeline, catalog)
     fake_dataset_instance.confirm.assert_called_once_with()
Ejemplo n.º 5
0
 def test_node_returning_none(self, is_async, saving_none_pipeline):
     pattern = "Saving `None` to a `DataSet` is not allowed"
     with pytest.raises(DataSetError, match=pattern):
         SequentialRunner(is_async=is_async).run(saving_none_pipeline,
                                                 DataCatalog())
Ejemplo n.º 6
0
 def test_unsatisfied_inputs(self, is_async, unfinished_outputs_pipeline):
     """ds1, ds2 and ds3 were not specified."""
     with pytest.raises(ValueError, match=r"not found in the DataCatalog"):
         SequentialRunner(is_async=is_async).run(
             unfinished_outputs_pipeline, DataCatalog())
Ejemplo n.º 7
0
def catalog():
    return DataCatalog()
Ejemplo n.º 8
0
 def test_no_data_sets(self, is_async, branchless_pipeline):
     catalog = DataCatalog({}, {"ds1": 42})
     outputs = SequentialRunner(is_async=is_async).run(
         branchless_pipeline, catalog)
     assert "ds3" in outputs
     assert outputs["ds3"] == 42
Ejemplo n.º 9
0
def multi_catalog():
    csv = CSVLocalDataSet(filepath="abc.csv")
    parq = ParquetLocalDataSet(filepath="xyz.parq")
    return DataCatalog({"abc": csv, "xyz": parq})
Ejemplo n.º 10
0
 def test_save_to_unregistered(self, dummy_dataframe):
     """Check the error when attempting to save to unregistered data set"""
     catalog = DataCatalog(data_sets={})
     pattern = r"DataSet 'test' not found in the catalog"
     with pytest.raises(DataSetNotFoundError, match=pattern):
         catalog.save("test", dummy_dataframe)
Ejemplo n.º 11
0
 def test_load_from_unregistered(self):
     """Check the error when attempting to load unregistered data set"""
     catalog = DataCatalog(data_sets={})
     pattern = r"DataSet 'test' not found in the catalog"
     with pytest.raises(DataSetNotFoundError, match=pattern):
         catalog.load("test")
Ejemplo n.º 12
0
def data_catalog(data_set):
    return DataCatalog(data_sets={"test": data_set})
Ejemplo n.º 13
0
 def test_node_returning_none(self, is_async):
     pipeline = Pipeline([node(identity, "A", "B"), node(return_none, "B", "C")])
     catalog = DataCatalog({"A": MemoryDataSet("42")})
     pattern = "Saving `None` to a `DataSet` is not allowed"
     with pytest.raises(DataSetError, match=pattern):
         ParallelRunner(is_async=is_async).run(pipeline, catalog)