def test_release_at_earliest_opportunity(self): runner = ThreadRunner() log = [] pipeline = Pipeline( [ node(source, None, "first"), node(identity, "first", "second"), node(sink, "second", None), ] ) catalog = DataCatalog( { "first": LoggingDataSet(log, "first"), "second": LoggingDataSet(log, "second"), } ) runner.run(pipeline, catalog) # we want to see "release first" before "load second" assert list(log) == [ ("load", "first"), ("release", "first"), ("load", "second"), ("release", "second"), ]
def test_count_multiple_loads(self): runner = ThreadRunner() log = [] pipeline = Pipeline([ node(source, None, "dataset"), node(sink, "dataset", None, name="bob"), node(sink, "dataset", None, name="fred"), ]) catalog = DataCatalog({"dataset": LoggingDataSet(log, "dataset")}) runner.run(pipeline, catalog) # we want to the release after both the loads assert list(log) == [ ("load", "dataset"), ("load", "dataset"), ("release", "dataset"), ]