Example #1
0
    def test_release_at_earliest_opportunity(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = manager.list()

        pipeline = Pipeline(
            [
                node(source, None, "first"),
                node(identity, "first", "second"),
                node(sink, "second", None),
            ]
        )
        catalog = DataCatalog(
            {
                "first": manager.LoggingDataSet(log, "first"),
                "second": manager.LoggingDataSet(log, "second"),
            }
        )
        ParallelRunner().run(pipeline, catalog)

        # we want to see "release first" before "load second"
        assert list(log) == [
            ("load", "first"),
            ("release", "first"),
            ("load", "second"),
            ("release", "second"),
        ]
Example #2
0
    def test_release_transcoded(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = manager.list()

        pipeline = Pipeline(
            [node(source, None, "ds@save"), node(sink, "ds@load", None)]
        )
        catalog = DataCatalog(
            {
                "ds@save": LoggingDataSet(log, "save"),
                "ds@load": LoggingDataSet(log, "load"),
            }
        )

        ParallelRunner().run(pipeline, catalog)

        # we want to see both datasets being released
        assert list(log) == [("release", "save"), ("load", "load"), ("release", "load")]
Example #3
0
    def test_dont_release_inputs_and_outputs(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = manager.list()

        pipeline = Pipeline(
            [node(identity, "in", "middle"), node(identity, "middle", "out")]
        )
        catalog = DataCatalog(
            {
                "in": manager.LoggingDataSet(log, "in", "stuff"),
                "middle": manager.LoggingDataSet(log, "middle"),
                "out": manager.LoggingDataSet(log, "out"),
            }
        )
        ParallelRunner().run(pipeline, catalog)

        # we don't want to see release in or out in here
        assert list(log) == [("load", "in"), ("load", "middle"), ("release", "middle")]
Example #4
0
    def test_count_multiple_loads(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = manager.list()

        pipeline = Pipeline(
            [
                node(source, None, "dataset"),
                node(sink, "dataset", None, name="bob"),
                node(sink, "dataset", None, name="fred"),
            ]
        )
        catalog = DataCatalog({"dataset": manager.LoggingDataSet(log, "dataset")})
        ParallelRunner().run(pipeline, catalog)

        # we want to the release after both the loads
        assert list(log) == [
            ("load", "dataset"),
            ("load", "dataset"),
            ("release", "dataset"),
        ]