Example #1
0
    def test_count_multiple_loads(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = manager.list()

        pipeline = Pipeline(
            [
                node(source, None, "dataset"),
                node(sink, "dataset", None, name="bob"),
                node(sink, "dataset", None, name="fred"),
            ]
        )
        catalog = DataCatalog({"dataset": manager.LoggingDataSet(log, "dataset")})
        ParallelRunner().run(pipeline, catalog)

        # we want to the release after both the loads
        assert list(log) == [
            ("load", "dataset"),
            ("load", "dataset"),
            ("release", "dataset"),
        ]
Example #2
0
    def test_release_at_earliest_opportunity(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = Manager().dict(log=[])

        pipeline = Pipeline([
            node(source, None, "first"),
            node(identity, "first", "second"),
            node(sink, "second", None),
        ])
        catalog = DataCatalog({
            "first": manager.LoggingDataSet(log, "first"),
            "second": manager.LoggingDataSet(log, "second"),
        })
        ParallelRunner().run(pipeline, catalog)

        # we want to see "release first" before "load second"
        assert log["log"] == [
            ("load", "first"),
            ("release", "first"),
            ("load", "second"),
            ("release", "second"),
        ]
Example #3
0
        self.log.append(("load", self.name))
        return self.value

    def _save(self, data: Any) -> None:
        self.value = data

    def _release(self) -> None:
        self.log.append(("release", self.name))
        self.value = None

    def _describe(self) -> Dict[str, Any]:
        return {}


if not sys.platform.startswith("win"):
    ParallelRunnerManager.register(  # pylint: disable=no-member
        "LoggingDataSet", LoggingDataSet)


@pytest.mark.skipif(sys.platform.startswith("win"),
                    reason="Due to bug in parallel runner")
@pytest.mark.parametrize("is_async", [False, True])
class TestParallelRunnerRelease:
    def test_dont_release_inputs_and_outputs(self, is_async):
        runner = ParallelRunner(is_async=is_async)
        log = runner._manager.list()

        pipeline = Pipeline(
            [node(identity, "in", "middle"),
             node(identity, "middle", "out")])
        # pylint: disable=no-member
        catalog = DataCatalog({
Example #4
0
    def _load(self) -> Any:
        self.log["log"] += [("load", self.name)]
        return self.value

    def _save(self, data: Any) -> None:
        self.value = data

    def _release(self) -> None:
        self.log["log"] += [("release", self.name)]
        self.value = None

    def _describe(self) -> Dict[str, Any]:
        return {}


ParallelRunnerManager.register("LoggingDataSet", LoggingDataSet)


class TestParallelRunnerRelease:
    def test_dont_release_inputs_and_outputs(self):
        manager = ParallelRunnerManager()
        manager.start()
        log = Manager().dict(log=[])

        pipeline = Pipeline(
            [node(identity, "in", "middle"),
             node(identity, "middle", "out")])
        catalog = DataCatalog({
            "in": manager.LoggingDataSet(log, "in", "stuff"),
            "middle": manager.LoggingDataSet(log, "middle"),
            "out": manager.LoggingDataSet(log, "out"),