Beispiel #1
0
    def test_managed_stream(self):
        dfl = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            snap_key=self.node_source.snap.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            snap_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        dfl2 = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node1.key,
            snap_key=self.node1.snap.key,
            runtime_url="test",
        )
        drl2 = DataBlockLog(
            snap_log=dfl2,
            data_block=self.dr1t1,
            direction=Direction.INPUT,
        )
        self.env.md_api.add_all([dfl, drl, dfl2, drl2])

        s = stream(nodes=self.node_source)
        s = s.filter_unprocessed(self.node1)

        ctx = make_test_run_context()
        with ctx.env.md_api.begin():
            dbs = ManagedDataBlockStream(ctx, stream_builder=s)
            with pytest.raises(StopIteration):
                assert next(dbs) is None
    def test_stream_unprocessed_ineligible_already_input(self):
        dfl = PipeLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            pipe_key=self.node_source.pipe.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            pipe_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        dfl2 = PipeLog(
            graph_id=self.graph.hash,
            node_key=self.node1.key,
            pipe_key=self.node1.pipe.key,
            runtime_url="test",
        )
        drl2 = DataBlockLog(
            pipe_log=dfl2,
            data_block=self.dr1t1,
            direction=Direction.INPUT,
        )
        self.sess.add_all([dfl, drl, dfl2, drl2])

        s = StreamBuilder(nodes=self.node_source)
        s = s.filter_unprocessed(self.node1)
        assert s.get_query(self.ctx, self.sess).first() is None
Beispiel #3
0
    def test_stream_unprocessed_ineligible_already_input(self):
        dfl = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            snap_key=self.node_source.snap.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            snap_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        dfl2 = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node1.key,
            snap_key=self.node1.snap.key,
            runtime_url="test",
        )
        drl2 = DataBlockLog(
            snap_log=dfl2,
            data_block=self.dr1t1,
            direction=Direction.INPUT,
        )
        self.env.md_api.add_all([dfl, drl, dfl2, drl2])

        s = stream(nodes=self.node_source)
        s = s.filter_unprocessed(self.node1)
        assert s.get_query_result(self.env).scalar_one_or_none() is None
 def log(self, block: DataBlockMetadata, direction: Direction):
     drl = DataBlockLog(  # type: ignore
         pipe_log=self.pipe_log,
         data_block=block,
         direction=direction,
         processed_at=utcnow(),
     )
     self.metadata_session.add(drl)
    def test_operators(self):
        dfl = PipeLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            pipe_key=self.node_source.pipe.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            pipe_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        drl2 = DataBlockLog(
            pipe_log=dfl,
            data_block=self.dr2t1,
            direction=Direction.OUTPUT,
        )
        self.sess.add_all([dfl, drl, drl2])

        self._cnt = 0

        @operator
        def count(stream: DataBlockStream) -> DataBlockStream:
            for db in stream:
                self._cnt += 1
                yield db

        sb = StreamBuilder(nodes=self.node_source)
        expected_cnt = sb.get_query(self.ctx, self.sess).count()
        assert expected_cnt == 2
        list(count(sb).as_managed_stream(self.ctx, self.sess))
        assert self._cnt == expected_cnt

        # Test composed operators
        self._cnt = 0
        list(count(latest(sb)).as_managed_stream(self.ctx, self.sess))
        assert self._cnt == 1

        # Test kwargs
        self._cnt = 0
        list(
            count(filter(sb, function=lambda db: False)).as_managed_stream(
                self.ctx, self.sess))
        assert self._cnt == 0
    def test_stream_unprocessed_ineligible_already_output(self):
        """
        By default we don't input a block that has already been output by a DF, _even if that block was never input_,
        UNLESS input is a self reference (`this`). This is to prevent infinite loops.
        """
        dfl = PipeLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            pipe_key=self.node_source.pipe.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            pipe_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        dfl2 = PipeLog(
            graph_id=self.graph.hash,
            node_key=self.node1.key,
            pipe_key=self.node1.pipe.key,
            runtime_url="test",
        )
        drl2 = DataBlockLog(
            pipe_log=dfl2,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        self.sess.add_all([dfl, drl, dfl2, drl2])

        s = StreamBuilder(nodes=self.node_source)
        s1 = s.filter_unprocessed(self.node1)
        assert s1.get_query(self.ctx, self.sess).first() is None

        # But ok with self reference
        s2 = s.filter_unprocessed(self.node1, allow_cycle=True)
        assert s2.get_query(self.ctx, self.sess).first() == self.dr1t1
Beispiel #7
0
    def test_stream_unprocessed_ineligible_already_output(self):
        """
        By default we don't input a block that has already been output by a DF, _even if that block was never input_,
        UNLESS input is a self reference (`this`). This is to prevent infinite loops.
        """
        dfl = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            snap_key=self.node_source.snap.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            snap_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        dfl2 = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node1.key,
            snap_key=self.node1.snap.key,
            runtime_url="test",
        )
        drl2 = DataBlockLog(
            snap_log=dfl2,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        self.env.md_api.add_all([dfl, drl, dfl2, drl2])

        s = stream(nodes=self.node_source)
        s1 = s.filter_unprocessed(self.node1)
        assert s1.get_query_result(self.env).scalar_one_or_none() is None

        # But ok with self reference
        s2 = s.filter_unprocessed(self.node1, allow_cycle=True)
        assert s2.get_query_result(self.env).scalar_one_or_none() == self.dr1t1
Beispiel #8
0
    def test_stream_unprocessed_eligible(self):
        dfl = DataFunctionLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            function_key=self.node_source.function.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            function_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        self.env.md_api.add_all([dfl, drl])

        s = stream(nodes=self.node_source)
        s = s.filter_unprocessed(self.node1)
        assert s.get_query_result(self.env).scalar_one_or_none() == self.dr1t1
Beispiel #9
0
 def ensure_log(self, block: DataBlockMetadata, direction: Direction,
                name: str):
     if self.metadata_api.execute(
             select(DataBlockLog).filter_by(
                 function_log_id=self.function_log.id,
                 stream_name=name,
                 data_block_id=block.id,
                 direction=direction,
             )).scalar_one_or_none():
         return
     drl = DataBlockLog(  # type: ignore
         function_log_id=self.function_log.id,
         stream_name=name,
         data_block_id=block.id,
         direction=direction,
         processed_at=utcnow(),
     )
     self.metadata_api.add(drl)
    def test_stream_unprocessed_eligible_schema(self):
        dfl = PipeLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            pipe_key=self.node_source.pipe.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            pipe_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        self.sess.add_all([dfl, drl])

        s = StreamBuilder(nodes=self.node_source, schema="TestSchema1")
        s = s.filter_unprocessed(self.node1)
        assert s.get_query(self.ctx, self.sess).first() == self.dr1t1

        s = StreamBuilder(nodes=self.node_source, schema="TestSchema2")
        s = s.filter_unprocessed(self.node1)
        assert s.get_query(self.ctx, self.sess).first() is None
Beispiel #11
0
    def test_stream_unprocessed_eligible_schema(self):
        dfl = SnapLog(
            graph_id=self.graph.hash,
            node_key=self.node_source.key,
            snap_key=self.node_source.snap.key,
            runtime_url="test",
        )
        drl = DataBlockLog(
            snap_log=dfl,
            data_block=self.dr1t1,
            direction=Direction.OUTPUT,
        )
        self.env.md_api.add_all([dfl, drl])

        s = stream(nodes=self.node_source, schema="TestSchema1")
        s = s.filter_unprocessed(self.node1)
        assert s.get_query_result(self.env).scalar_one_or_none() == self.dr1t1

        s = stream(nodes=self.node_source, schema="TestSchema2")
        s = s.filter_unprocessed(self.node1)
        assert s.get_query_result(self.env).scalar_one_or_none() is None