Beispiel #1
0
    def through(self, channel: Union[str, ChannelT]) -> StreamT:
        """Forward values to in this stream to channel.

        Send messages received on this stream to another channel,
        and return a new stream that consumes from that channel.

        Notes:
            The messages are forwarded after any processors have been
            applied.

        Example:
            .. sourcecode:: python

                topic = app.topic('foo')

                @app.agent(topic)
                async def mytask(stream):
                    async for value in stream.through(app.topic('bar')):
                        # value was first received in topic 'foo',
                        # then forwarded and consumed from topic 'bar'
                        print(value)
        """
        if self._finalized:
            # if agent restart we reuse the same stream object
            # which already have done the stream.through()
            # so on iteration we set the finalized flag
            # and make this through() a noop.
            return self
        if self.concurrency_index is not None:
            raise ImproperlyConfigured(
                'Agent with concurrency>1 cannot use stream.through!')
        # ridiculous mypy
        if isinstance(channel, str):
            channelchannel = cast(ChannelT, self.derive_topic(channel))
        else:
            channelchannel = channel

        channel_it = aiter(channelchannel)
        if self._next is not None:
            raise ImproperlyConfigured(
                'Stream is already using group_by/through')
        through = self._chain(channel=channel_it)

        async def forward(value: T) -> T:
            event = self.current_event
            return await maybe_forward(event, channelchannel)

        self.add_processor(forward)
        self._enable_passive(cast(ChannelT, channel_it), declare=True)
        return through
Beispiel #2
0
    async def join(
        self,
        values: Union[AsyncIterable[V], Iterable[V]],
        key: K = None,
        reply_to: ReplyToArg = None,
    ) -> List[Any]:  # pragma: no cover
        """RPC map operation on a list of values.

        A join returns the results in order, and only returns once
        all values have been processed.
        """
        return await self.kvjoin(
            ((key, value) async for value in aiter(values)),
            reply_to=reply_to,
        )
Beispiel #3
0
    async def map(
        self,
        values: Union[AsyncIterable, Iterable],
        key: K = None,
        reply_to: ReplyToArg = None,
    ) -> AsyncIterator:  # pragma: no cover
        """RPC map operation on a list of values.

        A map operation iterates over results as they arrive.
        See :meth:`join` and :meth:`kvjoin` if you want them in order.
        """
        # Map takes only values, but can provide one key that is used for all.
        async for value in self.kvmap(((key, v) async for v in aiter(values)),
                                      reply_to):
            yield value
Beispiel #4
0
 async def _prepare_actor(self, aref: ActorRefT, beacon: NodeT) -> ActorRefT:
     coro: Any
     if isinstance(aref, Awaitable):
         # agent does not yield
         coro = aref
         if self._sinks:
             raise ImproperlyConfigured("Agent must yield to use sinks")
     else:
         # agent yields and is an AsyncIterator so we have to consume it.
         coro = self._slurp(aref, aiter(aref))
     task = asyncio.Task(self._execute_actor(coro, aref), loop=self.loop)
     task._beacon = beacon  # type: ignore
     aref.actor_task = task
     self._actors.add(aref)
     return aref
Beispiel #5
0
    async def test_slurp__headers(self, *, agent, app):
        agent.use_reply_headers = True
        aref = agent(index=None, active_partitions=None)
        stream = aref.stream.get_active_stream()
        agent._delegate_to_sinks = AsyncMock(name='_delegate_to_sinks')
        agent._reply = AsyncMock(name='_reply')

        def on_delegate(value):
            raise StopAsyncIteration()

        word = Word('word')
        message1 = Mock(name='message1', autospec=Message)
        headers1 = message1.headers = {
            'Faust-Ag-ReplyTo': 'reply_to',
            'Faust-Ag-CorrelationId': 'correlation_id',
        }
        message2 = Mock(name='message2', autospec=Message)
        headers2 = message2.headers = {}
        event1 = Event(app, None, word, headers1, message1)
        event2 = Event(app, 'key', 'bar', headers2, message2)
        values = [
            (event1, word, True),
            (event1, word, False),
            (event2, 'bar', True),
        ]

        class AIT:
            async def __aiter__(self):
                for event, value, set_cur_event in values:
                    if set_cur_event:
                        stream.current_event = event
                    else:
                        stream.current_event = None
                    yield value

        it = aiter(AIT())
        await agent._slurp(aref, it)

        agent._reply.assert_called_once_with(None, word, 'reply_to',
                                             'correlation_id')
        agent._delegate_to_sinks.coro.assert_has_calls([
            call(word),
            call('bar'),
        ])
Beispiel #6
0
    async def test_slurp__headers(self, *, agent, app):
        agent.use_reply_headers = True
        aref = agent(index=None, active_partitions=None)
        stream = aref.stream.get_active_stream()
        agent._delegate_to_sinks = AsyncMock(name="_delegate_to_sinks")
        agent._reply = AsyncMock(name="_reply")

        def on_delegate(value):
            raise StopAsyncIteration()

        word = Word("word")
        message1 = Mock(name="message1", autospec=Message)
        headers1 = message1.headers = {
            "Faust-Ag-ReplyTo": "reply_to",
            "Faust-Ag-CorrelationId": "correlation_id",
        }
        message2 = Mock(name="message2", autospec=Message)
        headers2 = message2.headers = {}
        event1 = Event(app, None, word, headers1, message1)
        event2 = Event(app, "key", "bar", headers2, message2)
        values = [
            (event1, word, True),
            (event1, word, False),
            (event2, "bar", True),
        ]

        class AIT:
            async def __aiter__(self):
                for event, value, set_cur_event in values:
                    if set_cur_event:
                        stream.current_event = event
                    else:
                        stream.current_event = None
                    yield value

        it = aiter(AIT())
        await agent._slurp(aref, it)

        agent._reply.assert_called_once_with(None, word, "reply_to",
                                             "correlation_id")
        agent._delegate_to_sinks.coro.assert_has_calls([
            call(word),
            call("bar"),
        ])
Beispiel #7
0
    def stream(self,
               channel: Union[AsyncIterable, Iterable],
               beacon: NodeT = None,
               **kwargs: Any) -> StreamT:
        """Create new stream from channel/topic/iterable/async iterable.

        Arguments:
            channel: Iterable to stream over (async or non-async).

            kwargs: See :class:`Stream`.

        Returns:
            faust.Stream:
                to iterate over events in the stream.
        """
        return self.conf.Stream(
            app=self,
            channel=aiter(channel) if channel is not None else None,
            beacon=beacon or self.beacon,
            **kwargs)
Beispiel #8
0
    async def _barrier_send(
            self, barrier: BarrierState,
            items: Union[AsyncIterable[Tuple[K, V]], Iterable[Tuple[K, V]]],
            reply_to: ReplyToArg) -> AsyncIterator[str]:  # pragma: no cover
        # map: send many tasks to agents
        # while trying to pop incoming results off.
        async for key, value in aiter(items):
            correlation_id = str(uuid4())
            p = await self.ask_nowait(
                key=key,
                value=value,
                reply_to=reply_to,
                correlation_id=correlation_id)
            # add reply promise to the barrier
            barrier.add(p)

            # the ReplyConsumer will call the barrier whenever a new
            # result comes in.
            app = cast(App, self.app)
            await app.maybe_start_client()
            await app._reply_consumer.add(p.correlation_id, barrier)

            yield correlation_id
Beispiel #9
0
    def group_by(self,
                 key: GroupByKeyArg,
                 *,
                 name: str = None,
                 topic: TopicT = None,
                 partitions: int = None) -> StreamT:
        """Create new stream that repartitions the stream using a new key.

        Arguments:
            key: The key argument decides how the new key is generated,
                it can be a field descriptor, a callable, or an async
                callable.

                Note: The ``name`` argument must be provided if the key
                    argument is a callable.

            name: Suffix to use for repartitioned topics.
                This argument is required if `key` is a callable.

        Examples:
            Using a field descriptor to use a field in the event as the new
            key:

            .. sourcecode:: python

                s = withdrawals_topic.stream()
                # values in this stream are of type Withdrawal
                async for event in s.group_by(Withdrawal.account_id):
                    ...

            Using an async callable to extract a new key:

            .. sourcecode:: python

                s = withdrawals_topic.stream()

                async def get_key(withdrawal):
                    return await aiohttp.get(
                        f'http://e.com/resolve_account/{withdrawal.account_id}')

                async for event in s.group_by(get_key):
                    ...

            Using a regular callable to extract a new key:

            .. sourcecode:: python

                s = withdrawals_topic.stream()

                def get_key(withdrawal):
                    return withdrawal.account_id.upper()

                async for event in s.group_by(get_key):
                    ...
        """
        if self._finalized:
            # see note in self.through()
            return self
        channel: ChannelT
        if self.concurrency_index is not None:
            raise ImproperlyConfigured(
                'Agent with concurrency>1 cannot use stream.group_by!')
        if not name:
            if isinstance(key, FieldDescriptorT):
                name = cast(FieldDescriptorT, key).ident
            else:
                raise TypeError(
                    'group_by with callback must set name=topic_suffix')
        if topic is not None:
            channel = topic
        else:
            suffix = '-' + self.app.conf.id + '-' + name + '-repartition'
            p = partitions if partitions else self.app.conf.topic_partitions
            channel = cast(ChannelT, self.channel).derive(suffix=suffix,
                                                          partitions=p,
                                                          internal=True)
        format_key = self._format_key

        channel_it = aiter(channel)
        if self._next is not None:
            raise ImproperlyConfigured('Stream already uses group_by/through')
        grouped = self._chain(channel=channel_it)

        async def repartition(value: T) -> T:
            event = self.current_event
            if event is None:
                raise RuntimeError(
                    'Cannot repartition stream with non-topic channel')
            new_key = await format_key(key, value)
            await event.forward(channel, key=new_key)
            return value

        self.add_processor(repartition)
        self._enable_passive(cast(ChannelT, channel_it), declare=True)
        return grouped
Beispiel #10
0
    async def take(self, max_: int,
                   within: Seconds) -> AsyncIterable[Sequence[T_co]]:
        """Buffer n values at a time and yield a list of buffered values.

        Arguments:
            within: Timeout for when we give up waiting for another value,
                and process the values we have.
                Warning: If there's no timeout (i.e. `timeout=None`),
                the agent is likely to stall and block buffered events for an
                unreasonable length of time(!).
        """
        buffer: List[T_co] = []
        events: List[EventT] = []
        buffer_add = buffer.append
        event_add = events.append
        buffer_size = buffer.__len__
        buffer_full = asyncio.Event(loop=self.loop)
        buffer_consumed = asyncio.Event(loop=self.loop)
        timeout = want_seconds(within) if within else None
        stream_enable_acks: bool = self.enable_acks

        buffer_consuming: Optional[asyncio.Future] = None

        channel_it = aiter(self.channel)

        # We add this processor to populate the buffer, and the stream
        # is passively consumed in the background (enable_passive below).
        async def add_to_buffer(value: T) -> T:
            # buffer_consuming is set when consuming buffer after timeout.
            nonlocal buffer_consuming
            if buffer_consuming is not None:
                try:
                    await buffer_consuming
                finally:
                    buffer_consuming = None
            buffer_add(cast(T_co, value))
            event = self.current_event
            if event is not None:
                event_add(event)
            if buffer_size() >= max_:
                # signal that the buffer is full and should be emptied.
                buffer_full.set()
                # strict wait for buffer to be consumed after buffer full.
                # (if max_ is 1000, we are not allowed to return 1001 values.)
                buffer_consumed.clear()
                await self.wait(buffer_consumed)
            return value

        # Disable acks to ensure this method acks manually
        # events only after they are consumed by the user
        self.enable_acks = False

        self.add_processor(add_to_buffer)
        self._enable_passive(cast(ChannelT, channel_it))
        try:
            while not self.should_stop:
                # wait until buffer full, or timeout
                await self.wait_for_stopped(buffer_full, timeout=timeout)
                if buffer:
                    # make sure background thread does not add new times to
                    # budfer while we read.
                    buffer_consuming = self.loop.create_future()
                    try:
                        yield list(buffer)
                    finally:
                        buffer.clear()
                        for event in events:
                            await self.ack(event)
                        events.clear()
                        # allow writing to buffer again
                        notify(buffer_consuming)
                        buffer_full.clear()
                        buffer_consumed.set()

        finally:
            # Restore last behaviour of "enable_acks"
            self.enable_acks = stream_enable_acks
            self._processors.remove(add_to_buffer)
Beispiel #11
0
async def test_aiter__AsyncIterable():
    it = aiter(AIT())
    assert await anext(it) == 0
    assert await anext(it) == 1
    assert await anext(it) == 2
Beispiel #12
0
def test_aiter__not_an_iterator():
    with pytest.raises(TypeError):
        aiter(object())
Beispiel #13
0
 def __aiter__(self) -> AsyncIterator:
     return aiter(self._stream)
Beispiel #14
0
    async def noack_take(self, max_: int,
                         within: Seconds) -> AsyncIterable[Sequence[T_co]]:
        """
         Buffer n values at a time and yield a list of buffered values.
        :param max_: Max number of messages to receive. When more than this
             number of messages are received within the specified number of
             seconds then we flush the buffer immediately.
        :param within: Timeout for when we give up waiting for another value,
             and process the values we have.
             Warning: If there's no timeout (i.e. `timeout=None`),
             the agent is likely to stall and block buffered events for an
             unreasonable length of time(!).
        """
        buffer: List[T_co] = []
        events: List[EventT] = []
        buffer_add = buffer.append
        event_add = events.append
        buffer_size = buffer.__len__
        buffer_full = asyncio.Event()
        buffer_consumed = asyncio.Event()
        timeout = want_seconds(within) if within else None
        stream_enable_acks: bool = self.enable_acks

        buffer_consuming: Optional[asyncio.Future] = None

        channel_it = aiter(self.channel)

        # We add this processor to populate the buffer, and the stream
        # is passively consumed in the background (enable_passive below).
        async def add_to_buffer(value: T) -> T:
            try:
                # buffer_consuming is set when consuming buffer
                # after timeout.
                nonlocal buffer_consuming
                if buffer_consuming is not None:
                    try:
                        await buffer_consuming
                    finally:
                        buffer_consuming = None

                # We want to save events instead of values to allow for manual ack
                event = self.current_event
                buffer_add(cast(T_co, event))
                if event is None:
                    raise RuntimeError(
                        "Take buffer found current_event is None")

                event_add(event)
                if buffer_size() >= max_:
                    # signal that the buffer is full and should be emptied.
                    buffer_full.set()
                    # strict wait for buffer to be consumed after buffer
                    # full.
                    # If max is 1000, we are not allowed to return 1001
                    # values.
                    buffer_consumed.clear()
                    await self.wait(buffer_consumed)
            except CancelledError:  # pragma: no cover
                raise
            except Exception as exc:
                self.log.exception("Error adding to take buffer: %r", exc)
                await self.crash(exc)
            return value

        # Disable acks to ensure this method acks manually
        # events only after they are consumed by the user
        self.enable_acks = False

        self.add_processor(add_to_buffer)
        self._enable_passive(cast(ChannelT, channel_it))
        try:
            while not self.should_stop:
                # wait until buffer full, or timeout
                await self.wait_for_stopped(buffer_full, timeout=timeout)
                if buffer:
                    # make sure background thread does not add new items to
                    # buffer while we read.
                    buffer_consuming = self.loop.create_future()
                    try:
                        yield list(buffer)
                    finally:
                        buffer.clear()
                        # code change: We want to manually ack
                        # for event in events:
                        #     await self.ack(event)
                        events.clear()
                        # allow writing to buffer again
                        notify(buffer_consuming)
                        buffer_full.clear()
                        buffer_consumed.set()
                else:  # pragma: no cover
                    pass
            else:  # pragma: no cover
                pass

        finally:
            # Restore last behaviour of "enable_acks"
            self.enable_acks = stream_enable_acks
            self._processors.remove(add_to_buffer)