def through(self, channel: Union[str, ChannelT]) -> StreamT: """Forward values to in this stream to channel. Send messages received on this stream to another channel, and return a new stream that consumes from that channel. Notes: The messages are forwarded after any processors have been applied. Example: .. sourcecode:: python topic = app.topic('foo') @app.agent(topic) async def mytask(stream): async for value in stream.through(app.topic('bar')): # value was first received in topic 'foo', # then forwarded and consumed from topic 'bar' print(value) """ if self._finalized: # if agent restart we reuse the same stream object # which already have done the stream.through() # so on iteration we set the finalized flag # and make this through() a noop. return self if self.concurrency_index is not None: raise ImproperlyConfigured( 'Agent with concurrency>1 cannot use stream.through!') # ridiculous mypy if isinstance(channel, str): channelchannel = cast(ChannelT, self.derive_topic(channel)) else: channelchannel = channel channel_it = aiter(channelchannel) if self._next is not None: raise ImproperlyConfigured( 'Stream is already using group_by/through') through = self._chain(channel=channel_it) async def forward(value: T) -> T: event = self.current_event return await maybe_forward(event, channelchannel) self.add_processor(forward) self._enable_passive(cast(ChannelT, channel_it), declare=True) return through
async def join( self, values: Union[AsyncIterable[V], Iterable[V]], key: K = None, reply_to: ReplyToArg = None, ) -> List[Any]: # pragma: no cover """RPC map operation on a list of values. A join returns the results in order, and only returns once all values have been processed. """ return await self.kvjoin( ((key, value) async for value in aiter(values)), reply_to=reply_to, )
async def map( self, values: Union[AsyncIterable, Iterable], key: K = None, reply_to: ReplyToArg = None, ) -> AsyncIterator: # pragma: no cover """RPC map operation on a list of values. A map operation iterates over results as they arrive. See :meth:`join` and :meth:`kvjoin` if you want them in order. """ # Map takes only values, but can provide one key that is used for all. async for value in self.kvmap(((key, v) async for v in aiter(values)), reply_to): yield value
async def _prepare_actor(self, aref: ActorRefT, beacon: NodeT) -> ActorRefT: coro: Any if isinstance(aref, Awaitable): # agent does not yield coro = aref if self._sinks: raise ImproperlyConfigured("Agent must yield to use sinks") else: # agent yields and is an AsyncIterator so we have to consume it. coro = self._slurp(aref, aiter(aref)) task = asyncio.Task(self._execute_actor(coro, aref), loop=self.loop) task._beacon = beacon # type: ignore aref.actor_task = task self._actors.add(aref) return aref
async def test_slurp__headers(self, *, agent, app): agent.use_reply_headers = True aref = agent(index=None, active_partitions=None) stream = aref.stream.get_active_stream() agent._delegate_to_sinks = AsyncMock(name='_delegate_to_sinks') agent._reply = AsyncMock(name='_reply') def on_delegate(value): raise StopAsyncIteration() word = Word('word') message1 = Mock(name='message1', autospec=Message) headers1 = message1.headers = { 'Faust-Ag-ReplyTo': 'reply_to', 'Faust-Ag-CorrelationId': 'correlation_id', } message2 = Mock(name='message2', autospec=Message) headers2 = message2.headers = {} event1 = Event(app, None, word, headers1, message1) event2 = Event(app, 'key', 'bar', headers2, message2) values = [ (event1, word, True), (event1, word, False), (event2, 'bar', True), ] class AIT: async def __aiter__(self): for event, value, set_cur_event in values: if set_cur_event: stream.current_event = event else: stream.current_event = None yield value it = aiter(AIT()) await agent._slurp(aref, it) agent._reply.assert_called_once_with(None, word, 'reply_to', 'correlation_id') agent._delegate_to_sinks.coro.assert_has_calls([ call(word), call('bar'), ])
async def test_slurp__headers(self, *, agent, app): agent.use_reply_headers = True aref = agent(index=None, active_partitions=None) stream = aref.stream.get_active_stream() agent._delegate_to_sinks = AsyncMock(name="_delegate_to_sinks") agent._reply = AsyncMock(name="_reply") def on_delegate(value): raise StopAsyncIteration() word = Word("word") message1 = Mock(name="message1", autospec=Message) headers1 = message1.headers = { "Faust-Ag-ReplyTo": "reply_to", "Faust-Ag-CorrelationId": "correlation_id", } message2 = Mock(name="message2", autospec=Message) headers2 = message2.headers = {} event1 = Event(app, None, word, headers1, message1) event2 = Event(app, "key", "bar", headers2, message2) values = [ (event1, word, True), (event1, word, False), (event2, "bar", True), ] class AIT: async def __aiter__(self): for event, value, set_cur_event in values: if set_cur_event: stream.current_event = event else: stream.current_event = None yield value it = aiter(AIT()) await agent._slurp(aref, it) agent._reply.assert_called_once_with(None, word, "reply_to", "correlation_id") agent._delegate_to_sinks.coro.assert_has_calls([ call(word), call("bar"), ])
def stream(self, channel: Union[AsyncIterable, Iterable], beacon: NodeT = None, **kwargs: Any) -> StreamT: """Create new stream from channel/topic/iterable/async iterable. Arguments: channel: Iterable to stream over (async or non-async). kwargs: See :class:`Stream`. Returns: faust.Stream: to iterate over events in the stream. """ return self.conf.Stream( app=self, channel=aiter(channel) if channel is not None else None, beacon=beacon or self.beacon, **kwargs)
async def _barrier_send( self, barrier: BarrierState, items: Union[AsyncIterable[Tuple[K, V]], Iterable[Tuple[K, V]]], reply_to: ReplyToArg) -> AsyncIterator[str]: # pragma: no cover # map: send many tasks to agents # while trying to pop incoming results off. async for key, value in aiter(items): correlation_id = str(uuid4()) p = await self.ask_nowait( key=key, value=value, reply_to=reply_to, correlation_id=correlation_id) # add reply promise to the barrier barrier.add(p) # the ReplyConsumer will call the barrier whenever a new # result comes in. app = cast(App, self.app) await app.maybe_start_client() await app._reply_consumer.add(p.correlation_id, barrier) yield correlation_id
def group_by(self, key: GroupByKeyArg, *, name: str = None, topic: TopicT = None, partitions: int = None) -> StreamT: """Create new stream that repartitions the stream using a new key. Arguments: key: The key argument decides how the new key is generated, it can be a field descriptor, a callable, or an async callable. Note: The ``name`` argument must be provided if the key argument is a callable. name: Suffix to use for repartitioned topics. This argument is required if `key` is a callable. Examples: Using a field descriptor to use a field in the event as the new key: .. sourcecode:: python s = withdrawals_topic.stream() # values in this stream are of type Withdrawal async for event in s.group_by(Withdrawal.account_id): ... Using an async callable to extract a new key: .. sourcecode:: python s = withdrawals_topic.stream() async def get_key(withdrawal): return await aiohttp.get( f'http://e.com/resolve_account/{withdrawal.account_id}') async for event in s.group_by(get_key): ... Using a regular callable to extract a new key: .. sourcecode:: python s = withdrawals_topic.stream() def get_key(withdrawal): return withdrawal.account_id.upper() async for event in s.group_by(get_key): ... """ if self._finalized: # see note in self.through() return self channel: ChannelT if self.concurrency_index is not None: raise ImproperlyConfigured( 'Agent with concurrency>1 cannot use stream.group_by!') if not name: if isinstance(key, FieldDescriptorT): name = cast(FieldDescriptorT, key).ident else: raise TypeError( 'group_by with callback must set name=topic_suffix') if topic is not None: channel = topic else: suffix = '-' + self.app.conf.id + '-' + name + '-repartition' p = partitions if partitions else self.app.conf.topic_partitions channel = cast(ChannelT, self.channel).derive(suffix=suffix, partitions=p, internal=True) format_key = self._format_key channel_it = aiter(channel) if self._next is not None: raise ImproperlyConfigured('Stream already uses group_by/through') grouped = self._chain(channel=channel_it) async def repartition(value: T) -> T: event = self.current_event if event is None: raise RuntimeError( 'Cannot repartition stream with non-topic channel') new_key = await format_key(key, value) await event.forward(channel, key=new_key) return value self.add_processor(repartition) self._enable_passive(cast(ChannelT, channel_it), declare=True) return grouped
async def take(self, max_: int, within: Seconds) -> AsyncIterable[Sequence[T_co]]: """Buffer n values at a time and yield a list of buffered values. Arguments: within: Timeout for when we give up waiting for another value, and process the values we have. Warning: If there's no timeout (i.e. `timeout=None`), the agent is likely to stall and block buffered events for an unreasonable length of time(!). """ buffer: List[T_co] = [] events: List[EventT] = [] buffer_add = buffer.append event_add = events.append buffer_size = buffer.__len__ buffer_full = asyncio.Event(loop=self.loop) buffer_consumed = asyncio.Event(loop=self.loop) timeout = want_seconds(within) if within else None stream_enable_acks: bool = self.enable_acks buffer_consuming: Optional[asyncio.Future] = None channel_it = aiter(self.channel) # We add this processor to populate the buffer, and the stream # is passively consumed in the background (enable_passive below). async def add_to_buffer(value: T) -> T: # buffer_consuming is set when consuming buffer after timeout. nonlocal buffer_consuming if buffer_consuming is not None: try: await buffer_consuming finally: buffer_consuming = None buffer_add(cast(T_co, value)) event = self.current_event if event is not None: event_add(event) if buffer_size() >= max_: # signal that the buffer is full and should be emptied. buffer_full.set() # strict wait for buffer to be consumed after buffer full. # (if max_ is 1000, we are not allowed to return 1001 values.) buffer_consumed.clear() await self.wait(buffer_consumed) return value # Disable acks to ensure this method acks manually # events only after they are consumed by the user self.enable_acks = False self.add_processor(add_to_buffer) self._enable_passive(cast(ChannelT, channel_it)) try: while not self.should_stop: # wait until buffer full, or timeout await self.wait_for_stopped(buffer_full, timeout=timeout) if buffer: # make sure background thread does not add new times to # budfer while we read. buffer_consuming = self.loop.create_future() try: yield list(buffer) finally: buffer.clear() for event in events: await self.ack(event) events.clear() # allow writing to buffer again notify(buffer_consuming) buffer_full.clear() buffer_consumed.set() finally: # Restore last behaviour of "enable_acks" self.enable_acks = stream_enable_acks self._processors.remove(add_to_buffer)
async def test_aiter__AsyncIterable(): it = aiter(AIT()) assert await anext(it) == 0 assert await anext(it) == 1 assert await anext(it) == 2
def test_aiter__not_an_iterator(): with pytest.raises(TypeError): aiter(object())
def __aiter__(self) -> AsyncIterator: return aiter(self._stream)
async def noack_take(self, max_: int, within: Seconds) -> AsyncIterable[Sequence[T_co]]: """ Buffer n values at a time and yield a list of buffered values. :param max_: Max number of messages to receive. When more than this number of messages are received within the specified number of seconds then we flush the buffer immediately. :param within: Timeout for when we give up waiting for another value, and process the values we have. Warning: If there's no timeout (i.e. `timeout=None`), the agent is likely to stall and block buffered events for an unreasonable length of time(!). """ buffer: List[T_co] = [] events: List[EventT] = [] buffer_add = buffer.append event_add = events.append buffer_size = buffer.__len__ buffer_full = asyncio.Event() buffer_consumed = asyncio.Event() timeout = want_seconds(within) if within else None stream_enable_acks: bool = self.enable_acks buffer_consuming: Optional[asyncio.Future] = None channel_it = aiter(self.channel) # We add this processor to populate the buffer, and the stream # is passively consumed in the background (enable_passive below). async def add_to_buffer(value: T) -> T: try: # buffer_consuming is set when consuming buffer # after timeout. nonlocal buffer_consuming if buffer_consuming is not None: try: await buffer_consuming finally: buffer_consuming = None # We want to save events instead of values to allow for manual ack event = self.current_event buffer_add(cast(T_co, event)) if event is None: raise RuntimeError( "Take buffer found current_event is None") event_add(event) if buffer_size() >= max_: # signal that the buffer is full and should be emptied. buffer_full.set() # strict wait for buffer to be consumed after buffer # full. # If max is 1000, we are not allowed to return 1001 # values. buffer_consumed.clear() await self.wait(buffer_consumed) except CancelledError: # pragma: no cover raise except Exception as exc: self.log.exception("Error adding to take buffer: %r", exc) await self.crash(exc) return value # Disable acks to ensure this method acks manually # events only after they are consumed by the user self.enable_acks = False self.add_processor(add_to_buffer) self._enable_passive(cast(ChannelT, channel_it)) try: while not self.should_stop: # wait until buffer full, or timeout await self.wait_for_stopped(buffer_full, timeout=timeout) if buffer: # make sure background thread does not add new items to # buffer while we read. buffer_consuming = self.loop.create_future() try: yield list(buffer) finally: buffer.clear() # code change: We want to manually ack # for event in events: # await self.ack(event) events.clear() # allow writing to buffer again notify(buffer_consuming) buffer_full.clear() buffer_consumed.set() else: # pragma: no cover pass else: # pragma: no cover pass finally: # Restore last behaviour of "enable_acks" self.enable_acks = stream_enable_acks self._processors.remove(add_to_buffer)