Ejemplo n.º 1
0
    async def _chunk(self, actions, chunk_size, params):
        futures = []
        async with stream.chunks(actions, chunk_size).stream() as chunks:
            async for chunk in chunks:
                logger.debug('Elasticsearch bulk chunk size: %d' % len(chunk))
                for future in [future for future in futures if future.done()]:
                    # return all done future's result
                    yield future.result()
                    # remove future from futures list after yield result
                    futures.remove(future)

                logger.debug('Elasticsearch async helper bulk semaphore value: %d' % self.semaphore._value)
                await self.semaphore.acquire()
                future = async_helpers.bulk(client=self.es,
                                            actions=chunk,
                                            chunk_size=self.chunk_size,
                                            max_retries=3,
                                            initial_backoff=0.3,
                                            max_backoff=3,
                                            params=params,
                                            semaphore=self.semaphore)
                futures.append(asyncio.ensure_future(future))

            # await for all future complete
            if futures:
                done, _ = await asyncio.wait(futures)
                for future in done:
                    yield future.result()
Ejemplo n.º 2
0
 async def _f(self, pop_paths, cursor, batch_size):
     async with stream.chunks(cursor, batch_size).stream() as chunks:
         async for chunk in chunks:
             l_docs = chunk
             for pop_path in pop_paths:
                 l_docs = await self._populate(l_docs, pop_path)
             for doc in l_docs:
                 yield doc
Ejemplo n.º 3
0
    async def run_until_complete(
        self,
        iterable: InputSequenceType,
        n_total: Optional[int] = None,
    ) -> Any:
        try:
            self._n_total = n_total or len(iterable)  # type: ignore
        except Exception:
            pass

        assert self._start_time is None  # can't reuse Job instances
        self._start_time = time.time()

        try:
            chunk_stream = stream.chunks(stream.iterate(iterable),
                                         self.chunk_size)

            async with self.mapper as mapper_url, chunk_stream.stream(
            ) as chunk_gen:
                async for response in utils.limited_as_completed_from_async_coro_gen(
                    (self._request(mapper_url, chunk)
                     async for chunk in chunk_gen),
                        self.mapper.n_mappers,
                ):
                    response_tuple = await response
                    self._reduce_chunk(*response_tuple)

            if self._n_total is None:
                self._n_total = self._n_successful + self._n_failed
            else:
                assert self._n_total == self._n_successful + self._n_failed

            self.reducer.finish()
            return self.result
        finally:
            self._end_time = time.time()

            if self.owns_session:
                await self.session.close()
Ejemplo n.º 4
0
async def merge_graph_process(
    db: GraphDB,
    event_sender: AnalyticsEventSender,
    args: Namespace,
    content: AsyncGenerator[Union[bytes, Json], None],
    max_wait: timedelta,
    maybe_batch: Optional[str],
) -> GraphUpdate:
    change_id = maybe_batch if maybe_batch else uuid_str()
    write = Queue()  # type: ignore
    read = Queue()  # type: ignore
    updater = DbUpdaterProcess(
        write, read,
        args)  # the process reads from our write queue and vice versa
    stale = timedelta(seconds=5).total_seconds(
    )  # consider dead communication after this amount of time
    deadline = utc() + max_wait
    dead_adjusted = False

    async def send_to_child(pa: ProcessAction) -> bool:
        alive = updater.is_alive()
        if alive:
            await run_async(write.put, pa, True, stale)
        return alive

    def read_results() -> Task:  # type: ignore # pypy
        async def read_forever() -> GraphUpdate:
            nonlocal deadline
            nonlocal dead_adjusted
            while utc() < deadline:
                # After exit of updater: adjust the deadline once
                if not updater.is_alive() and not dead_adjusted:
                    log.debug("Import process done or dead. Adjust deadline.")
                    deadline = utc() + timedelta(seconds=30)
                    dead_adjusted = True
                try:
                    action = await run_async(read.get, True, stale)
                    if isinstance(action, EmitAnalyticsEvent):
                        await event_sender.capture(action.event)
                    elif isinstance(action, Result):
                        return action.get_value()
                except Empty:
                    # empty is fine
                    pass
            raise ImportAborted(
                f"Import process died. (ExitCode: {updater.exitcode})")

        return asyncio.create_task(read_forever())

    task: Optional[Task] = None  # type: ignore # pypy
    result: Optional[GraphUpdate] = None
    try:
        reset_process_start_method(
        )  # other libraries might have tampered the value in the mean time
        updater.start()
        task = read_results()  # concurrently read result queue
        chunked: Stream = stream.chunks(content, BatchSize)
        async with chunked.stream() as streamer:  # pylint: disable=no-member
            async for lines in streamer:
                if not await send_to_child(ReadElement(lines)):
                    # in case the child is dead, we should stop
                    break
        await send_to_child(
            MergeGraph(db.name, change_id, maybe_batch is not None))
        result = cast(GraphUpdate, await task)  # wait for final result
        return result
    finally:
        if task is not None and not task.done():
            task.cancel()
        if not result:
            # make sure the change is aborted in case of transaction
            log.info(f"Abort update manually: {change_id}")
            await db.abort_update(change_id)
        await send_to_child(PoisonPill())
        await run_async(updater.join, stale)
        if updater.is_alive():
            log.warning(
                f"Process is still alive after poison pill. Terminate process {updater.pid}"
            )
            with suppress(Exception):
                updater.terminate()
            await asyncio.sleep(3)
        if updater.is_alive():
            log.warning(
                f"Process is still alive after terminate. Kill process {updater.pid}"
            )
            with suppress(Exception):
                updater.kill()
            await asyncio.sleep(3)
        if not updater.is_alive():
            with suppress(Exception):
                updater.close()