def work(data, info): celery_chunk_size = info.get('celery_chunk_size', 80) celery_max_workers = info.get('celery_max_workers', 4) celery_sleep = info.get('celery_sleep') queue = info.get('queue', 'celery') sync_callback = info.get('sync_callback') final_callback = info.get('final_callback') dummy = info.get('dummy') dummy = load(dummy) splitted_data = [] for index, data_chunked in enumerate(grouper_it(data, celery_chunk_size)): splitted_data.append(data_chunked) results_list = [] if sync_callback: for index, splitted_chunked in enumerate( grouper_it(splitted_data, celery_max_workers)): tasks = parallel_chunked(splitted_chunked, info) results = tasks.apply_async() wait_for_group(results, celery_sleep, sync_callback) results_list.append(results) results = final_results(results_list) sync_callback = load(sync_callback) return sync_callback(results) else: tasks_list = [] for index, splitted_chunked in enumerate( grouper_it(splitted_data, celery_max_workers)): tasks = parallel_chunked(splitted_chunked, info) if len(tasks) == 1: # chord([A], B) can be optimized as A | B # - Issue #3323 tasks_list.append(tasks | dummy.si().set(queue=queue)) else: tasks_list.append(chord(tasks, dummy.si().set(queue=queue))) if final_callback: final_callback = load(final_callback) task_to_run = chain(tasks_list) | final_callback.si( data, info).set(queue=queue) else: task_to_run = chain(tasks_list) results = task_to_run.apply_async() results_list.append(results) return results_list
def work(data, info): log = logging.getLogger(info.get('log', 'worker')) results = [] chunk_size = info.get('chunk_size', 20) max_workers = info.get('max_workers', 4) try: func_str = info.get('worker') func = load(func_str) except Exception as exc: log.error('dynamic worker func invalid! %s' % exc) return results Executor = futures.ProcessPoolExecutor backup_info = deepcopy(info) with Executor(max_workers=max_workers) as executor: future_to_data = {} for index, data_chunked in enumerate(grouper_it(data, chunk_size)): log.debug('process worker chunk %d processing.' % (index)) info = deepcopy(backup_info) info['index'] = index future_to_data[executor.submit(func, data_chunked, info)] = data_chunked for future in futures.as_completed(future_to_data): data = future_to_data[future] try: result = future.result() except Exception as exc: tt(exc) log.critical('exception catched! %s %s' % (exc, type(exc))) result = WorkerException('%s -- %r' % (type(exc), exc)) results.append(result) return results
def parallel_chunked(data, info): func_str = info.get('celery_worker') queue = info.get('queue', 'celery') func = load(func_str) tasks = [] callback = info.get('each_callback') if callback: callback = load(callback) for i, d in enumerate(data): for index, chunked_data in enumerate( grouper_it(d, info.get('chunk_size', 20))): if callback: # the result to callback will be returned as [result] sig = func.si(chunked_data, info).set( queue=queue) | callback.s().set(queue=queue) else: sig = func.si(chunked_data, info).set(queue=queue) tasks.append(sig) # removed for return group results # callback = info.get('group_callback') # if callback: # callback = load(callback) # return group(tasks) | callback.s() g = group(tasks) return g
async def _work(data, info): log = logging.getLogger('worker') chunk_size = info.get('chunk_size', 20) # max_workers = info.get('max_workers', 4) max_sem = info.get('max_semaphore', len(data)) try: func_str = info.get('worker') func = load(func_str) except Exception as exc: log.error('dynamic worker func invalid! %s' % exc) return None tasks = [] semaphore = asyncio.Semaphore(max_sem) async with ClientSession() as session: backup_info = deepcopy(info) response = None for index, data_chunked in enumerate(grouper_it(data, chunk_size)): info = deepcopy(backup_info) info['index'] = index info['session'] = session info['semaphore'] = semaphore log.debug('coroutine worker chunk %d processing.' % (index)) task = asyncio.ensure_future( bound_process(func, data_chunked, info)) tasks.append(task) try: gathers = asyncio.gather(*tasks, return_exceptions=False) except Exception as exc: tt(exc) log.critical('exception catched! %s -- %r' % (exc, data)) # response = WorkerException('%s -- %r' % (type(exc), exc)) else: response = await gathers finally: return response
def work(data, info): log = logging.getLogger(info.get('log', 'worker')) results = [] chunk_size = info.get('chunk_size', 20) # max_workers = info.get('max_workers', 4) try: func_str = info.get('worker') func = load(func_str) except Exception as exc: log.error('dynamic worker func invalid! %s' % exc) return results backup_info = deepcopy(info) for index, data_chunked in enumerate(grouper_it(data, chunk_size)): log.debug('simple worker chunk %d processing.' % (index)) info = deepcopy(backup_info) info['index'] = index try: result = func(data_chunked, info) except Exception as exc: tt(exc) log.critical('exception catched! %s %r' % (type(exc), exc)) result = WorkerException('%s -- %r' % (type(exc), exc)) results.append(result) return results