def split_crawl_tasks(tasks, concurrency): """ Reorganize tasks according to the tasks max concurrency value. :param tasks: sub-tasks to execute, can be either a list of tasks of a list of list of tasks :param int concurrency: Maximum number of tasks that might be executed in parallel. :return: list of list of tasks. """ if any(tasks) and isinstance(tasks[0], list): for seq in tasks: if not isinstance(seq, list): raise Exception("Expected a list of tasks") else: if concurrency > 1: chain_size = int(ceil(float(len(tasks)) / concurrency)) tasks = [ chunk for chunk in chunks( iter(tasks), max(1, chain_size) ) ] else: tasks = [tasks] return tasks
def split_crawl_tasks(tasks, concurrency): """ Reorganize tasks according to the tasks max concurrency value. :param tasks: sub-tasks to execute, can be either a list of tasks of a list of list of tasks :param int concurrency: Maximum number of tasks that might be executed in parallel. :return: list of list of tasks. """ if any(tasks) and isinstance(tasks[0], list): for seq in tasks: if not isinstance(seq, list): raise Exception("Expected a list of tasks") else: if concurrency > 1: chain_size = int(ceil(float(len(tasks)) / concurrency)) tasks = [ chunk for chunk in chunks(iter(tasks), max(1, chain_size)) ] else: tasks = [tasks] return tasks
def __delete_es_docs(self, body, es, index, doc_type): query = dict(query=body, index=index, doc_type=doc_type, fields=['_id']) if self.__routing: query['routing'] = self.__routing for chunk in chunks(scan(es, **query), 500): ids = [item['_id'] for item in chunk] self.delete_cards_by_id(ids)
def __delete_es_docs(self, body, es, index, doc_type): query = dict( query=body, index=index, doc_type=doc_type, fields=['_id'] ) if self.__routing: query['routing'] = self.__routing for chunk in chunks(scan(es, **query), 500): ids = [item['_id'] for item in chunk] self.delete_cards_by_id(ids)