Ejemplo n.º 1
0
def split_crawl_tasks(tasks, concurrency):
    """ Reorganize tasks according to the tasks max concurrency value.

    :param tasks:
      sub-tasks to execute, can be either a list of tasks of a list of list
      of tasks
    :param int concurrency:
      Maximum number of tasks that might be executed in parallel.

    :return:
      list of list of tasks.
    """
    if any(tasks) and isinstance(tasks[0], list):
        for seq in tasks:
            if not isinstance(seq, list):
                raise Exception("Expected a list of tasks")
    else:
        if concurrency > 1:
            chain_size = int(ceil(float(len(tasks)) / concurrency))
            tasks = [
                chunk for chunk in
                chunks(
                    iter(tasks),
                    max(1, chain_size)
                )
            ]
        else:
            tasks = [tasks]
    return tasks
Ejemplo n.º 2
0
def split_crawl_tasks(tasks, concurrency):
    """ Reorganize tasks according to the tasks max concurrency value.

    :param tasks:
      sub-tasks to execute, can be either a list of tasks of a list of list
      of tasks
    :param int concurrency:
      Maximum number of tasks that might be executed in parallel.

    :return:
      list of list of tasks.
    """
    if any(tasks) and isinstance(tasks[0], list):
        for seq in tasks:
            if not isinstance(seq, list):
                raise Exception("Expected a list of tasks")
    else:
        if concurrency > 1:
            chain_size = int(ceil(float(len(tasks)) / concurrency))
            tasks = [
                chunk for chunk in chunks(iter(tasks), max(1, chain_size))
            ]
        else:
            tasks = [tasks]
    return tasks
Ejemplo n.º 3
0
 def __delete_es_docs(self, body, es, index, doc_type):
     query = dict(query=body,
                  index=index,
                  doc_type=doc_type,
                  fields=['_id'])
     if self.__routing:
         query['routing'] = self.__routing
     for chunk in chunks(scan(es, **query), 500):
         ids = [item['_id'] for item in chunk]
         self.delete_cards_by_id(ids)
Ejemplo n.º 4
0
 def __delete_es_docs(self, body, es, index, doc_type):
     query = dict(
         query=body,
         index=index,
         doc_type=doc_type,
         fields=['_id']
     )
     if self.__routing:
         query['routing'] = self.__routing
     for chunk in chunks(scan(es, **query), 500):
         ids = [item['_id'] for item in chunk]
         self.delete_cards_by_id(ids)