Python list_chunks Examples, ggrc.utils.list_chunks Python Examples

Example #1

0

Show file

File: propagation.py Project: egorhm/ggrc-core

def propagate_all():
  """Re-evaluate propagation for all objects."""
  with utils.benchmark("Run propagate_all"):
    from ggrc_workflows.models.hooks import workflow

    with utils.benchmark("Get non propagated acl ids"):
      query = db.session.query(
          all_models.AccessControlList.object_type,
          all_models.AccessControlList.id,
      ).filter(
          all_models.AccessControlList.parent_id.is_(None),
      )
      non_wf_acl_ids = []
      wf_acl_ids = []
      for object_type, acl_id in query:
        if object_type == "Workflow":
          wf_acl_ids.append(acl_id)
        else:
          non_wf_acl_ids.append(acl_id)

    with utils.benchmark("Propagate normal acl entries"):
      count = len(non_wf_acl_ids)
      propagated_count = 0
      for acl_ids in utils.list_chunks(non_wf_acl_ids):
        propagated_count += len(acl_ids)
        logger.info("Propagating ACL entries: %s/%s", propagated_count, count)
        _delete_propagated_acls(acl_ids)

        flask.g.new_acl_ids = acl_ids
        flask.g.new_relationship_ids = set()
        flask.g.deleted_objects = set()
        propagate()

    with utils.benchmark("Propagate WF related acl entries"):
      count = len(wf_acl_ids)
      propagated_count = 0
      for acl_ids in utils.list_chunks(wf_acl_ids):
        propagated_count += len(acl_ids)
        logger.info(
            "Propagating WF ACL entries: %s/%s",
            propagated_count,
            count
        )
        _delete_propagated_acls(acl_ids)

        flask.g.new_wf_acls = set(acl_ids)
        flask.g.new_wf_comment_ct_ids = set()
        flask.g.deleted_wf_objects = set()
        workflow.handle_acl_changes()

Example #2

0

Show file

def propagate_all():
    """Re-evaluate propagation for all objects."""
    with utils.benchmark("Clear ACL memcache"):
        clear_permission_cache()
    with utils.benchmark("Run propagate_all"):
        with utils.benchmark("Add missing acl entries"):
            _add_missing_acl_entries()
        with utils.benchmark("Get non propagated acl ids"):
            query = db.session.query(all_models.AccessControlList.id, ).filter(
                all_models.AccessControlList.parent_id.is_(None), )
            all_acl_ids = [acl.id for acl in query]

        with utils.benchmark("Propagate normal acl entries"):
            count = len(all_acl_ids)
            propagated_count = 0
            for acl_ids in utils.list_chunks(all_acl_ids, chunk_size=50):
                propagated_count += len(acl_ids)
                logger.info("Propagating ACL entries: %s/%s", propagated_count,
                            count)
                _delete_propagated_acls(acl_ids)

                flask.g.new_acl_ids = acl_ids
                flask.g.new_relationship_ids = set()
                flask.g.user_ids = set()
                flask.g.deleted_objects = set()
                propagate(full_propagate=True)

Example #3

0

Show file

File: propagation.py Project: google/ggrc-core

def propagate_all():
  """Re-evaluate propagation for all objects."""
  with utils.benchmark("Run propagate_all"):
    with utils.benchmark("Add missing acl entries"):
      _add_missing_acl_entries()
    with utils.benchmark("Get non propagated acl ids"):
      query = db.session.query(
          all_models.AccessControlList.id,
      ).filter(
          all_models.AccessControlList.parent_id.is_(None),
      )
      all_acl_ids = [acl.id for acl in query]

    with utils.benchmark("Propagate normal acl entries"):
      count = len(all_acl_ids)
      propagated_count = 0
      for acl_ids in utils.list_chunks(all_acl_ids, chunk_size=50):
        propagated_count += len(acl_ids)
        logger.info("Propagating ACL entries: %s/%s", propagated_count, count)
        _delete_propagated_acls(acl_ids)

        flask.g.new_acl_ids = acl_ids
        flask.g.new_relationship_ids = set()
        flask.g.deleted_objects = set()
        propagate()

Example #4

0

Show file

File: __init__.py Project: egorhm/ggrc-core

def do_reindex(with_reindex_snapshots=False):
  """Update the full text search index."""

  indexer = get_indexer()
  indexed_models = {
      m.__name__: m for m in all_models.all_models
      if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX
  }
  people_query = db.session.query(all_models.Person.id,
                                  all_models.Person.name,
                                  all_models.Person.email)
  indexer.cache["people_map"] = {p.id: (p.name, p.email) for p in people_query}
  indexer.cache["ac_role_map"] = dict(db.session.query(
      all_models.AccessControlRole.id,
      all_models.AccessControlRole.name,
  ))
  for model_name in sorted(indexed_models.keys()):
    logger.info("Updating index for: %s", model_name)
    with benchmark("Create records for %s" % model_name):
      model = indexed_models[model_name]
      ids = [obj.id for obj in model.query]
      ids_count = len(ids)
      handled_ids = 0
      for ids_chunk in utils.list_chunks(ids, chunk_size=REINDEX_CHUNK_SIZE):
        handled_ids += len(ids_chunk)
        logger.info("%s: %s / %s", model.__name__, handled_ids, ids_count)
        model.bulk_record_update_for(ids_chunk)
        db.session.commit()

  if with_reindex_snapshots:
    logger.info("Updating index for: %s", "Snapshot")
    with benchmark("Create records for %s" % "Snapshot"):
      snapshot_indexer.reindex()

  indexer.invalidate_cache()

Example #5

0

Show file

File: __init__.py Project: zhaoshiling1017/ggrc-core

def do_reindex():
  """Update the full text search index."""

  indexer = get_indexer()
  indexed_models = {
      m.__name__: m for m in all_models.all_models
      if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX
  }
  people_query = db.session.query(all_models.Person.id,
                                  all_models.Person.name,
                                  all_models.Person.email)
  indexer.cache["people_map"] = {p.id: (p.name, p.email) for p in people_query}
  indexer.cache["ac_role_map"] = dict(db.session.query(
      all_models.AccessControlRole.id,
      all_models.AccessControlRole.name,
  ))
  for model_name in sorted(indexed_models.keys()):
    logger.info("Updating index for: %s", model_name)
    with benchmark("Create records for %s" % model_name):
      model = indexed_models[model_name]
      ids = [obj.id for obj in model.query]
      ids_count = len(ids)
      handled_ids = 0
      for ids_chunk in utils.list_chunks(ids):
        handled_ids += len(ids_chunk)
        logger.info("%s: %s / %s", model_name, handled_ids, ids_count)
        model.bulk_record_update_for(ids_chunk)
        db.session.commit()

  indexer.invalidate_cache()

Example #6

0

Show file

    def row_converters_from_ids(self):
        """ Generate a row converter object for every csv row """
        if self.ignore or not self.object_ids:
            return
        self.row_converters = []

        index = 0
        for ids_pool in list_chunks(self.object_ids, self.ROW_CHUNK_SIZE):
            # sqlalchemy caches all queries and it takes a lot of memory.
            # This line clears query cache.
            _app_ctx_stack.top.sqlalchemy_queries = []

            objects = self.object_class.eager_query().filter(
                self.object_class.id.in_(ids_pool)).execution_options(
                    stream_results=True)

            for obj in objects:
                yield RowConverter(self,
                                   self.object_class,
                                   obj=obj,
                                   headers=self.headers,
                                   index=index)
                index += 1

            # Clear all objects from session (it helps to avoid memory leak)
            for obj in db.session:
                del obj

Example #7

0

Show file

def propagate_all():
    """Re-evaluate propagation for all objects."""
    with utils.benchmark("Run propagate_all"):
        from ggrc_workflows.models.hooks import workflow

        with utils.benchmark("Get non propagated acl ids"):
            query = db.session.query(
                all_models.AccessControlList.object_type,
                all_models.AccessControlList.id,
            ).filter(all_models.AccessControlList.parent_id.is_(None), )
            non_wf_acl_ids = []
            wf_acl_ids = []
            for object_type, acl_id in query:
                if object_type == "Workflow":
                    wf_acl_ids.append(acl_id)
                else:
                    non_wf_acl_ids.append(acl_id)

        with utils.benchmark("Propagate normal acl entries"):
            count = len(non_wf_acl_ids)
            propagated_count = 0
            for acl_ids in utils.list_chunks(non_wf_acl_ids):
                propagated_count += len(acl_ids)
                logger.info("Propagating ACL entries: %s/%s", propagated_count,
                            count)
                _delete_propagated_acls(acl_ids)

                flask.g.new_acl_ids = acl_ids
                flask.g.new_relationship_ids = set()
                flask.g.deleted_objects = set()
                propagate()

        with utils.benchmark("Propagate WF related acl entries"):
            count = len(wf_acl_ids)
            propagated_count = 0
            for acl_ids in utils.list_chunks(wf_acl_ids):
                propagated_count += len(acl_ids)
                logger.info("Propagating WF ACL entries: %s/%s",
                            propagated_count, count)
                _delete_propagated_acls(acl_ids)

                flask.g.new_wf_acls = set(acl_ids)
                flask.g.new_wf_comment_ct_ids = set()
                flask.g.deleted_wf_objects = set()
                workflow.handle_acl_changes()

Example #8

0

Show file

def update_ft_records(model_ids_to_reindex, chunk_size):
    """Update fulltext records in DB"""
    with benchmark("indexing. expire objects in session"):
        for obj in db.session:
            if (isinstance(obj, mixin.Indexed)
                    and obj.id in model_ids_to_reindex.get(obj.type, set())):
                db.session.expire(obj)
    with benchmark("indexing. update ft records in db"):
        for model_name in model_ids_to_reindex.keys():
            ids = model_ids_to_reindex.pop(model_name)
            chunk_list = utils.list_chunks(list(ids), chunk_size=chunk_size)
            for ids_chunk in chunk_list:
                get_model(model_name).bulk_record_update_for(ids_chunk)

Example #9

0

Show file

File: listeners.py Project: zdqf/ggrc-core

 def push_ft_records(self):
   """Function that clear and push new full text records in DB."""
   with benchmark("push ft records into DB"):
     self.warmup()
     for obj in db.session:
       if not isinstance(obj, mixin.Indexed):
         continue
       if obj.id in self.model_ids_to_reindex.get(obj.type, set()):
         db.session.expire(obj)
     for model_name in self.model_ids_to_reindex.keys():
       ids = self.model_ids_to_reindex.pop(model_name)
       chunk_list = utils.list_chunks(list(ids), chunk_size=self.CHUNK_SIZE)
       for ids_chunk in chunk_list:
         get_model(model_name).bulk_record_update_for(ids_chunk)

Example #10

0

Show file

def do_reindex(with_reindex_snapshots=False, delete=False):
    """Update the full text search index."""

    indexer = fulltext.get_indexer()
    indexed_models = {
        m.__name__: m
        for m in models.all_models.all_models
        if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX
    }
    people_query = db.session.query(models.all_models.Person.id,
                                    models.all_models.Person.name,
                                    models.all_models.Person.email)
    indexer.cache["people_map"] = {
        p.id: (p.name, p.email)
        for p in people_query
    }
    indexer.cache["ac_role_map"] = dict(
        db.session.query(
            models.all_models.AccessControlRole.id,
            models.all_models.AccessControlRole.name,
        ))
    _remove_dead_reindex_objects(indexed_models)
    for model_name in sorted(indexed_models.keys()):
        if delete:
            with benchmark("Deleting records for %s" % model_name):
                pass

        logger.info("Updating index for: %s", model_name)
        with benchmark("Create records for %s" % model_name):
            model = indexed_models[model_name]
            ids = [id_[0] for id_ in db.session.query(model.id)]
            ids_count = len(ids)
            handled_ids = 0
            ids_chunks = ggrc_utils.list_chunks(ids,
                                                chunk_size=REINDEX_CHUNK_SIZE)
            for ids_chunk in ids_chunks:
                handled_ids += len(ids_chunk)
                logger.info("%s: %s / %s", model.__name__, handled_ids,
                            ids_count)
                model.bulk_record_update_for(ids_chunk)
                db.session.plain_commit()

    if with_reindex_snapshots:
        logger.info("Updating index for: %s", "Snapshot")
        with benchmark("Create records for %s" % "Snapshot"):
            snapshot_indexer.reindex()

    indexer.invalidate_cache()

Example #11

0

Show file

File: computed_attributes.py Project: xuechaos/ggrc-core

def compute_attributes(revision_ids):
  """Compute new values based an changed objects.

  Args:
    objects: array of object stubs of modified objects.
  """

  with benchmark("Compute attributes"):

    if revision_ids == "all_latest":
      with benchmark("Get all latest revisions ids"):
        revision_ids = get_all_latest_revisions_ids()

    if not revision_ids:
      return

    ids_count = len(revision_ids)
    handled_ids = 0
    for ids_chunk in utils.list_chunks(revision_ids, chunk_size=CA_CHUNK_SIZE):
      handled_ids += len(ids_chunk)
      logger.info("Revision: %s/%s", handled_ids, ids_count)
      recompute_attrs_for_revisions(ids_chunk)

Example #12

0

Show file

  def row_converters_from_ids(self):
    """ Generate a row converter object for every csv row """
    if self.ignore or not self.object_ids:
      return
    self.row_converters = []

    for ids_pool in list_chunks(self.object_ids, self.ROW_CHUNK_SIZE):
      # sqlalchemy caches all queries and it takes a lot of memory.
      # This line clears query cache.
      _app_ctx_stack.top.sqlalchemy_queries = []

      objects = self.object_class.eager_query().filter(
          self.object_class.id.in_(ids_pool)
      ).execution_options(stream_results=True)

      for obj in objects:
        yield base_row.ExportRowConverter(self, self.object_class, obj=obj,
                                          headers=self.headers)

      # Clear all objects from session (it helps to avoid memory leak)
      for obj in db.session:
        del obj

Example #13

0

Show file

File: utils.py Project: wangsaisai/ggrc-core

def insert_select_acls(select_statement):
  """Insert acl records from the select statement
  Args:
    select_statement: sql statement that contains the following columns
      ac_role_id,
      object_id,
      object_type,
      created_at,
      modified_by_id,
      updated_at,
      parent_id,
      parent_id_nn,
      base_id,
  """

  acl_table = all_models.AccessControlList.__table__
  inserter = acl_table.insert().prefix_with("IGNORE")

  to_insert = db.session.execute(select_statement).fetchall()

  if to_insert:
    # TODO: investigate whether the select above sets locks on any tables
    db.session.plain_commit()

  def to_dict(record):
    """Match selected and inserted columns."""
    return dict(
        zip(
            [
                'ac_role_id',
                'object_id',
                'object_type',
                'created_at',
                'modified_by_id',
                'updated_at',
                'parent_id',
                'parent_id_nn',
                'base_id',
            ],
            record,
        ),
    )

  # process to_insert in chunks, retry failed inserts, allow maximum of
  # PROPAGATION_RETRIES total retries
  failures = 0
  for chunk in utils.list_chunks(to_insert, chunk_size=10000):
    inserted_successfully = False
    while not inserted_successfully:
      try:
        db.session.execute(
            inserter,
            [to_dict(record) for record in chunk],
        )
        db.session.plain_commit()
      except sa.exc.OperationalError as error:
        failures += 1
        if failures == PROPAGATION_RETRIES:
          logger.critical(
              "ACL propagation failed with %d retries on statement: \n %s",
              failures,
              select_statement,
          )
          raise
        logger.exception(error)
      else:
        inserted_successfully = True

Example #14

0

Show file

File: utils.py Project: google/ggrc-core

def insert_select_acls(select_statement):
  """Insert acl records from the select statement
  Args:
    select_statement: sql statement that contains the following columns
      ac_role_id,
      object_id,
      object_type,
      created_at,
      modified_by_id,
      updated_at,
      parent_id,
      parent_id_nn,
      base_id,
  """

  acl_table = all_models.AccessControlList.__table__
  inserter = acl_table.insert().prefix_with("IGNORE")

  to_insert = db.session.execute(select_statement).fetchall()

  if to_insert:
    # TODO: investigate whether the select above sets locks on any tables
    db.session.plain_commit()

  def to_dict(record):
    """Match selected and inserted columns."""
    return dict(
        zip(
            [
                'ac_role_id',
                'object_id',
                'object_type',
                'created_at',
                'modified_by_id',
                'updated_at',
                'parent_id',
                'parent_id_nn',
                'base_id',
            ],
            record,
        ),
    )

  # process to_insert in chunks, retry failed inserts, allow maximum of
  # PROPAGATION_RETRIES total retries
  failures = 0
  for chunk in utils.list_chunks(to_insert, chunk_size=10000):
    inserted_successfully = False
    while not inserted_successfully:
      try:
        db.session.execute(
            inserter,
            [to_dict(record) for record in chunk],
        )
        db.session.plain_commit()
      except sa.exc.OperationalError as error:
        failures += 1
        if failures == PROPAGATION_RETRIES:
          logger.critical(
              "ACL propagation failed with %d retries on statement: \n %s",
              failures,
              select_statement,
          )
          raise
        logger.exception(error)
      else:
        inserted_successfully = True