def propagate_all(): """Re-evaluate propagation for all objects.""" with utils.benchmark("Run propagate_all"): from ggrc_workflows.models.hooks import workflow with utils.benchmark("Get non propagated acl ids"): query = db.session.query( all_models.AccessControlList.object_type, all_models.AccessControlList.id, ).filter( all_models.AccessControlList.parent_id.is_(None), ) non_wf_acl_ids = [] wf_acl_ids = [] for object_type, acl_id in query: if object_type == "Workflow": wf_acl_ids.append(acl_id) else: non_wf_acl_ids.append(acl_id) with utils.benchmark("Propagate normal acl entries"): count = len(non_wf_acl_ids) propagated_count = 0 for acl_ids in utils.list_chunks(non_wf_acl_ids): propagated_count += len(acl_ids) logger.info("Propagating ACL entries: %s/%s", propagated_count, count) _delete_propagated_acls(acl_ids) flask.g.new_acl_ids = acl_ids flask.g.new_relationship_ids = set() flask.g.deleted_objects = set() propagate() with utils.benchmark("Propagate WF related acl entries"): count = len(wf_acl_ids) propagated_count = 0 for acl_ids in utils.list_chunks(wf_acl_ids): propagated_count += len(acl_ids) logger.info( "Propagating WF ACL entries: %s/%s", propagated_count, count ) _delete_propagated_acls(acl_ids) flask.g.new_wf_acls = set(acl_ids) flask.g.new_wf_comment_ct_ids = set() flask.g.deleted_wf_objects = set() workflow.handle_acl_changes()
def propagate_all(): """Re-evaluate propagation for all objects.""" with utils.benchmark("Clear ACL memcache"): clear_permission_cache() with utils.benchmark("Run propagate_all"): with utils.benchmark("Add missing acl entries"): _add_missing_acl_entries() with utils.benchmark("Get non propagated acl ids"): query = db.session.query(all_models.AccessControlList.id, ).filter( all_models.AccessControlList.parent_id.is_(None), ) all_acl_ids = [acl.id for acl in query] with utils.benchmark("Propagate normal acl entries"): count = len(all_acl_ids) propagated_count = 0 for acl_ids in utils.list_chunks(all_acl_ids, chunk_size=50): propagated_count += len(acl_ids) logger.info("Propagating ACL entries: %s/%s", propagated_count, count) _delete_propagated_acls(acl_ids) flask.g.new_acl_ids = acl_ids flask.g.new_relationship_ids = set() flask.g.user_ids = set() flask.g.deleted_objects = set() propagate(full_propagate=True)
def propagate_all(): """Re-evaluate propagation for all objects.""" with utils.benchmark("Run propagate_all"): with utils.benchmark("Add missing acl entries"): _add_missing_acl_entries() with utils.benchmark("Get non propagated acl ids"): query = db.session.query( all_models.AccessControlList.id, ).filter( all_models.AccessControlList.parent_id.is_(None), ) all_acl_ids = [acl.id for acl in query] with utils.benchmark("Propagate normal acl entries"): count = len(all_acl_ids) propagated_count = 0 for acl_ids in utils.list_chunks(all_acl_ids, chunk_size=50): propagated_count += len(acl_ids) logger.info("Propagating ACL entries: %s/%s", propagated_count, count) _delete_propagated_acls(acl_ids) flask.g.new_acl_ids = acl_ids flask.g.new_relationship_ids = set() flask.g.deleted_objects = set() propagate()
def do_reindex(with_reindex_snapshots=False): """Update the full text search index.""" indexer = get_indexer() indexed_models = { m.__name__: m for m in all_models.all_models if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX } people_query = db.session.query(all_models.Person.id, all_models.Person.name, all_models.Person.email) indexer.cache["people_map"] = {p.id: (p.name, p.email) for p in people_query} indexer.cache["ac_role_map"] = dict(db.session.query( all_models.AccessControlRole.id, all_models.AccessControlRole.name, )) for model_name in sorted(indexed_models.keys()): logger.info("Updating index for: %s", model_name) with benchmark("Create records for %s" % model_name): model = indexed_models[model_name] ids = [obj.id for obj in model.query] ids_count = len(ids) handled_ids = 0 for ids_chunk in utils.list_chunks(ids, chunk_size=REINDEX_CHUNK_SIZE): handled_ids += len(ids_chunk) logger.info("%s: %s / %s", model.__name__, handled_ids, ids_count) model.bulk_record_update_for(ids_chunk) db.session.commit() if with_reindex_snapshots: logger.info("Updating index for: %s", "Snapshot") with benchmark("Create records for %s" % "Snapshot"): snapshot_indexer.reindex() indexer.invalidate_cache()
def do_reindex(): """Update the full text search index.""" indexer = get_indexer() indexed_models = { m.__name__: m for m in all_models.all_models if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX } people_query = db.session.query(all_models.Person.id, all_models.Person.name, all_models.Person.email) indexer.cache["people_map"] = {p.id: (p.name, p.email) for p in people_query} indexer.cache["ac_role_map"] = dict(db.session.query( all_models.AccessControlRole.id, all_models.AccessControlRole.name, )) for model_name in sorted(indexed_models.keys()): logger.info("Updating index for: %s", model_name) with benchmark("Create records for %s" % model_name): model = indexed_models[model_name] ids = [obj.id for obj in model.query] ids_count = len(ids) handled_ids = 0 for ids_chunk in utils.list_chunks(ids): handled_ids += len(ids_chunk) logger.info("%s: %s / %s", model_name, handled_ids, ids_count) model.bulk_record_update_for(ids_chunk) db.session.commit() indexer.invalidate_cache()
def row_converters_from_ids(self): """ Generate a row converter object for every csv row """ if self.ignore or not self.object_ids: return self.row_converters = [] index = 0 for ids_pool in list_chunks(self.object_ids, self.ROW_CHUNK_SIZE): # sqlalchemy caches all queries and it takes a lot of memory. # This line clears query cache. _app_ctx_stack.top.sqlalchemy_queries = [] objects = self.object_class.eager_query().filter( self.object_class.id.in_(ids_pool)).execution_options( stream_results=True) for obj in objects: yield RowConverter(self, self.object_class, obj=obj, headers=self.headers, index=index) index += 1 # Clear all objects from session (it helps to avoid memory leak) for obj in db.session: del obj
def propagate_all(): """Re-evaluate propagation for all objects.""" with utils.benchmark("Run propagate_all"): from ggrc_workflows.models.hooks import workflow with utils.benchmark("Get non propagated acl ids"): query = db.session.query( all_models.AccessControlList.object_type, all_models.AccessControlList.id, ).filter(all_models.AccessControlList.parent_id.is_(None), ) non_wf_acl_ids = [] wf_acl_ids = [] for object_type, acl_id in query: if object_type == "Workflow": wf_acl_ids.append(acl_id) else: non_wf_acl_ids.append(acl_id) with utils.benchmark("Propagate normal acl entries"): count = len(non_wf_acl_ids) propagated_count = 0 for acl_ids in utils.list_chunks(non_wf_acl_ids): propagated_count += len(acl_ids) logger.info("Propagating ACL entries: %s/%s", propagated_count, count) _delete_propagated_acls(acl_ids) flask.g.new_acl_ids = acl_ids flask.g.new_relationship_ids = set() flask.g.deleted_objects = set() propagate() with utils.benchmark("Propagate WF related acl entries"): count = len(wf_acl_ids) propagated_count = 0 for acl_ids in utils.list_chunks(wf_acl_ids): propagated_count += len(acl_ids) logger.info("Propagating WF ACL entries: %s/%s", propagated_count, count) _delete_propagated_acls(acl_ids) flask.g.new_wf_acls = set(acl_ids) flask.g.new_wf_comment_ct_ids = set() flask.g.deleted_wf_objects = set() workflow.handle_acl_changes()
def update_ft_records(model_ids_to_reindex, chunk_size): """Update fulltext records in DB""" with benchmark("indexing. expire objects in session"): for obj in db.session: if (isinstance(obj, mixin.Indexed) and obj.id in model_ids_to_reindex.get(obj.type, set())): db.session.expire(obj) with benchmark("indexing. update ft records in db"): for model_name in model_ids_to_reindex.keys(): ids = model_ids_to_reindex.pop(model_name) chunk_list = utils.list_chunks(list(ids), chunk_size=chunk_size) for ids_chunk in chunk_list: get_model(model_name).bulk_record_update_for(ids_chunk)
def push_ft_records(self): """Function that clear and push new full text records in DB.""" with benchmark("push ft records into DB"): self.warmup() for obj in db.session: if not isinstance(obj, mixin.Indexed): continue if obj.id in self.model_ids_to_reindex.get(obj.type, set()): db.session.expire(obj) for model_name in self.model_ids_to_reindex.keys(): ids = self.model_ids_to_reindex.pop(model_name) chunk_list = utils.list_chunks(list(ids), chunk_size=self.CHUNK_SIZE) for ids_chunk in chunk_list: get_model(model_name).bulk_record_update_for(ids_chunk)
def do_reindex(with_reindex_snapshots=False, delete=False): """Update the full text search index.""" indexer = fulltext.get_indexer() indexed_models = { m.__name__: m for m in models.all_models.all_models if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX } people_query = db.session.query(models.all_models.Person.id, models.all_models.Person.name, models.all_models.Person.email) indexer.cache["people_map"] = { p.id: (p.name, p.email) for p in people_query } indexer.cache["ac_role_map"] = dict( db.session.query( models.all_models.AccessControlRole.id, models.all_models.AccessControlRole.name, )) _remove_dead_reindex_objects(indexed_models) for model_name in sorted(indexed_models.keys()): if delete: with benchmark("Deleting records for %s" % model_name): pass logger.info("Updating index for: %s", model_name) with benchmark("Create records for %s" % model_name): model = indexed_models[model_name] ids = [id_[0] for id_ in db.session.query(model.id)] ids_count = len(ids) handled_ids = 0 ids_chunks = ggrc_utils.list_chunks(ids, chunk_size=REINDEX_CHUNK_SIZE) for ids_chunk in ids_chunks: handled_ids += len(ids_chunk) logger.info("%s: %s / %s", model.__name__, handled_ids, ids_count) model.bulk_record_update_for(ids_chunk) db.session.plain_commit() if with_reindex_snapshots: logger.info("Updating index for: %s", "Snapshot") with benchmark("Create records for %s" % "Snapshot"): snapshot_indexer.reindex() indexer.invalidate_cache()
def compute_attributes(revision_ids): """Compute new values based an changed objects. Args: objects: array of object stubs of modified objects. """ with benchmark("Compute attributes"): if revision_ids == "all_latest": with benchmark("Get all latest revisions ids"): revision_ids = get_all_latest_revisions_ids() if not revision_ids: return ids_count = len(revision_ids) handled_ids = 0 for ids_chunk in utils.list_chunks(revision_ids, chunk_size=CA_CHUNK_SIZE): handled_ids += len(ids_chunk) logger.info("Revision: %s/%s", handled_ids, ids_count) recompute_attrs_for_revisions(ids_chunk)
def row_converters_from_ids(self): """ Generate a row converter object for every csv row """ if self.ignore or not self.object_ids: return self.row_converters = [] for ids_pool in list_chunks(self.object_ids, self.ROW_CHUNK_SIZE): # sqlalchemy caches all queries and it takes a lot of memory. # This line clears query cache. _app_ctx_stack.top.sqlalchemy_queries = [] objects = self.object_class.eager_query().filter( self.object_class.id.in_(ids_pool) ).execution_options(stream_results=True) for obj in objects: yield base_row.ExportRowConverter(self, self.object_class, obj=obj, headers=self.headers) # Clear all objects from session (it helps to avoid memory leak) for obj in db.session: del obj
def insert_select_acls(select_statement): """Insert acl records from the select statement Args: select_statement: sql statement that contains the following columns ac_role_id, object_id, object_type, created_at, modified_by_id, updated_at, parent_id, parent_id_nn, base_id, """ acl_table = all_models.AccessControlList.__table__ inserter = acl_table.insert().prefix_with("IGNORE") to_insert = db.session.execute(select_statement).fetchall() if to_insert: # TODO: investigate whether the select above sets locks on any tables db.session.plain_commit() def to_dict(record): """Match selected and inserted columns.""" return dict( zip( [ 'ac_role_id', 'object_id', 'object_type', 'created_at', 'modified_by_id', 'updated_at', 'parent_id', 'parent_id_nn', 'base_id', ], record, ), ) # process to_insert in chunks, retry failed inserts, allow maximum of # PROPAGATION_RETRIES total retries failures = 0 for chunk in utils.list_chunks(to_insert, chunk_size=10000): inserted_successfully = False while not inserted_successfully: try: db.session.execute( inserter, [to_dict(record) for record in chunk], ) db.session.plain_commit() except sa.exc.OperationalError as error: failures += 1 if failures == PROPAGATION_RETRIES: logger.critical( "ACL propagation failed with %d retries on statement: \n %s", failures, select_statement, ) raise logger.exception(error) else: inserted_successfully = True
def insert_select_acls(select_statement): """Insert acl records from the select statement Args: select_statement: sql statement that contains the following columns ac_role_id, object_id, object_type, created_at, modified_by_id, updated_at, parent_id, parent_id_nn, base_id, """ acl_table = all_models.AccessControlList.__table__ inserter = acl_table.insert().prefix_with("IGNORE") to_insert = db.session.execute(select_statement).fetchall() if to_insert: # TODO: investigate whether the select above sets locks on any tables db.session.plain_commit() def to_dict(record): """Match selected and inserted columns.""" return dict( zip( [ 'ac_role_id', 'object_id', 'object_type', 'created_at', 'modified_by_id', 'updated_at', 'parent_id', 'parent_id_nn', 'base_id', ], record, ), ) # process to_insert in chunks, retry failed inserts, allow maximum of # PROPAGATION_RETRIES total retries failures = 0 for chunk in utils.list_chunks(to_insert, chunk_size=10000): inserted_successfully = False while not inserted_successfully: try: db.session.execute( inserter, [to_dict(record) for record in chunk], ) db.session.plain_commit() except sa.exc.OperationalError as error: failures += 1 if failures == PROPAGATION_RETRIES: logger.critical( "ACL propagation failed with %d retries on statement: \n %s", failures, select_statement, ) raise logger.exception(error) else: inserted_successfully = True