def do_reindex(): """Update the full text search index.""" indexer = get_indexer() indexed_models = get_indexed_model_names() people = db.session.query(all_models.Person.id, all_models.Person.name, all_models.Person.email) indexer.cache["people_map"] = {p.id: (p.name, p.email) for p in people} for model in sorted(indexed_models): # pylint: disable=protected-access logger.info("Updating index for: %s", model) with benchmark("Create records for %s" % model): model = get_model(model) mapper_class = model._sa_class_manager.mapper.base_mapper.class_ if issubclass(model, mixin.Indexed): for query_chunk in generate_query_chunks( db.session.query(model.id)): model.bulk_record_update_for([i.id for i in query_chunk]) db.session.commit() else: logger.warning( "Try to index model that not inherited from Indexed mixin: %s", model.__name__) indexer.delete_records_by_type(model.__name__) query = model.query.options( db.undefer_group(mapper_class.__name__ + '_complete'), ) for query_chunk in generate_query_chunks(query): for instance in query_chunk: indexer.create_record(indexer.fts_record_for(instance), False) db.session.commit() reindex_snapshots() indexer.invalidate_cache()
def do_reindex(): """Update the full text search index.""" indexer = get_indexer() indexer.delete_all_records(False) # Remove model base classes and non searchable objects excluded_models = { all_models.Directive, all_models.Option, all_models.SystemOrProcess, all_models.Role, } indexed_models = { model for model in all_models.all_models if model_is_indexed(model) } indexed_models -= excluded_models for model in indexed_models: # pylint: disable=protected-access mapper_class = model._sa_class_manager.mapper.base_mapper.class_ query = model.query.options( db.undefer_group(mapper_class.__name__ + '_complete'), ) for query_chunk in generate_query_chunks(query): for instance in query_chunk: indexer.create_record(fts_record_for(instance), False) db.session.commit() reindex_snapshots()
def do_reindex(): """Update the full text search index.""" indexer = get_indexer() indexed_models = { m.__name__: m for m in all_models.all_models if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX } people_query = db.session.query(all_models.Person.id, all_models.Person.name, all_models.Person.email) indexer.cache["people_map"] = {p.id: (p.name, p.email) for p in people_query} indexer.cache["ac_role_map"] = dict(db.session.query( all_models.AccessControlRole.id, all_models.AccessControlRole.name, )) for model_name in sorted(indexed_models.keys()): logger.info("Updating index for: %s", model_name) with benchmark("Create records for %s" % model_name): model = indexed_models[model_name] for query_chunk in generate_query_chunks(db.session.query(model.id)): model.bulk_record_update_for([i.id for i in query_chunk]) db.session.commit() logger.info("Updating index for: %s", "Snapshot") with benchmark("Create records for %s" % "Snapshot"): reindex_snapshots() indexer.invalidate_cache()
def do_reindex(): """Update the full text search index.""" indexer = get_indexer() indexed_models = { m.__name__: m for m in all_models.all_models if issubclass(m, mixin.Indexed) and m.REQUIRED_GLOBAL_REINDEX } people_query = db.session.query(all_models.Person.id, all_models.Person.name, all_models.Person.email) indexer.cache["people_map"] = { p.id: (p.name, p.email) for p in people_query } indexer.cache["ac_role_map"] = dict( db.session.query( all_models.AccessControlRole.id, all_models.AccessControlRole.name, )) for model_name in sorted(indexed_models.keys()): logger.info("Updating index for: %s", model_name) with benchmark("Create records for %s" % model_name): model = indexed_models[model_name] for query_chunk in generate_query_chunks(db.session.query( model.id)): model.bulk_record_update_for([i.id for i in query_chunk]) db.session.commit() logger.info("Updating index for: %s", "Snapshot") with benchmark("Create records for %s" % "Snapshot"): reindex_snapshots() indexer.invalidate_cache() start_compute_attributes("all_latest")
def update_cad_related_objects(task): """Update CAD related objects""" event_id = task.parameters.get("event_id") model_name = task.parameters.get("model_name") need_revisions = task.parameters.get("need_revisions") modified_by_id = task.parameters.get("modified_by_id") event = models.all_models.Event.query.filter_by(id=event_id).first() cad = models.all_models.CustomAttributeDefinition.query.filter_by( id=event.resource_id).first() model = models.get_model(model_name) query = db.session.query(model if need_revisions else model.id) objects_count = query.count() handled_objects = 0 for chunk in ggrc_utils.generate_query_chunks(query): handled_objects += chunk.count() logger.info("Updating CAD related objects: %s/%s", handled_objects, objects_count) if need_revisions: for obj in chunk: obj.updated_at = datetime.datetime.utcnow() obj.modified_by_id = modified_by_id else: model.bulk_record_update_for([obj_id for obj_id, in chunk]) log_event.log_event(db.session, cad, event=event) db.session.commit() return app.make_response(("success", 200, [("Content-Type", "text/html")]))
def do_reindex(): """Update the full text search index.""" indexer = get_indexer() indexer.delete_all_records(False) # Remove model base classes and non searchable objects excluded_models = { all_models.Directive, all_models.Option, all_models.SystemOrProcess, all_models.Role, } indexed_models = {model for model in all_models.all_models if model_is_indexed(model)} indexed_models -= excluded_models for model in indexed_models: # pylint: disable=protected-access mapper_class = model._sa_class_manager.mapper.base_mapper.class_ query = model.query.options( db.undefer_group(mapper_class.__name__ + '_complete'), ) for query_chunk in generate_query_chunks(query): for instance in query_chunk: indexer.create_record(fts_record_for(instance), False) db.session.commit() reindex_snapshots()
def update_cad_related_objects(task): """Update CAD related objects""" event_id = task.parameters.get("event_id") model_name = task.parameters.get("model_name") need_revisions = task.parameters.get("need_revisions") modified_by_id = task.parameters.get("modified_by_id") event = models.all_models.Event.query.filter_by(id=event_id).first() cad = models.all_models.CustomAttributeDefinition.query.filter_by( id=event.resource_id ).first() model = models.get_model(model_name) query = db.session.query(model if need_revisions else model.id) objects_count = query.count() handled_objects = 0 for chunk in ggrc_utils.generate_query_chunks(query): handled_objects += chunk.count() logger.info( "Updating CAD related objects: %s/%s", handled_objects, objects_count ) if need_revisions: for obj in chunk: obj.updated_at = datetime.datetime.utcnow() obj.modified_by_id = modified_by_id else: model.bulk_record_update_for([obj_id for obj_id, in chunk]) log_event.log_event(db.session, cad, event=event) db.session.commit() return app.make_response(("success", 200, [("Content-Type", "text/html")]))
def do_reindex(): """Update the full text search index.""" indexer = get_indexer() with benchmark('Delete all records'): indexer.delete_all_records(False) indexed_models = get_indexed_model_names() people = db.session.query(all_models.Person.id, all_models.Person.name, all_models.Person.email) g.people_map = {p.id: (p.name, p.email) for p in people} for model in sorted(indexed_models): # pylint: disable=protected-access logger.info("Updating index for: %s", model) with benchmark("Create records for %s" % model): model = get_model(model) mapper_class = model._sa_class_manager.mapper.base_mapper.class_ query = model.query.options( db.undefer_group(mapper_class.__name__ + '_complete'), ) for query_chunk in generate_query_chunks(query): for instance in query_chunk: indexer.create_record(fts_record_for(instance), False) db.session.commit() reindex_snapshots() delattr(g, "people_map")
def update_cad_related_objects(task): """Update CAD related objects""" event = models.all_models.Event.query.filter_by( id=task.parameters.get("event_id") ).first() model = models.get_model(task.parameters.get("model_name")) if issubclass(model, models.mixins.ExternalCustomAttributable): cad_model = models.all_models.ExternalCustomAttributeDefinition else: cad_model = models.all_models.CustomAttributeDefinition cad = cad_model.query.filter_by(id=event.resource_id).first() query = db.session.query(model if task.parameters.get("need_revisions") else model.id) if event.action == "PUT": refresh_program_cads_title(cad) objects_count = len(query.all()) handled_objects = 0 for chunk in ggrc_utils.generate_query_chunks(query): chunk_objects = chunk.all() handled_objects += len(chunk_objects) logger.info( "Updating CAD related objects: %s/%s", handled_objects, objects_count ) if task.parameters.get("need_revisions"): for obj in chunk_objects: obj.updated_at = datetime.datetime.utcnow() obj.modified_by_id = task.parameters.get("modified_by_id") else: model.bulk_record_update_for([obj_id for obj_id, in chunk_objects]) log_event.log_event(db.session, cad, event=event) db.session.commit() return app.make_response(("success", 200, [("Content-Type", "text/html")]))
def reindex(): """Reindex all snapshots.""" columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) for query_chunk in generate_query_chunks(columns): pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def reindex(): """Reindex all snapshots.""" columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) for query_chunk in generate_query_chunks(columns): pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def _generate_events(self): """Generates Calendar Events.""" task_mappings = utils.get_related_mapping( left=all_models.CycleTaskGroupObjectTask, right=all_models.CalendarEvent ) columns = all_models.CycleTaskGroupObjectTask.query.options( orm.joinedload("cycle").load_only( "workflow_id", "is_current", "is_verification_needed" ), orm.joinedload("cycle").joinedload("workflow").load_only( "unit", "recurrences", "next_cycle_start_date", ), orm.subqueryload( "_access_control_list" ).joinedload( "ac_role" ).undefer_group( "AccessControlRole_complete" ), orm.subqueryload( "_access_control_list" ).joinedload( "access_control_people" ).joinedload( "person" ).undefer_group( "Person_complete" ), load_only( all_models.CycleTaskGroupObjectTask.id, all_models.CycleTaskGroupObjectTask.end_date, all_models.CycleTaskGroupObjectTask.status, all_models.CycleTaskGroupObjectTask.title, all_models.CycleTaskGroupObjectTask.verified_date, ), ).order_by(all_models.CycleTaskGroupObjectTask.end_date) all_count = columns.count() handled = 0 for query_chunk in generate_query_chunks( columns, chunk_size=self.chunk_size, needs_ordering=False ): handled += query_chunk.count() logger.info("Cycle task processed: %s/%s", handled, all_count) for task in query_chunk: events = task_mappings[task.id] if task.id in task_mappings else set() self._generate_events_for_task(task, events_ids=events) db.session.flush()
def reindex_snapshots(snapshot_ids): """Reindex selected snapshots""" if not snapshot_ids: return columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(models.Snapshot.id.in_(snapshot_ids)) for query_chunk in generate_query_chunks(columns): pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def _get_revisions_by_type(resource_type): """Get all revision according the resource type Args: resource_type: Resource type of revision that yields further Yields: Revision that according the resource_type """ query = models.Revision.query.filter( models.Revision.resource_type == resource_type) for chunk in ggrc_utils.generate_query_chunks(query): for obj in chunk.all(): yield obj
def reindex_snapshots(snapshot_ids): """Reindex selected snapshots""" if not snapshot_ids: return columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(models.Snapshot.id.in_(snapshot_ids)) for query_chunk in generate_query_chunks(columns): pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def reindex(): """Reindex all snapshots.""" columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) all_count = columns.count() handled = 0 for query_chunk in generate_query_chunks(columns): handled += query_chunk.count() logger.info("Snapshot: %s/%s", handled, all_count) pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def reindex(): """Reindex all snapshots.""" columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) all_count = columns.count() handled = 0 for query_chunk in generate_query_chunks(columns): handled += query_chunk.count() logger.info("Snapshot: %s/%s", handled, all_count) pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def sync_cycle_tasks_events(self): """Generates Calendar Events descriptions.""" with benchmark("Sync of calendar events."): events = all_models.CalendarEvent.query.options( orm.joinedload("attendee").load_only( "email", ), orm.joinedload("attendee").joinedload("profile").load_only( "send_calendar_events", ), load_only( all_models.CalendarEvent.id, all_models.CalendarEvent.external_event_id, all_models.CalendarEvent.title, all_models.CalendarEvent.description, all_models.CalendarEvent.attendee_id, all_models.CalendarEvent.due_date, all_models.CalendarEvent.last_synced_at, ) ).order_by(all_models.CalendarEvent.due_date) event_mappings = utils.get_related_mapping( left=all_models.CalendarEvent, right=all_models.CycleTaskGroupObjectTask ) all_count = events.count() handled = 0 for query_chunk in generate_query_chunks( events, chunk_size=self.chunk_size, needs_ordering=False ): chunk_objects = query_chunk.all() handled += len(chunk_objects) logger.info("Sync of calendar events: %s/%s", handled, all_count) for event in chunk_objects: if not event.needs_sync: continue if event.id not in event_mappings or not event_mappings[event.id]: if not event.is_synced: db.session.delete(event) else: self._delete_event(event) continue if not event.is_synced: self._create_event(event) continue self._update_event(event) db.session.commit()
def generate_daily_notifications(): """Generate daily notifications data in chunks.""" notifications = db.session.query(Notification).options( joinedload("notification_type") ).filter( (Notification.runner == Notification.RUNNER_DAILY) & (Notification.send_on <= datetime.today()) & ((Notification.sent_at.is_(None)) | (Notification.repeating == true())) ).order_by(Notification.repeating, Notification.id) all_count = notifications.count() handled = 0 chunk_size = settings.DAILY_DIGEST_BATCH_SIZE for data_chunk in generate_query_chunks( notifications, chunk_size=chunk_size, needs_ordering=False ): handled += data_chunk.count() logger.info("Processing notifications: %s/%s", handled, all_count) yield data_chunk, get_notification_data(data_chunk)
def generate_daily_notifications(): """Generate daily notifications data in chunks.""" notifications = db.session.query(Notification).options( joinedload("notification_type")).filter( (Notification.runner == Notification.RUNNER_DAILY) & (Notification.send_on <= datetime.today()) & ((Notification.sent_at.is_(None)) | (Notification.repeating == true()))).order_by( Notification.repeating, Notification.id) all_count = notifications.count() handled = 0 chunk_size = settings.DAILY_DIGEST_BATCH_SIZE for data_chunk in generate_query_chunks(notifications, chunk_size=chunk_size, needs_ordering=False): handled += data_chunk.count() logger.info("Processing notifications: %s/%s", handled, all_count) yield data_chunk, get_notification_data(data_chunk, with_related=False)
def _generate_event_descriptions(self): """Generates CalendarEvents descriptions.""" event_mappings = utils.get_related_mapping( left=all_models.CalendarEvent, right=all_models.CycleTaskGroupObjectTask) events = db.session.query(all_models.CalendarEvent).options( load_only( all_models.CalendarEvent.id, all_models.CalendarEvent.description, )) for query_chunk in generate_query_chunks(events, chunk_size=self.chunk_size): for event in query_chunk: if event.id not in event_mappings: continue self._generate_description_for_event( event, task_ids=event_mappings[event.id], )
def _generate_events(self): """Generates Calendar Events.""" task_mappings = utils.get_related_mapping( left=all_models.CycleTaskGroupObjectTask, right=all_models.CalendarEvent) columns = all_models.CycleTaskGroupObjectTask.query.options( orm.joinedload("cycle").load_only("workflow_id", "is_current", "is_verification_needed"), orm.joinedload("cycle").joinedload("workflow").load_only( "unit", "recurrences", "next_cycle_start_date", ), orm.subqueryload("_access_control_list").joinedload( "ac_role").undefer_group("AccessControlRole_complete"), orm.subqueryload("_access_control_list").joinedload( "access_control_people").joinedload("person").undefer_group( "Person_complete"), load_only( all_models.CycleTaskGroupObjectTask.id, all_models.CycleTaskGroupObjectTask.end_date, all_models.CycleTaskGroupObjectTask.status, all_models.CycleTaskGroupObjectTask.title, all_models.CycleTaskGroupObjectTask.verified_date, ), ).order_by(all_models.CycleTaskGroupObjectTask.end_date) all_count = columns.count() handled = 0 for query_chunk in generate_query_chunks(columns, chunk_size=self.chunk_size, needs_ordering=False): handled += query_chunk.count() logger.info("Cycle task processed: %s/%s", handled, all_count) for task in query_chunk: events = task_mappings[ task.id] if task.id in task_mappings else set() self._generate_events_for_task(task, events_ids=events) db.session.flush()
def _generate_event_descriptions(self): """Generates CalendarEvents descriptions.""" event_mappings = utils.get_related_mapping( left=all_models.CalendarEvent, right=all_models.CycleTaskGroupObjectTask ) events = db.session.query(all_models.CalendarEvent).options( load_only( all_models.CalendarEvent.id, all_models.CalendarEvent.description, ) ) for query_chunk in generate_query_chunks( events, chunk_size=self.chunk_size ): for event in query_chunk: if event.id not in event_mappings: continue self._generate_description_for_event( event, task_ids=event_mappings[event.id], )