def reindex(parents=None): """Reindex all snapshots or limit to a subset of certain parents. Args: parents: An iterable of parents for which to reindex their scopes. Returns: Pair of parent-child that were reindexed. """ columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) query = columns if parents: _parents = {(obj.type, obj.id) for obj in parents} query = query.filter( tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, ).in_(_parents)) pairs = {Pair.from_4tuple(p) for p in query} reindex_pairs(pairs) return pairs
def clone_scope(base_parent, new_parent, event): """Create exact copy of parent object scope. Args: base_parent: Old parent object new_parent: New parent object event: Event that triggered scope cloning """ with benchmark("clone_scope.clone audit scope"): source_snapshots = db.session.query( models.Snapshot.child_type, models.Snapshot.child_id, models.Snapshot.revision_id).filter( models.Snapshot.parent_type == base_parent.type, models.Snapshot.parent_id == base_parent.id) snapshot_revisions = { Pair.from_4tuple((new_parent.type, new_parent.id, ctype, cid)): revid for ctype, cid, revid in source_snapshots } parent = Stub(new_parent.type, new_parent.id) children = {pair.child for pair in snapshot_revisions} generator = SnapshotGenerator(dry_run=False) generator.add_family(parent, children) generator.create(event, snapshot_revisions)
def clone_scope(base_parent, new_parent, event): """Create exact copy of parent object scope. Args: base_parent: Old parent object new_parent: New parent object event: Event that triggered scope cloning """ with benchmark("clone_scope.clone audit scope"): source_snapshots = db.session.query( models.Snapshot.child_type, models.Snapshot.child_id, models.Snapshot.revision_id ).filter( models.Snapshot.parent_type == base_parent.type, models.Snapshot.parent_id == base_parent.id) snapshot_revisions = { Pair.from_4tuple((new_parent.type, new_parent.id, ctype, cid)): revid for ctype, cid, revid in source_snapshots} parent = Stub(new_parent.type, new_parent.id) children = {pair.child for pair in snapshot_revisions} generator = SnapshotGenerator(dry_run=False) generator.add_family(parent, children) generator.create(event, snapshot_revisions)
def reindex(): """Reindex all snapshots.""" columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) for query_chunk in generate_query_chunks(columns): pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def reindex_snapshots(snapshot_ids): """Reindex selected snapshots""" if not snapshot_ids: return columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(models.Snapshot.id.in_(snapshot_ids)) for query_chunk in generate_query_chunks(columns): pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def reindex(): """Reindex all snapshots.""" columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) all_count = columns.count() handled = 0 for query_chunk in generate_query_chunks(columns): handled += query_chunk.count() logger.info("Snapshot: %s/%s", handled, all_count) pairs = {Pair.from_4tuple(p) for p in query_chunk} reindex_pairs(pairs) db.session.commit()
def analyze(self): """Analyze which snapshots need to be updated and which created""" query = set(db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id ).in_(self.parents))) existing_scope = {Pair.from_4tuple(fields) for fields in query} full_scope = {Pair(parent, child) for parent, children in self.snapshots.items() for child in children} for_update = existing_scope for_create = full_scope - existing_scope return for_create, for_update
def _update(self, for_update, event, revisions, _filter): """Update (or create) parent objects' snapshots and create revisions for them. Args: event: A ggrc.models.Event instance revisions: A set of tuples of pairs with revisions to which it should either create or update a snapshot of that particular audit _filter: Callable that should return True if it should be updated Returns: OperationResponse """ # pylint: disable=too-many-locals with benchmark("Snapshot._update"): user_id = get_current_user_id() missed_keys = set() snapshot_cache = dict() modified_snapshot_keys = set() data_payload_update = list() revision_payload = list() response_data = dict() if self.dry_run and event is None: event_id = 0 else: event_id = event.id with benchmark("Snapshot._update.filter"): if _filter: for_update = {elem for elem in for_update if _filter(elem)} with benchmark("Snapshot._update.get existing snapshots"): existing_snapshots = db.session.query( models.Snapshot.id, models.Snapshot.revision_id, models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter( tuple_(models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id).in_( {pair.to_4tuple() for pair in for_update})) for esnap in existing_snapshots: sid, rev_id, pair_tuple = esnap[0], esnap[1], esnap[2:] pair = Pair.from_4tuple(pair_tuple) snapshot_cache[pair] = (sid, rev_id) with benchmark("Snapshot._update.retrieve latest revisions"): revision_id_cache = get_revisions( for_update, filters=[ models.Revision.action.in_(["created", "modified"]) ], revisions=revisions) response_data["revisions"] = { "old": {pair: values[1] for pair, values in snapshot_cache.items()}, "new": revision_id_cache } with benchmark("Snapshot._update.build snapshot payload"): for key in for_update: if key in revision_id_cache: sid, rev_id = snapshot_cache[key] latest_rev = revision_id_cache[key] if rev_id != latest_rev: modified_snapshot_keys.add(key) data_payload_update += [{ "_id": sid, "_revision_id": latest_rev, "_modified_by_id": user_id }] else: missed_keys.add(key) if missed_keys: logger.warning( "Tried to update snapshots for the following objects but " "found no revisions: %s", missed_keys) if not modified_snapshot_keys: return OperationResponse("update", True, set(), response_data) with benchmark("Snapshot._update.write snapshots to database"): update_sql = models.Snapshot.__table__.update().where( models.Snapshot.id == bindparam("_id")).values( revision_id=bindparam("_revision_id"), modified_by_id=bindparam("_modified_by_id")) self._execute(update_sql, data_payload_update) with benchmark("Snapshot._update.retrieve inserted snapshots"): snapshots = get_snapshots(modified_snapshot_keys) with benchmark( "Snapshot._update.create snapshots revision payload"): for snapshot in snapshots: parent = Stub(snapshot.parent_type, snapshot.parent_id) context_id = self.context_cache[parent] data = create_snapshot_revision_dict( "modified", event_id, snapshot, user_id, context_id) revision_payload += [data] with benchmark("Insert Snapshot entries into Revision"): self._execute(models.Revision.__table__.insert(), revision_payload) return OperationResponse("update", True, for_update, response_data)
def reindex_pairs(pairs): # noqa # pylint:disable=too-many-branches """Reindex selected snapshots. Args: pairs: A list of parent-child pairs that uniquely represent snapshot object whose properties should be reindexed. """ # pylint: disable=too-many-locals snapshots = dict() revisions = dict() snap_to_sid_cache = dict() search_payload = list() cad_dict = _get_custom_attribute_dict() snapshot_columns, revision_columns = _get_columns() snapshot_query = snapshot_columns if pairs: # pylint:disable=too-many-nested-blocks pairs_filter = tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).in_({pair.to_4tuple() for pair in pairs}) snapshot_query = snapshot_columns.filter(pairs_filter) for _id, ctx_id, ptype, pid, ctype, cid, revid in snapshot_query: pair = Pair.from_4tuple((ptype, pid, ctype, cid)) snapshots[pair] = [_id, ctx_id, revid] snap_to_sid_cache[pair] = _id revision_ids = {revid for _, _, revid in snapshots.values()} revision_query = revision_columns.filter( models.Revision.id.in_(revision_ids) ) for _id, _type, content in revision_query: revisions[_id] = get_searchable_attributes( CLASS_PROPERTIES[_type], cad_dict, content) snapshot_ids = set() for pair in snapshots: snapshot_id, ctx_id, revision_id = snapshots[pair] snapshot_ids.add(snapshot_id) properties = revisions[revision_id] properties.update({ "parent": _get_parent_property(pair), "child": _get_child_property(pair), "child_type": pair.child.type, "child_id": pair.child.id }) assignees = properties.pop("assignees", None) if assignees: for person, roles in assignees: if person: for role in roles: properties[role] = [person] for prop, val in properties.items(): if prop and val is not None: # record stub rec = { "key": snapshot_id, "type": "Snapshot", "context_id": ctx_id, "tags": _get_tag(pair), "property": prop, "subproperty": "", "content": val, } if isinstance(val, dict) and "title" in val: # Option rec["content"] = val["title"] search_payload += [rec] elif isinstance(val, dict) and val.get("type") == "Person": search_payload += get_person_data(rec, val) search_payload += get_person_sort_subprop(rec, [val]) elif isinstance(val, list) and all([p.get("type") == "Person" for p in val]): for person in val: search_payload += get_person_data(rec, person) search_payload += get_person_sort_subprop(rec, val) elif isinstance(val, (bool, int, long)): rec["content"] = unicode(val) search_payload += [rec] elif isinstance(rec["content"], basestring): search_payload += [rec] else: logger.warning(u"Unsupported value for %s #%s in %s %s: %r", rec["type"], rec["key"], rec["property"], rec["subproperty"], rec["content"]) delete_records(snapshot_ids) insert_records(search_payload)
def _update(self, for_update, event, revisions, _filter): """Update (or create) parent objects' snapshots and create revisions for them. Args: event: A ggrc.models.Event instance revisions: A set of tuples of pairs with revisions to which it should either create or update a snapshot of that particular audit _filter: Callable that should return True if it should be updated Returns: OperationResponse """ # pylint: disable=too-many-locals with benchmark("Snapshot._update"): user_id = get_current_user_id() missed_keys = set() snapshot_cache = dict() modified_snapshot_keys = set() data_payload_update = list() revision_payload = list() response_data = dict() if self.dry_run and event is None: event_id = 0 else: event_id = event.id with benchmark("Snapshot._update.filter"): if _filter: for_update = {elem for elem in for_update if _filter(elem)} with benchmark("Snapshot._update.get existing snapshots"): existing_snapshots = db.session.query( models.Snapshot.id, models.Snapshot.revision_id, models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id ).in_({pair.to_4tuple() for pair in for_update})) for esnap in existing_snapshots: sid, rev_id, pair_tuple = esnap[0], esnap[1], esnap[2:] pair = Pair.from_4tuple(pair_tuple) snapshot_cache[pair] = (sid, rev_id) with benchmark("Snapshot._update.retrieve latest revisions"): revision_id_cache = get_revisions( for_update, filters=[models.Revision.action.in_(["created", "modified"])], revisions=revisions) response_data["revisions"] = { "old": {pair: values[1] for pair, values in snapshot_cache.items()}, "new": revision_id_cache } with benchmark("Snapshot._update.build snapshot payload"): for key in for_update: if key in revision_id_cache: sid, rev_id = snapshot_cache[key] latest_rev = revision_id_cache[key] if rev_id != latest_rev: modified_snapshot_keys.add(key) data_payload_update += [{ "_id": sid, "_revision_id": latest_rev, "_modified_by_id": user_id }] else: missed_keys.add(key) if missed_keys: logger.warning( "Tried to update snapshots for the following objects but " "found no revisions: %s", missed_keys) if not modified_snapshot_keys: return OperationResponse("update", True, set(), response_data) with benchmark("Snapshot._update.write snapshots to database"): update_sql = models.Snapshot.__table__.update().where( models.Snapshot.id == bindparam("_id")).values( revision_id=bindparam("_revision_id"), modified_by_id=bindparam("_modified_by_id")) self._execute(update_sql, data_payload_update) with benchmark("Snapshot._update.retrieve inserted snapshots"): snapshots = get_snapshots(modified_snapshot_keys) with benchmark("Snapshot._update.create snapshots revision payload"): for snapshot in snapshots: parent = Stub(snapshot.parent_type, snapshot.parent_id) context_id = self.context_cache[parent] data = create_snapshot_revision_dict("modified", event_id, snapshot, user_id, context_id) revision_payload += [data] with benchmark("Insert Snapshot entries into Revision"): self._execute(models.Revision.__table__.insert(), revision_payload) return OperationResponse("update", True, for_update, response_data)
def reindex_pairs(pairs): """Reindex selected snapshots. Args: pairs: A list of parent-child pairs that uniquely represent snapshot object whose properties should be reindexed. """ # pylint: disable=too-many-locals snapshots = dict() revisions = dict() snap_to_sid_cache = dict() search_payload = list() object_properties, cad_list = _get_model_properties() snapshot_columns, revision_columns = _get_columns() snapshot_query = snapshot_columns if pairs: pairs_filter = tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).in_({pair.to_4tuple() for pair in pairs}) snapshot_query = snapshot_columns.filter(pairs_filter) for _id, ctx_id, ptype, pid, ctype, cid, revid in snapshot_query: pair = Pair.from_4tuple((ptype, pid, ctype, cid)) snapshots[pair] = [_id, ctx_id, revid] snap_to_sid_cache[pair] = _id revision_ids = {revid for _, _, revid in snapshots.values()} revision_query = revision_columns.filter( models.Revision.id.in_(revision_ids) ) for _id, _type, content in revision_query: revisions[_id] = get_searchable_attributes( object_properties[_type], cad_list, content) snapshot_ids = set() for pair in snapshots: snapshot_id, ctx_id, revision_id = snapshots[pair] snapshot_ids.add(snapshot_id) properties = revisions[revision_id] properties.update({ "parent": _get_parent_property(pair), "child": _get_child_property(pair), "child_type": pair.child.type, "child_id": pair.child.id }) for prop, val in properties.items(): if prop and val: data = { "key": snapshot_id, "type": "Snapshot", "context_id": ctx_id, "tags": _get_tag(pair), "property": prop, "content": val, } search_payload += [data] delete_records(snapshot_ids) insert_records(search_payload)