def _ensure_program_relationships(objects): """Ensure that snapshotted object is related to audit program. This function is made to handle multiple snapshots for a single audit. Args: objects: list of snapshot objects with child_id and child_type set. """ pairs = [(o.child_type, o.child_id) for o in objects] assert len({o.parent.id for o in objects}) == 1 # fail on multiple audits program = ("Program", objects[0].parent.program_id) rel = relationship.Relationship columns = db.session.query( rel.destination_type, rel.destination_id, rel.source_type, rel.source_id, ) query = columns.filter( tuple_(rel.destination_type, rel.destination_id) == (program), tuple_(rel.source_type, rel.source_id).in_(pairs) ).union( columns.filter( tuple_(rel.source_type, rel.source_id) == (program), tuple_(rel.destination_type, rel.destination_id).in_(pairs) ) ) existing_pairs = set(sum([ [(r.destination_type, r.destination_id), (r.source_type, r.source_id)] for r in query ], [])) # build a set of all found type-id pairs missing_pairs = set(pairs).difference(existing_pairs) _insert_program_relationships(program, missing_pairs)
def get_relationships(relationships): """Retrieve relationships Args: relationships: """ with benchmark("snapshotter.helpers.get_relationships"): if relationships: relationship_columns = db.session.query( models.Relationship.id, models.Relationship.modified_by_id, models.Relationship.created_at, models.Relationship.updated_at, models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, models.Relationship.destination_id, models.Relationship.context_id, ) return relationship_columns.filter( tuple_( models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, models.Relationship.destination_id, ).in_(relationships)).union( relationship_columns.filter( tuple_( models.Relationship.destination_type, models.Relationship.destination_id, models.Relationship.source_type, models.Relationship.source_id).in_(relationships))) else: return set()
def _populate_cache(self, stubs): """Fetch all mappings for objects in stubs, cache them in self.cache.""" # Union is here to convince mysql to use two separate indices and # merge te results. Just using `or` results in a full-table scan # Manual column list avoids loading the full object which would also try to # load related objects cols = db.session.query( Relationship.source_type, Relationship.source_id, Relationship.destination_type, Relationship.destination_id) relationships = cols.filter( tuple_(Relationship.source_type, Relationship.source_id).in_( [(s.type, s.id) for s in stubs] ) ).union_all( cols.filter( tuple_(Relationship.destination_type, Relationship.destination_id).in_( [(s.type, s.id) for s in stubs])) ).all() for (src_type, src_id, dst_type, dst_id) in relationships: src = Stub(src_type, src_id) dst = Stub(dst_type, dst_id) # only store a neighbor if we queried for it since this way we know # we'll be storing complete neighborhood by the end of the loop if src in stubs: self.cache[src].add(dst) if dst in stubs: self.cache[dst].add(src)
def _populate_cache(self, stubs): """Fetch all mappings for objects in stubs, cache them in self.cache.""" # Union is here to convince mysql to use two separate indices and # merge te results. Just using `or` results in a full-table scan # Manual column list avoids loading the full object which would also try to # load related objects cols = db.session.query(Relationship.source_type, Relationship.source_id, Relationship.destination_type, Relationship.destination_id) relationships = cols.filter( tuple_(Relationship.source_type, Relationship.source_id).in_([ (s.type, s.id) for s in stubs ])).union_all( cols.filter( tuple_(Relationship.destination_type, Relationship.destination_id).in_([ (s.type, s.id) for s in stubs ]))).all() for (src_type, src_id, dst_type, dst_id) in relationships: src = Stub(src_type, src_id) dst = Stub(dst_type, dst_id) # only store a neighbor if we queried for it since this way we know # we'll be storing complete neighborhood by the end of the loop if src in stubs: self.cache[src].add(dst) if dst in stubs: self.cache[dst].add(src)
def _ensure_program_relationships(snapshots): """Ensure that snapshotted object is related to audit program. This function is made to handle multiple snapshots for a single audit. Args: snapshots: list of snapshot objects with child_id, child_type and parent. """ # assert that every parent is an Audit as the code relies on program_id field assert {s.parent_type for s in snapshots} == {"Audit"} rel = relationship.Relationship program_children = {} for obj in snapshots: program_children.setdefault(obj.parent.program, set()).add( (obj.child_type, obj.child_id) ) for program, children_set in program_children.items(): query = db.session.query( rel.destination_type, rel.destination_id ).filter( and_( rel.source_type == "Program", rel.source_id == program.id, tuple_(rel.destination_type, rel.destination_id).in_(children_set) ) ).union_all( db.session.query( rel.source_type, rel.source_id ).filter( and_( rel.destination_type == "Program", rel.destination_id == program.id, tuple_(rel.source_type, rel.source_id).in_(children_set) ) ) ) children_set.difference_update(query.all()) child_objects = {} type_ids = {} for child in children_set: type_ids.setdefault(child[0], set()).add(child[1]) for child_type, ids in type_ids.items(): child_model = inflector.get_model(child_type) query = child_model.query.filter(child_model.id.in_(ids)) for child in query: child_objects[(child.type, child.id)] = child for child in children_set: if child in child_objects: db.session.add( relationship.Relationship( source=program, destination=child_objects[child], ) )
def _ensure_program_relationships(objects): """Ensure that snapshotted object is related to audit program. This function is made to handle multiple snapshots for a single audit. Args: objects: list of snapshot objects with child_id, child_type and parent set. """ # assert that every parent is an Audit as the code relies on program_id field assert {o.parent_type for o in objects} == {"Audit"} relationship_stub = collections.namedtuple( "RelationshipStub", ["source_id", "source_type", "destination_id", "destination_type"], ) required_relationships = set( relationship_stub(o.parent.program_id, "Program", o.child_id, o.child_type) for o in objects ) if not required_relationships: # nothing to create return rel = relationship.Relationship columns = db.session.query( rel.source_id, rel.source_type, rel.destination_id, rel.destination_type, ) existing_mappings = columns.filter( tuple_(rel.source_id, rel.source_type, rel.destination_id, rel.destination_type) .in_(required_relationships) ) existing_mappings_reverse = columns.filter( tuple_(rel.destination_id, rel.destination_type, rel.source_id, rel.source_type) .in_(required_relationships) ) required_relationships -= set( relationship_stub(row.source_id, row.source_type, row.destination_id, row.destination_type) for row in existing_mappings ) required_relationships -= set( relationship_stub(row.destination_id, row.destination_type, row.source_id, row.source_type) for row in existing_mappings_reverse ) _insert_program_relationships(required_relationships)
def get_records(_audit, _snapshots): """Get Record objects related to provided audit and snapshots""" return db.session.query(Record).filter( tuple_(Record.type, Record.key, Record.property, Record.content).in_({ ("Snapshot", s.id, "parent", "Audit-{}".format(_audit.id)) for s in _snapshots }))
def yield_records_to_be_deleted(prod_conn, table_name, records_marked_for_deletion, batch_size=100): """Yield records to marked as deleted from pre-prod table The methods yields records marked for delete('W') from the pre-prod database table :param prod_conn: connection object to prod database :param table_name: name of the table as string :param records_marked_for_deletion: records from pre-prod marked for deletion (rec_status: 'W') :param batch_size: batch size to yield results @return: Yields records from prod in size of batch_size """ table = prod_conn.get_table(table_name) natural_keys = get_natural_key_columns(table) columns_to_select = [ table.c[column_name] for column_name in get_columns_names_to_pick_for_delete(table) ] key_columns = [table.columns[key] for key in natural_keys] key_values = [[row[key] for key in natural_keys] for row in records_marked_for_deletion] query = select(columns_to_select, from_obj=table).where( and_(table.c.rec_status == Constants.STATUS_CURRENT, tuple_(*key_columns).in_(key_values))) result = prod_conn.execute(query, stream_results=True) rows = result.fetchmany(batch_size) while len(rows) > 0: yield rows rows = result.fetchmany(batch_size)
def get_snapshots(objects=None, ids=None): with benchmark("snapshotter.helpers.get_snapshots"): if objects and ids: raise Exception( "Insert only iterable of (parent, child) tuples or set of IDS") columns = db.session.query( models.Snapshot.id, models.Snapshot.context_id, models.Snapshot.created_at, models.Snapshot.updated_at, models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, models.Snapshot.revision_id, models.Snapshot.modified_by_id, ) if objects: return columns.filter( tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id ).in_({(parent.type, parent.id, child.type, child.id) for parent, child in objects})) if ids: return columns.filter( models.Snapshot.id.in_(ids)) return set()
def get_snapshots(objects=None, ids=None): with benchmark("snapshotter.helpers.get_snapshots"): if objects and ids: raise Exception( "Insert only iterable of (parent, child) tuples or set of IDS") columns = db.session.query( models.Snapshot.id, models.Snapshot.context_id, models.Snapshot.created_at, models.Snapshot.updated_at, models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, models.Snapshot.revision_id, models.Snapshot.modified_by_id, ) if objects: return columns.filter( tuple_(models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id).in_({ (parent.type, parent.id, child.type, child.id) for parent, child in objects })) if ids: return columns.filter(models.Snapshot.id.in_(ids)) return set()
def _set_latest_revisions(objects): """Set latest revision_id for given child_type. Args: objects: list of snapshot objects with child_id and child_type set. """ pairs = [(o.child_type, o.child_id) for o in objects] query = db.session.query( func.max(revision.Revision.id, name="id", identifier="id"), revision.Revision.resource_type, revision.Revision.resource_id, ).filter( tuple_( revision.Revision.resource_type, revision.Revision.resource_id, ).in_(pairs) ).group_by( revision.Revision.resource_type, revision.Revision.resource_id, ) id_map = {(r_type, r_id): id_ for id_, r_type, r_id in query} for o in objects: o.revision_id = id_map.get((o.child_type, o.child_id)) if o.revision_id is None: raise exceptions.InternalServerError(errors.MISSING_REVISION)
def reindex(parents=None): """Reindex all snapshots or limit to a subset of certain parents. Args: parents: An iterable of parents for which to reindex their scopes. Returns: Pair of parent-child that were reindexed. """ columns = db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ) query = columns if parents: _parents = {(obj.type, obj.id) for obj in parents} query = query.filter( tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, ).in_(_parents)) pairs = {Pair.from_4tuple(p) for p in query} reindex_pairs(pairs) return pairs
def yield_records_to_be_deleted(prod_conn, table_name, records_marked_for_deletion, batch_size=100): """Yield records to marked as deleted from pre-prod table The methods yields records marked for delete('W') from the pre-prod database table :param prod_conn: connection object to prod database :param table_name: name of the table as string :param records_marked_for_deletion: records from pre-prod marked for deletion (rec_status: 'W') :param batch_size: batch size to yield results @return: Yields records from prod in size of batch_size """ table = prod_conn.get_table(table_name) natural_keys = get_natural_key_columns(table) columns_to_select = [table.c[column_name] for column_name in get_columns_names_to_pick_for_delete(table)] key_columns = [table.columns[key] for key in natural_keys] key_values = [[row[key] for key in natural_keys] for row in records_marked_for_deletion] query = select(columns_to_select, from_obj=table).where(and_(table.c.rec_status == Constants.STATUS_CURRENT, tuple_(*key_columns).in_(key_values))) result = prod_conn.execute(query, stream_results=True) rows = result.fetchmany(batch_size) while len(rows) > 0: yield rows rows = result.fetchmany(batch_size)
def test_acl_propagation_on_unmap(self): """Test if acls are deleted correctly when the object is unmapped""" issue = self.objects['issue'] db.session.delete( issue.related_sources[0]) # Delete audit relationship db.session.commit() # Check if issue propagated roles were deleted: acl_count = all_models.AccessControlList.query.filter( all_models.AccessControlList.object_type == "Issue", all_models.AccessControlList.object_id == issue.id, ).count() self.assertEqual(acl_count, 1) # Check if comment/document propagated roles were deleted: acl_count = all_models.AccessControlList.query.filter( tuple_(all_models.AccessControlList.object_id, all_models.AccessControlList.object_type).in_( ((self.objects['issue_comment'].id, "Comment"), (self.objects['issue_document'].id, "Document")))).count() # NOTE: The result should actually be 2 here, but because the Admin role # does not propagate permissions to comment/document it's those permission # are missing. self.assertEqual(acl_count, 0)
def drawstillage(self, h): sl = td.s.query(StockAnnotation)\ .join(StockItem)\ .outerjoin(StockLine)\ .filter(tuple_(StockAnnotation.text,StockAnnotation.time).in_( select([StockAnnotation.text, func.max(StockAnnotation.time)], StockAnnotation.atype == 'location')\ .group_by(StockAnnotation.text)))\ .filter(StockItem.finished == None)\ .order_by(StockLine.name != null(), StockAnnotation.time)\ .options(joinedload('stockitem'))\ .options(joinedload('stockitem.stocktype'))\ .options(joinedload('stockitem.stockline'))\ .all() if not sl: return self.drawlines(h) f = ui.tableformatter('pl l c L c lp') header = f("Loc", "Racked", "StockID", "Name", "BB", "Line") ml = [f(a.text, a.time.date().strftime("%d %b"), a.stockid, a.stockitem.stocktype.format(), a.stockitem.bestbefore or "", a.stockitem.stockline.name if a.stockitem.stockline else "") for a in sl] ml.insert(0, header) y = 0 for l in ml: for line in l.display(self.w): self.win.addstr(y, 0, line) y = y + 1 if y >= h: break
class LedgerEntryScanner(TableScanner): """Garbage-collects staled ledger entries.""" table = LedgerEntry.__table__ columns = [ LedgerEntry.creditor_id, LedgerEntry.debtor_id, LedgerEntry.entry_id, LedgerEntry.added_at ] pk = tuple_(table.c.creditor_id, table.c.debtor_id, table.c.entry_id) def __init__(self): super().__init__() self.retention_interval = timedelta( days=current_app.config['APP_LEDGER_RETENTION_DAYS']) @property def blocks_per_query(self) -> int: return int( current_app.config['APP_LEDGER_ENTRIES_SCAN_BLOCKS_PER_QUERY']) @property def target_beat_duration(self) -> int: return int( current_app.config['APP_LEDGER_ENTRIES_SCAN_BEAT_MILLISECS']) @atomic def process_rows(self, rows): cutoff_ts = datetime.now(tz=timezone.utc) - self.retention_interval pks_to_delete = [(row[0], row[1], row[2]) for row in rows if row[3] < cutoff_ts] if pks_to_delete: db.session.execute(self.table.delete().where( self.pk.in_(pks_to_delete)))
def get_acl_payload(snapshots): """Get ACL payload for newly created snapshots""" acl_payload = [] parents = set( (snapshot.parent_id, snapshot.parent_type) for snapshot in snapshots) ac_roles = db.session.query(all_models.AccessControlRole.id, all_models.AccessControlRole.name).filter( all_models.AccessControlRole.name.in_( ("Auditors", "Audit Captains", "Auditors Snapshot Mapped", "Audit Captains Mapped"))) ac_roles = {name: id_ for id_, name in ac_roles} parent_roles = db.session.query( all_models.AccessControlList.id, all_models.AccessControlList.person_id, all_models.AccessControlList.ac_role_id).filter( all_models.AccessControlList.ac_role_id.in_( (ac_roles["Auditors"], ac_roles["Audit Captains"])), tuple_(all_models.AccessControlList.object_id, all_models.AccessControlList.object_type).in_(parents)) child_roles = { ac_roles["Auditors"]: ac_roles["Auditors Snapshot Mapped"], ac_roles["Audit Captains"]: ac_roles["Audit Captains Mapped"] } for parent_id, person_id, ac_role_id in parent_roles: for snapshot in snapshots: acl_payload.append({ "object_id": snapshot.id, "object_type": "Snapshot", "ac_role_id": child_roles[ac_role_id], "parent_id": parent_id, "person_id": person_id }) return acl_payload
def equal_in(cls, other: Iterable) -> BinaryExpression: equal_columns = cls.equal_tuple(cls) nullable_columns = [ column.is_(None) for column in equal_columns if getattr(column, 'nullable', None) ] return sql_expression.or_( sql_expression.tuple_(*equal_columns).in_(other), *nullable_columns)
def related(self, obj): if obj in self.cache: return self.cache[obj] # Pre-fetch neighborhood for enqueued object since we're gonna need that # results in a few steps. This drastically reduces number of queries. stubs = {s for rel in self.queue for s in rel} stubs.add(obj) # Union is here to convince mysql to use two separate indices and # merge te results. Just using `or` results in a full-table scan # Manual column list avoids loading the full object which would also try to # load related objects cols = db.session.query( Relationship.source_type, Relationship.source_id, Relationship.destination_type, Relationship.destination_id) relationships = cols.filter( tuple_(Relationship.source_type, Relationship.source_id).in_( [(s.type, s.id) for s in stubs] ) ).union_all( cols.filter( tuple_(Relationship.destination_type, Relationship.destination_id).in_( [(s.type, s.id) for s in stubs])) ).all() batch_requests = collections.defaultdict(set) for (src_type, src_id, dst_type, dst_id) in relationships: src = Stub(src_type, src_id) dst = Stub(dst_type, dst_id) # only store a neighbor if we queried for it since this way we know # we'll be storing complete neighborhood by the end of the loop batch_requests[src_type].add(src_id) batch_requests[dst_type].add(dst_id) if src in stubs: self.cache[src].add(dst) if dst in stubs: self.cache[dst].add(src) for type_, ids in batch_requests.iteritems(): model = getattr(models.all_models, type_) instances = model.query.filter(model.id.in_(ids)) for instance in instances: self.instance_cache[Stub(type_, instance.id)] = instance return self.cache[obj]
def pubroot(request, info, session): date = datetime.date.today() # If it's the early hours of the morning, it's more useful for us # to consider it still to be yesterday. if datetime.datetime.now().hour < 4: date = date - datetime.timedelta(1) thisweek_start = date - datetime.timedelta(date.weekday()) thisweek_end = thisweek_start + datetime.timedelta(6) lastweek_start = thisweek_start - datetime.timedelta(7) lastweek_end = thisweek_end - datetime.timedelta(7) weekbefore_start = lastweek_start - datetime.timedelta(7) weekbefore_end = lastweek_end - datetime.timedelta(7) weeks = [ ("Current week", thisweek_start, thisweek_end, business_totals(session, thisweek_start, thisweek_end)), ("Last week", lastweek_start, lastweek_end, business_totals(session, lastweek_start, lastweek_end)), ( "The week before last", weekbefore_start, weekbefore_end, business_totals(session, weekbefore_start, weekbefore_end), ), ] currentsession = Session.current(session) barsummary = ( session.query(StockLine) .filter(StockLine.location == "Bar") .order_by(StockLine.dept_id, StockLine.name) .options(joinedload_all("stockonsale.stocktype.unit")) .options(undefer_group("qtys")) .all() ) stillage = ( session.query(StockAnnotation) .join(StockItem) .outerjoin(StockLine) .filter( tuple_(StockAnnotation.text, StockAnnotation.time).in_( select( [StockAnnotation.text, func.max(StockAnnotation.time)], StockAnnotation.atype == "location" ).group_by(StockAnnotation.text) ) ) .filter(StockItem.finished == None) .order_by(StockLine.name != null(), StockAnnotation.time) .options(joinedload_all("stockitem.stocktype.unit")) .options(joinedload_all("stockitem.stockline")) .options(undefer_group("qtys")) .all() ) return ( "index.html", {"currentsession": currentsession, "barsummary": barsummary, "stillage": stillage, "weeks": weeks}, )
def delete_records(snapshot_ids): """Delete all records for some snapshots. Args: snapshot_ids: An iterable with snapshot IDs whose full text records should be deleted. """ to_delete = {("Snapshot", _id) for _id in snapshot_ids} db.session.query(Record).filter( tuple_(Record.type, Record.key).in_(to_delete) ).delete(synchronize_session=False) db.session.commit()
def switch(self, betID): # Return redirect('/bet/' + betID + '/') user = User() if user: RUser = user[0] c.user = user[1] RMatch = db.Session.query(db.Matches).filter(db.Matches.id == betID).first() if RMatch: RBet = db.Session.query(db.Bets).filter(and_(db.Bets.match == RMatch.id, db.Bets.user == RUser.id)).first() if RBet.team == RMatch.team1: RBetsTotal1 = db.Session.query(db.BetsTotal).filter(and_(db.BetsTotal.match == RMatch.id, db.BetsTotal.team == RMatch.team1)).first() RBetsTotal2 = db.Session.query(db.BetsTotal).filter(and_(db.BetsTotal.match == RMatch.id, db.BetsTotal.team == RMatch.team2)).first() RBet.team = RMatch.team2 else: RBetsTotal1 = db.Session.query(db.BetsTotal).filter(and_(db.BetsTotal.match == RMatch.id, db.BetsTotal.team == RMatch.team2)).first() RBetsTotal2 = db.Session.query(db.BetsTotal).filter(and_(db.BetsTotal.match == RMatch.id, db.BetsTotal.team == RMatch.team1)).first() RBet.team = RMatch.team1 RBetsTotal1.value -= RBet.value RBetsTotal2.value += RBet.value keys = [] totalGroups1 = defaultdict(Counter) for group in RBetsTotal1.groups: totalGroups1[group[0]][group[1]] = group[2] keys.append((group[0], group[1])) totalGroups2 = defaultdict(Counter) for group in RBetsTotal2.groups: totalGroups2[group[0]][group[1]] = group[2] keys.append((group[0], group[1])) # Convert PostgreSQL's multidimensional array to dictionary usersGroups = defaultdict(Counter) for group in RBet.groups: totalGroups1[group[0]][group[1]] -= group[2] totalGroups2[group[0]][group[1]] += group[2] orderedGroups1 = [] orderedGroups2 = [] orderedItems = db.Session.query(db.Items).filter(tuple_(db.Items.defindex, db.Items.quality).in_(keys)).order_by(db.Items.type, db.Items.quality, db.Items.value.desc()).all() for orderedItem in orderedItems: defindex = orderedItem.defindex quality = orderedItem.quality if quality in totalGroups1[defindex]: orderedGroups1.append([defindex, quality, totalGroups1[defindex][quality]]) if quality in totalGroups2[defindex]: orderedGroups2.append([defindex, quality, totalGroups2[defindex][quality]]) RBetsTotal1.groups = orderedGroups1 RBetsTotal2.groups = orderedGroups2 db.Session.commit() return redirect('/bet/' + betID + '/') return redirect('/')
def get_records(_audit, _snapshots): return db.session.query(Record).filter( tuple_( Record.type, Record.key, Record.property, Record.content ).in_( {("Snapshot", s.id, "parent", "Audit-{}".format(_audit.id)) for s in _snapshots} ))
def get_records(_audit, _snapshots): """Get Record objects related to provided audit and snapshots""" return db.session.query(Record).filter( tuple_( Record.type, Record.key, Record.property, Record.content ).in_( {("Snapshot", s.id, "parent", "Audit-{}".format(_audit.id)) for s in _snapshots} ))
def get_revisions(pairs, revisions, filters=None): """Retrieve revision ids for pairs If revisions dictionary is provided it will validate that the selected revision exists in the objects revision history. Args: pairs: set([(parent_1, child_1), (parent_2, child_2), ...]) revisions: dict({(parent, child): revision_id, ...}) filters: predicate """ with benchmark("snapshotter.helpers.get_revisions"): revision_id_cache = dict() if pairs: with benchmark("get_revisions.create caches"): child_stubs = {pair.child for pair in pairs} with benchmark("get_revisions.create child -> parents cache"): parents_cache = collections.defaultdict(set) for parent, child in pairs: parents_cache[child].add(parent) with benchmark("get_revisions.retrieve revisions"): query = db.session.query( models.Revision.id, models.Revision.resource_type, models.Revision.resource_id).filter( tuple_( models.Revision.resource_type, models.Revision.resource_id).in_(child_stubs) ).order_by(models.Revision.id.desc()) if filters: for _filter in filters: query = query.filter(_filter) with benchmark("get_revisions.create revision_id cache"): for revid, restype, resid in query: child = Stub(restype, resid) for parent in parents_cache[child]: key = Pair(parent, child) if key in revisions: if revid == revisions[key]: revision_id_cache[key] = revid else: logger.warning( "Specified revision for object %s but couldn't find the" "revision '%s' in object history", key, revisions[key]) else: if key not in revision_id_cache: revision_id_cache[key] = revid return revision_id_cache
def get_revisions(pairs, revisions, filters=None): """Retrieve revision ids for pairs If revisions dictionary is provided it will validate that the selected revision exists in the objects revision history. Args: pairs: set([(parent_1, child_1), (parent_2, child_2), ...]) revisions: dict({(parent, child): revision_id, ...}) filters: predicate """ with benchmark("snapshotter.helpers.get_revisions"): revision_id_cache = dict() if pairs: with benchmark("get_revisions.create caches"): child_stubs = {pair.child for pair in pairs} with benchmark("get_revisions.create child -> parents cache"): parents_cache = collections.defaultdict(set) for parent, child in pairs: parents_cache[child].add(parent) with benchmark("get_revisions.retrieve revisions"): query = db.session.query( models.Revision.id, models.Revision.resource_type, models.Revision.resource_id).filter( tuple_(models.Revision.resource_type, models.Revision.resource_id).in_( child_stubs)).order_by( models.Revision.id.desc()) if filters: for _filter in filters: query = query.filter(_filter) with benchmark("get_revisions.create revision_id cache"): for revid, restype, resid in query: child = Stub(restype, resid) for parent in parents_cache[child]: key = Pair(parent, child) if key in revisions: if revid == revisions[key]: revision_id_cache[key] = revid else: logger.warning( "Specified revision for object %s but couldn't find the" "revision '%s' in object history", key, revisions[key]) else: if key not in revision_id_cache: revision_id_cache[key] = revid return revision_id_cache
def query_tiles(sess, layer, chunk, refresh_cutoff, refresh_cutoff_missing): """see existing_tiles; query the set of tiles 'chunk' to see which already exist. 'cutoff's are timestamps instead of intervals now""" q = sess.query(mt.Tile).filter_by(layer=layer).filter(tuple_(mt.Tile.z, mt.Tile.x, mt.Tile.y).in_(list(chunk))) def cutoff_criteria(): if refresh_cutoff is not None: yield and_(mt.Tile.uuid != null_digest(), mt.Tile.fetched_on > refresh_cutoff) if refresh_cutoff_missing is not None: yield and_(mt.Tile.uuid == null_digest(), mt.Tile.fetched_on > refresh_cutoff_missing) coc = list(cutoff_criteria()) if coc: q = q.filter(or_(*coc)) return set((layer, t.z, t.x, t.y) for t in q)
def get_relationships(relationships): """Retrieve relationships Args: relationships: """ with benchmark("snapshotter.helpers.get_relationships"): if relationships: relationship_columns = db.session.query( models.Relationship.id, models.Relationship.modified_by_id, models.Relationship.created_at, models.Relationship.updated_at, models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, models.Relationship.destination_id, models.Relationship.context_id, ) return relationship_columns.filter( tuple_( models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, models.Relationship.destination_id, ).in_(relationships) ).union( relationship_columns.filter( tuple_( models.Relationship.destination_type, models.Relationship.destination_id, models.Relationship.source_type, models.Relationship.source_id ).in_(relationships) ) ) else: return set()
def _get_latest_object_revisions(objects): """Get latest revisions of given objects.""" object_tuples = [(o.id, o.type) for o in objects] revisions = Revision.query.filter( Revision.id.in_( db.session.query(func.max(Revision.id)).filter( tuple_( Revision.resource_id, Revision.resource_type, ).in_(object_tuples)).group_by( Revision.resource_type, Revision.resource_id, ))) return revisions
def _get_existing_markets(self, markets: List[str]) -> List[str]: def get_market_pk(market): pair = sorted(market.split("_")) return (pair[0], pair[1], self.exchange.id) market_pks = [get_market_pk(market) for market in markets] results = self.session.query(models.ExchangeMarket) \ .filter(tuple_(models.ExchangeMarket.first_coin_id, models.ExchangeMarket.second_coin_id, models.ExchangeMarket.exchange_id).in_(market_pks)) \ .order_by(models.ExchangeMarket.volume_usd.desc()) \ .limit(self.max_markets) \ .all() return [result.original_name for result in results]
def _fetch_neighborhood(self, parent_object, objects): """Fetch relationships for objects and parent.""" with benchmark("Snapshot._fetch_object_neighborhood"): query_pairs = set() for obj in objects: for snd_obj in self.rules.rules[parent_object.type]["snd"]: query_pairs.add((obj.type, obj.id, snd_obj)) columns = db.session.query(models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, models.Relationship.destination_id) relationships = columns.filter( tuple_( models.Relationship.destination_type, models.Relationship.destination_id, models.Relationship.source_type, ).in_(query_pairs)).union( columns.filter( tuple_( models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, ).in_(query_pairs))) neighborhood = set() for (stype, sid, dtype, did) in relationships: source = Stub(stype, sid) destination = Stub(dtype, did) if source in objects: neighborhood.add(destination) else: neighborhood.add(source) return neighborhood
def _fetch_neighborhood(self, parent_object, objects): """Fetch relationships for objects and parent.""" with benchmark("Snapshot._fetch_object_neighborhood"): query_pairs = set() for obj in objects: for snd_obj in self.rules.rules[parent_object.type]["snd"]: query_pairs.add((obj.type, obj.id, snd_obj)) columns = db.session.query( models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, models.Relationship.destination_id) relationships = columns.filter( tuple_( models.Relationship.destination_type, models.Relationship.destination_id, models.Relationship.source_type, ).in_(query_pairs)).union( columns.filter(tuple_( models.Relationship.source_type, models.Relationship.source_id, models.Relationship.destination_type, ).in_(query_pairs))) neighborhood = set() for (stype, sid, dtype, did) in relationships: source = Stub(stype, sid) destination = Stub(dtype, did) if source in objects: neighborhood.add(destination) else: neighborhood.add(source) return neighborhood
def deactivate_old_records(dest_connector, dest_table, natural_keys, batch): '''deactivates old records in the destination table based on matching records from batch using natural key combination :param dest_connector: Destination connection :param dest_table: name of the table to be migrated :param natural_keys: natural key combination for the dest_table :batch batch of records to be verified ''' key_columns = [dest_table.columns[key] for key in natural_keys] key_values = [[row[key] for key in natural_keys] for row in batch] # update prod rec_status to inactive for records matching with the natural keys of the records in the current batch update_query = dest_table.update(and_(dest_table.c.rec_status == 'C', tuple_(*key_columns).in_(key_values))).values(rec_status='I', to_date=time.strftime("%Y%m%d")) dest_connector.execute(update_query)
def _prepare_cache(self, translatable): """ Bulk load translations required to translate a translatable 'structure' """ translatables = self._collect_translatables(translatable) if not translatables: return {} pks = [(t.context, t.message_id) for t in translatables] pk_filter = tuple_(self.model.context, self.model.message_id).in_(pks) translations = self.session.query(self.model).filter( self.model.language == self.language).filter(pk_filter).values( self.model.context, self.model.message_id, self.model.value) cache = {(t[0], t[1]): t[2] for t in translations} return cache
class CommittedTransferScanner(TableScanner): """Garbage-collects staled committed transfers.""" table = CommittedTransfer.__table__ columns = [ CommittedTransfer.creditor_id, CommittedTransfer.debtor_id, CommittedTransfer.creation_date, CommittedTransfer.transfer_number, CommittedTransfer.committed_at, ] pk = tuple_( table.c.creditor_id, table.c.debtor_id, table.c.creation_date, table.c.transfer_number, ) def __init__(self): super().__init__() self.retention_interval = timedelta( days=current_app.config['APP_MAX_TRANSFER_DELAY_DAYS']) + max( timedelta(days=current_app.config['APP_LOG_RETENTION_DAYS']), timedelta( days=current_app.config['APP_LEDGER_RETENTION_DAYS']), ) @property def blocks_per_query(self) -> int: return int(current_app. config['APP_COMMITTED_TRANSFERS_SCAN_BLOCKS_PER_QUERY']) @property def target_beat_duration(self) -> int: return int( current_app.config['APP_COMMITTED_TRANSFERS_SCAN_BEAT_MILLISECS']) @atomic def process_rows(self, rows): cutoff_ts = datetime.now(tz=timezone.utc) - self.retention_interval pks_to_delete = [(row[0], row[1], row[2], row[3]) for row in rows if row[4] < cutoff_ts] if pks_to_delete: db.session.execute(self.table.delete().where( self.pk.in_(pks_to_delete)))
def _get_latest_object_revisions(objects): """Get latest revisions of given objects.""" object_tuples = [(o.id, o.type) for o in objects] revisions = Revision.query.filter( Revision.id.in_( db.session.query(func.max(Revision.id)).filter( tuple_( Revision.resource_id, Revision.resource_type, ).in_(object_tuples) ).group_by( Revision.resource_type, Revision.resource_id, ) ) ) return revisions
def get_revisions(pairs, revisions, filters=None): """Retrieve revision ids for pairs Args: pairs: set([(parent_1, child_1), (parent_2, child_2), ...]) revisions: dict({(parent, child): revision_id, ...}) filters: predicate """ with benchmark("snapshotter.helpers.get_revisions"): revision_id_cache = dict() if pairs: with benchmark("get_revisions.create caches"): child_stubs = {pair.child for pair in pairs} with benchmark("get_revisions.create child -> parents cache"): parents_cache = collections.defaultdict(set) for parent, child in pairs: parents_cache[child].add(parent) with benchmark("get_revisions.retrieve revisions"): query = db.session.query( models.Revision.id, models.Revision.resource_type, models.Revision.resource_id).filter( tuple_(models.Revision.resource_type, models.Revision.resource_id).in_( child_stubs)).order_by( models.Revision.id.desc()) if filters: for _filter in filters: query = query.filter(_filter) with benchmark("get_revisions.create revision_id cache"): for revid, restype, resid in query: child = Stub(restype, resid) for parent in parents_cache[child]: key = Pair(parent, child) if key in revisions: if revid == revisions[key]: revision_id_cache[key] = revid else: if key not in revision_id_cache: revision_id_cache[key] = revid return revision_id_cache
def update_movies(self): with open(project_path + '/data/movies', 'r') as movies: while True: batch = (line.decode('utf-8').strip().split('(') for _, line in zip(range(100), movies) if line.strip() != '') batch = [('('.join(parts[:-1]).strip(), int(parts[-1][:-1])) for parts in batch] if batch == []: break batch = set(batch) existing = self.session.query(Movie.title, Movie.year)\ .filter(tuple_(Movie.title, Movie.year).in_(batch)).all() new_ = batch.difference(existing) new_ = [Movie(title=title, year=year, meta={}) for title, year in new_] self.session.add_all(new_) self.session.commit() self.redirect('/')
def _do_delete(*, query, orm_model, skip_archive, session): import re from datetime import datetime print("Performing Delete...") # using bulk delete # create a new table and copy the rows there timestamp_str = re.sub(r'[^\d]', '', datetime.utcnow().isoformat())[:14] target_table_name = f'_airflow_deleted__{orm_model.name}__{timestamp_str}' print(f"Moving data to table {target_table_name}") stmt = CreateTableAs(target_table_name, query.selectable) logger.debug("ctas query:\n%s", stmt.compile()) session.execute(stmt) session.commit() # delete the rows from the old table metadata = reflect_tables([orm_model.name, target_table_name], session) source_table = metadata.tables[orm_model.name] target_table = metadata.tables[target_table_name] logger.debug("rows moved; purging from %s", source_table.name) bind = session.get_bind() dialect_name = bind.dialect.name if dialect_name == 'sqlite': pk_cols = source_table.primary_key.columns delete = source_table.delete().where( tuple_(*pk_cols).in_( session.query(*[ target_table.c[x.name] for x in source_table.primary_key.columns ]).subquery())) else: delete = source_table.delete().where( and_(col == target_table.c[col.name] for col in source_table.primary_key.columns)) logger.debug("delete statement:\n%s", delete.compile()) session.execute(delete) session.commit() if skip_archive: target_table.drop() session.commit() print("Finished Performing Delete")
def analyze(self): """Analyze which snapshots need to be updated and which created""" query = set(db.session.query( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id ).in_(self.parents))) existing_scope = {Pair.from_4tuple(fields) for fields in query} full_scope = {Pair(parent, child) for parent, children in self.snapshots.items() for child in children} for_update = existing_scope for_create = full_scope - existing_scope return for_create, for_update
def test_acl_propagation_on_unmap(self): """Test if acls are deleted correctly when the object is unmapped""" issue = self.objects['issue'] db.session.delete(issue.related_sources[0]) # Delete audit relationship db.session.commit() # Check if issue propagated roles were deleted: acl_count = all_models.AccessControlList.query.filter( all_models.AccessControlList.object_type == "Issue", all_models.AccessControlList.object_id == issue.id, ).count() self.assertEqual(acl_count, 1) # Check if comment/document propagated roles were deleted: acl_count = all_models.AccessControlList.query.filter( tuple_(all_models.AccessControlList.object_id, all_models.AccessControlList.object_type).in_( ((self.objects['issue_comment'].id, "Comment"), (self.objects['issue_document'].id, "Document"))) ).count() self.assertEqual(acl_count, 2)
def _set_revisions(cls, objects): """Set latest revision_id for given child_type. Args: objects: list of snapshot objects with child_id and child_type set. """ pairs = [(o.child_type, o.child_id) for o in objects] query = db.session.query( func.max(revision.Revision.id, name="id", identifier="id"), revision.Revision.resource_type, revision.Revision.resource_id, ).filter( tuple_( revision.Revision.resource_type, revision.Revision.resource_id, ).in_(pairs)).group_by( revision.Revision.resource_type, revision.Revision.resource_id, ) id_map = {(r_type, r_id): id_ for id_, r_type, r_id in query} for o in objects: o.revision_id = id_map.get((o.child_type, o.child_id))
async def list_services_access_rights( self, key_versions: List[Tuple[str, str]], product_name: Optional[str] = None ) -> Dict[Tuple[str, str], List[ServiceAccessRightsAtDB]]: """Batch version of get_service_access_rights""" service_to_access_rights = defaultdict(list) query = sa.select([services_access_rights]).where( tuple_(services_access_rights.c.key, services_access_rights.c.version).in_( key_versions ) & (services_access_rights.c.product_name == product_name) if product_name else True ) async with self.db_engine.acquire() as conn: async for row in conn.execute(query): service_to_access_rights[ ( row[services_access_rights.c.key], row[services_access_rights.c.version], ) ].append(ServiceAccessRightsAtDB(**row)) return service_to_access_rights
def drawstillage(self, h): sl = ( td.s.query(StockAnnotation) .join(StockItem) .outerjoin(StockLine) .filter( tuple_(StockAnnotation.text, StockAnnotation.time).in_( select( [StockAnnotation.text, func.max(StockAnnotation.time)], StockAnnotation.atype == "location" ).group_by(StockAnnotation.text) ) ) .filter(StockItem.finished == None) .order_by(StockLine.name != null(), StockAnnotation.time) .options(joinedload("stockitem")) .options(joinedload("stockitem.stocktype")) .options(joinedload("stockitem.stockline")) .all() ) f = ui.tableformatter("pl l c L lp") header = f("Loc", "Racked", "StockID", "Name", "Line") ml = [ f( a.text, a.time.date().strftime("%d %b"), a.stockid, a.stockitem.stocktype.format(), a.stockitem.stockline.name if a.stockitem.stockline else "", ) for a in sl ] ml.insert(0, header) y = 0 for l in ml: for line in l.display(self.w): self.addstr(y, 0, line) y = y + 1 if y >= h: break
def update_movies(self): with open(project_path + '/data/movies', 'r') as movies: while True: batch = (line.decode('utf-8').strip().split('(') for _, line in zip(range(100), movies) if line.strip() != '') batch = [('('.join(parts[:-1]).strip(), int(parts[-1][:-1])) for parts in batch] if batch == []: break batch = set(batch) existing = self.session.query(Movie.title, Movie.year)\ .filter(tuple_(Movie.title, Movie.year).in_(batch)).all() new_ = batch.difference(existing) new_ = [ Movie(title=title, year=year, meta={}) for title, year in new_ ] self.session.add_all(new_) self.session.commit() self.redirect('/')
def get_revisions_query(child_stubs, revisions, filters=None): """Return revisions query for sent params.""" queries = [] if revisions: queries.append(get_revision_query_for( models.Revision.id.in_(revisions.values()), filters, )) if child_stubs: queries.append(get_revision_query_for( tuple_( models.Revision.resource_type, models.Revision.resource_id, ).in_( child_stubs ), filters, )) if not queries: queries.append(get_revision_query_for(True, filters)) if len(queries) == 1: return queries[0] return queries[0].union_all(*queries[1:])
def test_create_indexing(self): """Test that creating objects results in full index""" self._check_csv_response(self._import_file("snapshotter_create.csv"), {}) control = db.session.query(models.Control).filter( models.Control.slug == "control-3" ).one() access_group = db.session.query(models.AccessGroup).filter( models.AccessGroup.slug == "ag-2" ).one() objective = db.session.query(models.Objective).filter( models.Objective.slug == "obj-1" ).one() process = db.session.query(models.Process).filter( models.Process.slug == "proc-2" ).one() custom_attribute_defs = self.create_custom_attribute_definitions() custom_attribute_values = [ { "custom_attribute": custom_attribute_defs["control"], "attributable": control, "attribute_value": "control value 1", }, { "custom_attribute": custom_attribute_defs["objective"], "attributable": objective, "attribute_value": "objective value 1", }, { "custom_attribute": custom_attribute_defs["process"], "attributable": process, "attribute_value": "07/12/2016", }, { "custom_attribute": custom_attribute_defs["access_group"], "attributable": access_group, "attribute_value": "access_group text value 1", }, ] for value in custom_attribute_values: factories.CustomAttributeValueFactory(**value) # Add custom attribute values via factory doesn't create revisions, so # we modify all the objects via import, which saves the full object # state in revisions table (including custom attribute values). self._check_csv_response(self._import_file("snapshotter_update.csv"), {}) program = db.session.query(models.Program).filter( models.Program.slug == "Prog-13211" ).one() self.create_audit(program) audit = db.session.query(models.Audit).filter( models.Audit.title.like("%Snapshotable audit%")).first() snapshots = db.session.query(models.Snapshot).all() records = db.session.query(Record).filter( tuple_( Record.type, Record.key, Record.property, Record.content ).in_( {("Snapshot", s.id, "parent", "Audit-{}".format(audit.id)) for s in snapshots} )) self.assertEqual(records.count(), 57) # At this point all objects are no longer in the session and we have to # manually refresh them from the database control = db.session.query(models.Control).filter( models.Control.slug == "control-3" ).one() access_group = db.session.query(models.AccessGroup).filter( models.AccessGroup.slug == "ag-2" ).one() objective = db.session.query(models.Objective).filter( models.Objective.slug == "obj-1" ).one() process = db.session.query(models.Process).filter( models.Process.slug == "proc-2" ).one() custom_attributes = [ (control, "control text field 1", "control value 1"), (objective, "objective rich field 1", "objective value 1"), (process, "process date field 1", "07/12/2016"), (access_group, "access group text field 2", "access_group text value 1") ] for obj, definition, value in custom_attributes: snapshot = db.session.query(models.Snapshot).filter( models.Snapshot.child_type == obj.type, models.Snapshot.child_id == obj.id, ).one() _cav = db.session.query(Record).filter( Record.key == snapshot.id, Record.type == "Snapshot", Record.property == definition, Record.content == value ) _title = db.session.query(Record).filter( Record.key == snapshot.id, Record.type == "Snapshot", Record.property == "title", Record.content == obj.title ) _desc = db.session.query(Record).filter( Record.key == snapshot.id, Record.type == "Snapshot", Record.property == "description", Record.content == obj.description ) self.assertEqual(_cav.count(), 1) self.assertEqual(_title.count(), 1) self.assertEqual(_desc.count(), 1)
def test_pickle_operators_two(self): clause = tuple_(1, 2, 3) eq_(str(clause), str(util.pickle.loads(util.pickle.dumps(clause))))
def test_update_indexing(self): """Test that creating objects results in full index""" self._check_csv_response(self._import_file("snapshotter_create.csv"), {}) access_group = db.session.query(models.AccessGroup).filter( models.AccessGroup.title == "ag-2" ).one() objective = db.session.query(models.Objective).filter( models.Objective.title == "obj-1" ).one() custom_attribute_defs = self.create_custom_attribute_definitions() custom_attribute_values = [ { "custom_attribute": custom_attribute_defs["objective"], "attributable": objective, "attribute_value": "objective value 1", }, { "custom_attribute": custom_attribute_defs["access_group"], "attributable": access_group, "attribute_value": "access_group text value 1", }, ] for value in custom_attribute_values: factories.CustomAttributeValueFactory(**value) # Add custom attribute values via factory doesn't create revisions, so # we modify all the objects via import, which saves the full object # state in revisions table (including custom attribute values). self._check_csv_response(self._import_file("snapshotter_update.csv"), {}) program = db.session.query(models.Program).filter( models.Program.slug == "Prog-13211" ).one() self.create_audit(program) objective_cav = db.session.query(models.CustomAttributeValue).filter( models.CustomAttributeValue.attribute_value == "objective value 1" ).one() access_group_cav = db.session.query(models.CustomAttributeValue).filter( models.CustomAttributeValue.attribute_value == "access_group text value 1" ).one() cavs = [ (objective_cav, "objective CA value edited after initial index"), (access_group_cav, "access_group CA value edited after initial index"), ] # This is not a correct way to update CAVs but PUT request on CAV # doesn't work and attaching full custom attribute signature # (custom_attribute_definitions, custom_attribute_values and # custom_attributes) is out of scope for this. for obj, val in cavs: obj.attribute_value = val db.session.add(obj) db.session.commit() access_group = db.session.query(models.AccessGroup).filter( models.AccessGroup.slug == "ag-2" ).one() objective = db.session.query(models.Objective).filter( models.Objective.slug == "obj-1" ).one() obj_edits = [ (objective, "objective title edited after initial index"), (access_group, "access group title edited after initial index") ] for obj, title in obj_edits: obj = self.refresh_object(obj) self.api.modify_object(obj, { "title": title }) audit = db.session.query(models.Audit).filter( models.Audit.title.like("%Snapshotable audit%")).one() # Initiate update operation self.api.modify_object(audit, { "snapshots": { "operation": "upsert" } }) snapshots = db.session.query(models.Snapshot).all() records = db.session.query(Record).filter( tuple_( Record.type, Record.key, Record.property, Record.content ).in_( {("Snapshot", s.id, "parent", "Audit-{}".format(s.parent_id)) for s in snapshots} )) self.assertEqual(records.count(), 57) custom_attributes = [ (objective, "objective title edited after initial index", "objective rich field 1", "objective CA value edited after initial index"), (access_group, "access group title edited after initial index", "access group text field 2", "access_group CA value edited after initial index") ] for obj, title, definition, value in custom_attributes: obj = self.refresh_object(obj) snapshot = db.session.query(models.Snapshot).filter( models.Snapshot.child_type == obj.type, models.Snapshot.child_id == obj.id, ).one() _cav = db.session.query(Record).filter( Record.key == snapshot.id, Record.type == "Snapshot", Record.property == definition, Record.content == value ) _title = db.session.query(Record).filter( Record.key == snapshot.id, Record.type == "Snapshot", Record.property == "title", Record.content == title ) _desc = db.session.query(Record).filter( Record.key == snapshot.id, Record.type == "Snapshot", Record.property == "description", Record.content == obj.description ) self.assertEqual(_cav.count(), 1) self.assertEqual(_title.count(), 1) self.assertEqual(_desc.count(), 1)
def size(cls): return tuple_(cls.width, cls.height)
def reindex_pairs(pairs): """Reindex selected snapshots. Args: pairs: A list of parent-child pairs that uniquely represent snapshot object whose properties should be reindexed. """ if not pairs: return snapshots = dict() options = get_options() snapshot_query = models.Snapshot.query.filter( tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).in_( {pair.to_4tuple() for pair in pairs} ) ).options( orm.subqueryload("revision").load_only( "id", "resource_type", "resource_id", "_content", ), orm.load_only( "id", "parent_type", "parent_id", "child_type", "child_id", "revision_id", ) ) cad_dict = _get_custom_attribute_dict() for snapshot in snapshot_query: revision = snapshot.revision snapshots[snapshot.id] = { "id": snapshot.id, "parent_type": snapshot.parent_type, "parent_id": snapshot.parent_id, "child_type": snapshot.child_type, "child_id": snapshot.child_id, "revision": get_searchable_attributes( CLASS_PROPERTIES[revision.resource_type], cad_dict[revision.resource_type], revision.content) } search_payload = [] for snapshot in snapshots.values(): for prop, val in get_properties(snapshot).items(): search_payload.extend( get_record_value( prop, val, { "key": snapshot["id"], "type": "Snapshot", "tags": TAG_TMPL.format(**snapshot), "subproperty": "", }, options ) ) delete_records(snapshots.keys()) insert_records(search_payload)
def _update(self, for_update, event, revisions, _filter): """Update (or create) parent objects' snapshots and create revisions for them. Args: event: A ggrc.models.Event instance revisions: A set of tuples of pairs with revisions to which it should either create or update a snapshot of that particular audit _filter: Callable that should return True if it should be updated Returns: OperationResponse """ # pylint: disable=too-many-locals with benchmark("Snapshot._update"): user_id = get_current_user_id() missed_keys = set() snapshot_cache = dict() modified_snapshot_keys = set() data_payload_update = list() revision_payload = list() response_data = dict() if self.dry_run and event is None: event_id = 0 else: event_id = event.id with benchmark("Snapshot._update.filter"): if _filter: for_update = {elem for elem in for_update if _filter(elem)} with benchmark("Snapshot._update.get existing snapshots"): existing_snapshots = db.session.query( models.Snapshot.id, models.Snapshot.revision_id, models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).filter(tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id ).in_({pair.to_4tuple() for pair in for_update})) for esnap in existing_snapshots: sid, rev_id, pair_tuple = esnap[0], esnap[1], esnap[2:] pair = Pair.from_4tuple(pair_tuple) snapshot_cache[pair] = (sid, rev_id) with benchmark("Snapshot._update.retrieve latest revisions"): revision_id_cache = get_revisions( for_update, filters=[models.Revision.action.in_(["created", "modified"])], revisions=revisions) response_data["revisions"] = { "old": {pair: values[1] for pair, values in snapshot_cache.items()}, "new": revision_id_cache } with benchmark("Snapshot._update.build snapshot payload"): for key in for_update: if key in revision_id_cache: sid, rev_id = snapshot_cache[key] latest_rev = revision_id_cache[key] if rev_id != latest_rev: modified_snapshot_keys.add(key) data_payload_update += [{ "_id": sid, "_revision_id": latest_rev, "_modified_by_id": user_id }] else: missed_keys.add(key) if missed_keys: logger.warning( "Tried to update snapshots for the following objects but " "found no revisions: %s", missed_keys) if not modified_snapshot_keys: return OperationResponse("update", True, set(), response_data) with benchmark("Snapshot._update.write snapshots to database"): update_sql = models.Snapshot.__table__.update().where( models.Snapshot.id == bindparam("_id")).values( revision_id=bindparam("_revision_id"), modified_by_id=bindparam("_modified_by_id")) self._execute(update_sql, data_payload_update) with benchmark("Snapshot._update.retrieve inserted snapshots"): snapshots = get_snapshots(modified_snapshot_keys) with benchmark("Snapshot._update.create snapshots revision payload"): for snapshot in snapshots: parent = Stub(snapshot.parent_type, snapshot.parent_id) context_id = self.context_cache[parent] data = create_snapshot_revision_dict("modified", event_id, snapshot, user_id, context_id) revision_payload += [data] with benchmark("Insert Snapshot entries into Revision"): self._execute(models.Revision.__table__.insert(), revision_payload) return OperationResponse("update", True, for_update, response_data)
def reindex_pairs(pairs): """Reindex selected snapshots. Args: pairs: A list of parent-child pairs that uniquely represent snapshot object whose properties should be reindexed. """ # pylint: disable=too-many-locals snapshots = dict() revisions = dict() snap_to_sid_cache = dict() search_payload = list() object_properties, cad_list = _get_model_properties() snapshot_columns, revision_columns = _get_columns() snapshot_query = snapshot_columns if pairs: pairs_filter = tuple_( models.Snapshot.parent_type, models.Snapshot.parent_id, models.Snapshot.child_type, models.Snapshot.child_id, ).in_({pair.to_4tuple() for pair in pairs}) snapshot_query = snapshot_columns.filter(pairs_filter) for _id, ctx_id, ptype, pid, ctype, cid, revid in snapshot_query: pair = Pair.from_4tuple((ptype, pid, ctype, cid)) snapshots[pair] = [_id, ctx_id, revid] snap_to_sid_cache[pair] = _id revision_ids = {revid for _, _, revid in snapshots.values()} revision_query = revision_columns.filter( models.Revision.id.in_(revision_ids) ) for _id, _type, content in revision_query: revisions[_id] = get_searchable_attributes( object_properties[_type], cad_list, content) snapshot_ids = set() for pair in snapshots: snapshot_id, ctx_id, revision_id = snapshots[pair] snapshot_ids.add(snapshot_id) properties = revisions[revision_id] properties.update({ "parent": _get_parent_property(pair), "child": _get_child_property(pair), "child_type": pair.child.type, "child_id": pair.child.id }) for prop, val in properties.items(): if prop and val: data = { "key": snapshot_id, "type": "Snapshot", "context_id": ctx_id, "tags": _get_tag(pair), "property": prop, "content": val, } search_payload += [data] delete_records(snapshot_ids) insert_records(search_payload)