def format_transactions_after_pointer(namespace, pointer, db_session, result_limit, exclude_types=None, include_types=None, exclude_folders=True, exclude_metadata=True, exclude_account=True, expand=False): """ Return a pair (deltas, new_pointer), where deltas is a list of change events, represented as dictionaries: { "object": <API object type, e.g. "thread">, "event": <"create", "modify", or "delete>, "attributes": <API representation of the object for insert/update events> "cursor": <public_id of the transaction> } and new_pointer is the integer id of the last included transaction Arguments --------- namespace_id: int Id of the namespace for which to get changes. pointer: int Process transactions starting after this id. db_session: new_session database session result_limit: int Maximum number of results to return. (Because we may roll up multiple changes to the same object, fewer results can be returned.) format_transaction_fn: function pointer Function that defines how to format the transactions. exclude_types: list, optional If given, don't include transactions for these types of objects. """ exclude_types = set(exclude_types) if exclude_types else set() # Begin backwards-compatibility shim -- suppress new object types for now, # because clients may not be able to deal with them. if exclude_folders is True: exclude_types.update(('folder', 'label')) if exclude_account is True: exclude_types.add('account') # End backwards-compatibility shim. # Metadata is excluded by default, and can only be included by setting the # exclude_metadata flag to False. If listed in include_types, remove it. if exclude_metadata is True: exclude_types.add('metadata') if include_types is not None and 'metadata' in include_types: include_types.remove('metadata') last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session) if last_trx == pointer: return ([], pointer) while True: # deleted_at condition included to allow this query to be satisfied via # the legacy index on (namespace_id, deleted_at) for performance. # Also need to explicitly specify the index hint because the query # planner is dumb as nails and otherwise would make this super slow for # some values of namespace_id and pointer. # TODO(emfree): Remove this hack and ensure that the right index (on # namespace_id only) exists. transactions = db_session.query(Transaction). \ filter( Transaction.id > pointer, Transaction.namespace_id == namespace.id, Transaction.deleted_at.is_(None)). \ with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)') if exclude_types is not None: transactions = transactions.filter( ~Transaction.object_type.in_(exclude_types)) if include_types is not None: transactions = transactions.filter( Transaction.object_type.in_(include_types)) transactions = transactions. \ order_by(asc(Transaction.id)).limit(result_limit).all() if not transactions: return ([], pointer) results = [] # Group deltas by object type. trxs_by_obj_type = collections.defaultdict(list) for trx in transactions: trxs_by_obj_type[trx.object_type].append(trx) for obj_type, trxs in trxs_by_obj_type.items(): # Build a dictionary mapping pairs (record_id, command) to # transaction. If successive modifies for a given record id appear # in the list of transactions, this will only keep the latest # one (which is what we want). latest_trxs = {(trx.record_id, trx.command): trx for trx in sorted(trxs, key=lambda t: t.id)}.values() # Load all referenced not-deleted objects. ids_to_query = [trx.record_id for trx in latest_trxs if trx.command != 'delete'] object_cls = transaction_objects()[obj_type] if object_cls == Account: # The base query for Account queries the /Namespace/ table # since the API-returned "`account`" is a `namespace` # under-the-hood. query = db_session.query(Namespace).join(Account).filter( Account.id.in_(ids_to_query), Namespace.id == namespace.id) # Key by /namespace.account_id/ -- # namespace.id may not be equal to account.id # and trx.record_id == account.id for `account` trxs. objects = {obj.account_id: obj for obj in query} else: query = db_session.query(object_cls).filter( object_cls.id.in_(ids_to_query), object_cls.namespace_id == namespace.id) if object_cls == Thread: query = query.options(*Thread.api_loading_options(expand)) elif object_cls == Message: query = query.options(*Message.api_loading_options(expand)) objects = {obj.id: obj for obj in query} for trx in latest_trxs: delta = { 'object': trx.object_type, 'event': EVENT_NAME_FOR_COMMAND[trx.command], 'id': trx.object_public_id, 'cursor': trx.public_id } if trx.command != 'delete': obj = objects.get(trx.record_id) if obj is None: continue repr_ = encode( obj, namespace_public_id=namespace.public_id, expand=expand) delta['attributes'] = repr_ results.append((trx.id, delta)) if results: # Sort deltas by id of the underlying transactions. results.sort() deltas = [d for _, d in results] return (deltas, results[-1][0]) else: # It's possible that none of the referenced objects exist any more, # meaning the result list is empty. In that case, keep traversing # the log until we get actual results or reach the end. pointer = transactions[-1].id
def format_transactions_after_pointer(namespace, pointer, db_session, result_limit, exclude_types=None, include_types=None, exclude_folders=True, expand=False): """ Return a pair (deltas, new_pointer), where deltas is a list of change events, represented as dictionaries: { "object": <API object type, e.g. "thread">, "event": <"create", "modify", or "delete>, "attributes": <API representation of the object for insert/update events> "cursor": <public_id of the transaction> } and new_pointer is the integer id of the last included transaction Arguments --------- namespace_id: int Id of the namespace for which to get changes. pointer: int Process transactions starting after this id. db_session: new_session database session result_limit: int Maximum number of results to return. (Because we may roll up multiple changes to the same object, fewer results can be returned.) format_transaction_fn: function pointer Function that defines how to format the transactions. exclude_types: list, optional If given, don't include transactions for these types of objects. """ exclude_types = set(exclude_types) if exclude_types else set() # Begin backwards-compatibility shim -- suppress new object types for now, # because clients may not be able to deal with them. exclude_types.add('account') if exclude_folders is True: exclude_types.update(('folder', 'label')) # End backwards-compatibility shim. last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session) if last_trx == pointer: return ([], pointer) while True: # deleted_at condition included to allow this query to be satisfied via # the legacy index on (namespace_id, deleted_at) for performance. # Also need to explicitly specify the index hint because the query # planner is dumb as nails and otherwise would make this super slow for # some values of namespace_id and pointer. # TODO(emfree): Remove this hack and ensure that the right index (on # namespace_id only) exists. transactions = db_session.query(Transaction). \ filter( Transaction.id > pointer, Transaction.namespace_id == namespace.id, Transaction.deleted_at.is_(None)). \ with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)') if exclude_types is not None: transactions = transactions.filter( ~Transaction.object_type.in_(exclude_types)) if include_types is not None: transactions = transactions.filter( Transaction.object_type.in_(include_types)) transactions = transactions. \ order_by(asc(Transaction.id)).limit(result_limit).all() if not transactions: return ([], pointer) results = [] # Group deltas by object type. trxs_by_obj_type = collections.defaultdict(list) for trx in transactions: trxs_by_obj_type[trx.object_type].append(trx) for obj_type, trxs in trxs_by_obj_type.items(): # Build a dictionary mapping pairs (record_id, command) to # transaction. If successive modifies for a given record id appear # in the list of transactions, this will only keep the latest # one (which is what we want). latest_trxs = {(trx.record_id, trx.command): trx for trx in sorted(trxs, key=lambda t: t.id) }.values() # Load all referenced not-deleted objects. ids_to_query = [ trx.record_id for trx in latest_trxs if trx.command != 'delete' ] object_cls = transaction_objects()[obj_type] query = db_session.query(object_cls).filter( object_cls.id.in_(ids_to_query), object_cls.namespace_id == namespace.id) if object_cls == Thread: query = query.options(*Thread.api_loading_options(expand)) elif object_cls == Message: query = query.options(*Message.api_loading_options(expand)) objects = {obj.id: obj for obj in query} for trx in latest_trxs: delta = { 'object': trx.object_type, 'event': EVENT_NAME_FOR_COMMAND[trx.command], 'id': trx.object_public_id, 'cursor': trx.public_id } if trx.command != 'delete': obj = objects.get(trx.record_id) if obj is None: continue repr_ = encode(obj, namespace_public_id=namespace.public_id, expand=expand) delta['attributes'] = repr_ results.append((trx.id, delta)) if results: # Sort deltas by id of the underlying transactions. results.sort() deltas = [d for _, d in results] return (deltas, results[-1][0]) else: # It's possible that none of the referenced objects exist any more, # meaning the result list is empty. In that case, keep traversing # the log until we get actual results or reach the end. pointer = transactions[-1].id
def format_transactions_after_pointer(namespace, pointer, db_session, result_limit, exclude_types=None, include_types=None, exclude_folders=True, exclude_metadata=True, exclude_account=True, expand=False, is_n1=False): """ Return a pair (deltas, new_pointer), where deltas is a list of change events, represented as dictionaries: { "object": <API object type, e.g. "thread">, "event": <"create", "modify", or "delete>, "attributes": <API representation of the object for insert/update events> "cursor": <public_id of the transaction> } and new_pointer is the integer id of the last included transaction Arguments --------- namespace_id: int Id of the namespace for which to get changes. pointer: int Process transactions starting after this id. db_session: new_session database session result_limit: int Maximum number of results to return. (Because we may roll up multiple changes to the same object, fewer results can be returned.) format_transaction_fn: function pointer Function that defines how to format the transactions. exclude_types: list, optional If given, don't include transactions for these types of objects. """ exclude_types = set(exclude_types) if exclude_types else set() # Begin backwards-compatibility shim -- suppress new object types for now, # because clients may not be able to deal with them. if exclude_folders is True: exclude_types.update(('folder', 'label')) if exclude_account is True: exclude_types.add('account') # End backwards-compatibility shim. # Metadata is excluded by default, and can only be included by setting the # exclude_metadata flag to False. If listed in include_types, remove it. if exclude_metadata is True: exclude_types.add('metadata') if include_types is not None and 'metadata' in include_types: include_types.remove('metadata') last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session) if last_trx == pointer: return ([], pointer) while True: transactions = db_session.query(Transaction). \ filter( Transaction.id > pointer, Transaction.namespace_id == namespace.id) if exclude_types is not None: transactions = transactions.filter( ~Transaction.object_type.in_(exclude_types)) if include_types is not None: transactions = transactions.filter( Transaction.object_type.in_(include_types)) transactions = transactions. \ order_by(asc(Transaction.id)).limit(result_limit).all() if not transactions: return ([], pointer) results = [] # Group deltas by object type. trxs_by_obj_type = collections.defaultdict(list) for trx in transactions: trxs_by_obj_type[trx.object_type].append(trx) for obj_type, trxs in trxs_by_obj_type.items(): # Build a dictionary mapping pairs (record_id, command) to # transaction. If successive modifies for a given record id appear # in the list of transactions, this will only keep the latest # one (which is what we want). latest_trxs = {(trx.record_id, trx.command): trx for trx in sorted(trxs, key=lambda t: t.id) }.values() # Load all referenced not-deleted objects. ids_to_query = [ trx.record_id for trx in latest_trxs if trx.command != 'delete' ] object_cls = transaction_objects()[obj_type] if object_cls == Account: # The base query for Account queries the /Namespace/ table # since the API-returned "`account`" is a `namespace` # under-the-hood. query = db_session.query(Namespace).join(Account).filter( Account.id.in_(ids_to_query), Namespace.id == namespace.id) # Key by /namespace.account_id/ -- # namespace.id may not be equal to account.id # and trx.record_id == account.id for `account` trxs. objects = {obj.account_id: obj for obj in query} else: query = db_session.query(object_cls).filter( object_cls.id.in_(ids_to_query), object_cls.namespace_id == namespace.id) if object_cls == Thread: query = query.options(*Thread.api_loading_options(expand)) elif object_cls == Message: query = query.options(*Message.api_loading_options(expand)) objects = {obj.id: obj for obj in query} for trx in latest_trxs: delta = { 'object': trx.object_type, 'event': EVENT_NAME_FOR_COMMAND[trx.command], 'id': trx.object_public_id, 'cursor': trx.public_id } if trx.command != 'delete': obj = objects.get(trx.record_id) if obj is None: continue repr_ = encode(obj, namespace_public_id=namespace.public_id, expand=expand, is_n1=is_n1) delta['attributes'] = repr_ results.append((trx.id, delta)) if results: # Sort deltas by id of the underlying transactions. results.sort() deltas = [d for _, d in results] return (deltas, results[-1][0]) else: # It's possible that none of the referenced objects exist any more, # meaning the result list is empty. In that case, keep traversing # the log until we get actual results or reach the end. pointer = transactions[-1].id