Beispiel #1
0
def format_transactions_after_pointer(namespace, pointer, db_session,
                                      result_limit, exclude_types=None,
                                      include_types=None, exclude_folders=True,
                                      exclude_metadata=True, exclude_account=True,
                                      expand=False):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: new_session
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    exclude_types = set(exclude_types) if exclude_types else set()
    # Begin backwards-compatibility shim -- suppress new object types for now,
    # because clients may not be able to deal with them.
    if exclude_folders is True:
        exclude_types.update(('folder', 'label'))
    if exclude_account is True:
        exclude_types.add('account')
    # End backwards-compatibility shim.

    # Metadata is excluded by default, and can only be included by setting the
    # exclude_metadata flag to False. If listed in include_types, remove it.
    if exclude_metadata is True:
        exclude_types.add('metadata')
    if include_types is not None and 'metadata' in include_types:
        include_types.remove('metadata')

    last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session)
    if last_trx == pointer:
        return ([], pointer)

    while True:
        # deleted_at condition included to allow this query to be satisfied via
        # the legacy index on (namespace_id, deleted_at) for performance.
        # Also need to explicitly specify the index hint because the query
        # planner is dumb as nails and otherwise would make this super slow for
        # some values of namespace_id and pointer.
        # TODO(emfree): Remove this hack and ensure that the right index (on
        # namespace_id only) exists.
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id,
                Transaction.deleted_at.is_(None)). \
            with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)')

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx for trx in
                           sorted(trxs, key=lambda t: t.id)}.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [trx.record_id for trx in latest_trxs
                            if trx.command != 'delete']

            object_cls = transaction_objects()[obj_type]

            if object_cls == Account:
                # The base query for Account queries the /Namespace/ table
                # since the API-returned "`account`" is a `namespace`
                # under-the-hood.
                query = db_session.query(Namespace).join(Account).filter(
                    Account.id.in_(ids_to_query),
                    Namespace.id == namespace.id)

                # Key by /namespace.account_id/ --
                # namespace.id may not be equal to account.id
                # and trx.record_id == account.id for `account` trxs.
                objects = {obj.account_id: obj for obj in query}
            else:
                query = db_session.query(object_cls).filter(
                    object_cls.id.in_(ids_to_query),
                    object_cls.namespace_id == namespace.id)

                if object_cls == Thread:
                    query = query.options(*Thread.api_loading_options(expand))
                elif object_cls == Message:
                    query = query.options(*Message.api_loading_options(expand))

                objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(
                        obj, namespace_public_id=namespace.public_id,
                        expand=expand)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id
Beispiel #2
0
def format_transactions_after_pointer(namespace,
                                      pointer,
                                      db_session,
                                      result_limit,
                                      exclude_types=None,
                                      include_types=None,
                                      exclude_folders=True,
                                      expand=False):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: new_session
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    exclude_types = set(exclude_types) if exclude_types else set()
    # Begin backwards-compatibility shim -- suppress new object types for now,
    # because clients may not be able to deal with them.
    exclude_types.add('account')
    if exclude_folders is True:
        exclude_types.update(('folder', 'label'))
    # End backwards-compatibility shim.

    last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session)
    if last_trx == pointer:
        return ([], pointer)

    while True:
        # deleted_at condition included to allow this query to be satisfied via
        # the legacy index on (namespace_id, deleted_at) for performance.
        # Also need to explicitly specify the index hint because the query
        # planner is dumb as nails and otherwise would make this super slow for
        # some values of namespace_id and pointer.
        # TODO(emfree): Remove this hack and ensure that the right index (on
        # namespace_id only) exists.
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id,
                Transaction.deleted_at.is_(None)). \
            with_hint(Transaction, 'USE INDEX (namespace_id_deleted_at)')

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx
                           for trx in sorted(trxs, key=lambda t: t.id)
                           }.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [
                trx.record_id for trx in latest_trxs if trx.command != 'delete'
            ]

            object_cls = transaction_objects()[obj_type]
            query = db_session.query(object_cls).filter(
                object_cls.id.in_(ids_to_query),
                object_cls.namespace_id == namespace.id)
            if object_cls == Thread:
                query = query.options(*Thread.api_loading_options(expand))
            elif object_cls == Message:
                query = query.options(*Message.api_loading_options(expand))
            objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(obj,
                                   namespace_public_id=namespace.public_id,
                                   expand=expand)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id
Beispiel #3
0
def format_transactions_after_pointer(namespace,
                                      pointer,
                                      db_session,
                                      result_limit,
                                      exclude_types=None,
                                      include_types=None,
                                      exclude_folders=True,
                                      exclude_metadata=True,
                                      exclude_account=True,
                                      expand=False,
                                      is_n1=False):
    """
    Return a pair (deltas, new_pointer), where deltas is a list of change
    events, represented as dictionaries:
    {
      "object": <API object type, e.g. "thread">,
      "event": <"create", "modify", or "delete>,
      "attributes": <API representation of the object for insert/update events>
      "cursor": <public_id of the transaction>
    }

    and new_pointer is the integer id of the last included transaction

    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to get changes.
    pointer: int
        Process transactions starting after this id.
    db_session: new_session
        database session
    result_limit: int
        Maximum number of results to return. (Because we may roll up multiple
        changes to the same object, fewer results can be returned.)
    format_transaction_fn: function pointer
        Function that defines how to format the transactions.
    exclude_types: list, optional
        If given, don't include transactions for these types of objects.

    """
    exclude_types = set(exclude_types) if exclude_types else set()
    # Begin backwards-compatibility shim -- suppress new object types for now,
    # because clients may not be able to deal with them.
    if exclude_folders is True:
        exclude_types.update(('folder', 'label'))
    if exclude_account is True:
        exclude_types.add('account')
    # End backwards-compatibility shim.

    # Metadata is excluded by default, and can only be included by setting the
    # exclude_metadata flag to False. If listed in include_types, remove it.
    if exclude_metadata is True:
        exclude_types.add('metadata')
    if include_types is not None and 'metadata' in include_types:
        include_types.remove('metadata')

    last_trx = _get_last_trx_id_for_namespace(namespace.id, db_session)
    if last_trx == pointer:
        return ([], pointer)

    while True:
        transactions = db_session.query(Transaction). \
            filter(
                Transaction.id > pointer,
                Transaction.namespace_id == namespace.id)

        if exclude_types is not None:
            transactions = transactions.filter(
                ~Transaction.object_type.in_(exclude_types))

        if include_types is not None:
            transactions = transactions.filter(
                Transaction.object_type.in_(include_types))

        transactions = transactions. \
            order_by(asc(Transaction.id)).limit(result_limit).all()

        if not transactions:
            return ([], pointer)

        results = []

        # Group deltas by object type.
        trxs_by_obj_type = collections.defaultdict(list)
        for trx in transactions:
            trxs_by_obj_type[trx.object_type].append(trx)

        for obj_type, trxs in trxs_by_obj_type.items():
            # Build a dictionary mapping pairs (record_id, command) to
            # transaction. If successive modifies for a given record id appear
            # in the list of transactions, this will only keep the latest
            # one (which is what we want).
            latest_trxs = {(trx.record_id, trx.command): trx
                           for trx in sorted(trxs, key=lambda t: t.id)
                           }.values()
            # Load all referenced not-deleted objects.
            ids_to_query = [
                trx.record_id for trx in latest_trxs if trx.command != 'delete'
            ]

            object_cls = transaction_objects()[obj_type]

            if object_cls == Account:
                # The base query for Account queries the /Namespace/ table
                # since the API-returned "`account`" is a `namespace`
                # under-the-hood.
                query = db_session.query(Namespace).join(Account).filter(
                    Account.id.in_(ids_to_query), Namespace.id == namespace.id)

                # Key by /namespace.account_id/ --
                # namespace.id may not be equal to account.id
                # and trx.record_id == account.id for `account` trxs.
                objects = {obj.account_id: obj for obj in query}
            else:
                query = db_session.query(object_cls).filter(
                    object_cls.id.in_(ids_to_query),
                    object_cls.namespace_id == namespace.id)

                if object_cls == Thread:
                    query = query.options(*Thread.api_loading_options(expand))
                elif object_cls == Message:
                    query = query.options(*Message.api_loading_options(expand))

                objects = {obj.id: obj for obj in query}

            for trx in latest_trxs:
                delta = {
                    'object': trx.object_type,
                    'event': EVENT_NAME_FOR_COMMAND[trx.command],
                    'id': trx.object_public_id,
                    'cursor': trx.public_id
                }
                if trx.command != 'delete':
                    obj = objects.get(trx.record_id)
                    if obj is None:
                        continue
                    repr_ = encode(obj,
                                   namespace_public_id=namespace.public_id,
                                   expand=expand,
                                   is_n1=is_n1)
                    delta['attributes'] = repr_

                results.append((trx.id, delta))

        if results:
            # Sort deltas by id of the underlying transactions.
            results.sort()
            deltas = [d for _, d in results]
            return (deltas, results[-1][0])
        else:
            # It's possible that none of the referenced objects exist any more,
            # meaning the result list is empty. In that case, keep traversing
            # the log until we get actual results or reach the end.
            pointer = transactions[-1].id