Example #1
0
def index_messages(namespace_id, namespace_public_id, created_before=None):
    """ Index the messages of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Message.created_at <= created_before)

        query = query.options(joinedload(Message.parts).
                              load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            encoded_obj = encode(obj, namespace_public_id=namespace_public_id)
            index_obj = _process_attributes(encoded_obj)

            encoded.append(('index', index_obj))

    log.info('Going to index messages', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id)

    indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Example #2
0
def fetch_corresponding_thread(db_session, namespace_id, message):
    """fetch a thread matching the corresponding message. Returns None if
       there's no matching thread."""
    # FIXME: for performance reasons, we make the assumption that a reply
    # to a message always has a similar subject. This is only
    # right 95% of the time.
    clean_subject = cleanup_subject(message.subject)
    threads = db_session.query(Thread).filter(
        Thread.namespace_id == namespace_id,
        Thread._cleaned_subject == clean_subject). \
        order_by(desc(Thread.id))

    for thread in safer_yield_per(threads, Thread.id, 0, 100):
        for match in thread.messages:
            # A lot of people BCC some address when sending mass
            # emails so ignore BCC.
            match_bcc = match.bcc_addr if match.bcc_addr else []
            message_bcc = message.bcc_addr if message.bcc_addr else []

            match_emails = [
                t[1] for t in match.participants if t not in match_bcc
            ]
            message_emails = [
                t[1] for t in message.participants if t not in message_bcc
            ]

            # A conversation takes place between two or more persons.
            # Are there more than two participants in common in this
            # thread? If yes, it's probably a related thread.
            match_participants_set = set(match_emails)
            message_participants_set = set(message_emails)

            if len(match_participants_set & message_participants_set) >= 2:
                # No need to loop through the rest of the messages
                # in the thread
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

            # handle the case where someone is self-sending an email.
            if not message.from_addr or not message.to_addr:
                return

            match_from = [t[1] for t in match.from_addr]
            match_to = [t[1] for t in match.from_addr]
            message_from = [t[1] for t in message.from_addr]
            message_to = [t[1] for t in message.to_addr]

            if (len(message_to) == 1 and message_from == message_to
                    and match_from == match_to and message_to == match_from):
                # Check that we're not over max thread length in this case
                # No need to loop through the rest of the messages
                # in the thread.
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

    return
Example #3
0
def index_messages(namespace, updated_since=None):
    """ Index the messages of a namespace. """
    namespace_id, namespace_public_id = namespace

    if updated_since is not None:
        updated_since = dateutil.parser.parse(updated_since)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace_id)

        if updated_since is not None:
            query = query.filter(Message.updated_at > updated_since)

        query = query.options(
            joinedload(Message.parts).load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            encoded_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', encoded_obj))

    indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages',
             namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Example #4
0
def index_messages(namespace, updated_since=None):
    """ Index the messages of a namespace. """
    namespace_id, namespace_public_id = namespace

    if updated_since is not None:
        updated_since = dateutil.parser.parse(updated_since)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace.id)

        if updated_since is not None:
            query = query.filter(Message.updated_at > updated_since)

        query = query.options(joinedload(Message.parts).
                              load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            encoded_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(encoded_obj)

    indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Example #5
0
    def _process_log(self):
        """Scan the transaction log `self.chunk_size` entries at a time,
        publishing matching events to registered hooks."""
        with session_scope() as db_session:
            self.log.info('Scanning tx log from id: {}'.
                          format(self.minimum_id))
            unprocessed_txn_count = db_session.query(
                func.count(Transaction.id)).filter(
                Transaction.table_name == 'message',
                Transaction.id > self.minimum_id).scalar()
            if unprocessed_txn_count:
                self.log.debug('Total of {0} transactions to process'.
                               format(unprocessed_txn_count))

            max_tx_id, = db_session.query(func.max(Transaction.id)).one()
            if max_tx_id is None:
                max_tx_id = 0
            query = db_session.query(Transaction). \
                filter(Transaction.table_name == 'message',
                       Transaction.command == 'insert'). \
                order_by(asc(Transaction.id))
            for transaction in safer_yield_per(query, Transaction.id,
                                               self.minimum_id,
                                               self.chunk_size):
                namespace_id = transaction.namespace_id
                for worker in self.workers[namespace_id]:
                    if worker.match(transaction):
                        worker.enqueue(EventData(transaction))
                self.minimum_id = transaction.id + 1
            self.log.debug('Processed tx. setting min id to {0}'.
                           format(self.minimum_id))
Example #6
0
def index_namespace(namespace_id):
    """ Backfill function to index a namespace from current db data Not used
    for incremental indexing.

    """
    if not search_service_url or not doc_service_url:
        raise Exception("CloudSearch not configured; cannot index")
    else:
        search_client = ContactSearchClient(namespace_id)
        doc_service = get_doc_service()

        # Look up previously indexed data so we can delete any records which
        # have disappeared.
        previous_records = search_client.fetch_all_matching_ids()

        log.info("previous records",
                 total=len(previous_records),
                 ids=previous_records)

        indexed = 0
        current_records = set()
        docs = []
        with session_scope(namespace_id) as db_session:
            query = (db_session.query(Contact).options(
                joinedload("phone_numbers")).filter_by(
                    namespace_id=namespace_id))
            for contact in safer_yield_per(query, Contact.id, 0, 1000):
                log.info("indexing", contact_id=contact.id)
                current_records.add(long(contact.id))
                contact_object = cloudsearch_contact_repr(contact)
                docs.append({
                    "type": "add",
                    "id": contact.id,
                    "fields": contact_object
                })
                if len(docs) > DOC_UPLOAD_CHUNK_SIZE:
                    doc_service.upload_documents(
                        documents=json.dumps(docs),
                        contentType="application/json")
                    indexed += len(docs)
                    docs = []

        indexed += len(docs)

        # Deletes are small, so we can stick 'em on this batch.
        deleted_records = set(previous_records).difference(current_records)
        for id_ in deleted_records:
            log.info("deleting", contact_id=id_)
            docs.append({"type": "delete", "id": id_})

        if docs:
            doc_service.upload_documents(documents=json.dumps(docs),
                                         contentType="application/json")

        log.info(
            "namespace index complete",
            namespace_id=namespace_id,
            total_contacts_indexed=indexed,
            total_contacts_deleted=len(deleted_records),
        )
Example #7
0
    def _process_log(self):
        # TODO(emfree) handle the case that message/thread objects may have
        # been deleted in the interim.
        with session_scope() as db_session:
            query = db_session.query(ActionLog).filter(
                ActionLog.status == 'pending',
                ActionLog.retries < ACTION_MAX_NR_OF_RETRIES)

            if self._scheduled_actions:
                query = query.filter(
                    ~ActionLog.id.in_(self._scheduled_actions))
            query = query.order_by(asc(ActionLog.id))

            for log_entry in safer_yield_per(query, ActionLog.id, 0,
                                             self.chunk_size):
                action_function = ACTION_FUNCTION_MAP[log_entry.action]
                namespace = db_session.query(Namespace). \
                    get(log_entry.namespace_id)

                # Only actions on accounts associated with this sync-engine
                if namespace.account.sync_host != platform.node():
                    continue

                self._scheduled_actions.add(log_entry.id)
                self.log.info('delegating action',
                              action_id=log_entry.id,
                              msg=log_entry.action)
                semaphore = self.semaphore_map[(namespace.account_id,
                                                log_entry.action)]
                gevent.spawn(syncback_worker, semaphore, action_function,
                             log_entry.id, log_entry.record_id,
                             namespace.account_id, syncback_service=self,
                             extra_args=log_entry.extra_args)
Example #8
0
    def _process_log(self):
        # TODO(emfree) handle the case that message/thread objects may have
        # been deleted in the interim.
        with session_scope() as db_session:
            query = db_session.query(ActionLog).filter(~ActionLog.executed)
            if self._scheduled_actions:
                query = query.filter(
                    ~ActionLog.id.in_(self._scheduled_actions))
            query = query.order_by(asc(ActionLog.id))

            for log_entry in safer_yield_per(query, ActionLog.id, 0,
                                             self.chunk_size):
                action_function = ACTION_FUNCTION_MAP[log_entry.action]
                namespace = db_session.query(Namespace). \
                    get(log_entry.namespace_id)

                # Only actions on accounts associated with this sync-engine
                if namespace.account.sync_host != platform.node():
                    continue

                self._scheduled_actions.add(log_entry.id)
                self.log.info('delegating action',
                              action_id=log_entry.id,
                              msg=log_entry.action)
                semaphore = self.semaphore_map[(namespace.account_id,
                                                log_entry.action)]
                gevent.spawn(syncback_worker,
                             semaphore,
                             action_function,
                             log_entry.id,
                             log_entry.record_id,
                             namespace.account_id,
                             syncback_service=self,
                             extra_args=log_entry.extra_args)
Example #9
0
    def _process_log(self):
        """Scan the transaction log `self.chunk_size` entries at a time,
        publishing matching events to registered hooks."""
        with session_scope() as db_session:
            self.log.info('Scanning tx log from id: {}'.format(
                self.minimum_id))
            unprocessed_txn_count = db_session.query(
                func.count(Transaction.id)).filter(
                    Transaction.table_name == 'message',
                    Transaction.id > self.minimum_id).scalar()
            if unprocessed_txn_count:
                self.log.debug('Total of {0} transactions to process'.format(
                    unprocessed_txn_count))

            max_tx_id, = db_session.query(func.max(Transaction.id)).one()
            if max_tx_id is None:
                max_tx_id = 0
            query = db_session.query(Transaction). \
                filter(Transaction.table_name == 'message',
                       Transaction.command == 'insert'). \
                order_by(asc(Transaction.id))
            for transaction in safer_yield_per(query, Transaction.id,
                                               self.minimum_id,
                                               self.chunk_size):
                namespace_id = transaction.namespace_id
                for worker in self.workers[namespace_id]:
                    if worker.match(transaction):
                        worker.enqueue(EventData(transaction))
                self.minimum_id = transaction.id + 1
            self.log.debug('Processed tx. setting min id to {0}'.format(
                self.minimum_id))
Example #10
0
def fetch_corresponding_thread(db_session, namespace_id, message):
    """fetch a thread matching the corresponding message. Returns None if
       there's no matching thread."""
    # FIXME: for performance reasons, we make the assumption that a reply
    # to a message always has a similar subject. This is only
    # right 95% of the time.
    clean_subject = cleanup_subject(message.subject)
    threads = db_session.query(Thread).filter(
        Thread.namespace_id == namespace_id,
        Thread._cleaned_subject == clean_subject). \
        order_by(desc(Thread.id))

    for thread in safer_yield_per(threads, Thread.id, 0, 100):
        for match in thread.messages:
            # A lot of people BCC some address when sending mass
            # emails so ignore BCC.
            match_bcc = match.bcc_addr if match.bcc_addr else []
            message_bcc = message.bcc_addr if message.bcc_addr else []

            match_emails = [t[1] for t in match.participants
                            if t not in match_bcc]
            message_emails = [t[1] for t in message.participants
                              if t not in message_bcc]

            # A conversation takes place between two or more persons.
            # Are there more than two participants in common in this
            # thread? If yes, it's probably a related thread.
            match_participants_set = set(match_emails)
            message_participants_set = set(message_emails)

            if len(match_participants_set & message_participants_set) >= 2:
                # No need to loop through the rest of the messages
                # in the thread
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

            # handle the case where someone is self-sending an email.
            if not message.from_addr or not message.to_addr:
                return

            match_from = [t[1] for t in match.from_addr]
            match_to = [t[1] for t in match.from_addr]
            message_from = [t[1] for t in message.from_addr]
            message_to = [t[1] for t in message.to_addr]

            if (len(message_to) == 1 and message_from == message_to and
                    match_from == match_to and message_to == match_from):
                # Check that we're not over max thread length in this case
                # No need to loop through the rest of the messages
                # in the thread.
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

    return
Example #11
0
def index_namespace(namespace_id):
    """ Backfill function to index a namespace from current db data Not used
    for incremental indexing.

    """
    if not search_service_url or not doc_service_url:
        raise Exception('CloudSearch not configured; cannot index')
    else:
        search_client = ContactSearchClient(namespace_id)
        doc_service = get_doc_service()

        # Look up previously indexed data so we can delete any records which
        # have disappeared.
        previous_records = search_client.fetch_all_matching_ids()

        log.info("previous records", total=len(previous_records),
                 ids=previous_records)

        indexed = 0
        current_records = set()
        docs = []
        with session_scope(namespace_id) as db_session:
            query = db_session.query(Contact).options(
                joinedload("phone_numbers")).filter_by(
                    namespace_id=namespace_id)
            for contact in safer_yield_per(query, Contact.id, 0, 1000):
                log.info("indexing", contact_id=contact.id)
                current_records.add(long(contact.id))
                contact_object = cloudsearch_contact_repr(contact)
                docs.append({'type': 'add', 'id': contact.id,
                             'fields': contact_object})
                if len(docs) > DOC_UPLOAD_CHUNK_SIZE:
                    doc_service.upload_documents(
                        documents=json.dumps(docs),
                        contentType='application/json')
                    indexed += len(docs)
                    docs = []

        indexed += len(docs)

        # Deletes are small, so we can stick 'em on this batch.
        deleted_records = set(previous_records).difference(current_records)
        for id_ in deleted_records:
            log.info("deleting", contact_id=id_)
            docs.append({'type': 'delete', 'id': id_})

        if docs:
            doc_service.upload_documents(
                documents=json.dumps(docs),
                contentType='application/json')

        log.info("namespace index complete",
                 namespace_id=namespace_id,
                 total_contacts_indexed=indexed,
                 total_contacts_deleted=len(deleted_records))
Example #12
0
def index_namespace(namespace_id):
    if not CLOUDSEARCH_DOMAIN:
        raise Exception('CloudSearch not configured; cannot index')
    else:
        search_client = ContactSearchClient(namespace_id)
        doc_service = get_doc_service()

        # Look up previously indexed data so we can delete any records which
        # have disappeared.
        #
        previous_records = search_client.fetch_all_matching_ids()

        log.info("previous records",
                 total=len(previous_records),
                 ids=previous_records)

        indexed = 0
        current_records = set()
        docs = []
        with session_scope() as db_session:
            query = db_session.query(Contact).options(
                joinedload("phone_numbers")).filter_by(
                    namespace_id=namespace_id)
            for contact in safer_yield_per(query, Contact.id, 0, 1000):
                log.info("indexing", contact_id=contact.id)
                current_records.add(long(contact.id))
                contact_object = cloudsearch_contact_repr(contact)
                docs.append({
                    'type': 'add',
                    'id': contact.id,
                    'fields': contact_object
                })
                if len(docs) > DOC_UPLOAD_CHUNK_SIZE:
                    doc_service.upload_documents(
                        documents=json.dumps(docs),
                        contentType='application/json')
                    indexed += len(docs)
                    docs = []

        indexed += len(docs)

        # Deletes are small, so we can stick 'em on this batch.
        deleted_records = set(previous_records).difference(current_records)
        for id_ in deleted_records:
            log.info("deleting", contact_id=id_)
            docs.append({'type': 'delete', 'id': id_})

        if docs:
            doc_service.upload_documents(documents=json.dumps(docs),
                                         contentType='application/json')

        log.info("namespace index complete",
                 total_contacts_indexed=indexed,
                 total_contacts_deleted=len(deleted_records))
Example #13
0
def index_threads(namespace_id, namespace_public_id, created_before=None):
    """ Index the threads of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id,
                                          create_index=True)

    with session_scope() as db_session:
        query = db_session.query(Thread).filter(
            Thread.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Thread.created_at <= created_before)

        query = query.options(
            subqueryload(Thread.messages).load_only('public_id', 'is_draft',
                                                    'from_addr', 'to_addr',
                                                    'cc_addr', 'bcc_addr'),
            subqueryload('tagitems').joinedload('tag').load_only(
                'public_id', 'name'))

        encoded = []

        for obj in safer_yield_per(query, Thread.id, 0, CHUNK_SIZE):
            if len(encoded) >= INDEX_CHUNK_SIZE:
                indexed_count += search_engine.threads.bulk_index(encoded)
                encoded = []

            index_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', index_obj))

        if encoded:
            indexed_count += search_engine.threads.bulk_index(encoded)

    log.info('Indexed threads',
             namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             thread_count=indexed_count)

    return indexed_count
Example #14
0
def upgrade():
    from inbox.models.session import session_scope
    from inbox.models import Namespace, Tag, Thread
    from inbox.sqlalchemy_ext.util import safer_yield_per
    from sqlalchemy import func
    from sqlalchemy.orm import joinedload
    with session_scope() as db_session:
        # Create the attachment tag
        for ns in db_session.query(Namespace):
            Tag.create_canonical_tags(ns, db_session)

        thread_count, = db_session.query(func.count(Thread.id)).one()
        q = db_session.query(Thread).options(joinedload(Thread.messages))
        processed_count = 0
        for thr in safer_yield_per(q, Thread.id, 1, thread_count):
            if any(m.attachments for m in thr.messages):
                attachment_tag = thr.namespace.tags['attachment']
                thr.apply_tag(attachment_tag)
            processed_count += 1
            print processed_count
Example #15
0
def index_threads(namespace_id, namespace_public_id, created_before=None):
    """ Index the threads of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id,
                                          create_index=True)

    with session_scope() as db_session:
        query = db_session.query(Thread).filter(
            Thread.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Thread.created_at <= created_before)

        query = query.options(
            subqueryload(Thread.messages).
            load_only('public_id', 'is_draft', 'from_addr', 'to_addr',
                      'cc_addr', 'bcc_addr'),
            subqueryload('tagitems').joinedload('tag').
            load_only('public_id', 'name'))

        encoded = []

        for obj in safer_yield_per(query, Thread.id, 0, CHUNK_SIZE):
            if len(encoded) >= INDEX_CHUNK_SIZE:
                indexed_count += search_engine.threads.bulk_index(encoded)
                encoded = []

            index_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', index_obj))

        if encoded:
            indexed_count += search_engine.threads.bulk_index(encoded)

    log.info('Indexed threads', namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             thread_count=indexed_count)

    return indexed_count
Example #16
0
    def _process_log(self):
        # TODO(emfree) handle the case that message/thread objects may have
        # been deleted in the interim.
        with session_scope() as db_session:
            query = db_session.query(ActionLog).filter(~ActionLog.executed)
            if self._scheduled_actions:
                query = query.filter(
                    ~ActionLog.id.in_(self._scheduled_actions))
            query = query.order_by(asc(ActionLog.id))

            for log_entry in safer_yield_per(query, ActionLog.id, 0,
                                             self.chunk_size):
                action_function = ACTION_FUNCTION_MAP[log_entry.action]
                namespace = db_session.query(Namespace). \
                    get(log_entry.namespace_id)
                self._scheduled_actions.add(log_entry.id)
                worker = SyncbackWorker(action_function, log_entry.id,
                                        log_entry.record_id,
                                        namespace.account_id,
                                        syncback_service=self)
                self.log.info('delegating action', action_id=log_entry.id)
                self.worker_pool.start(worker)
Example #17
0
    def _process_log(self):
        # TODO(emfree) handle the case that message/thread objects may have
        # been deleted in the interim.
        with session_scope() as db_session:
            query = db_session.query(ActionLog).filter(~ActionLog.executed)
            if self._scheduled_actions:

                query = query.filter(
                    ~ActionLog.id.in_(self._scheduled_actions))
            query = query.order_by(asc(ActionLog.id))

            for log_entry in safer_yield_per(query, ActionLog.id, 0,
                                             self.chunk_size):
                action_function = ACTION_FUNCTION_MAP[log_entry.action]
                namespace = db_session.query(Namespace). \
                    get(log_entry.namespace_id)
                self._scheduled_actions.add(log_entry.id)
                worker = SyncbackWorker(action_function, log_entry.id,
                                        log_entry.record_id,
                                        namespace.account_id,
                                        syncback_service=self)
                self.log.info('delegating action', action_id=log_entry.id)
                self.worker_pool.start(worker)
Example #18
0
def index_messages(namespace_id, namespace_public_id, created_before=None):
    """ Index the messages of a namespace. """
    if created_before is not None:
        created_before = dateutil.parser.parse(created_before)

    indexed_count = 0
    search_engine = NamespaceSearchEngine(namespace_public_id,
                                          create_index=True)

    with session_scope() as db_session:
        query = db_session.query(Message).filter(
            Message.namespace_id == namespace_id)

        if created_before is not None:
            query = query.filter(Message.created_at <= created_before)

        query = query.options(
            joinedload(Message.parts).load_only('content_disposition'))

        encoded = []
        for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE):
            if len(encoded) >= INDEX_CHUNK_SIZE:
                indexed_count += search_engine.messages.bulk_index(encoded)
                encoded = []

            index_obj = encode(obj, namespace_public_id=namespace_public_id)
            encoded.append(('index', index_obj))

        if encoded:
            indexed_count += search_engine.messages.bulk_index(encoded)

    log.info('Indexed messages',
             namespace_id=namespace_id,
             namespace_public_id=namespace_public_id,
             message_count=indexed_count)

    return indexed_count
Example #19
0
def get_entries_from_public_id(namespace_id, cursor_start, db_session,
                               result_limit):
    """Returns up to result_limit processed transaction log entries for the
    given namespace_id. Begins processing the log after the transaction with
    public_id equal to the cursor_start parameter.

    Arguments
    ---------
    namespace_id: int
    cursor_start: string
        The public_id of the transaction log entry after which to begin
        processing. Normally this should be the return value of a previous call
        to get_public_id_from_ts, or the value of 'cursor_end' from a previous
        call to this function.
    db_session: InboxSession
    result_limit: int
        The maximum number of deltas to return.

    Returns
    -------
    Dictionary with keys:
     - 'cursor_start'
     - 'deltas': list of serialized add/modify/delete deltas
     - (optional) 'cursor_end': the public_id of the last transaction log entry
       in the returned deltas, if available. This value can be passed as
       cursor_start in a subsequent call to this function to get the next page
       of results.

    Raises
    ------
    ValueError
        If cursor_start is invalid.
    """
    try:
        # Check that cursor_start can be a public id, and interpret the special
        # stamp value '0'.
        int_value = int(cursor_start, 36)
        if not int_value:
            internal_start_id = 0
        else:
            internal_start_id, = db_session.query(Transaction.id). \
                filter(Transaction.public_id == cursor_start,
                       Transaction.namespace_id == namespace_id).one()
    except (ValueError, NoResultFound):
        raise ValueError(
            'Invalid first_public_id parameter: {}'.format(cursor_start))
    query = db_session.query(Transaction). \
        order_by(asc(Transaction.id)). \
        filter(Transaction.namespace_id == namespace_id)

    deltas = []
    cursor_end = cursor_start
    for transaction in safer_yield_per(query, Transaction.id,
                                       internal_start_id + 1, result_limit):

        if should_publish_transaction(transaction, db_session):
            event = create_event(transaction)
            deltas.append(event)
            cursor_end = transaction.public_id
            if len(deltas) == result_limit:
                break

    result = {
        'cursor_start': cursor_start,
        'deltas': deltas,
        'cursor_end': cursor_end
    }

    return result
Example #20
0
def get_entries_from_public_id(namespace_id, events_start, db_session,
                               result_limit):
    """Returns up to result_limit processed transaction log entries for the
    given namespace_id. Begins processing the log after the transaction with
    public_id equal to the events_start parameter.

    Arguments
    ---------
    namespace_id: int
    events_start: string
        The public_id of the transaction log entry after which to begin
        processing. Normally this should be the return value of a previous call
        to get_public_id_from_ts, or the value of 'events_end' from a previous
        call to this function.
    db_session: InboxSession
    result_limit: int
        The maximum number of events to return.

    Returns
    -------
    Dictionary with keys:
     - 'events_start'
     - 'events': list of serialized add/modify/delete events
     - (optional) 'events_end': the public_id of the last transaction log entry
       in the returned events, if available. This value can be passed as
       events_start in a subsequent call to this function to get the next page
       of results.

    Raises
    ------
    ValueError
        If events_start is invalid.
    """
    try:
        # Check that events_start can be a public id, and interpret the special
        # stamp value '0'.
        int_value = int(events_start, 36)
        if not int_value:
            internal_start_id = 0
        else:
            internal_start_id, = db_session.query(Transaction.id). \
                filter(Transaction.public_id == events_start,
                       Transaction.namespace_id == namespace_id).one()
    except (ValueError, NoResultFound):
        raise ValueError('Invalid first_public_id parameter: {}'.
                         format(events_start))
    query = db_session.query(Transaction). \
        order_by(asc(Transaction.id)). \
        filter(Transaction.namespace_id == namespace_id)

    events = []
    events_end = events_start
    for transaction in safer_yield_per(query, Transaction.id,
                                       internal_start_id + 1,
                                       result_limit):


        if should_publish_transaction(transaction, db_session):
            event = create_event(transaction)
            events.append(event)
            events_end = transaction.public_id
            if len(events) == result_limit:
                break

    result = {
        'events_start': events_start,
        'events': events,
        'events_end': events_end
    }

    return result