예제 #1
0
def test_publisher_failure():
    messages = []
    publisher = Publisher(messages.extend)

    with pytest.raises(NotImplementedError):
        with publisher.batch(batch_identifier, begin):
            raise NotImplementedError

    published_messages = map(reserialize, messages)

    assert get_operation(get_operation(published_messages[0])) == begin
    assert get_operation(get_operation(published_messages[1])) == rollback

    # Ensure it actually generates valid data.
    assert list(states.validate(published_messages))
    assert list(sequences.validate(published_messages))

    for i, message in enumerate(published_messages):
        assert message.header.publisher == publisher.id
        assert message.header.sequence == i

    # Write another message to ensure that the publisher can continue to be used.
    assert len(messages) == 2
    publisher.publish()
    assert len(messages) == 3
    assert messages[2].header.sequence == 2
예제 #2
0
def test_publisher():
    messages = []
    publisher = Publisher(messages.extend)

    with publisher.batch(batch_identifier, begin) as publish:
        publish(mutation)

    published_messages = map(reserialize, messages)

    assert get_oneof_value(
        get_oneof_value(published_messages[0], 'operation'),
        'operation'
    ) == begin
    assert get_oneof_value(
        get_oneof_value(published_messages[1], 'operation'),
        'operation'
    ) == mutation
    assert get_oneof_value(
        get_oneof_value(published_messages[2], 'operation'),
        'operation'
    ) == commit

    for i, message in enumerate(published_messages):
        assert message.header.publisher == publisher.id
        assert message.header.sequence == i

    # Ensure it actually generates valid data.
    state = None
    for offset, message in enumerate(published_messages):
        state = reserialize(validate_state(state, offset, message))
예제 #3
0
def test_publisher():
    messages = []
    publisher = Publisher(messages.extend)

    with publisher.batch(batch_identifier, begin) as publish:
        publish(mutation)

    published_messages = map(reserialize, messages)

    assert get_operation(get_operation(published_messages[0])) == begin
    assert get_operation(get_operation(published_messages[1])) == mutation
    assert get_operation(get_operation(published_messages[2])) == commit

    for i, message in enumerate(published_messages):
        assert message.header.publisher == publisher.id
        assert message.header.sequence == i

    # Ensure it actually generates valid data.
    assert list(states.validate(published_messages))
    assert list(sequences.validate(published_messages))
예제 #4
0
파일: relay.py 프로젝트: disqus/pgshovel
    def run(self):
        publisher = Publisher(self.stream.push)

        try:
            logger.debug('Started worker.')

            # TODO: this connection needs to timeout in case the lock cannot be
            # grabbed or the connection cannot be established to avoid never
            # exiting
            logger.info('Registering as queue consumer...')
            with self.database.connection() as connection, connection.cursor() as cursor:
                statement = "SELECT * FROM pgq.register_consumer(%s, %s)"
                cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer))
                (new,) = cursor.fetchone()
                logger.info('Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing')
                connection.commit()

            logger.info('Ready to relay events.')
            while True:
                if self.__stop_requested.wait(0.01):
                    break

                # TODO: this needs a timeout as well
                # TODO: this probably should have a lock on consumption
                with self.database.connection() as connection:
                    # Check to see if there is a batch available to be relayed.
                    statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)"
                    with connection.cursor() as cursor:
                        cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer,))
                        (batch_id,) = cursor.fetchone()
                        if batch_id is None:
                            connection.commit()
                            continue  #  There is nothing to consume.

                    # Fetch the details of the batch.
                    with connection.cursor() as cursor:
                        cursor.execute(BATCH_INFO_STATEMENT, (batch_id,))
                        start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone()

                    batch = BatchIdentifier(
                        id=batch_id,
                        node=self.database.id.bytes,
                    )

                    begin = BeginOperation(
                        start=Tick(
                            id=start_id,
                            snapshot=to_snapshot(start_snapshot),
                            timestamp=to_timestamp(start_timestamp),
                        ),
                        end=Tick(
                            id=end_id,
                            snapshot=to_snapshot(end_snapshot),
                            timestamp=to_timestamp(end_timestamp),
                        ),
                    )

                    with publisher.batch(batch, begin) as publish:
                        # Fetch the events for the batch. This uses a named cursor
                        # to avoid having to load the entire event block into
                        # memory at once.
                        with connection.cursor('events') as cursor:
                            statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)"
                            cursor.execute(statement, (batch_id,))

                            # TODO: Publish these in chunks, the full ack + RTT is a performance killer
                            for mutation in itertools.imap(to_mutation, cursor):
                                publish(mutation)

                        with connection.cursor() as cursor:
                            cursor.execute("SELECT * FROM pgq.finish_batch(%s)", (batch_id,))
                            (success,) = cursor.fetchone()

                        # XXX: Not sure why this could happen?
                        if not success:
                            raise RuntimeError('Could not close batch!')

                    # XXX: Since this is outside of the batch block, this
                    # downstream consumers need to be able to handle receiving
                    # the same transaction multiple times, probably by checking
                    # a metadata table before starting to apply a batch.
                    connection.commit()

                    logger.debug('Successfully relayed batch: %s.', FormattedBatchIdentifier(batch))

        except Exception as error:
            logger.exception('Caught exception in worker: %s', error)
            self.__result.set_exception(error)
        else:
            logger.debug('Stopped.')
            self.__result.set_result(None)
예제 #5
0
    def run(self):
        publisher = Publisher(self.handler.push)

        try:
            logger.debug('Started worker.')

            # TODO: this connection needs to timeout in case the lock cannot be
            # grabbed or the connection cannot be established to avoid never
            # exiting
            logger.info('Registering as queue consumer...')
            with self.database.connection() as connection, connection.cursor(
            ) as cursor:
                statement = "SELECT * FROM pgq.register_consumer(%s, %s)"
                cursor.execute(
                    statement,
                    (self.cluster.get_queue_name(self.set), self.consumer))
                (new, ) = cursor.fetchone()
                logger.info(
                    'Registered as queue consumer: %s (%s registration).',
                    self.consumer, 'new' if new else 'existing')
                connection.commit()

            logger.info('Ready to relay events.')
            while True:
                if self.__stop_requested.wait(0.01):
                    break

                # TODO: this needs a timeout as well
                # TODO: this probably should have a lock on consumption
                with self.database.connection() as connection:
                    # Check to see if there is a batch available to be relayed.
                    statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)"
                    with connection.cursor() as cursor:
                        cursor.execute(statement, (
                            self.cluster.get_queue_name(self.set),
                            self.consumer,
                        ))
                        (batch_id, ) = cursor.fetchone()
                        if batch_id is None:
                            connection.commit()
                            continue  #  There is nothing to consume.

                    # Fetch the details of the batch.
                    with connection.cursor() as cursor:
                        cursor.execute(BATCH_INFO_STATEMENT, (batch_id, ))
                        start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone(
                        )

                    batch = BatchIdentifier(
                        id=batch_id,
                        node=self.database.id.bytes,
                    )

                    begin = BeginOperation(
                        start=Tick(
                            id=start_id,
                            snapshot=to_snapshot(start_snapshot),
                            timestamp=to_timestamp(start_timestamp),
                        ),
                        end=Tick(
                            id=end_id,
                            snapshot=to_snapshot(end_snapshot),
                            timestamp=to_timestamp(end_timestamp),
                        ),
                    )

                    with publisher.batch(batch, begin) as publish:
                        # Fetch the events for the batch. This uses a named cursor
                        # to avoid having to load the entire event block into
                        # memory at once.
                        with connection.cursor('events') as cursor:
                            statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)"
                            cursor.execute(statement, (batch_id, ))

                            for mutation in itertools.imap(
                                    to_mutation, cursor):
                                publish(mutation)

                        with connection.cursor() as cursor:
                            cursor.execute(
                                "SELECT * FROM pgq.finish_batch(%s)",
                                (batch_id, ))
                            (success, ) = cursor.fetchone()

                        # XXX: Not sure why this could happen?
                        if not success:
                            raise RuntimeError('Could not close batch!')

                    # XXX: Since this is outside of the batch block, this
                    # downstream consumers need to be able to handle receiving
                    # the same transaction multiple times, probably by checking
                    # a metadata table before starting to apply a batch.
                    connection.commit()

                    logger.debug('Successfully relayed batch %s.', batch)

        except Exception as error:
            logger.exception('Caught exception in worker: %s', error)
            self.__result.set_exception(error)
        else:
            logger.debug('Stopped.')
            self.__result.set_result(None)