Beispiel #1
0
def to_mutation(row):
    id, payload, timestamp, transaction = row

    version, payload = payload.split(':', 1)
    if version != '0':
        raise RuntimeError('Cannot parse payload version: %s', version)

    (schema, table), operation, primary_key_columns, (old, new), configuration_version = pickle.loads(payload)

    states = {}
    if old:
        states['old'] = row_converter.to_protobuf(old)

    if new:
        states['new'] = row_converter.to_protobuf(new)

    assert states, 'at least one state must be set'

    return MutationOperation(
        id=id,
        schema=schema,
        table=table,
        operation=getattr(MutationOperation, operation),
        identity_columns=primary_key_columns,
        timestamp=to_timestamp(timestamp),
        transaction=transaction,
        **states
    )
Beispiel #2
0
 def publish(self, **kwargs):
     self.receiver((Message(header=Header(
         publisher=self.id,
         sequence=next(self.sequence),
         timestamp=to_timestamp(time.time()),
     ),
                            **kwargs), ))
Beispiel #3
0
def to_mutation(row):
    id, payload, timestamp, transaction = row

    version, payload = payload.split(':', 1)
    if version != '0':
        raise RuntimeError('Cannot parse payload version: %s', version)

    (schema, table), operation, primary_key_columns, (
        old, new), configuration_version = pickle.loads(payload)

    states = {}
    if old:
        states['old'] = row_converter.to_protobuf(old)

    if new:
        states['new'] = row_converter.to_protobuf(new)

    assert states, 'at least one state must be set'

    return MutationOperation(id=id,
                             schema=schema,
                             table=table,
                             operation=getattr(MutationOperation, operation),
                             identity_columns=primary_key_columns,
                             timestamp=to_timestamp(timestamp),
                             transaction=transaction,
                             **states)
Beispiel #4
0
 def publish(self, **kwargs):
     self.receiver((
         Message(
             header=Header(
                 publisher=self.id,
                 sequence=next(self.sequence),
                 timestamp=to_timestamp(time.time()),
             ),
             **kwargs
         ),
     ))
Beispiel #5
0
    def run(self):
        publisher = Publisher(self.stream.push)

        try:
            logger.debug('Started worker.')

            # TODO: this connection needs to timeout in case the lock cannot be
            # grabbed or the connection cannot be established to avoid never
            # exiting
            logger.info('Registering as queue consumer...')
            with self.database.connection() as connection, connection.cursor() as cursor:
                statement = "SELECT * FROM pgq.register_consumer(%s, %s)"
                cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer))
                (new,) = cursor.fetchone()
                logger.info('Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing')
                connection.commit()

            logger.info('Ready to relay events.')
            while True:
                if self.__stop_requested.wait(0.01):
                    break

                # TODO: this needs a timeout as well
                # TODO: this probably should have a lock on consumption
                with self.database.connection() as connection:
                    # Check to see if there is a batch available to be relayed.
                    statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)"
                    with connection.cursor() as cursor:
                        cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer,))
                        (batch_id,) = cursor.fetchone()
                        if batch_id is None:
                            connection.commit()
                            continue  #  There is nothing to consume.

                    # Fetch the details of the batch.
                    with connection.cursor() as cursor:
                        cursor.execute(BATCH_INFO_STATEMENT, (batch_id,))
                        start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone()

                    batch = BatchIdentifier(
                        id=batch_id,
                        node=self.database.id.bytes,
                    )

                    begin = BeginOperation(
                        start=Tick(
                            id=start_id,
                            snapshot=to_snapshot(start_snapshot),
                            timestamp=to_timestamp(start_timestamp),
                        ),
                        end=Tick(
                            id=end_id,
                            snapshot=to_snapshot(end_snapshot),
                            timestamp=to_timestamp(end_timestamp),
                        ),
                    )

                    with publisher.batch(batch, begin) as publish:
                        # Fetch the events for the batch. This uses a named cursor
                        # to avoid having to load the entire event block into
                        # memory at once.
                        with connection.cursor('events') as cursor:
                            statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)"
                            cursor.execute(statement, (batch_id,))

                            # TODO: Publish these in chunks, the full ack + RTT is a performance killer
                            for mutation in itertools.imap(to_mutation, cursor):
                                publish(mutation)

                        with connection.cursor() as cursor:
                            cursor.execute("SELECT * FROM pgq.finish_batch(%s)", (batch_id,))
                            (success,) = cursor.fetchone()

                        # XXX: Not sure why this could happen?
                        if not success:
                            raise RuntimeError('Could not close batch!')

                    # XXX: Since this is outside of the batch block, this
                    # downstream consumers need to be able to handle receiving
                    # the same transaction multiple times, probably by checking
                    # a metadata table before starting to apply a batch.
                    connection.commit()

                    logger.debug('Successfully relayed batch: %s.', FormattedBatchIdentifier(batch))

        except Exception as error:
            logger.exception('Caught exception in worker: %s', error)
            self.__result.set_exception(error)
        else:
            logger.debug('Stopped.')
            self.__result.set_result(None)
Beispiel #6
0
def test_timetamp_conversion():
    assert to_timestamp(1438814328.940597) == Timestamp(
        seconds=1438814328,
        nanos=940597057,  # this is different due to floating point arithmetic
    )
Beispiel #7
0
    def run(self):
        publisher = Publisher(self.handler.push)

        try:
            logger.debug('Started worker.')

            # TODO: this connection needs to timeout in case the lock cannot be
            # grabbed or the connection cannot be established to avoid never
            # exiting
            logger.info('Registering as queue consumer...')
            with self.database.connection() as connection, connection.cursor(
            ) as cursor:
                statement = "SELECT * FROM pgq.register_consumer(%s, %s)"
                cursor.execute(
                    statement,
                    (self.cluster.get_queue_name(self.set), self.consumer))
                (new, ) = cursor.fetchone()
                logger.info(
                    'Registered as queue consumer: %s (%s registration).',
                    self.consumer, 'new' if new else 'existing')
                connection.commit()

            logger.info('Ready to relay events.')
            while True:
                if self.__stop_requested.wait(0.01):
                    break

                # TODO: this needs a timeout as well
                # TODO: this probably should have a lock on consumption
                with self.database.connection() as connection:
                    # Check to see if there is a batch available to be relayed.
                    statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)"
                    with connection.cursor() as cursor:
                        cursor.execute(statement, (
                            self.cluster.get_queue_name(self.set),
                            self.consumer,
                        ))
                        (batch_id, ) = cursor.fetchone()
                        if batch_id is None:
                            connection.commit()
                            continue  #  There is nothing to consume.

                    # Fetch the details of the batch.
                    with connection.cursor() as cursor:
                        cursor.execute(BATCH_INFO_STATEMENT, (batch_id, ))
                        start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone(
                        )

                    batch = BatchIdentifier(
                        id=batch_id,
                        node=self.database.id.bytes,
                    )

                    begin = BeginOperation(
                        start=Tick(
                            id=start_id,
                            snapshot=to_snapshot(start_snapshot),
                            timestamp=to_timestamp(start_timestamp),
                        ),
                        end=Tick(
                            id=end_id,
                            snapshot=to_snapshot(end_snapshot),
                            timestamp=to_timestamp(end_timestamp),
                        ),
                    )

                    with publisher.batch(batch, begin) as publish:
                        # Fetch the events for the batch. This uses a named cursor
                        # to avoid having to load the entire event block into
                        # memory at once.
                        with connection.cursor('events') as cursor:
                            statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)"
                            cursor.execute(statement, (batch_id, ))

                            for mutation in itertools.imap(
                                    to_mutation, cursor):
                                publish(mutation)

                        with connection.cursor() as cursor:
                            cursor.execute(
                                "SELECT * FROM pgq.finish_batch(%s)",
                                (batch_id, ))
                            (success, ) = cursor.fetchone()

                        # XXX: Not sure why this could happen?
                        if not success:
                            raise RuntimeError('Could not close batch!')

                    # XXX: Since this is outside of the batch block, this
                    # downstream consumers need to be able to handle receiving
                    # the same transaction multiple times, probably by checking
                    # a metadata table before starting to apply a batch.
                    connection.commit()

                    logger.debug('Successfully relayed batch %s.', batch)

        except Exception as error:
            logger.exception('Caught exception in worker: %s', error)
            self.__result.set_exception(error)
        else:
            logger.debug('Stopped.')
            self.__result.set_result(None)