Example #1
0
    def fetch(self):
        # TODO: Might need to use the znode version here just to be safe
        # to avoid any race conditions? What happens if the set configuraton
        # is updated while this is starting?
        configuration, _ = fetch_set(self.cluster, self.set)
        database = ManagedDatabase(self.cluster, configuration.database.dsn)

        connection_lock = threading.Lock()

        with database.connection() as connection:
            with connection.cursor() as cursor:
                cursor.execute('SELECT txid_current_snapshot();')
                row = cursor.fetchone()
                snapshot = to_snapshot(row[0])

            def loader(table):
                with connection_lock, connection.cursor('records', cursor_factory=NamedTupleCursor) as cursor:
                    if table.columns:
                        columns = ', '.join(map(quote, table.columns))
                    else:
                        columns = '*'

                    statement = 'SELECT {columns} FROM {schema}.{name}'.format(
                        columns=columns,
                        schema=quote(table.schema),
                        name=quote(table.name),
                    )

                    cursor.execute(statement)
                    for row in cursor:
                        converted = row_converter.to_protobuf(row._asdict())
                        # XXX: This is necessary because of a bug in protocol buffer oneof.
                        yield type(converted).FromString(converted.SerializeToString())

            loaders = []
            for table in configuration.tables:
                loaders.append((table, loader(table)))

            state = BootstrapState(
                node=database.id.bytes,
                snapshot=snapshot,
            )

            yield state, loaders

            connection.commit()
Example #2
0
    def run(self):
        publisher = Publisher(self.stream.push)

        try:
            logger.debug('Started worker.')

            # TODO: this connection needs to timeout in case the lock cannot be
            # grabbed or the connection cannot be established to avoid never
            # exiting
            logger.info('Registering as queue consumer...')
            with self.database.connection() as connection, connection.cursor() as cursor:
                statement = "SELECT * FROM pgq.register_consumer(%s, %s)"
                cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer))
                (new,) = cursor.fetchone()
                logger.info('Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing')
                connection.commit()

            logger.info('Ready to relay events.')
            while True:
                if self.__stop_requested.wait(0.01):
                    break

                # TODO: this needs a timeout as well
                # TODO: this probably should have a lock on consumption
                with self.database.connection() as connection:
                    # Check to see if there is a batch available to be relayed.
                    statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)"
                    with connection.cursor() as cursor:
                        cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer,))
                        (batch_id,) = cursor.fetchone()
                        if batch_id is None:
                            connection.commit()
                            continue  #  There is nothing to consume.

                    # Fetch the details of the batch.
                    with connection.cursor() as cursor:
                        cursor.execute(BATCH_INFO_STATEMENT, (batch_id,))
                        start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone()

                    batch = BatchIdentifier(
                        id=batch_id,
                        node=self.database.id.bytes,
                    )

                    begin = BeginOperation(
                        start=Tick(
                            id=start_id,
                            snapshot=to_snapshot(start_snapshot),
                            timestamp=to_timestamp(start_timestamp),
                        ),
                        end=Tick(
                            id=end_id,
                            snapshot=to_snapshot(end_snapshot),
                            timestamp=to_timestamp(end_timestamp),
                        ),
                    )

                    with publisher.batch(batch, begin) as publish:
                        # Fetch the events for the batch. This uses a named cursor
                        # to avoid having to load the entire event block into
                        # memory at once.
                        with connection.cursor('events') as cursor:
                            statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)"
                            cursor.execute(statement, (batch_id,))

                            # TODO: Publish these in chunks, the full ack + RTT is a performance killer
                            for mutation in itertools.imap(to_mutation, cursor):
                                publish(mutation)

                        with connection.cursor() as cursor:
                            cursor.execute("SELECT * FROM pgq.finish_batch(%s)", (batch_id,))
                            (success,) = cursor.fetchone()

                        # XXX: Not sure why this could happen?
                        if not success:
                            raise RuntimeError('Could not close batch!')

                    # XXX: Since this is outside of the batch block, this
                    # downstream consumers need to be able to handle receiving
                    # the same transaction multiple times, probably by checking
                    # a metadata table before starting to apply a batch.
                    connection.commit()

                    logger.debug('Successfully relayed batch: %s.', FormattedBatchIdentifier(batch))

        except Exception as error:
            logger.exception('Caught exception in worker: %s', error)
            self.__result.set_exception(error)
        else:
            logger.debug('Stopped.')
            self.__result.set_result(None)
Example #3
0
def test_snapshot_conversion_in_progress():
    assert to_snapshot('1:10:2,3,4') == Snapshot(
        min=1,
        max=10,
        active=[2, 3, 4],
    )
Example #4
0
def test_snapshot_conversion():
    assert to_snapshot('1:10:') == Snapshot(
        min=1,
        max=10,
    )
Example #5
0
    def run(self):
        publisher = Publisher(self.handler.push)

        try:
            logger.debug('Started worker.')

            # TODO: this connection needs to timeout in case the lock cannot be
            # grabbed or the connection cannot be established to avoid never
            # exiting
            logger.info('Registering as queue consumer...')
            with self.database.connection() as connection, connection.cursor(
            ) as cursor:
                statement = "SELECT * FROM pgq.register_consumer(%s, %s)"
                cursor.execute(
                    statement,
                    (self.cluster.get_queue_name(self.set), self.consumer))
                (new, ) = cursor.fetchone()
                logger.info(
                    'Registered as queue consumer: %s (%s registration).',
                    self.consumer, 'new' if new else 'existing')
                connection.commit()

            logger.info('Ready to relay events.')
            while True:
                if self.__stop_requested.wait(0.01):
                    break

                # TODO: this needs a timeout as well
                # TODO: this probably should have a lock on consumption
                with self.database.connection() as connection:
                    # Check to see if there is a batch available to be relayed.
                    statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)"
                    with connection.cursor() as cursor:
                        cursor.execute(statement, (
                            self.cluster.get_queue_name(self.set),
                            self.consumer,
                        ))
                        (batch_id, ) = cursor.fetchone()
                        if batch_id is None:
                            connection.commit()
                            continue  #  There is nothing to consume.

                    # Fetch the details of the batch.
                    with connection.cursor() as cursor:
                        cursor.execute(BATCH_INFO_STATEMENT, (batch_id, ))
                        start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone(
                        )

                    batch = BatchIdentifier(
                        id=batch_id,
                        node=self.database.id.bytes,
                    )

                    begin = BeginOperation(
                        start=Tick(
                            id=start_id,
                            snapshot=to_snapshot(start_snapshot),
                            timestamp=to_timestamp(start_timestamp),
                        ),
                        end=Tick(
                            id=end_id,
                            snapshot=to_snapshot(end_snapshot),
                            timestamp=to_timestamp(end_timestamp),
                        ),
                    )

                    with publisher.batch(batch, begin) as publish:
                        # Fetch the events for the batch. This uses a named cursor
                        # to avoid having to load the entire event block into
                        # memory at once.
                        with connection.cursor('events') as cursor:
                            statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)"
                            cursor.execute(statement, (batch_id, ))

                            for mutation in itertools.imap(
                                    to_mutation, cursor):
                                publish(mutation)

                        with connection.cursor() as cursor:
                            cursor.execute(
                                "SELECT * FROM pgq.finish_batch(%s)",
                                (batch_id, ))
                            (success, ) = cursor.fetchone()

                        # XXX: Not sure why this could happen?
                        if not success:
                            raise RuntimeError('Could not close batch!')

                    # XXX: Since this is outside of the batch block, this
                    # downstream consumers need to be able to handle receiving
                    # the same transaction multiple times, probably by checking
                    # a metadata table before starting to apply a batch.
                    connection.commit()

                    logger.debug('Successfully relayed batch %s.', batch)

        except Exception as error:
            logger.exception('Caught exception in worker: %s', error)
            self.__result.set_exception(error)
        else:
            logger.debug('Stopped.')
            self.__result.set_result(None)
Example #6
0
def source_transaction_snapshot(source_connection):
    with source_connection as conn, conn.cursor() as cursor:
        cursor.execute('SELECT txid_current_snapshot();')
        row = cursor.fetchone()
        yield to_snapshot(row[0])