def fetch(self): # TODO: Might need to use the znode version here just to be safe # to avoid any race conditions? What happens if the set configuraton # is updated while this is starting? configuration, _ = fetch_set(self.cluster, self.set) database = ManagedDatabase(self.cluster, configuration.database.dsn) connection_lock = threading.Lock() with database.connection() as connection: with connection.cursor() as cursor: cursor.execute('SELECT txid_current_snapshot();') row = cursor.fetchone() snapshot = to_snapshot(row[0]) def loader(table): with connection_lock, connection.cursor('records', cursor_factory=NamedTupleCursor) as cursor: if table.columns: columns = ', '.join(map(quote, table.columns)) else: columns = '*' statement = 'SELECT {columns} FROM {schema}.{name}'.format( columns=columns, schema=quote(table.schema), name=quote(table.name), ) cursor.execute(statement) for row in cursor: converted = row_converter.to_protobuf(row._asdict()) # XXX: This is necessary because of a bug in protocol buffer oneof. yield type(converted).FromString(converted.SerializeToString()) loaders = [] for table in configuration.tables: loaders.append((table, loader(table))) state = BootstrapState( node=database.id.bytes, snapshot=snapshot, ) yield state, loaders connection.commit()
def run(self): publisher = Publisher(self.stream.push) try: logger.debug('Started worker.') # TODO: this connection needs to timeout in case the lock cannot be # grabbed or the connection cannot be established to avoid never # exiting logger.info('Registering as queue consumer...') with self.database.connection() as connection, connection.cursor() as cursor: statement = "SELECT * FROM pgq.register_consumer(%s, %s)" cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer)) (new,) = cursor.fetchone() logger.info('Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing') connection.commit() logger.info('Ready to relay events.') while True: if self.__stop_requested.wait(0.01): break # TODO: this needs a timeout as well # TODO: this probably should have a lock on consumption with self.database.connection() as connection: # Check to see if there is a batch available to be relayed. statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)" with connection.cursor() as cursor: cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer,)) (batch_id,) = cursor.fetchone() if batch_id is None: connection.commit() continue # There is nothing to consume. # Fetch the details of the batch. with connection.cursor() as cursor: cursor.execute(BATCH_INFO_STATEMENT, (batch_id,)) start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone() batch = BatchIdentifier( id=batch_id, node=self.database.id.bytes, ) begin = BeginOperation( start=Tick( id=start_id, snapshot=to_snapshot(start_snapshot), timestamp=to_timestamp(start_timestamp), ), end=Tick( id=end_id, snapshot=to_snapshot(end_snapshot), timestamp=to_timestamp(end_timestamp), ), ) with publisher.batch(batch, begin) as publish: # Fetch the events for the batch. This uses a named cursor # to avoid having to load the entire event block into # memory at once. with connection.cursor('events') as cursor: statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)" cursor.execute(statement, (batch_id,)) # TODO: Publish these in chunks, the full ack + RTT is a performance killer for mutation in itertools.imap(to_mutation, cursor): publish(mutation) with connection.cursor() as cursor: cursor.execute("SELECT * FROM pgq.finish_batch(%s)", (batch_id,)) (success,) = cursor.fetchone() # XXX: Not sure why this could happen? if not success: raise RuntimeError('Could not close batch!') # XXX: Since this is outside of the batch block, this # downstream consumers need to be able to handle receiving # the same transaction multiple times, probably by checking # a metadata table before starting to apply a batch. connection.commit() logger.debug('Successfully relayed batch: %s.', FormattedBatchIdentifier(batch)) except Exception as error: logger.exception('Caught exception in worker: %s', error) self.__result.set_exception(error) else: logger.debug('Stopped.') self.__result.set_result(None)
def test_snapshot_conversion_in_progress(): assert to_snapshot('1:10:2,3,4') == Snapshot( min=1, max=10, active=[2, 3, 4], )
def test_snapshot_conversion(): assert to_snapshot('1:10:') == Snapshot( min=1, max=10, )
def run(self): publisher = Publisher(self.handler.push) try: logger.debug('Started worker.') # TODO: this connection needs to timeout in case the lock cannot be # grabbed or the connection cannot be established to avoid never # exiting logger.info('Registering as queue consumer...') with self.database.connection() as connection, connection.cursor( ) as cursor: statement = "SELECT * FROM pgq.register_consumer(%s, %s)" cursor.execute( statement, (self.cluster.get_queue_name(self.set), self.consumer)) (new, ) = cursor.fetchone() logger.info( 'Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing') connection.commit() logger.info('Ready to relay events.') while True: if self.__stop_requested.wait(0.01): break # TODO: this needs a timeout as well # TODO: this probably should have a lock on consumption with self.database.connection() as connection: # Check to see if there is a batch available to be relayed. statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)" with connection.cursor() as cursor: cursor.execute(statement, ( self.cluster.get_queue_name(self.set), self.consumer, )) (batch_id, ) = cursor.fetchone() if batch_id is None: connection.commit() continue # There is nothing to consume. # Fetch the details of the batch. with connection.cursor() as cursor: cursor.execute(BATCH_INFO_STATEMENT, (batch_id, )) start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone( ) batch = BatchIdentifier( id=batch_id, node=self.database.id.bytes, ) begin = BeginOperation( start=Tick( id=start_id, snapshot=to_snapshot(start_snapshot), timestamp=to_timestamp(start_timestamp), ), end=Tick( id=end_id, snapshot=to_snapshot(end_snapshot), timestamp=to_timestamp(end_timestamp), ), ) with publisher.batch(batch, begin) as publish: # Fetch the events for the batch. This uses a named cursor # to avoid having to load the entire event block into # memory at once. with connection.cursor('events') as cursor: statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)" cursor.execute(statement, (batch_id, )) for mutation in itertools.imap( to_mutation, cursor): publish(mutation) with connection.cursor() as cursor: cursor.execute( "SELECT * FROM pgq.finish_batch(%s)", (batch_id, )) (success, ) = cursor.fetchone() # XXX: Not sure why this could happen? if not success: raise RuntimeError('Could not close batch!') # XXX: Since this is outside of the batch block, this # downstream consumers need to be able to handle receiving # the same transaction multiple times, probably by checking # a metadata table before starting to apply a batch. connection.commit() logger.debug('Successfully relayed batch %s.', batch) except Exception as error: logger.exception('Caught exception in worker: %s', error) self.__result.set_exception(error) else: logger.debug('Stopped.') self.__result.set_result(None)
def source_transaction_snapshot(source_connection): with source_connection as conn, conn.cursor() as cursor: cursor.execute('SELECT txid_current_snapshot();') row = cursor.fetchone() yield to_snapshot(row[0])