async def wrapper(): async with LiftBridgeClient() as client: await client.create_stream( subject=name, name=name, partitions=n_partitions, minisr=minisr, replication_factor=replication_factor, retention_max_bytes=getattr( config.liftbridge, f"stream_{base_name}_retention_max_age", 0 ), retention_max_age=getattr( config.liftbridge, f"stream_{base_name}_retention_max_bytes", 0 ), segment_max_bytes=getattr( config.liftbridge, f"stream_{base_name}_segment_max_bytes", 0 ), segment_max_age=getattr( config.liftbridge, f"stream_{base_name}_segment_max_age", 0 ), auto_pause_time=getattr( config.liftbridge, f"stream_{base_name}_auto_pause_time", 0 ), auto_pause_disable_if_subscribers=getattr( config.liftbridge, f"stream_{base_name}_auto_pause_disable_if_subscribers", False, ), )
def publish( self, value: bytes, stream: str, partition: Optional[int] = None, key: Optional[bytes] = None, headers: Optional[Dict[str, bytes]] = None, ): """ Schedule publish request :param value: :param stream: :param partition: :param key: :param headers: :return: """ if not self.publish_queue: self._init_publisher() req = LiftBridgeClient.get_publish_request( value=value, stream=stream, partition=partition, key=key, headers=headers, auto_compress=bool(config.liftbridge.compression_method), ) self.publish_queue.put(req)
async def create(): async with LiftBridgeClient() as client: await client.create_stream( name=name, subject=subject, partitions=partitions, replication_factor=rf, )
async def wrap(): async with LiftBridgeClient() as client: await client.publish( value=value, stream=stream, partition=partition, key=key, headers=headers, auto_compress=bool(config.liftbridge.compression_method), )
async def upload(table: str, data: List[bytes]): CHUNK = 1000 n_parts = len(config.clickhouse.cluster_topology.split(",")) async with LiftBridgeClient() as client: while data: chunk, data = data[:CHUNK], data[CHUNK:] await client.publish( b"\n".join(chunk), stream=f"ch.{table}", partition=random.randint(0, n_parts - 1), )
async def publisher(self): async with LiftBridgeClient() as client: while not self.publish_queue.to_shutdown: req = await self.publish_queue.get(timeout=1) if not req: continue # Timeout or shutdown try: await client.publish_sync(req, wait_for_stream=True) except LiftbridgeError as e: self.logger.error("Failed to publish message: %s", e) self.logger.error("Retry message") await asyncio.sleep(1) self.publish_queue.put(req, fifo=False)
async def publisher(): async with LiftBridgeClient() as client: payload = b" " * payload_size t0 = perf_counter() for _ in self.progress(range(num_messages), num_messages): await client.publish(payload, stream=name, wait_for_stream=wait_for_stream) dt = perf_counter() - t0 self.print("%d messages sent in %.2fms" % (num_messages, dt * 1000)) self.print("%d msg/sec, %d bytes/sec" % (num_messages / dt, num_messages * payload_size / dt))
async def process_stream(self, stream: str) -> None: self.logger.info("[%s] Subscribing", stream) table = stream[3:] channel = Channel(self, table) self.channels[table] = channel cursor_id = self.get_cursor_id() async with LiftBridgeClient() as client: async for msg in client.subscribe( stream=stream, partition=config.chwriter.shard_id, start_position=StartPosition.RESUME, cursor_id=cursor_id, ): await channel.feed(msg)
async def iter_ch_streams(self) -> AsyncIterable[str]: """ Yields CH stream names :return: """ async with LiftBridgeClient() as client: while True: meta = await client.fetch_metadata() if meta.metadata: for stream_meta in meta.metadata: if stream_meta.name.startswith("ch."): yield stream_meta.name break # Cluster election in progress or cluster is misconfigured self.logger.info("Cluster has no active partitions. Waiting") await asyncio.sleep(1)
async def get_stream_partitions(self, stream: str) -> int: """ :param stream: :return: """ async with LiftBridgeClient() as client: while True: meta = await client.fetch_metadata() if meta.metadata: for stream_meta in meta.metadata: if stream_meta.name == stream: if stream_meta.partitions: return len(stream_meta.partitions) break # Cluster election in progress or cluster is misconfigured self.logger.info("Stream '%s' has no active partitions. Waiting" % stream) await asyncio.sleep(1)
async def subscribe_stream( self, stream: str, partition: int, handler: Callable[ [Message], Awaitable[None], ], start_timestamp: Optional[float] = None, start_position: StartPosition = StartPosition.RESUME, cursor_id: Optional[str] = None, auto_set_cursor: bool = True, ) -> None: # @todo: Restart on failure self.logger.info("Subscribing %s:%s", stream, partition) cursor_id = cursor_id or self.name try: async with LiftBridgeClient() as client: self.active_subscribers += 1 async for msg in client.subscribe( stream=stream, partition=partition, start_position=start_position, cursor_id=self.name, start_timestamp=start_timestamp, ): try: await handler(msg) except Exception as e: self.logger.error("Failed to process message: %s", e) if auto_set_cursor and cursor_id: await client.set_cursor( stream=stream, partition=partition, cursor_id=cursor_id, offset=msg.offset, ) if self.subscriber_shutdown_waiter: break finally: self.active_subscribers -= 1 if self.subscriber_shutdown_waiter and not self.active_subscribers: self.subscriber_shutdown_waiter.set()
async def subscribe(): async with LiftBridgeClient() as client: async for msg in client.subscribe( stream=name, partition=partition, start_offset=start_offset, cursor_id=cursor or None, start_timestamp=start_ts, ): print( "# Subject: %s Partition: %s Offset: %s Timestamp: %s Key: %s Headers: %s" % ( msg.subject, msg.partition, msg.offset, msg.timestamp, msg.key, msg.headers, )) print(msg.value)
async def batch_publisher(): async with LiftBridgeClient() as client: payload = b" " * payload_size t0 = perf_counter() out = [] n_acks = 0 for _ in self.progress(range(num_messages), num_messages): out += [client.get_publish_request(payload, stream=name)] if len(out) == batch: async for ack in client.publish_async(out): n_acks += 1 out = [] if out: async for _ in client.publish_async(out): n_acks += 1 out = [] dt = perf_counter() - t0 self.print("%d messages sent in %.2fms (%d acks)" % (num_messages, dt * 1000, n_acks)) self.print("%d msg/sec, %d bytes/sec" % (num_messages / dt, num_messages * payload_size / dt))
async def subscriber(): async with LiftBridgeClient() as client: report_interval = 1.0 t0 = perf_counter() total_msg = last_msg = 0 total_size = last_size = 0 async for msg in client.subscribe(name): total_msg += 1 total_size += len(msg.value) t = perf_counter() dt = t - t0 if dt >= report_interval: self.print("%d msg/sec, %d bytes/sec" % ((total_msg - last_msg) / dt, (total_size - last_size) / dt)) t0 = t last_msg = total_msg last_size = total_size if cursor: await client.set_cursor(stream=name, partition=0, cursor_id=cursor, offset=msg.offset)
async def fetch_cursor(self, stream, partition, name): async with LiftBridgeClient() as client: return await client.fetch_cursor(stream=stream, partition=partition, cursor_id=name)
async def flush_data(self): """ Flush data :return: """ async with LiftBridgeClient() as client: cursor_id = self.get_cursor_id() partition_id = config.chwriter.shard_id while not self.stopping: ch = await self.flush_queue.get() n_records = ch.records while True: try: self.logger.info("[%s] Sending %d records", ch.table, n_records) t0 = perf_counter() url = ( f"http://{self.ch_address}/?" f"user={config.clickhouse.rw_user}&" f"password={config.clickhouse.rw_password or ''}&" f"database={config.clickhouse.db}&" f"query={ch.q_sql}" ) code, headers, body = await fetch( url, method="POST", body=ch.get_data(), user=config.clickhouse.rw_user, password=config.clickhouse.rw_password or "", content_encoding=config.clickhouse.encoding, ) if code == 200: self.logger.info( "[%s] %d records sent in %.2fms", ch.table, n_records, (perf_counter() - t0) * 1000, ) metrics["records_written"] += n_records break elif code in self.CH_SUSPEND_ERRORS: self.logger.info("[%s] Timed out: %s", ch.table, body) metrics["error", ("type", "records_spool_timeouts")] += 1 await asyncio.sleep(1) continue else: self.logger.info( "[%s] Failed to write records: %s %s", ch.table, code, body ) metrics["error", ("type", "records_spool_failed")] += 1 break except Exception as e: self.logger.error( "[%s] Failed to spool %d records due to unknown error: %s", ch.table, n_records, e, ) await asyncio.sleep(1) continue # Set cursor await client.set_cursor( ch.stream, partition=partition_id, cursor_id=cursor_id, offset=ch.last_offset, ) # Unfreeze channel ch.flush_complete()
async def set_cursor(): async with LiftBridgeClient() as client: await client.set_cursor(stream=stream, partition=partition, cursor_id=name, offset=offset)
async def fetch_cursor(): async with LiftBridgeClient() as client: cursor = await client.fetch_cursor(stream=stream, partition=partition, cursor_id=name) print(cursor)
async def wrap(): async with LiftBridgeClient() as client: r = await client.fetch_metadata(MX_STREAM, wait_for_stream=True) for m in r.metadata: if m.name == MX_STREAM: return len(m.partitions)
async def wrap(): async with LiftBridgeClient() as client: await client.publish( value=value, stream=stream, partition=partition, key=key, headers=headers )
async def get_meta() -> Metadata: async with LiftBridgeClient() as client: return await client.fetch_metadata()
async def wrapper(): self.print("Altering stream %s" % name) async with LiftBridgeClient() as client: # Create temporary stream with same structure, as original one tmp_stream = "__tmp-%s" % name self.print("Creating temporary stream %s" % tmp_stream) await client.create_stream( subject=tmp_stream, name=tmp_stream, partitions=old_partitions, replication_factor=replication_factor, ) # Copy all unread data to temporary stream as is for partition in range(old_partitions): self.print( "Copying partition %s:%s to %s:%s" % (name, partition, tmp_stream, partition) ) n_msg[partition] = 0 # Get current offset p_meta = run_sync(functools.partial(get_partition_meta, stream, partition)) newest_offset = p_meta.newest_offset or 0 # Fetch cursor current_offset = await client.fetch_cursor( stream=stream, partition=partition, cursor_id=self.CURSOR_STREAM[name.split(".")[0]], ) if current_offset > newest_offset: # Fix if cursor not set properly current_offset = newest_offset self.print( "Start copying from current_offset: %s to newest offset: %s" % (current_offset, newest_offset) ) if current_offset < newest_offset: async for msg in client.subscribe( stream=name, partition=partition, start_offset=current_offset ): await client.publish( msg.value, stream=tmp_stream, partition=partition, ) n_msg[partition] += 1 if msg.offset == newest_offset: break if n_msg[partition]: self.print(" %d messages has been copied" % n_msg[partition]) else: self.print(" nothing to copy") # Drop original stream self.print("Dropping original stream %s" % name) await client.delete_stream(name) # Create new stream with required structure self.print("Creating stream %s" % name) await client.create_stream( subject=name, name=name, partitions=new_partitions, replication_factor=replication_factor, ) # Copy data from temporary stream to a new one for partition in range(old_partitions): self.print( "Restoring partition %s:%s to %s" % (tmp_stream, partition, new_partitions) ) # Re-route dropped partitions to partition 0 dest_partition = partition if partition < new_partitions else 0 n = n_msg[partition] if n > 0: async for msg in client.subscribe( stream=tmp_stream, partition=partition, start_position=StartPosition.EARLIEST, ): await client.publish( msg.value, stream=name, partition=dest_partition ) n -= 1 if not n: break self.print(" %d messages restored" % n_msg[partition]) else: self.print(" nothing to restore") # Drop temporary stream self.print("Dropping temporary stream %s" % tmp_stream) await client.delete_stream(tmp_stream) # Uh-oh self.print("Stream %s has been altered" % name)
async def get_partition_meta(stream, partition): async with LiftBridgeClient() as client: return await client.fetch_partition_metadata(stream, partition)
async def delete(): async with LiftBridgeClient() as client: await client.delete_stream(name)
async def wrapper(): async with LiftBridgeClient() as client: await client.delete_stream(client.get_offset_stream(name)) await client.delete_stream(name)