def __init__(self, topic_name: str, match: str = None): self.queue_length = 100000 self.internal_queue_length_limit = self.queue_length / 0.5 self._config = Config.get_instance().create_confluent_config() self._setup_config() self.logger = logging.getLogger(__name__) self._topic_name = topic_name self._match = match self._producer = None if self._match is not None: self._rule_tree = RuleTree(match) else: self._rule_tree = None self.create_internal_producer()
def __init__(self, group_id: str, topic_name: str, last: bool, match: str = None, enable_auto_commit: bool = True): self._match = match self._last = last self._group_id = group_id self._consumer = None self._enable_auto_commit = enable_auto_commit self._config = {} if self._match is not None: self._rule_tree = RuleTree(match) else: self._rule_tree = None self._topic_name = topic_name self.writers: Dict[int, GenericWriter] = {} self._setup_config() self.create_internal_consumer()
class AbstractProducer(ABC): def __init__(self, topic_name: str, match: str = None): self.queue_length = 100000 self.internal_queue_length_limit = self.queue_length / 0.5 self._config = Config.get_instance().create_confluent_config() self._setup_config() self.logger = logging.getLogger(__name__) self._topic_name = topic_name self._match = match self._producer = None if self._match is not None: self._rule_tree = RuleTree(match) else: self._rule_tree = None self.create_internal_producer() @abstractmethod def produce(self) -> int: raise NotImplementedError() def _setup_config(self): self._config.update({ "on_delivery": raise_for_kafka_error, "error_cb": log_error, "queue.buffering.max.messages": str(self.queue_length), }) def flush_all(self, message_prefix: str = None): while True: left_messages = self._producer.flush(1) if left_messages == 0: break self.logger.info( (message_prefix or "") + f"Still {left_messages} messages left, flushing...") def create_internal_producer(self): self._producer = confluent_kafka.Producer(self._config) def produce_message(self, topic_name: str, message: KafkaMessage): if self._rule_tree is None or self._rule_tree.evaluate(message): self._producer.produce( topic=topic_name, key=message.key, value=message.value, partition=message.partition, headers=message.headers, )
def yield_only_matching_messages( match_expr_or_rule_tree: Union[str, RuleTree] ) -> Callable[[MessageStream], MessageStream]: if not isinstance(match_expr_or_rule_tree, RuleTree): tree = RuleTree(match_expr_or_rule_tree) else: tree = match_expr_or_rule_tree def _yield_only_matching_messages(message_stream: MessageStream) -> MessageStream: for msg in message_stream: if isinstance(msg, StreamEvent): yield msg elif tree.evaluate(msg): yield msg return _yield_only_matching_messages
def test4(self): rule_tree = RuleTree("(320 + 5)*2-(1-2) < 1") self.assertFalse(rule_tree.evaluate(message=None))
def test12(self): rule_tree = RuleTree("prefixstr like %efi%") self.assertTrue(rule_tree.evaluate(message=None))
def test13(self): rule_tree = RuleTree("prefixstr notlike pre%") self.assertFalse(rule_tree.evaluate(message=None))
def test9(self): rule_tree = RuleTree("neg(5>9)") self.assertTrue(rule_tree.evaluate(message=None))
def test11(self): rule_tree = RuleTree("prefixstr like %pre") self.assertFalse(rule_tree.evaluate(message=None))
def test6(self): rule_tree = RuleTree("system.timestamp < 2018-10-08") self.assertFalse(rule_tree.evaluate(message=None))
def test8(self): rule_tree = RuleTree( "(320 + 5)*2-(1-2) < 1 or system.timestamp > 2018-10-08") self.assertTrue(rule_tree.evaluate(message=None))
class AbstractConsumer(ABC): def __init__(self, group_id: str, topic_name: str, last: bool, match: str = None, enable_auto_commit: bool = True): self._match = match self._last = last self._group_id = group_id self._consumer = None self._enable_auto_commit = enable_auto_commit self._config = {} if self._match is not None: self._rule_tree = RuleTree(match) else: self._rule_tree = None self._topic_name = topic_name self.writers: Dict[int, GenericWriter] = {} self._setup_config() self.create_internal_consumer() def _setup_config(self): offset_reset = "earliest" if self._last: offset_reset = "latest" self._config = Config.get_instance().create_confluent_config() self._config.update({ "group.id": self._group_id, "error_cb": log_error, # We need to commit offsets manually once we"re sure it got saved # to the sink "enable.auto.commit": self._enable_auto_commit, "enable.partition.eof": True, # We need this to start at the last committed offset instead of the # latest when subscribing for the first time "default.topic.config": { "auto.offset.reset": offset_reset }, }) @abstractmethod def create_internal_consumer(self): raise NotImplementedError() def assign_specific_partitions(self, topic_name: str, partitions: list = None, offset: int = 0): self._topic_name = topic_name if partitions is not None: topic_partitions = [ TopicPartition(self._topic_name, partition=partition, offset=offset) for partition in partitions ] else: topic_partitions = [ TopicPartition(self._topic_name, partition=0, offset=offset) ] self._consumer.assign(topic_partitions) def subscribe(self, topics: List[str]) -> None: self._consumer.subscribe(topics) def close(self) -> None: self._consumer.close() def commit(self, offsets: List[TopicPartition]): self._consumer.commit(offsets=offsets) @abstractmethod def consume(self, **kwargs) -> int: raise NotImplementedError() def output_consumed(self, message: Message): """ Outputs the message to a destination determined by the implementation of the inheriting class. :param message: Message to output :return: This method returns no values """ writer = self.writers.get(message.partition(), self.writers[-1]) writer.write_message(message) def close_all_writers(self): for w in self.writers.values(): if isinstance(w, FileWriter) and w.file is not None: w.file.close() def consume_single_message(self, timeout=30) -> Message: message = self._consumer.poll(timeout=timeout) raise_for_message(message) return message def consume_single_acceptable_message(self, timeout=30) -> Optional[Message]: message_acceptable = False total_time_remaining = timeout while not message_acceptable and total_time_remaining > 0: iteration_start = pendulum.now() message = self.consume_single_message(timeout=timeout) total_time_remaining -= (pendulum.now() - iteration_start).in_seconds() message_acceptable = self.consumed_message_matches(message) return message if message_acceptable else None def consumed_message_matches(self, message: Message): if self._rule_tree is not None: return self._rule_tree.evaluate(message) else: return True
def test5(self): rule_tree = RuleTree("(320 + 5)*2-(1-2) > 1") self.assertTrue(rule_tree.evaluate(message=None))
def test3(self): rule_tree = RuleTree("(5 + 7)*3-(1/2)") self.assertEqual(rule_tree.evaluate(message=None), 35.5)
def test2(self): rule_tree = RuleTree("5 + 7<2") self.assertFalse(rule_tree.evaluate(message=None))
def test1(self): rule_tree = RuleTree("5 + 7>2") self.assertTrue(rule_tree.evaluate(message=None))
def test15(self): rule_tree = RuleTree("system.timestamp > 2019-08-08T20:21:22") self.assertTrue(rule_tree.evaluate(message=None))
def test14(self): rule_tree = RuleTree("315 +1 mod 9") self.assertEqual(rule_tree.evaluate(message=None), 1)