Ejemplo n.º 1
0
 def __init__(self, topic_name: str, match: str = None):
     self.queue_length = 100000
     self.internal_queue_length_limit = self.queue_length / 0.5
     self._config = Config.get_instance().create_confluent_config()
     self._setup_config()
     self.logger = logging.getLogger(__name__)
     self._topic_name = topic_name
     self._match = match
     self._producer = None
     if self._match is not None:
         self._rule_tree = RuleTree(match)
     else:
         self._rule_tree = None
     self.create_internal_producer()
Ejemplo n.º 2
0
 def __init__(self,
              group_id: str,
              topic_name: str,
              last: bool,
              match: str = None,
              enable_auto_commit: bool = True):
     self._match = match
     self._last = last
     self._group_id = group_id
     self._consumer = None
     self._enable_auto_commit = enable_auto_commit
     self._config = {}
     if self._match is not None:
         self._rule_tree = RuleTree(match)
     else:
         self._rule_tree = None
     self._topic_name = topic_name
     self.writers: Dict[int, GenericWriter] = {}
     self._setup_config()
     self.create_internal_consumer()
Ejemplo n.º 3
0
class AbstractProducer(ABC):
    def __init__(self, topic_name: str, match: str = None):
        self.queue_length = 100000
        self.internal_queue_length_limit = self.queue_length / 0.5
        self._config = Config.get_instance().create_confluent_config()
        self._setup_config()
        self.logger = logging.getLogger(__name__)
        self._topic_name = topic_name
        self._match = match
        self._producer = None
        if self._match is not None:
            self._rule_tree = RuleTree(match)
        else:
            self._rule_tree = None
        self.create_internal_producer()

    @abstractmethod
    def produce(self) -> int:
        raise NotImplementedError()

    def _setup_config(self):
        self._config.update({
            "on_delivery":
            raise_for_kafka_error,
            "error_cb":
            log_error,
            "queue.buffering.max.messages":
            str(self.queue_length),
        })

    def flush_all(self, message_prefix: str = None):
        while True:
            left_messages = self._producer.flush(1)
            if left_messages == 0:
                break
            self.logger.info(
                (message_prefix or "") +
                f"Still {left_messages} messages left, flushing...")

    def create_internal_producer(self):
        self._producer = confluent_kafka.Producer(self._config)

    def produce_message(self, topic_name: str, message: KafkaMessage):
        if self._rule_tree is None or self._rule_tree.evaluate(message):
            self._producer.produce(
                topic=topic_name,
                key=message.key,
                value=message.value,
                partition=message.partition,
                headers=message.headers,
            )
Ejemplo n.º 4
0
def yield_only_matching_messages(
    match_expr_or_rule_tree: Union[str, RuleTree]
) -> Callable[[MessageStream], MessageStream]:
    if not isinstance(match_expr_or_rule_tree, RuleTree):
        tree = RuleTree(match_expr_or_rule_tree)
    else:
        tree = match_expr_or_rule_tree

    def _yield_only_matching_messages(message_stream: MessageStream) -> MessageStream:
        for msg in message_stream:
            if isinstance(msg, StreamEvent):
                yield msg
            elif tree.evaluate(msg):
                yield msg

    return _yield_only_matching_messages
Ejemplo n.º 5
0
 def test4(self):
     rule_tree = RuleTree("(320 + 5)*2-(1-2) < 1")
     self.assertFalse(rule_tree.evaluate(message=None))
Ejemplo n.º 6
0
 def test12(self):
     rule_tree = RuleTree("prefixstr like %efi%")
     self.assertTrue(rule_tree.evaluate(message=None))
Ejemplo n.º 7
0
 def test13(self):
     rule_tree = RuleTree("prefixstr notlike pre%")
     self.assertFalse(rule_tree.evaluate(message=None))
Ejemplo n.º 8
0
 def test9(self):
     rule_tree = RuleTree("neg(5>9)")
     self.assertTrue(rule_tree.evaluate(message=None))
Ejemplo n.º 9
0
 def test11(self):
     rule_tree = RuleTree("prefixstr like %pre")
     self.assertFalse(rule_tree.evaluate(message=None))
Ejemplo n.º 10
0
 def test6(self):
     rule_tree = RuleTree("system.timestamp < 2018-10-08")
     self.assertFalse(rule_tree.evaluate(message=None))
Ejemplo n.º 11
0
 def test8(self):
     rule_tree = RuleTree(
         "(320 + 5)*2-(1-2) < 1 or system.timestamp > 2018-10-08")
     self.assertTrue(rule_tree.evaluate(message=None))
Ejemplo n.º 12
0
class AbstractConsumer(ABC):
    def __init__(self,
                 group_id: str,
                 topic_name: str,
                 last: bool,
                 match: str = None,
                 enable_auto_commit: bool = True):
        self._match = match
        self._last = last
        self._group_id = group_id
        self._consumer = None
        self._enable_auto_commit = enable_auto_commit
        self._config = {}
        if self._match is not None:
            self._rule_tree = RuleTree(match)
        else:
            self._rule_tree = None
        self._topic_name = topic_name
        self.writers: Dict[int, GenericWriter] = {}
        self._setup_config()
        self.create_internal_consumer()

    def _setup_config(self):
        offset_reset = "earliest"
        if self._last:
            offset_reset = "latest"
        self._config = Config.get_instance().create_confluent_config()
        self._config.update({
            "group.id": self._group_id,
            "error_cb": log_error,
            # We need to commit offsets manually once we"re sure it got saved
            # to the sink
            "enable.auto.commit": self._enable_auto_commit,
            "enable.partition.eof": True,
            # We need this to start at the last committed offset instead of the
            # latest when subscribing for the first time
            "default.topic.config": {
                "auto.offset.reset": offset_reset
            },
        })

    @abstractmethod
    def create_internal_consumer(self):
        raise NotImplementedError()

    def assign_specific_partitions(self,
                                   topic_name: str,
                                   partitions: list = None,
                                   offset: int = 0):
        self._topic_name = topic_name
        if partitions is not None:
            topic_partitions = [
                TopicPartition(self._topic_name,
                               partition=partition,
                               offset=offset) for partition in partitions
            ]
        else:
            topic_partitions = [
                TopicPartition(self._topic_name, partition=0, offset=offset)
            ]
        self._consumer.assign(topic_partitions)

    def subscribe(self, topics: List[str]) -> None:
        self._consumer.subscribe(topics)

    def close(self) -> None:
        self._consumer.close()

    def commit(self, offsets: List[TopicPartition]):
        self._consumer.commit(offsets=offsets)

    @abstractmethod
    def consume(self, **kwargs) -> int:
        raise NotImplementedError()

    def output_consumed(self, message: Message):
        """
        Outputs the message to a destination determined by the implementation of the inheriting class.
        :param message: Message to output
        :return: This method returns no values
        """
        writer = self.writers.get(message.partition(), self.writers[-1])
        writer.write_message(message)

    def close_all_writers(self):
        for w in self.writers.values():
            if isinstance(w, FileWriter) and w.file is not None:
                w.file.close()

    def consume_single_message(self, timeout=30) -> Message:
        message = self._consumer.poll(timeout=timeout)
        raise_for_message(message)
        return message

    def consume_single_acceptable_message(self,
                                          timeout=30) -> Optional[Message]:
        message_acceptable = False
        total_time_remaining = timeout
        while not message_acceptable and total_time_remaining > 0:
            iteration_start = pendulum.now()
            message = self.consume_single_message(timeout=timeout)
            total_time_remaining -= (pendulum.now() -
                                     iteration_start).in_seconds()
            message_acceptable = self.consumed_message_matches(message)
        return message if message_acceptable else None

    def consumed_message_matches(self, message: Message):
        if self._rule_tree is not None:
            return self._rule_tree.evaluate(message)
        else:
            return True
Ejemplo n.º 13
0
 def test5(self):
     rule_tree = RuleTree("(320 + 5)*2-(1-2) > 1")
     self.assertTrue(rule_tree.evaluate(message=None))
Ejemplo n.º 14
0
 def test3(self):
     rule_tree = RuleTree("(5 +  7)*3-(1/2)")
     self.assertEqual(rule_tree.evaluate(message=None), 35.5)
Ejemplo n.º 15
0
 def test2(self):
     rule_tree = RuleTree("5 +  7<2")
     self.assertFalse(rule_tree.evaluate(message=None))
Ejemplo n.º 16
0
 def test1(self):
     rule_tree = RuleTree("5 +  7>2")
     self.assertTrue(rule_tree.evaluate(message=None))
Ejemplo n.º 17
0
 def test15(self):
     rule_tree = RuleTree("system.timestamp > 2019-08-08T20:21:22")
     self.assertTrue(rule_tree.evaluate(message=None))
Ejemplo n.º 18
0
 def test14(self):
     rule_tree = RuleTree("315 +1 mod 9")
     self.assertEqual(rule_tree.evaluate(message=None), 1)