Ejemplo n.º 1
0
def test_transaction_api():
    """ Excercise the transactional API """
    p = Producer({"transactional.id": "test"})

    with pytest.raises(KafkaException) as ex:
        p.init_transactions(0.5)
    assert ex.value.args[0].code() == KafkaError._TIMED_OUT
    assert ex.value.args[0].retriable() is True
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    # Any subsequent APIs will fail since init did not succeed.
    with pytest.raises(KafkaException) as ex:
        p.begin_transaction()
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    consumer = Consumer({"group.id": "testgroup"})
    group_metadata = consumer.consumer_group_metadata()
    consumer.close()

    with pytest.raises(KafkaException) as ex:
        p.send_offsets_to_transaction([TopicPartition("topic", 0, 123)],
                                      group_metadata)
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    with pytest.raises(KafkaException) as ex:
        p.commit_transaction(0.5)
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    with pytest.raises(KafkaException) as ex:
        p.abort_transaction(0.5)
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False
Ejemplo n.º 2
0
class KafkaProducerConfluent:
    """
    Продюсер (Производитель). confluent_kafka
    """
    """
    Инициализация
    """
    def __init__(self,
                 hosts=None,
                 configuration=None,
                 use_tx=False,
                 one_topic_name=None,
                 auto_flush_size=0,
                 flush_is_bad=False):
        """

        :param configuration:
        https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """

        if configuration is None:
            self.configuration = {
                'client.id':
                default_cfg.DEFAULT_CONNECTION_OPTION_ADMIN['client_id'],
                'socket.timeout.ms':
                default_cfg.DEFAULT_BROKER_TIMEOUT_MS_OPERATIONS
            }

            if use_tx:
                self.configuration['transactional.id'] = str(uuid4())
        else:
            self.configuration = configuration

        if hosts:
            self.configuration['bootstrap.servers'] = hosts
        else:
            if not self.configuration.get('bootstrap.servers'):
                self.configuration[
                    'bootstrap.servers'] = GeneralConfig.KAFKA_URL

        self.use_tx = use_tx
        self.topic_part_itr = None
        self.topic_parts = None
        self.one_topic_name = one_topic_name

        if auto_flush_size:
            self.auto_flush = True
        else:
            self.auto_flush = False

        self.auto_flush_size = auto_flush_size
        self.auto_flush_itr = 0
        self.flush_is_bad = flush_is_bad

    """
    Контекст
    """

    def __enter__(self):

        self.auto_flush_itr = 0
        self.producer = Producer(self.configuration)
        self.update_partition_settings(name_topic=self.one_topic_name)

        if self.use_tx:
            try:
                self.producer.abort_transaction(
                    default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
            except Exception:
                pass

            self.producer.init_transactions(
                default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
            self.producer.begin_transaction()

        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """
        После выхода

        :param exc_type:
        :param exc_val:
        :param exc_tb:
        :return:
        """

        self.auto_flush_itr = 0
        if self.use_tx:
            if exc_type:
                self.producer.abort_transaction()
            else:
                # flush вызывается под капотом commit_transaction
                self.producer.commit_transaction(
                    default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
        else:
            self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)

        del self

    """
    Вспомогательные операции
    """

    def get_list_topics(self):
        """
        Все топики
        :return:
        """
        try:
            res = self.producer.list_topics().topics
            return res
        except Exception:
            return None

    def get_one_topic(self, name):
        """
        Один топик по имени
        :param name:
        :return:
        """
        try:
            res = self.producer.list_topics(topic=name).topics
            return res
        except Exception:
            return None

    def update_partition_settings(self, name_topic=None):
        """
        Обновить настройки партиций всех топиков

        :param name_topic: - либо конкретного топика
        :return:
        """

        if self.topic_parts is None:
            self.topic_part_itr = {}
            self.topic_parts = {}

        if name_topic is None:
            topics = self.get_list_topics()
        else:
            if self.topic_parts.get(name_topic) is not None:
                self.topic_parts.pop(name_topic)

            topics = self.get_one_topic(name_topic)

        for name, topic_obj in topics.items():
            list_partitions = list(topic_obj.partitions)
            if len(list_partitions) <= 1:
                continue

            self.topic_parts[name] = list_partitions
            self.topic_part_itr[name] = 0

    def put_data(self,
                 key,
                 value,
                 topic=None,
                 callback=None,
                 partition=None,
                 poll_time=0):
        """
        Поместить данные в очередь на обработку для брокера сообщений
        Чтобы не думать об этом - дампим в строку джсона сразу. Имя топика и ключа - строго строкой

        :param key: - ключ сообщения. Сделать пустым если исползуется автопопил сообщений средствами кафки
        :param value: - значение сообщения

        :param topic: - имя топика - если не задано -то будет применяться имя основного топика self.one_topic_name
        :param partition: - раздел топика(число). если не указано - то балансировка нагрузки по разделам

        :param callback: func(err, msg): if err is not None...
        :return:
        """

        dict_args = self._put_validation_and_transform(key=key,
                                                       value=value,
                                                       topic=topic,
                                                       callback=callback,
                                                       partition=partition)

        self._put_data_default(dict_args=dict_args, poll_time=poll_time)

    def _put_validation_and_transform(self,
                                      key,
                                      value,
                                      topic=None,
                                      callback=None,
                                      partition=None):
        """
        Для разных алгоритмов вставки - формирует словарь аргументов вставки
        """

        if topic is None and self.one_topic_name is None:
            raise AttributeError('NEED TOPIC NAME!')

        if topic is None:
            topic = self.one_topic_name

        dict_args = {
            'topic': str(topic),
            'value': jsd(value),
        }

        if key:
            dict_args['key']: str(key)

        if callback:
            dict_args['callback'] = callback

        if partition:
            # Прямое задание позиции

            dict_args['partition'] = partition
        else:
            # Смещение позиции равномерно

            top_name = dict_args['topic']
            topic_parts = self.topic_parts.get(top_name)
            if topic_parts:

                current_position = self.topic_part_itr[top_name]

                if key:
                    # Партиция нужна если есть ключ
                    dict_args['partition'] = topic_parts[current_position]

                current_position += 1
                if current_position >= len(topic_parts):
                    current_position = 0

                self.topic_part_itr[top_name] = current_position

        return dict_args

    def _put_data_default(self, dict_args, poll_time=0):
        """
        Первоначальный замысел вставки с доработками
        """

        if self.auto_flush:
            # Авто-ожидание приёма буфера сообщений - третья версия

            self.producer.produce(**dict_args)
            self.producer.poll(poll_time)

            self.auto_flush_itr = self.auto_flush_itr + 1
            if self.auto_flush_itr >= self.auto_flush_size:
                self.auto_flush_itr = 0
                self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
        else:
            if self.flush_is_bad:
                # Вторая версия алгоритма - флушить по факту
                try:
                    self.producer.produce(**dict_args)
                    self.producer.poll(poll_time)
                except BufferError:
                    #  Дожидаемся когда кафка разгребёт очередь
                    self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
            else:
                # Первая версия
                self.producer.produce(**dict_args)
                self.producer.poll(poll_time)

    def put_data_direct(self,
                        key,
                        value,
                        topic=None,
                        callback=None,
                        partition=None):
        """
        Прямая вставка с преобразованием данных. Метод poll не используется
        """

        dict_args = self._put_validation_and_transform(key=key,
                                                       value=value,
                                                       topic=topic,
                                                       callback=callback,
                                                       partition=partition)

        if self.auto_flush:
            self.producer.produce(**dict_args)

            self.auto_flush_itr = self.auto_flush_itr + 1
            if self.auto_flush_itr >= self.auto_flush_size:
                self.auto_flush_itr = 0
                self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
        else:
            if self.flush_is_bad:
                try:
                    self.producer.produce(**dict_args)
                except BufferError:
                    #  Дожидаемся когда кафка разгребёт очередь
                    self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
            else:
                self.producer.produce(**dict_args)
Ejemplo n.º 3
0
    #     # from previous produce() calls.
    #     producer.poll(0)

    with open('bcsample.json') as f:
        # return JSON object as a dictionary
        bcsample_data = json.load(f)

    for bc_data in bcsample_data:
        # record_key = "breadcrumb"
        # Choose a random number between 1 and 5 for each record’s key
        record_key = str(random.randint(1, 5))
        record_value = json.dumps(bc_data)
        print("Producing record key: {}".format(record_key))
        producer.begin_transaction()
        producer.produce(topic, key=record_key,
                         value=record_value, on_delivery=acked)
        # p.poll() serves delivery reports (on_delivery)
        # from previous produce() calls.
        producer.poll(2000)

        # CShoose True/False randomly with equal probability
        if decision(0.5):
            print("Commiting record key: {}".format(record_key))
            producer.commit_transaction()
        else:
            producer.abort_transaction()

    producer.flush()

    print("{} messages were produced to topic {}!".format(delivered_records, topic))
Ejemplo n.º 4
0
class KafkaProducerChannel(Channel):
    """
    Represents kafka producer channel for communication.
    """
    def __init__(self, **kwargs):
        Channel.__init__(self)
        self._hosts = kwargs.get("hosts")
        self._client_id = kwargs.get("client_id")
        self._retry_counter = kwargs.get("retry_counter", 5)
        self._topic = None
        self._channel = None

    def get_topic(self):
        return self._topic

    def set_topic(self, topic):
        if topic:
            self._topic = topic

    def init(self):
        """
        Initialize the object usinf configuration params passed.
        Establish connection with Kafka broker.
        """
        self._channel = None
        retry_count = 0
        try:
            while self._channel is None and int(
                    self._retry_counter) > retry_count:
                self.connect()
                if self._channel is None:
                    Log.warn(f"message bus producer connection Failed. Retry Attempt: {retry_count+1}" \
                        f" in {2**retry_count} seconds")
                    time.sleep(2**retry_count)
                    retry_count += 1
                else:
                    Log.debug(f"message bus producer connection is Initialized."\
                    f"Attempts:{retry_count+1}")
        except Exception as ex:
            Log.error(f"message bus producer initialization failed. {ex}")
            raise ConnectionEstError(
                f"Unable to connect to message bus broker. {ex}")

    def connect(self):
        """
        Initiate the connection with Kafka broker and open the
        necessary communication channel.
        """
        try:
            conf = {
                'bootstrap.servers': str(self._hosts),
                'request.required.acks': 'all',
                'max.in.flight.requests.per.connection': 1,
                'client.id': self._client_id,
                'transactional.id': uuid.uuid4(),
                'enable.idempotence': True
            }
            self._channel = Producer(conf)
            self._channel.init_transactions()
        except Exception as ex:
            Log.error(f"Unable to connect to message bus broker. {ex}")
            raise ConnectionEstError(
                f"Unable to connect to message bus broker. {ex}")

    @classmethod
    def disconnect(self):
        raise Exception('recv not implemented for Kafka producer Channel')

    @classmethod
    def recv(self, message=None):
        raise Exception('recv not implemented for Kafka producer Channel')

    def channel(self):
        return self._channel

    def send(self, message):
        """
        Publish the message to kafka broker topic.
        """
        try:
            if self._channel is not None:
                self._channel.begin_transaction()
                self._channel.produce(self._topic, message)
                self._channel.commit_transaction()
                Log.info(f"Message Published to Topic: {self._topic},"\
                    f"Msg Details: {message}")
        except KafkaException as e:
            if e.args[0].retriable():
                """Retriable error, try again"""
                self.send(message)
            elif e.args[0].txn_requires_abort():
                """
                Abort current transaction, begin a new transaction,
                and rewind the consumer to start over.
                """
                self._channel.abort_transaction()
                self.send(message)
                #TODO
                #rewind_consumer_offsets...()
            else:
                """Treat all other errors as fatal"""
                Log.error(
                    f"Failed to publish message to topic : {self._topic}. {e}")
                raise SendError(
                    f"Unable to send message to message bus broker. {e}")

    @classmethod
    def recv_file(self, remote_file, local_file):
        raise Exception('recv_file not implemented for Kafka producer Channel')

    @classmethod
    def send_file(self, local_file, remote_file):
        raise Exception('send_file not implemented for Kafka producer Channel')

    @classmethod
    def acknowledge(self, delivery_tag=None):
        raise Exception('send_file not implemented for Kafka producer Channel')