Esempio n. 1
0
class ItemConsumer(Service):
    """
    kafka consumer.
    """
    name = "item_consumer"

    def __init__(self):
        super(ItemConsumer, self).__init__()
        self.topic_in = self.args.topic_in
        self.consumer_id = self.args.consumer or self.name
        with ExceptContext(Exception, errback=lambda *args: self.stop()):
            self.consumer = KafkaConsumer(
                self.topic_in,
                group_id=self.consumer_id,
                bootstrap_servers=self.settings.get("KAFKA_HOSTS").split(","),
                enable_auto_commit=False,
                consumer_timeout_ms=1000)

    def consume_errback(self, exception):
        self.logger.error("Consume feature failed: %s. " % str(exception))

    def consume_callback(self, value):
        self.logger.debug("Consume feature success: %s. " % str(value))

    @call_later("commit", interval=10, immediately=False)
    def consume(self,
                callback=lambda value: value,
                errback=lambda message: message):
        with ExceptContext(errback=self.log_err):
            with ExceptContext((StopIteration, OSError),
                               errback=lambda *args: True):
                message = self.consumer.__next__()
                while message:
                    value = message.value
                    self.logger.debug(
                        'Consume message from %s, message is %s' %
                        (self.topic_in, value))
                    if isinstance(value, bytes):
                        value = value.decode("utf-8")
                    value = callback(value)
                    if value:
                        return value
                    else:
                        message = errback(message)

    def commit(self):
        future = self.consumer.commit_async()
        future.add_callback(self.consume_callback)
        future.add_errback(self.consume_errback)

    def enrich_parser_arguments(self):
        super(ItemConsumer, self).enrich_parser_arguments()
        self.parser.add_argument("-ti", "--topic-in", help="Topic in. ")
        self.parser.add_argument("-c", "--consumer", help="consumer id. ")

    def stop(self, *args):
        super(ItemConsumer, self).stop(args)
        with ExceptContext(Exception, errback=lambda *args: True):
            if self.consumer:
                self.consumer.close()
Esempio n. 2
0
class KafkaAdapter(BaseAdapter):
    """
    Adapter for communicating with Kafka brokers
    """

    def __init__(self, direction, bootstrap_servers, topic, **kwargs):
        super().__init__(direction)

        self.topic = topic
        self.bootstrap_servers = bootstrap_servers
        self.extra_opts = kwargs

        if 'sasl_plain_username' in self.extra_opts and 'sasl_plain_password' in self.extra_opts:
            self.extra_opts['sasl_mechanism'] = 'PLAIN'
            self.extra_opts['security_protocol'] = 'SASL_SSL'

        self.url = 'kafka://{}/{}'.format(self.bootstrap_servers[0], self.topic)

        self._connect()

    def _connect(self):
        try:
            if self.direction == 'input':
                self._consumer = KafkaConsumer(self.topic,
                                               bootstrap_servers=self.bootstrap_servers,
                                               **self.extra_opts)
            else:
                self._producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers,
                                               retries=6,
                                               **self.extra_opts)
            TuberLogger.info('Connected to {}'.format(self.url))
        except KafkaError as e:
            raise TuberIOError('Unable to connect to {}. Reason: {}'.format(self.url, str(e))) from e
        except SSLError as e:
            raise TuberException('Error creating adapter {}: {}'.format(self.url, str(e))) from e


    def _send(self, message):
        try:
            record = self._producer.send(self.topic, message.serialize())
            record_metadata = record.get(timeout=10)
        except KafkaError as e:
            raise TuberIOError('Unable to send message to {}. Reason: {}'.format(self.url, str(e))) from e


    def receive(self):
        try:
            record = self._consumer.__next__()
            return Message(record.value)
        except KafkaError as e:
            raise TuberIOError('Error receiving from {}. Reson: {}'.format(self.url, str(e))) from e
Esempio n. 3
0
class KafkaInput(AbstractInput):
    def __init__(self, config):
        super().__init__(config)

        self.consumer = None
        self.topics_mapping = self.get_config('kafka_topics_mapping')
        self.input_mapper = instance_from_config(
            self.get_config(
                'kafka_input_mapper',
                default={'class': 'ml4iiot.input.kafka.JsonInputMapper'}))

    def init(self):
        super().init()

        self.consumer = KafkaConsumer(*self.topics_mapping.keys(),
                                      bootstrap_servers=[
                                          self.get_config(
                                              'kafka_server',
                                              default='localhost:9092')
                                      ],
                                      enable_auto_commit=True)

    def next_data_frame(self, batch_size: int = 1) -> DataFrame:
        pandas_dict = {}
        record_count = 0

        while record_count < batch_size:
            record = self.consumer.__next__()
            record_dict = self.input_mapper.from_kafka_record_to_dict(record)

            for key, value in record_dict.items():
                if key == self.index_column:
                    append_value_to_dict_list(pandas_dict, self.index_column,
                                              value)
                else:
                    append_value_to_dict_list(
                        pandas_dict, self.topics_mapping[record.topic], value)

            record_count = record_count + 1

        data_frame = pd.DataFrame.from_dict(pandas_dict)
        data_frame.drop_duplicates(self.index_column)
        data_frame.set_index(self.index_column, inplace=True)

        return data_frame

    def destroy(self) -> None:
        super().destroy()

        self.consumer.close()
Esempio n. 4
0
class EasyKafkaConsumer:
    __config_dic = {}
    logger = EasyKafkaLog.logger()

    def __init__(self, config_or_yml_path):
        """
        init, require config
        :param config_or_yml_path: yml path or EasyKafkaConfig
        """
        if isinstance(config_or_yml_path, EasyKafkaConfig):
            self.__config_dic = config_or_yml_path.__dict__
        elif isinstance(config_or_yml_path, str):
            self.__config_dic = EasyKafkaConfig(config_or_yml_path).__dict__
        else:
            raise TypeError('config_or_yml_path: need str or EasyKafkaConfig')
        self.consumer = KafkaConsumer(
            bootstrap_servers=self.__config_dic['bootstrap_servers'],
            group_id=self.__config_dic['group_id'])
        self.consumer.subscribe(self.__config_dic['topic_subscribe'])
        self.logger.info('consumer started[topic: {}, group_id: {}]'.format(
            self.__config_dic['topic_subscribe'],
            self.__config_dic['group_id']))

    def __iter__(self):
        return self.consumer

    def __next__(self):
        return self.consumer.__next__()

    def subscribe(self, fn, thread=False):
        """
        subscribe with callback fn(record), blocked default(thread=False)
        def task(record):
            ...
        kafka_consumer.subscribe(task)
        :param fn: method, require one parameter
        :param thread: new thread to handle if thread=True
        """
        self.logger.info('consumer task started, mode={}'.format(
            'async' if thread else 'blocked'))
        for record in self:
            self.logger.info('received topic: {}, msg: {}'.format(
                record.topic, record.value))
            if thread:
                threading.Thread(target=fn, args=(record, )).start()
            else:
                fn(record)