def kafka_local_file(opticons=None,broker='',group='',topics=''):

    broker = argv[0]
    group = argv[1]
    topics = argv[2:]
    # Consumer configuration
    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    conf = {'bootstrap.servers': broker, 'group.id': group, 'session.timeout.ms': 6000,
            'default.topic.config': {'auto.offset.reset': 'smallest'}}

    # Check to see if -T option exists
    for opt in optlist:
        if opt[0] != '-T':
            continue
        try:
            intval = int(opt[1])
        except ValueError:
            sys.stderr.write("Invalid option value for -T: %s\n" % opt[1])
            sys.exit(1)

        if intval <= 0:
            sys.stderr.write("-T option value needs to be larger than zero: %s\n" % opt[1])
            sys.exit(1)

        conf['stats_cb'] = stats_cb
        conf['statistics.interval.ms'] = int(opt[1])

    # Create logger for consumer (logs will be emitted when poll() is called)
    logger = logging.getLogger('consumer')
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
    logger.addHandler(handler)

    # Create Consumer instance
    # Hint: try debug='fetch' to generate some log messages
    c = Consumer(conf, logger=logger)

    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)
    # Subscribe to topics
    c.subscribe(topics, on_assign=print_assignment)
    # hdfs login
    #client = hdfs.Client('http://%s:50070' % (hdfshost))
    # client = InsecureClient('http://%s:50070' % (hdfshost),user='******')
    # Read messages from Kafka, print to stdout
    try:
        while True:
            logtime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))

            msg = c.poll(timeout=1.0)

            if msg is None:
                continue

            if msg.error():
                # Error or event
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write('%s %s [%d] reached end at offset %d\n' %
                                     (logtime, msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    # Error
                    raise KafkaException(msg.error())
            else:
                msgstr = msg.value().decode('utf-8')
                msgdict = json.loads(msgstr,encoding="uft-8",object_pairs_hook=OrderedDict)

                database = msgdict.get('database').encode()
                table = msgdict.get('table').encode()
                type = msgdict.get('type').encode()
                if type == 'insert':
                    data = msgdict.get('data')
                    datalist = data.values()
                    datastr = ','.join('%s' % id for id in datalist).encode()
                    hour = time.strftime('%Y-%m-%d-%H',time.localtime(time.time()))
                    localfile = '/mnt/var/%s.%s.%s.%s' % (database,table,type,hour)
                    sys.stderr.write('%s %s [%d] at offset %d with key %s:\n' % (logtime,msg.topic(),msg.partition(),msg.offset(),msgstr))

                    with open(localfile,'a') as writer:
                        writer.write(datastr+'\n')
                else:
                    sys.stderr.write('%s %s [%d] at offset %d with key %s:\n' % (logtime,msg.topic(),msg.partition(),msg.offset(),type))

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    # Close down consumer to commit final offsets.
    c.close()
Esempio n. 2
0
def print_assignment(consumer, partitions):
    print('Assignment:', partitions)


if __name__ == '__main__':

    consumer_conf = get_section_config('consumer')
    consume = Consumer(**consumer_conf)
    consume.subscribe(['example-topic-output'], on_assign=print_assignment)

    try:
        while True:
            msg = consume.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                 (msg.topic(), msg.partition(), msg.offset(),
                                  str(msg.key())))
                print(msg.value())

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        consume.close()
Esempio n. 3
0
def item_consume_red():
    props = {
        'bootstrap.servers': '10.1.1.133:9092',  # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'auto.offset.reset': 'latest',  # Offset從最前面開始(earliest)
        'enable.auto.commit': True,  # auto commit
        'session.timeout.ms': 6000,  # consumer超過6000ms沒有與kafka連線,會被認為掛掉了
        'error_cb': error_cb  # 設定接收error訊息的callback函數
    }
    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName1 = "items2"
    # topicName = "test"
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName1])
    # consumer.subscribe([topicName])
    # 步驟5. 持續的拉取Kafka有進來的訊息 # on_assign=my_assign
    count = 0
    try:
        while True:
            # 請求Kafka把新的訊息吐出來
            records = consumer.consume(num_messages=500, timeout=1.0)  # 批次讀取
            if records is None:
                continue

            for record in records:
                # 檢查是否有錯誤
                if record is None:
                    continue
                if record.error():
                    # Error or event
                    if record.error().code() == KafkaError._PARTITION_EOF:
                        sys.stderr.write(
                            '%% {} [{}] reached end at offset {} - {}\n'.
                            format(record.topic(), record.partition(),
                                   record.offset()))

                        # End of partition event
                    else:
                        # Error
                        raise KafkaException(record.error())
                else:
                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    topic = record.topic()
                    partition = record.partition()
                    offset = record.offset()
                    timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    msgKey = try_decode_utf8(record.key())
                    msgValue = try_decode_utf8(record.value())
                    msg_k_v = {msgKey, msgValue}
                    # 秀出metadata與msgKey & msgValue訊息
                    count += 1
                    print('{}-{}-{} : ({} , {})'.format(
                        topic, partition, offset, msgKey, msgValue))
                    return msg_k_v

                    consumer.close()
                    # print("consumer")
    except KeyboardInterrupt as e:
        sys.stderr.write('Aborted by user\n')
    except Exception as e:
        sys.stderr.write(str(e))

    finally:
        # 步驟6.關掉Consumer實例的連線
        consumer.close()
Esempio n. 4
0
    def run(self):
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        conf = {
            "bootstrap.servers": self.bootstrap_servers,
            "group.id": self.group_id,
            "session.timeout.ms": 6000,
            "auto.offset.reset": self.initial_offset_reset,
            "enable.auto.commit": "false",
            "enable.auto.offset.store": "false",
            "enable.partition.eof": "false",
            "default.topic.config": {
                "auto.offset.reset": self.initial_offset_reset
            },
        }

        def on_assign(consumer, partitions):
            for partition in partitions:
                if partition.offset == OFFSET_INVALID:
                    updated_offset = None
                else:
                    updated_offset = partition.offset
                self.offsets[partition.partition] = updated_offset
            logger.info(
                "query-subscription-consumer.on_assign",
                extra={
                    "offsets": six.text_type(self.offsets),
                    "partitions": six.text_type(partitions),
                },
            )

        def on_revoke(consumer, partitions):
            partition_numbers = [
                partition.partition for partition in partitions
            ]
            self.commit_offsets(partition_numbers)
            for partition_number in partition_numbers:
                self.offsets.pop(partition_number, None)
            logger.info(
                "query-subscription-consumer.on_revoke",
                extra={
                    "offsets": six.text_type(self.offsets),
                    "partitions": six.text_type(partitions),
                },
            )

        self.consumer = Consumer(conf)
        self.consumer.subscribe([self.topic],
                                on_assign=on_assign,
                                on_revoke=on_revoke)

        try:
            i = 0
            while True:
                message = self.consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise KafkaException(error)

                i = i + 1

                with sentry_sdk.start_transaction(
                        op="handle_message",
                        name="query_subscription_consumer_process_message",
                        sampled=True,
                ), metrics.timer("snuba_query_subscriber.handle_message"):
                    self.handle_message(message)

                # Track latest completed message here, for use in `shutdown` handler.
                self.offsets[message.partition()] = message.offset() + 1

                if i % self.commit_batch_size == 0:
                    logger.debug("Committing offsets")
                    self.commit_offsets()
        except KeyboardInterrupt:
            pass

        self.shutdown()
Esempio n. 5
0
 def on_rebalance(consumer, partitions):
     for p in partitions:
         if p.error:
             raise KafkaException(p.error)
     print("Kafka partitions rebalanced: {} / {}".format(
         consumer, partitions))
Esempio n. 6
0
def consumer_json_kafka(name):

    props = {
        'bootstrap.servers': '10.120.28.129:9092',   # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'group.id': 'STUDENTID',                     # ConsumerGroup的名稱 (置換成你/妳的學員ID)
        'auto.offset.reset': 'earliest',             # Offset從最前面開始
        'session.timeout.ms': 6000,                  # consumer超過6000ms沒有與kafka連線,會被認為掛掉了
        'error_cb': error_cb                         # 設定接收error訊息的callback函數
    }
    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = name
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName], on_assign=my_assign)
    # 步驟5. 持續的拉取Kafka有進來的訊息
  
    list_data = []
    list_key = []
    msgValue=0
    try:
        while True:
            # 請求Kafka把新的訊息吐出來
            
            records = consumer.consume(num_messages=500, timeout=1.0)  # 批次讀取
            #time.sleep(3)
            if len(records)==0:
                if msgValue!=0:
                    break
                else:
                    continue 
            else:
                pass
            for record in records:

                # 檢查是否有錯誤
                if record is None:
                    continue
                if record.error():
                    # Error or event
                    if record.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write('%% {} [{}] reached end at offset {} - {}\n'.format(record.topic(),
                                                                                             record.partition(),
                                                                                             record.offset()))

                    else:
                        # Error
                        raise KafkaException(record.error())
                else:
                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    topic = record.topic()
                    partition = record.partition()
                    offset = record.offset()
                    timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    msgKey = try_decode_utf8(record.key())
                    msgValue = try_decode_utf8(record.value())
                    list_key.append(msgKey)
                    list_data.append(msgValue)
                    
                    #print(msgKey,msgValue)

    except KeyboardInterrupt as e:
        sys.stderr.write('Aborted by user\n')
    except Exception as e:
        sys.stderr.write(e)

    finally:
        # 步驟6.關掉Consumer實例的連線
        consumer.close()
        return list_key,list_data
Esempio n. 7
0
def receiver(q, l, no_requests, w_q):

    """
    Kafka listener for incoming requests
    """
    l.acquire()
    print("%% Lock acquired")
    conf = {
    'bootstrap.servers':BROKER,
    'group.id':GROUP,
    'session.timeout.ms':6000,
    'default.topic.config':{'auto.offset.reset':'smallest'},
    }
    c = Consumer(conf)

    try:
        c.subscribe([TOPIC])
        tm_out = 5
        tm_cur = time.time()
        tm_tot = tm_cur + tm_out
        messages_received = 0
        l.release()
        print("%% Lock released")
        while True:

            msg = c.poll(timeout=1.0)

            if msg is None:
                tm_none = time.time()

                if tm_none > tm_tot:
                    print("%% Messages received: {}".format(messages_received))
                    print(
                    '%% No message recieved for {} seconds.'.format(tm_out)
                    )
                    break
                continue

            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    print(
                    '%% Reached end of topic {} [{}] at offset {}\n'.format(
                                    msg.topic(), msg.partition(), msg.offset())
                                    )
                    continue
                else:
                    raise KafkaException(msg.error())
                    break
            msg_load = json.loads(msg.value())

            if not msg_load.get('session_id') == session_id:
                print("Session ID mismatch")
                continue

            tm_msg = msg_load['sys_ts']
            tm_tot = tm_msg + tm_out
            messages_received += 1

            if messages_received == no_requests:
                print("%% Messages received: {}".format(messages_received))
                # c_q.put(('reciever', messages_received))
                break

            msg_auth_r = (1, msg_load)
            w_q.put(msg_auth_r)

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        print("%% Closing consumer \n")
        c.close()
Esempio n. 8
0
 def on_delivery(self, err, msg):
     if err is not None:
         raise KafkaException(err)
     self.last_msg = msg
Esempio n. 9
0
    async def get_message_from_kafka_cb(self, callback_method) -> None:
        """
        Get a specific message from the kafka_broker and invoke the callback_method automatically with the
        message body passed as an argument to the callback_method.

        :param callback_method: Takes a callback_method which is automatically called on successfully retrieving
                                a message from the KafkaBroker.
        """
        if self.consumer is None:
            logger.error("Kafka Consumer not initialized prior to this call")
            raise ValueError("ERROR - Consumer not initialized")

        if callback_method is None:
            logger.error(
                "No callback_method provided for handling of fetched message")
            raise ValueError("ERROR - callback_method not provided")

        loop = get_running_loop()
        topic_partition = None

        try:
            # This automatically sets the offset to the one provided by the user if it is not None
            topic_partition = TopicPartition(self.topic_name, self.partition,
                                             self.offset)

            self.consumer.assign([topic_partition])

            # polls for exactly one record - waits for a configurable max time (seconds)
            msg = await loop.run_in_executor(None, self.consumer.poll, 5.0)

            if msg is None:  # Handle timeout during poll
                msg = "Consumer error: timeout while polling message from Kafka"
                logger.error(msg)
                raise KafkaException(msg)
            if msg.error():
                error_msg = f"Consumer - error: {msg.error()}"
                logger.error(error_msg)
                if msg.error().code() is KafkaError.OFFSET_OUT_OF_RANGE:
                    raise KafkaMessageNotFoundError(
                        error_msg)  # throw a 404 at the controller

                raise KafkaException(error_msg)

            headers = msg.headers()
            message = None

            if headers is None:
                message = msg.value()
            # Re-evaluate later if we will need message semgentation or have a use case where the producer
            # will chunk messages and record them with the broker
            # else:
            #     message = combine_segments(msg.value(), self._generate_header_dictionary(msg.headers()))

            if message is not None:
                logger.trace(
                    f"Found message for topic_name - {self.topic_name}, partition - {self.partition} "
                    f"and offset - {self.offset}. Invoking callback_method - {callback_method}",
                )

                return await callback_method(message)
            else:
                _msg_not_found_error = "No message was found that could be fetched for "
                f"topic_name: {self.topic_name}, partition: {self.partition}, offset: {self.offset}"

                logger.error(_msg_not_found_error)
                raise KafkaMessageNotFoundError(_msg_not_found_error)
        finally:
            self._close_consumer()
Esempio n. 10
0
def kafka_fail_fast(err: Optional[Any], _msg: Any) -> None:
    if err is not None:
        print("Kafka producer delivery error: {}".format(err))
        print("Bailing out...")
        # TODO: should it be sys.exit(-1)?
        raise KafkaException(err)
Esempio n. 11
0
async def _confluent_consumer_loop(q: asyncio.Queue):
    global c
    if c is None:
        raise Exception
    await asyncio.sleep(
        1.0)  # wait here so that kafka has time to finish creating topics
    try:
        consumer.subscribe_to_all(c)
        _set_halted_to_zero()
        while running:
            msg = c.poll(0)
            if msg is None:
                await asyncio.sleep(1.0)
            elif msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    _LOGGER.warning("%s [%d] reached end at offset %d",
                                    msg.topic(), msg.partition(), msg.offset())
                else:
                    raise KafkaException(msg.error())
            else:
                contents = json.loads(
                    msg.value().decode("utf-8"))  # type: dict
                v = contents.get("version", "v1")

                ############################################################
                # Choose which handler table to use based on env variables #
                ############################################################
                if ConsumerModeEnum[
                        Configuration.
                        CONSUMER_MODE] == ConsumerModeEnum.investigator:
                    try:
                        func = _handler_lookup(msg.topic(), v)
                    except KeyError:
                        _LOGGER.warning(
                            "No handler for version %s of %s", v,
                            _get_class_from_topic_name(msg.topic_name()))
                        message_class = _get_class_from_topic_name(msg.topic())
                        missing_handler.labels(
                            base_topic_name=message_class.base_name,
                            message_version=v).set(1)
                        _message_failed(msg)
                elif ConsumerModeEnum[
                        Configuration.
                        CONSUMER_MODE] == ConsumerModeEnum.metrics:
                    try:
                        func = _handler_lookup(msg.topic(),
                                               v,
                                               table=metrics_handler_table,
                                               default=default_metric_handler)
                    except KeyError:
                        _LOGGER.warning(
                            "Could not find entry in metrics handler table for %s.",
                            msg.topic_name())
                #############################################################

                await q.put((func, msg))
            await asyncio.sleep(0)
    finally:
        c.close()
        for _ in range(Configuration.NUM_WORKERS):
            await q.put(None
                        )  # each worker can receive this value exactly once
Esempio n. 12
0
 def _fail_fast_produce(err: Any, msg: Any) -> None:
     if err is not None:
         print(f"Kafka producer delivery error: {err}", file=sys.stderr)
         raise KafkaException(err)
Esempio n. 13
0
    def run(self) -> None:
        def fail_fast(err: Any, _msg: Any) -> None:
            if err is not None:
                print("Kafka producer delivery error: {}".format(err))
                print("Bailing out...")
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)

        def on_commit(err: Any, partitions: List[Any]) -> None:
            if err is not None:
                print("Kafka consumer commit error: {}".format(err))
                print("Bailing out...")
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)
            for p in partitions:
                # check for partition-specific commit errors
                print(p)
                if p.error:
                    print("Kafka consumer commit error: {}".format(p.error))
                    print("Bailing out...")
                    # TODO: should it be sys.exit(-1)?
                    raise KafkaException(p.error)
            print("Kafka consumer commit successful")
            pass

        def on_rebalance(consumer: Consumer, partitions: List[Any]) -> None:
            for p in partitions:
                if p.error:
                    raise KafkaException(p.error)
            print("Kafka partitions rebalanced: {} / {}".format(
                consumer, partitions))

        consumer_conf = self.kafka_config.copy()
        consumer_conf.update({
            "group.id": self.consumer_group,
            "on_commit": fail_fast,
            # messages don't have offset marked as stored until pushed to
            # elastic, but we do auto-commit stored offsets to broker
            "enable.auto.commit": True,
            "enable.auto.offset.store": False,
            # user code timeout; if no poll after this long, assume user code
            # hung and rebalance (default: 5min)
            "max.poll.interval.ms": 180000,
            "default.topic.config": {
                "auto.offset.reset": "latest",
            },
        })
        consumer = Consumer(consumer_conf)

        producer_conf = self.kafka_config.copy()
        producer_conf.update({
            "delivery.report.only.error": True,
            "default.topic.config": {
                "request.required.acks": -1,  # all brokers must confirm
            },
        })
        producer = Producer(producer_conf)

        consumer.subscribe(
            [self.consume_topic],
            on_assign=on_rebalance,
            on_revoke=on_rebalance,
        )
        print("Kafka consuming {}".format(self.consume_topic))

        while True:
            msg = consumer.poll(self.poll_interval)
            if not msg:
                print("nothing new from kafka (poll_interval: {} sec)".format(
                    self.poll_interval))
                continue
            if msg.error():
                raise KafkaException(msg.error())

            cle = json.loads(msg.value().decode("utf-8"))
            # print(cle)
            print("processing changelog index {}".format(cle["index"]))
            release_ids = []
            new_release_ids = []
            file_ids = []
            fileset_ids = []
            webcapture_ids = []
            container_ids = []
            work_ids = []
            release_edits = cle["editgroup"]["edits"]["releases"]
            for re in release_edits:
                release_ids.append(re["ident"])
                # filter to direct release edits which are not updates
                if not re.get("prev_revision") and not re.get(
                        "redirect_ident"):
                    new_release_ids.append(re["ident"])
            file_edits = cle["editgroup"]["edits"]["files"]
            for e in file_edits:
                file_ids.append(e["ident"])
            fileset_edits = cle["editgroup"]["edits"]["filesets"]
            for e in fileset_edits:
                fileset_ids.append(e["ident"])
            webcapture_edits = cle["editgroup"]["edits"]["webcaptures"]
            for e in webcapture_edits:
                webcapture_ids.append(e["ident"])
            container_edits = cle["editgroup"]["edits"]["containers"]
            for e in container_edits:
                container_ids.append(e["ident"])
            work_edits = cle["editgroup"]["edits"]["works"]
            for e in work_edits:
                work_ids.append(e["ident"])

            # TODO: do these fetches in parallel using a thread pool?
            for ident in set(file_ids):
                file_entity = self.api.get_file(ident, expand=None)
                # update release when a file changes
                # TODO: also fetch old version of file and update any *removed*
                # release idents (and same for filesets, webcapture updates)
                release_ids.extend(file_entity.release_ids or [])
                file_dict = self.api.api_client.sanitize_for_serialization(
                    file_entity)
                producer.produce(
                    self.file_topic,
                    json.dumps(file_dict).encode("utf-8"),
                    key=ident.encode("utf-8"),
                    on_delivery=fail_fast,
                )

            # TODO: topic for fileset updates
            for ident in set(fileset_ids):
                fileset_entity = self.api.get_fileset(ident, expand=None)
                # update release when a fileset changes
                release_ids.extend(fileset_entity.release_ids or [])

            # TODO: topic for webcapture updates
            for ident in set(webcapture_ids):
                webcapture_entity = self.api.get_webcapture(ident, expand=None)
                # update release when a webcapture changes
                release_ids.extend(webcapture_entity.release_ids or [])

            for ident in set(container_ids):
                container = self.api.get_container(ident)
                container_dict = self.api.api_client.sanitize_for_serialization(
                    container)
                producer.produce(
                    self.container_topic,
                    json.dumps(container_dict).encode("utf-8"),
                    key=ident.encode("utf-8"),
                    on_delivery=fail_fast,
                )

            for ident in set(release_ids):
                release = self.api.get_release(
                    ident,
                    expand="files,filesets,webcaptures,container,creators")
                if release.work_id:
                    work_ids.append(release.work_id)
                release_dict = self.api.api_client.sanitize_for_serialization(
                    release)
                producer.produce(
                    self.release_topic,
                    json.dumps(release_dict).encode("utf-8"),
                    key=ident.encode("utf-8"),
                    on_delivery=fail_fast,
                )
                # for ingest requests, filter to "new" active releases with no matched files
                if release.ident in new_release_ids:
                    ir = release_ingest_request(
                        release, ingest_request_source="fatcat-changelog")
                    if ir and not release.files and self.want_live_ingest(
                            release, ir):
                        producer.produce(
                            self.ingest_file_request_topic,
                            json.dumps(ir).encode("utf-8"),
                            # key=None,
                            on_delivery=fail_fast,
                        )

            # send work updates (just ident and changelog metadata) to scholar for re-indexing
            for ident in set(work_ids):
                assert ident
                key = f"work_{ident}"
                work_ident_dict = dict(
                    key=key,
                    type="fatcat_work",
                    work_ident=ident,
                    updated=cle["timestamp"],
                    fatcat_changelog_index=cle["index"],
                )
                producer.produce(
                    self.work_ident_topic,
                    json.dumps(work_ident_dict).encode("utf-8"),
                    key=key.encode("utf-8"),
                    on_delivery=fail_fast,
                )

            producer.flush()
            # TODO: publish updated 'work' entities to a topic
            consumer.store_offsets(message=msg)
Esempio n. 14
0
def consumer(args, poll_timeout=3.0):
    """ Consumes packets from a Kafka topic. """

    # setup the signal handler
    signal.signal(signal.SIGINT, signal_handler)

    # where to start consuming messages from
    kafka_offset_options = {
        "begin": seek_to_begin,
        "end": seek_to_end,
        "stored": seek_to_stored
    }
    on_assign_cb = kafka_offset_options[args.kafka_offset]

    # connect to kafka
    logging.debug("Connecting to Kafka; %s", args.kafka_configs)
    kafka_consumer = Consumer(args.kafka_configs)
    kafka_consumer.subscribe([args.kafka_topic], on_assign=on_assign_cb)

    # if 'pretty-print' not set, write libpcap global header
    if args.pretty_print == 0:
        sys.stdout.write(global_header(args))
        sys.stdout.flush()

    try:
        pkts_in = 0
        while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets):

            # consume a message from kafka
            msg = kafka_consumer.poll(timeout=poll_timeout)
            if msg is None:
                # no message received
                continue;

            elif msg.error():

                if msg.error().code() == KafkaError._PARTITION_EOF:
                    if args.pretty_print > 0:
                        print "Reached end of topar: topic=%s, partition=%d, offset=%s" % (
                            msg.topic(), msg.partition(), msg.offset())
                else:
                    raise KafkaException(msg.error())

            else:
                pkts_in += 1
                logging.debug("Packet received: pkts_in=%d", pkts_in)

                if args.pretty_print == 0:

                    # write the packet header and packet

                    # AT:  We are just sending over the results of the scan -- a list of macs/rssi's -- where this code
                    # was dealing with network packet sniffers --
                    sys.stdout.write(json.dumps(msg.value(), indent=2))
                    # sys.stdout.write(packet_header(msg))
                    # sys.stdout.write(msg.value())
                    sys.stdout.flush()

                elif pkts_in % args.pretty_print == 0:

                    # pretty print
                    print 'Packet[%s]: date=%s topic=%s partition=%s offset=%s len=%s' % (
                        pkts_in, to_date(unpack_ts(msg.key())), args.kafka_topic,
                        msg.partition(), msg.offset(), len(msg.value()))

    finally:
        sys.stdout.close()
        kafka_consumer.close()
Esempio n. 15
0
def collect_image(topic: str, kafka_session: Consumer):
    """Collect an image from the respective image topic
    
    Arguments:
        broker {str} -- Kafka client
        topic {str} -- topic (ex. images)
    """
    def print_assignment(consumer, partitions):
        print('Assignment:', partitions)

    kafka_session.subscribe(topic, on_assign=print_assignment)

    while True:
        msg = kafka_session.poll(timeout=1.0)
        if msg is None:
            continue
            logs.info("No messages available within topic : %s", topic)
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                logs.info('%% %s [%d] reached end of offset %d' %
                          (msg.topic(), msg.partition(), msg.offset()))
            else:
                logs.debug("Kafka Exception : %s", msg.error())
                raise KafkaException(msg.error())
        else:
            # Well formed messaged
            logs.info(
                '%% %s [%d] at offset %d with key %s: ' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))

            # image transform
            image_array, orig_image_array = image_transform(msg)

            prediction, class_weights, final_conv_layer = do_inference(
                ts_server="172.23.0.9", ts_port=8500, model_input=image_array)

            # create CAM
            get_output = K.function([tf.convert_to_tensor(image_array)], [
                tf.convert_to_tensor(final_conv_layer),
                tf.convert_to_tensor(prediction)
            ])
            [conv_outputs, predictions] = get_output([image_array[0]])
            conv_outputs = conv_outputs[0, :, :, :]

            # TODO: Receiving variable results across CAMs generated by this
            # method. Needs further investigation and comparison to original
            # CAM paper found here : http://cnnlocalization.csail.mit.edu/
            cam = np.zeros(dtype=np.float32, shape=(conv_outputs.shape[:2]))
            for i, w in enumerate(class_weights[0]):
                cam += w * conv_outputs[:, :, i]
            cam = cam - np.min(cam)
            cam /= np.max(cam)
            #h,w = orig_image_array.shape[:2]
            cam = cv2.resize(cam, orig_image_array.shape[:2])

            # TODO : Investigate why the cv2.resize() function transposes
            # the height and width of the orig_image_array
            #cam = cv2.resize(cam, (orig_image_array.shape[:2][1], orig_image_array.shape[:2][0]), interpolation=cv2.INTER_CUBIC)
            cam = np.uint8(255 * cam)
            heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
            #heatmap[np.where(cam < 0.2)] = 0
            img = heatmap * 0.3 + orig_image_array

            logs.info("Class Activation Map (CAM) Created!")

            # This is complete hackery and will need to be replaced
            # I don't know why a numpy array (see `img` array above)
            # would be 25MB when all constituent arrays are ~ 7MB total.
            # Let alone when saving an image to disk the image is only 1MB total.
            cv2.imwrite("inflight_img.png", img)

            new_img = Image.open("inflight_img.png", mode='r')
            img_bytes = io.BytesIO()
            new_img.save(img_bytes, format='PNG')
            img_bytes = img_bytes.getvalue()
            message = marshall_message(img_bytes, prediction.tolist())
            os.remove("inflight_img.png")

            p = kafka_producer()
            p.poll(0)
            p.produce(results_kafka_topic,
                      value=message,
                      callback=kafka_delivery_report)
            p.flush()
Esempio n. 16
0
def memberin2():
    props = {
        'bootstrap.servers': '10.1.1.133:9092',  # Kafka集群
        'group.id': 'peter',  # ConsumerGroup的名稱
        'auto.offset.reset': 'latest',
        'enable.auto.commit': False,
        'error_cb': error_cb  # 設定接收error訊息的callback函數
    }

    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = 'logs'
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName])

    # 步驟5. 持續的拉取Kafka有進來的訊息
    try:
        while True:
            records_pulled = False  # 用來檢查是否有有效的record被取出來

            # 請求Kafka把新的訊息吐出來
            records = consumer.consume(num_messages=500, timeout=1.0)  # 批次讀取
            if records is None:
                continue

            for record in records:
                # 檢查是否有錯誤
                if record is None:
                    continue
                if record.error():
                    # Error or event
                    if record.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (record.topic(), record.partition(),
                             record.offset()))
                    else:
                        # Error
                        raise KafkaException(record.error())
                else:
                    records_pulled = True

                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    # topic = record.topic()
                    # partition = record.partition()
                    # offset = record.offset()
                    # timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    msgKey = try_decode_utf8(record.key())
                    msgValue = try_decode_utf8(record.value())
                    sendmsg = {msgKey: msgValue}
                    qq = {}
                    # 秀出metadata與msgKey & msgValue訊息
                    # print('%s-%d-%d : (%s , %s)' % (topic, partition, offset, msgKey, msgValue))
                    # test_msg = ('%s : %s' % (msgKey,msgValue))
                    print(sendmsg)
                    print("已偵測到有會員進入商店")
                    a = sendmsg['login'].split("'")
                    qq.setdefault("Name", a[5])
                    print(qq)
                    client = pymongo.MongoClient(
                        "mongodb+srv://peter:[email protected]/ceb101?retryWrites=true&w=majority"
                    )

                    mydb = client.wow
                    mycol = mydb['fit']
                    mycol.insert_many([qq])

                    consumer.close()

            # 同步地執行commit (Sync commit)
            # if records_pulled:
            #     offsets = consumer.commit(asynchronous=False)
            #     print_sync_commit_result(offsets)
    except KeyboardInterrupt as e:
        sys.stderr.write('Aborted by user\n')
    except Exception as e:
        sys.stderr.write(str(e))
    finally:
        print("慢慢逛")
Esempio n. 17
0
def update(consumer, df, df2, df3, df4, df5, df6, df7):
    i = 0
    while i < 50:
        i = i + 1
        msg = consumer.poll(timeout=1.0)
        if msg is None:
            continue
        if msg.error():
            # Error or event
            if msg.error().code() == KafkaError._PARTITION_EOF:
                # End of partition event
                sys.stderr.write('%% %s [%d] reached end at offset %d\n' %
                                 (msg.topic(), msg.partition(), msg.offset()))
            elif msg.error():
                # Error
                raise KafkaException(msg.error())
        else:
            # Proper message
            sys.stderr.write(
                '%% %s [%d] at offset %d with key %s:\n' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))
            print(msg.value())
            # parse the message
            val = msg.value()
            result = json.loads(val)
            #t = datetime.strptime(result['time'], "%H:%M:%S.%f")
            t = float(result['timestamp'])
            vINX = float(result['INX_perChange'])
            vMSFT = float(result['MSFT_perChange'])
            vBA = float(result['BA_perChange'])
            volMSFT = float(result['MSFT_vol'])

            # add to the dataframe
            df.loc[len(df)] = [t, vINX]
            df2.loc[len(df2)] = [t, vMSFT]
            df3.loc[len(df3)] = [t, vBA]
            df4.loc[len(df4)] = [t, vMSFT / vINX]
            df5.loc[len(df5)] = [t, vBA / vINX]
            if len(df6['value']) > 0:
                df6.loc[len(df6)] = [
                    t, (vMSFT - df6['value'].iloc[-1]) / DELTAT
                ]
                df7.loc[len(df7)] = [
                    t, (volMSFT - df7['value'].iloc[-1]) / DELTAT
                ]
                # df6.loc[len(df6)] = 1
                # df7.loc[len(df7)] = 1
            else:
                df6.loc[len(df6)] = 0
                df7.loc[len(df7)] = 0

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df['value']) > WINDOW_SIZE:
                r.data_source.data['y'] = list(df['value'])[-WINDOW_SIZE:]
                r.data_source.data['x'] = range(len(list(
                    df['value'])))[-WINDOW_SIZE:]
                dots.data_source.data['y'] = list(df['value'])[-WINDOW_SIZE:]
                dots.data_source.data['x'] = range(len(list(
                    df['value'])))[-WINDOW_SIZE:]
            else:
                r.data_source.data['y'] = list(df['value'])
                r.data_source.data['x'] = range(len(list(df['value'])))
                dots.data_source.data['y'] = list(df['value'])
                dots.data_source.data['x'] = range(len(list(df['value'])))

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df2['value']) > WINDOW_SIZE:
                r2.data_source.data['y'] = list(df2['value'])[-WINDOW_SIZE:]
                r2.data_source.data['x'] = range(len(list(
                    df2['value'])))[-WINDOW_SIZE:]
                dots2.data_source.data['y'] = list(df2['value'])[-WINDOW_SIZE:]
                dots2.data_source.data['x'] = range(len(list(
                    df2['value'])))[-WINDOW_SIZE:]
            else:
                r2.data_source.data['y'] = list(df2['value'])
                r2.data_source.data['x'] = range(len(list(df2['value'])))
                dots2.data_source.data['y'] = list(df2['value'])
                dots2.data_source.data['x'] = range(len(list(df2['value'])))

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df3['value']) > WINDOW_SIZE:
                r3.data_source.data['y'] = list(df3['value'])[-WINDOW_SIZE:]
                r3.data_source.data['x'] = range(len(list(
                    df3['value'])))[-WINDOW_SIZE:]
                dots3.data_source.data['y'] = list(df3['value'])[-WINDOW_SIZE:]
                dots3.data_source.data['x'] = range(len(list(
                    df3['value'])))[-WINDOW_SIZE:]
            else:
                r3.data_source.data['y'] = list(df3['value'])
                r3.data_source.data['x'] = range(len(list(df3['value'])))
                dots3.data_source.data['y'] = list(df3['value'])
                dots3.data_source.data['x'] = range(len(list(df3['value'])))

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df4['value']) > WINDOW_SIZE:
                r4.data_source.data['y'] = list(df4['value'])[-WINDOW_SIZE:]
                r4.data_source.data['x'] = range(len(list(
                    df4['value'])))[-WINDOW_SIZE:]
                dots4.data_source.data['y'] = list(df4['value'])[-WINDOW_SIZE:]
                dots4.data_source.data['x'] = range(len(list(
                    df4['value'])))[-WINDOW_SIZE:]
            else:
                r4.data_source.data['y'] = list(df4['value'])
                r4.data_source.data['x'] = range(len(list(df4['value'])))
                dots4.data_source.data['y'] = list(df4['value'])
                dots4.data_source.data['x'] = range(len(list(df4['value'])))

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df5['value']) > WINDOW_SIZE:
                r5.data_source.data['y'] = list(df5['value'])[-WINDOW_SIZE:]
                r5.data_source.data['x'] = range(len(list(
                    df5['value'])))[-WINDOW_SIZE:]
                dots5.data_source.data['y'] = list(df5['value'])[-WINDOW_SIZE:]
                dots5.data_source.data['x'] = range(len(list(
                    df5['value'])))[-WINDOW_SIZE:]
            else:
                r5.data_source.data['y'] = list(df5['value'])
                r5.data_source.data['x'] = range(len(list(df5['value'])))
                dots5.data_source.data['y'] = list(df5['value'])
                dots5.data_source.data['x'] = range(len(list(df5['value'])))

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df6['value']) > WINDOW_SIZE:
                r6.data_source.data['y'] = list(df6['value'])[-WINDOW_SIZE:]
                r6.data_source.data['x'] = range(len(list(
                    df6['value'])))[-WINDOW_SIZE:]
                dots6.data_source.data['y'] = list(df6['value'])[-WINDOW_SIZE:]
                dots6.data_source.data['x'] = range(len(list(
                    df6['value'])))[-WINDOW_SIZE:]
            else:
                r6.data_source.data['y'] = list(df6['value'])
                r6.data_source.data['x'] = range(len(list(df6['value'])))
                dots6.data_source.data['y'] = list(df6['value'])
                dots6.data_source.data['x'] = range(len(list(df6['value'])))

            # Sliding window of the WINDOW_SIZE most recent values
            if len(df7['value']) > WINDOW_SIZE:
                r7.data_source.data['y'] = list(df7['value'])[-WINDOW_SIZE:]
                r7.data_source.data['x'] = range(len(list(
                    df7['value'])))[-WINDOW_SIZE:]
                dots7.data_source.data['y'] = list(df7['value'])[-WINDOW_SIZE:]
                dots7.data_source.data['x'] = range(len(list(
                    df7['value'])))[-WINDOW_SIZE:]
            else:
                r7.data_source.data['y'] = list(df7['value'])
                r7.data_source.data['x'] = range(len(list(df7['value'])))
                dots7.data_source.data['y'] = list(df7['value'])
                dots7.data_source.data['x'] = range(len(list(df7['value'])))
Esempio n. 18
0
def get_trans():
    props = {
        'bootstrap.servers': '10.1.1.133:9092',  # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'group.id': 'peter',  # ConsumerGroup的名稱 (置換成你/妳的學員ID)
        'auto.offset.reset':
        'latest',  # 是否從這個ConsumerGroup尚未讀取的partition/offset開始讀earliest
        'enable.auto.commit': False,  # 是否啟動自動commit
        'error_cb': error_cb  # 設定接收error訊息的callback函數
    }

    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = 'transaction'
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName])

    # 步驟5. 持續的拉取Kafka有進來的訊息
    try:
        while True:
            records_pulled = False  # 用來檢查是否有有效的record被取出來

            # 請求Kafka把新的訊息吐出來
            records = consumer.consume(num_messages=500, timeout=1.0)  # 批次讀取
            if records is None:
                continue

            for record in records:
                # 檢查是否有錯誤
                if record is None:
                    continue
                if record.error():
                    # Error or event
                    if record.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (record.topic(), record.partition(),
                             record.offset()))
                    else:
                        # Error
                        raise KafkaException(record.error())
                else:
                    records_pulled = True

                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    # topic = record.topic()
                    # partition = record.partition()
                    # offset = record.offset()
                    # timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    try:
                        msgKey2 = try_decode_utf8(record.key())
                        msgValue2 = try_decode_utf8(record.value())

                        sendmsg_trans = {msgKey2: msgValue2}
                        print(sendmsg_trans)
                        return sendmsg_trans

                    finally:
                        client = pymongo.MongoClient(
                            "mongodb+srv://peter:[email protected]/ceb101?retryWrites=true&w=majority"
                        )

                        mydb = client.wow
                        mycol = mydb['fit']
                        mycol.insert_many([sendmsg_trans])

                    # 秀出metadata與msgKey & msgValue訊息
                    # print('%s-%d-%d : (%s , %s)' % (topic, partition, offset, msgKey, msgValue))
                    # test1_msg = ("%s" , "%s" % (msgKey, msgValue)) #('%s : %s' % (msgKey,msgValue))

            # 同步地執行commit (Sync commit)
            # if records_pulled:
            #     offsets = consumer.commit(asynchronous=False)
            #     print_sync_commit_result(offsets)

    except KeyboardInterrupt as e:
        sys.stderr.write('Aborted by user\n')
    except Exception as e:
        sys.stderr.write(str(e))

    finally:
        consumer.commit(asynchronous=False)
        # 步驟6.關掉Consumer實例的連線
        consumer.close()
Esempio n. 19
0
 def _error_cb(self, error):
     if error.fatal():
         raise KafkaException(error)
     logger.info("Received non-fatal kafka error: %s", error)
Esempio n. 20
0
    def handle(self, *args, **kwargs):
        c = Consumer(**CONFIG)
        topic = django.conf.settings.CLOUDKARAFKA_TOPIC_GEONAMES
        c.subscribe([topic])
        logger.info('Subscribed to {0} topic \n'.format(topic))
        try:
            while True:
                msg = c.poll(timeout=1.0)
                if msg is None:
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '{0} [{1}] reached end at offset {2}\n'
                            ''.format(msg.topic(), msg.partition(),
                                      msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    geoname_item = msg.value()
                    geoname_item = geoname_item.decode()
                    geoname_item = json.loads(geoname_item)
                    geoname_values = {}
                    for key, value in geoname_item.items():
                        if key == 'osm_id':
                            try:
                                osm_rel = \
                                    models.OpenStreetMap.objects.get(
                                        osm_id=value
                                    )
                                value = osm_rel.id
                            except models.OpenStreetMap.DoesNotExist:
                                value = None

                        if key == 'osm_id' and not value:
                            continue

                        geoname_values[key] = value

                    try:
                        geoname = models.GeoName.objects.get(
                            geoname_id=geoname_values['geoname_id'])
                        for attr, attr_val in geoname_values.items():
                            attr_val = attr_val or ''
                            if attr != 'geoname_id':
                                setattr(geoname, attr, attr_val)

                        geoname.save()
                    except models.GeoName.DoesNotExist:
                        models.GeoName.objects.create(**geoname_values)

                    sys.stderr.write('{0} [{1}] at offset {2}\n'
                                     ''.format(msg.topic(), msg.partition(),
                                               msg.offset()))
                    logger.info(geoname_item)

        except KeyboardInterrupt:
            logger.warning('Aborted by user\n')

        # Close down consumer to commit final offsets.
        c.close()
Esempio n. 21
0
 def fail_fast(err, msg):
     if err is not None:
         print("Kafka producer delivery error: {}".format(err), file=sys.stderr)
         print("Bailing out...", file=sys.stderr)
         # TODO: should it be sys.exit(-1)?
         raise KafkaException(err)
Esempio n. 22
0
    def run(self):
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        def on_assign(consumer, partitions):
            updated_partitions = []
            for partition in partitions:
                if self.resolve_partition_force_offset:
                    partition = self.resolve_partition_force_offset(partition)
                    updated_partitions.append(partition)

                if partition.offset == OFFSET_INVALID:
                    updated_offset = None
                else:
                    updated_offset = partition.offset
                self.offsets[partition.partition] = updated_offset
            if updated_partitions:
                self.consumer.assign(updated_partitions)
            logger.info(
                "query-subscription-consumer.on_assign",
                extra={
                    "offsets": six.text_type(self.offsets),
                    "partitions": six.text_type(partitions),
                },
            )

        def on_revoke(consumer, partitions):
            partition_numbers = [partition.partition for partition in partitions]
            self.commit_offsets(partition_numbers)
            for partition_number in partition_numbers:
                self.offsets.pop(partition_number, None)
            logger.info(
                "query-subscription-consumer.on_revoke",
                extra={
                    "offsets": six.text_type(self.offsets),
                    "partitions": six.text_type(partitions),
                },
            )

        self.consumer = Consumer(self.cluster_options)
        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            admin_client = AdminClient(self.admin_cluster_options)
            wait_for_topics(admin_client, [self.topic])

        self.consumer.subscribe([self.topic], on_assign=on_assign, on_revoke=on_revoke)

        try:
            i = 0
            while True:
                message = self.consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise KafkaException(error)

                i = i + 1

                with sentry_sdk.start_transaction(
                    op="handle_message",
                    name="query_subscription_consumer_process_message",
                    sampled=True,
                ), metrics.timer("snuba_query_subscriber.handle_message"):
                    self.handle_message(message)

                # Track latest completed message here, for use in `shutdown` handler.
                self.offsets[message.partition()] = message.offset() + 1

                if i % self.commit_batch_size == 0:
                    logger.debug("Committing offsets")
                    self.commit_offsets()
        except KeyboardInterrupt:
            pass

        self.shutdown()
Esempio n. 23
0
def on_delivery(err, msg):
    if err is not None:
        raise KafkaException(err)
def kafkaMysqlRedisLinebot():
    # 步驟1.設定要連線到Kafka集群的相關設定
    # Consumer configuration
    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    props = {
        'bootstrap.servers': 'kafka:29092',  # Kafka集群在那裡? (置換成要連接的Kafka集群)
        'group.id': 'iii',  # ConsumerGroup的名稱 (置換成你/妳的學員ID)
        'auto.offset.reset': 'earliest',  # Offset從最前面開始
        'error_cb': error_cb  # 設定接收error訊息的callback函數
    }

    # 步驟2. 產生一個Kafka的Consumer的實例
    consumer = Consumer(props)
    # 步驟3. 指定想要訂閱訊息的topic名稱
    topicName = 'Shopping_list2'
    # 步驟4. 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe([topicName],
                       on_assign=print_assignment,
                       on_revoke=print_revoke)

    # 步驟5. 持續的拉取Kafka有進來的訊息
    try:
        while True:
            # 請求Kafka把新的訊息吐出來
            records = consumer.consume(num_messages=500, timeout=1.0)  # 批次讀取
            if records is None:
                continue

            for record in records:
                # 檢查是否有錯誤
                if record is None:
                    continue
                if record.error():
                    # Error or event
                    if record.error().code() == KafkaError._PARTITION_EOF:
                        print('')
                        # End of partition event
                        # sys.stderr.write('%% %s [%d] reached end at offset %d\n' %
                        #                 (record.topic(), record.partition(), record.offset()))
                    else:
                        # Error
                        raise KafkaException(record.error())
                else:
                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    topic = record.topic()
                    partition = record.partition()
                    offset = record.offset()
                    timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    msgKey = try_decode_utf8(record.key())
                    msgValue = try_decode_utf8(record.value())

                    if msgKey != 'end':
                        # 從kafka吸出資料後,先整理成可供MySQL查詢的狀態
                        msgValue2 = msgValue.split(' ')
                        # 從Redis查詢購物車商品的基本資料並整理'value'
                        message = ''
                        for i in range(len(msgValue2)):
                            price = redis.get('{}'.format(msgValue2[i]))
                            price = int(price.decode('utf-8'))
                            message += '{}:{}:{}:{}'.format(
                                msgValue2[i], price, 1, price * 1)
                            if i < len(msgValue2) - 1:
                                message += ','
                        # 將用戶的購物車資料存入redis(key:userID, value:購物車資料)
                        redis.set(msgKey, message)
                        redis.expire(msgKey, 600)
                        # 將該用戶的userID寫入txt,使結帳時能知道從redis抓出的用戶是誰
                        with open('trade_user.txt', 'w',
                                  encoding='utf-8') as f:
                            f.write(msgKey)
                    else:
                        # 在用戶走出結帳區,能知道從redis抓出的用戶是誰
                        with open('trade_user.txt', 'r',
                                  encoding='utf-8') as f:
                            userID = f.read()
                        # 整理成能存入MySQL的時間格式
                        timestamp = timestamp[1] / 1000
                        timestamp = time.localtime(timestamp)
                        timestamp = time.strftime("%Y-%m-%d %H:%M:%S",
                                                  timestamp)
                        # 從Redis 抓出並整理用戶的購物車購買資訊
                        content = redis.get('{}'.format(userID))
                        trade_info = content.decode('utf-8').split(',')
                        # 把用戶的購買資訊存入MySQL
                        for i in trade_info:
                            sql = '''select productID from product where productName = '{}';'''.format(
                                i.split(':')[0])
                            cursor.execute(sql)
                            productID = cursor.fetchall()[0][0]
                            sql2 = '''INSERT INTO shoppinglist (userID,shoppingdate,productID,quantity) VALUE ('{}','{}','{}',1);'''.format(
                                userID, timestamp, productID)
                            cursor.execute(sql2)
                            conn.commit()
                        # 結束交易後,用爬蟲方式向該會員傳送交易結束訊息
                        url = 'http://localhost:5000/thank/{}'.format(userID)
                        headers = {
                            'User-Agent':
                            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
                        }
                        requests.get(url=url, headers=headers)

                    # 秀出metadata與msgKey & msgValue訊息
                    print('%s-%d-%d : (%s , %s)' %
                          (topic, partition, offset, msgKey, msgValue))

    except KeyboardInterrupt as e:
        sys.stderr.write('Aborted by user\n')
    except Exception as e:
        sys.stderr.write(str(e))

    finally:
        # 步驟6.關掉Consumer實例的連線
        consumer.close()
        # MySQL
        cursor.close()
        conn.close()
Esempio n. 25
0
 def ack(err, msg):
     if err:
         self._loop.call_soon_threadsafe(result.set_exception,
                                         KafkaException(err))
     else:
         self._loop.call_soon_threadsafe(result.set_result, msg)
Esempio n. 26
0
 def delivery_callback(err, msg):
     if err:
         raise KafkaException(err)
     else:
         pass
Esempio n. 27
0
            if records is None:
                continue

            for record in records:
                # 檢查是否有錯誤
                if record is None:
                    continue
                if record.error():
                    # 偵測是否己經讀到了partiton的最尾端
                    if record.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write('%% %s [%d] reached end at offset %d\n' %
                                         (record.topic(), record.partition(), record.offset()))
                    else:
                        # Error
                        raise KafkaException(record.error())
                else:
                    record_counter += 1
                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    topic = record.topic()
                    partition = record.partition()
                    offset = record.offset()
                    timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    msgKey = try_decode_utf8(record.key())      # << 這個是row_id編號
                    msgValue = try_decode_utf8(record.value())  # << 這個是taxidata資料

                    taxidata = json.loads(msgValue)

                    # 根據題義取出以下pickup_datetime
Esempio n. 28
0
    def poll(self,
             group_id,
             timeout=1,
             max_records=1,
             poll_attempts=10,
             only_value=True,
             auto_create_topics=True,
             decode_format=None,
             fail_on_deserialization=False):
        """Fetch and return messages from assigned topics / partitions as list.
        - ``timeout`` (int): Seconds spent waiting in poll if data is not available in the buffer.\n
        - ``max_records`` (int): maximum number of messages to get from poll. Default: 1.
        If 0, returns immediately with any records that are available currently in the buffer,
        else returns empty. Must not be negative. Default: `1`
        - ``poll_attempts`` (int): Attempts to consume messages and endless looping prevention.
        Sometimes the first messages are None or the topic could be empty. Default: `10`.
        - ``only_value`` (bool): Return only message.value(). Default: `True`.
        - ``decode_format`` (str) - If you need to decode data to specific format
            (See https://docs.python.org/3/library/codecs.html#standard-encodings). Default: None.
        - ``auto_create_topics`` (bool): Consumers no longer trigger auto creation of topics,
            will be removed in future release. If True then the error message UNKNOWN_TOPIC_OR_PART is ignored.
            Default: `True`.
        - ``fail_on_deserialization`` (bool): If True and message deserialization fails, will raise a SerializerError
            exception; on False will just stop the current poll and return the message so far. Default: `False`.            
        """

        messages = []
        while poll_attempts > 0:
            msg = None
            try:
                msg = self.consumers[group_id].poll(timeout=timeout)
            except SerializerError as err:
                error = 'Message deserialization failed for {}: {}'.format(
                    msg, err)
                if fail_on_deserialization:
                    raise SerializerError(error)
                else:
                    print(error)
                    break

            if msg is None:
                poll_attempts -= 1
                continue

            if msg.error():
                # Workaround due to new message return + deprecation of the "Consumers no longer trigger auto creation of topics"
                if int(
                        msg.error().code()
                ) == KafkaError.UNKNOWN_TOPIC_OR_PART and auto_create_topics == True:
                    continue
                else:
                    raise KafkaException(msg.error())

            if only_value:
                messages.append(msg.value())
            else:
                messages.append(msg)

            if len(messages) == max_records:
                break

        if decode_format:
            messages = self._decode_data(data=messages,
                                         decode_format=decode_format)

        return messages