コード例 #1
2
ファイル: commit_offset.py プロジェクト: goibibo/woof
def commit_offsets_in_kafka(broker, group_name, group_dict):
    cons = KafkaConsumer(bootstrap_servers=broker, group_id=group_name)
    for topic_name, topic_dict in group_dict.iteritems():
        for partition, offset in topic_dict.iteritems():
            logging.info(
                "Commiting {} {} to topic {} and partition number {}".format(
                    group_name, offset, topic_name, partition))
            tp = TopicPartition(topic_name, int(partition))
            cons.assign([tp])
            cons.seek(tp, int(offset))
            # commit it
            cons.commit()
            time.sleep(8)
    cons.close()
    time.sleep(1)
コード例 #2
0
def Consumer():
   data = []
   start_time=timer()
   name=multiprocessing.current_process().name
  # print(name,'Starting')
   while True:
        print(name,'Starting')
	consumer = KafkaConsumer('topic-weather-stations',group_id='consumer-weather-data',bootstrap_servers=['vm1:9092'],consumer_timeout_ms=15000,heartbeat_interval_ms=1000)
	consumer.zookeeper_connect='vm1:2181'
	try:
	    for message in consumer:
		data.append(message.value)
		if len(data) >15000:
			insert_weather_stations(data,name)
			data=[]
		else:
			continue
	finally:
	      print(name,'Exiting now')
              if len(data) >0:
                        insert_weather_stations(data,name)
                        data=[]

	      sys.stdout.flush()
	      consumer.close()
コード例 #3
0
def Consumer():
   data = []
   start_time=timer()
   name=multiprocessing.current_process().name
   while True:
        print (name,'Starting')
	consumer = KafkaConsumer('topic-weather-data',group_id='consumer-weather-data',bootstrap_servers=['vm1:9092'],consumer_timeout_ms=14000,heartbeat_interval_ms=1000)
	consumer.zookeeper_connect='vm1:2181'
	try:
	    for message in consumer:
		data.append(message.value)
		if len(data) >5000:
			insert_raw_data(data,name)
		#	collect_data(data)
			data=[]
		else:
			continue
	finally:
	      print(name,'Exiting now',len(data))
              if len(data) >0:
			try:
                        	insert_raw_data(data,name)
			#	collect_data(data)
                        	data=[]
			except Exception,e :
				print('Error due to ',e)
	      sys.stdout.flush()
	      print (name,'Closing out',timer() - start_time)
	      consumer.close()
    def step(self):

        # Connect to Cassandra
        cluster = Cluster(['192.168.3.2'],
                          port= 9042)

        session = cluster.connect()

        # Link to kafka
        consumer = KafkaConsumer('qc-qualitative-persist',
                                 bootstrap_servers="192.168.3.5:9092")


        # Process observations
        for msg in consumer:
            split_msg = string.split(msg.value,"::")

            if(len(split_msg) == 9):

                session.execute(
                    """
                    INSERT INTO observation.observations_qc_qualitative (feature, procedure, observableproperty,
                    year, month, phenomenontimestart, qualifier, qualifiervalue, comment)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """,
                    (split_msg[0], split_msg[1], split_msg[2], int(split_msg[3]), int(split_msg[4]),
                     int(split_msg[5]), split_msg[6], split_msg[7], split_msg[8])
                )

        # Close link to kafka
        consumer.close()
        cluster.shutdown()
    def step(self):

        # Connect to Cassandra
        cluster = Cluster(['192.168.3.2'],
                          port= 9042)

        session = cluster.connect()

        # Link to kafka
        consumer = KafkaConsumer('observation-persist',
                                 bootstrap_servers="192.168.3.5:9092")


        # Process observations
        for msg in consumer:
            split_msg = string.split(msg.value,"::")

            if(len(split_msg) == 16)    :

                session.execute(
                    """
                    INSERT INTO observation.observations_numeric (feature, procedure, observableproperty,
                    year, month, phenomenontimestart, phenomenontimeend, value, quality, accuracy, status,
                    processing, uncertml, comment, location, parameters)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """,
                    (split_msg[0],split_msg[1],split_msg[2],int(split_msg[3]),int(split_msg[4]),int(split_msg[5]),int(split_msg[6]),
                     float(split_msg[7]),split_msg[8],float(split_msg[9]),split_msg[10],split_msg[11],split_msg[12],
                     split_msg[13],split_msg[14],split_msg[15])
                )

        # Close link to kafka
        consumer.close()
        cluster.shutdown()
コード例 #6
0
    def dump_data(
            cls, topic=None, timeout=None, poll_timeout=None,
            enable_auto_commit=False):

        # TODO: remove this hack
        # HACK
        log.debug("Wait 5s to allow kafka node to be ready")
        time.sleep(5)

        topic = topic or cls.TOPIC
        endpoints = list(get_kafka_endpoints())
        log.debug("Connect to kafka as consumer - %s", endpoints)
        if not endpoints:
            raise RuntimeError("Kafka endpoints not defined")

        consumer = KafkaConsumer(
            topic,
            auto_offset_reset='earliest',
            enable_auto_commit=enable_auto_commit,
            value_deserializer=cls.SERIALIZER.loads,
            bootstrap_servers=endpoints,
            consumer_timeout_ms=timeout or -1,
        )

        # TODO use native kafka-python poll
        if poll_timeout:
            while True:
                yield list(data.value for data in consumer)
                time.sleep(poll_timeout / 1000.0)
        else:
            for data in consumer:
                yield data.value

        consumer.close()
コード例 #7
0
    def run(self):
        consumer = KafkaConsumer(bootstrap_servers='localhost:9092',auto_offset_reset='earliest',consumer_timeout_ms=1000)
        consumer.subscribe(['my-test-topic'])
        while not self.stop_event.is_set():
            for message in consumer:
                print(message)
                if self.stop_event.is_set():
                    break

        consumer.close()
コード例 #8
0
ファイル: consumer.py プロジェクト: venicegeo/fm-mvp
    def consume(self):
        consumer = None
        try:
            print "\nCONSUMER TOPICS = " + str(self.listener_topics)
            consumer = KafkaConsumer(*self.listener_topics,
                                     client_id=self.name,
                                     group_id='kafka',
                                     bootstrap_servers=self.connection_string,
                                     auto_offset_reset='smallest')
            self._set_alive(True)
        except Exception as e:
            print "A consumer couldn't be created."
            print e
        while is_running(self.name):
            for message in consumer.fetch_messages():
                asset_success = True
                message_success = True
                if not is_running(self.name):
                    break
                try:
                    try:
                        key = Key.objects.get(listener=Listener.objects.get(listener_topic=message.topic),
                                              listener_key=message.key)
                        feature_data = json.loads(message.value)
                        for asset_type in ['photos', 'videos', 'sounds']:
                            if feature_data.get('properties').get(asset_type):
                                import urllib2
                                urls = []
                                for index, value in enumerate(feature_data.get('properties').get(asset_type)):
                                    asset, created = write_asset(key, value, asset_type, feature_data.get('properties').get('{}_url'.format(asset_type))[index-1])
                                    if not asset:
                                        asset_success = False
                                    else:
                                        print "Asset {} was written.".format(value)
                                    urls += [asset.asset_data.url]
                                feature_data['properties']['{}_url'.format(asset_type)] = urls
                                print "URLS:" + str(urls)
                        if not write_message(key, json.dumps(feature_data)):
                            message_success = False
                        else:
                            print "Message {} was written.".format(feature_data.get('properties').get('city'))
                    except Exception as e:
                        if 'DoesNotExist' in e:
                            continue
                        else:
                            print e
                            message_success = False

                except KeyboardInterrupt:
                    break
                if message_success and asset_success:
                    consumer.task_done(message)
                    consumer.commit()
        consumer.close()
        self._set_alive(False)
コード例 #9
0
ファイル: consumer.py プロジェクト: zhouhang/Btrade
class KafkaConsumerServer(object):
    def __init__(self,topic,server):
        if type(server)!=list:
            server=[server]
        self._consumer= KafkaConsumer(topic,
                         bootstrap_servers=server,
                         group_id="consumer-group",
                         value_deserializer=lambda m: json.loads(m.decode('utf8')))
    def getConsumer(self):
        return self._consumer
    def close(self):
        self._consumer.close()
コード例 #10
0
ファイル: mlsql.py プロジェクト: Ji3jin/streamingpro
    def from_kafka(args, mgr):
        consumer = KafkaConsumer(kafka_param["topic"],
                                 group_id=kafka_param["group_id"],
                                 bootstrap_servers=kafka_param["bootstrap.servers"],
                                 auto_offset_reset="earliest",
                                 enable_auto_commit=False
                                 )

        max_records = args["max_records"]
        no_message_count = 0
        no_message_time = 5
        try:
            stop_count = 0
            fail_msg_count = 0
            while True:
                messages = consumer.poll(timeout_ms=1000, max_records=max_records)
                queue = mgr.get_queue("input")
                group_msgs_count = 0
                group_msgs = []
                for tp, records in messages.items():
                    for record in records:
                        try:
                            with io.BytesIO(record.value) as f:
                                msg_value = pickle.load(f)
                            if msg_value == "_stop_":
                                stop_count += 1
                            else:
                                group_msgs.append(msg_value)
                                group_msgs_count += 1
                        except:
                            fail_msg_count += 0
                            print("unpickle from kafka fail")
                            sys.stdout.flush()
                            pass
                if len(group_msgs) > 0:
                    no_message_count = 0
                    queue.put(group_msgs, block=True)

                if len(group_msgs) == 0 and no_message_count < 10:
                    time.sleep(no_message_time)
                    no_message_count += 1

                if (stop_count >= internal_system_param["stopFlagNum"] and group_msgs_count == 0) or (
                                no_message_count >= 10 and group_msgs_count == 0):
                    queue.put(["_stop_"], block=True)
                    print(
                        "no message from kafka, send _stop_ message. no_message_count={},stop_count={},stopFlagNum={}".format(
                            no_message_count, stop_count, internal_system_param["stopFlagNum"]))
                    sys.stdout.flush()
                    break
        finally:
            consumer.close()
コード例 #11
0
ファイル: KafkaAPI.py プロジェクト: shiladityasen/Locus
def poll(topic, offset=0, hostname=None, port_num=None, max_timeout=100):
    hostname, port_num = insure_host_port(hostname, port_num)
    server = hostname+':'+str(port_num)
    topic_partition = TopicPartition(topic, partition)

    consumer = KafkaConsumer(bootstrap_servers=server, group_id=None)
    consumer.assign([topic_partition])
    consumer.seek(topic_partition, offset)
    msgs = consumer.poll(max_timeout).values()
    consumer.close()
    if len(msgs) > 0:
        return msgs[0]
    else:
        return {}
コード例 #12
0
ファイル: kafkabus.py プロジェクト: scrapinghub/frontera
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, location, enable_ssl, cert_path, topic, group, partition_id):
        self._location = location
        self._group = group
        self._topic = topic
        kwargs = _prepare_kafka_ssl_kwargs(cert_path) if enable_ssl else {}
        self._consumer = KafkaConsumer(
            bootstrap_servers=self._location,
            group_id=self._group,
            max_partition_fetch_bytes=10485760,
            consumer_timeout_ms=100,
            client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
            request_timeout_ms=120 * 1000,
            heartbeat_interval_ms=10000,
            **kwargs
        )

        # explicitly causing consumer to bootstrap the cluster metadata
        self._consumer.topics()

        if partition_id is not None:
            self._partitions = [TopicPartition(self._topic, partition_id)]
            self._consumer.assign(self._partitions)
        else:
            self._partitions = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
            self._consumer.subscribe(topics=[self._topic])

    def get_messages(self, timeout=0.1, count=1):
        result = []
        while count > 0:
            try:
                m = next(self._consumer)
                result.append(m.value)
                count -= 1
            except StopIteration:
                break
        return result

    def get_offset(self, partition_id):
        for tp in self._partitions:
            if tp.partition == partition_id:
                return self._consumer.position(tp)
        raise KeyError("Can't find partition %d", partition_id)

    def close(self):
        self._consumer.commit()
        self._consumer.close()
コード例 #13
0
ファイル: trader.py プロジェクト: intrad/fooltrader
    def __consume_topic_with_func(self, topic, func):
        consumer = KafkaConsumer(topic,
                                 client_id='fooltrader',
                                 group_id=self.trader_id,
                                 value_deserializer=lambda m: json.loads(m.decode('utf8')),
                                 bootstrap_servers=[KAFKA_HOST])
        topic_partition = TopicPartition(topic=topic, partition=0)
        start_timestamp = int(self.start_date.timestamp())

        partition_map_offset_and_timestamp = consumer.offsets_for_times({topic_partition: start_timestamp})

        if partition_map_offset_and_timestamp:
            offset_and_timestamp = partition_map_offset_and_timestamp[topic_partition]

            if offset_and_timestamp:
                # partition  assigned after poll, and we could seek
                consumer.poll(5, 1)
                consumer.seek(topic_partition, offset_and_timestamp.offset)
                end_offset = consumer.end_offsets([topic_partition])[topic_partition]
                consuming_time = self.current_time
                for message in consumer:
                    message_time = pd.Timestamp(message.value['timestamp'])
                    # 设定了结束日期的话,时间到了或者kafka没数据了就结束
                    if self.end_date and (message_time > self.end_date or message.offset + 1 == end_offset):
                        consumer.close()
                        break

                    # 收到的时间戳与消费了的时间戳比较
                    time_delta = message_time.date() - consuming_time.date()

                    # 为了准确计算当天收盘账户,必须等各级别都把当天的行情撸完了
                    if time_delta.days >= 1:
                        self.barrier.wait()
                        self.account_service.save_account(self.current_time, trading_close=True)

                    getattr(self, func)(message.value)

                    consuming_time = message_time
                    # 时间以最小级别为准
                    if self.level_step.get(func) == self.step:
                        self.current_time = message_time

            else:
                consumer.poll(5, 1)
                consumer.seek(topic_partition, consumer.end_offsets([topic_partition])[topic_partition] - 1)
                message = consumer.poll(5000, 1)
                kafka_start_date = datetime.fromtimestamp(message[topic_partition][0].timestamp).strftime(
                    TIME_FORMAT_DAY)
                self.logger.warn("start:{} is after the last record:{}".format(self.start_date, kafka_start_date))
コード例 #14
0
def kafka_data_consumer(consumer_id):
    logger.info("Started metric consumer number " + consumer_id)
    (brokers, topic, filter_hosts, all_metrics_set) = getKafkaConfig()
    if agent_config_vars["clientId"] == "":
        consumer = KafkaConsumer(bootstrap_servers=brokers, auto_offset_reset='latest',
                                 consumer_timeout_ms=1000 * parameters['timeout'],
                                 group_id=agent_config_vars['groupId'])
    else:
        consumer = KafkaConsumer(bootstrap_servers=brokers, auto_offset_reset='latest',
                                 consumer_timeout_ms=1000 * parameters['timeout'],
                                 group_id=agent_config_vars['groupId'], client_id=agent_config_vars["clientId"])
    consumer.subscribe([topic])
    parseConsumerMessages(consumer, all_metrics_set, normalization_ids_map, filter_hosts)
    consumer.close()
    logger.info("Closed log consumer number " + consumer_id)
コード例 #15
0
ファイル: load_example.py プロジェクト: 0ste00/kafka-python
    def run(self):
        consumer = KafkaConsumer(bootstrap_servers='localhost:9092',
                                 auto_offset_reset='earliest')
        consumer.subscribe(['my-topic'])
        self.valid = 0
        self.invalid = 0

        for message in consumer:
            if len(message.value) == msg_size:
                self.valid += 1
            else:
                self.invalid += 1

            if consumer_stop.is_set():
                break

        consumer.close()
コード例 #16
0
def kafka_data_consumer(consumer_id):
    logger.info("Started log consumer number " + consumer_id)
    # Kafka consumer configuration
    (brokers, topic, filter_hosts) = get_kafka_config()
    if agentConfigVars["clientId"] == "":
        consumer = KafkaConsumer(bootstrap_servers=brokers,
                             auto_offset_reset='latest', consumer_timeout_ms=1000 * parameters['timeout'],
                             group_id=agentConfigVars['groupId'])
    else:
        logger.info(agentConfigVars["clientId"])
        consumer = KafkaConsumer(bootstrap_servers=brokers,
                                 auto_offset_reset='latest', consumer_timeout_ms=1000 * parameters['timeout'],
                                 group_id=agentConfigVars['groupId'], client_id = agentConfigVars["clientId"])
    consumer.subscribe([topic])
    parse_consumer_messages(consumer, filter_hosts)
    consumer.close()
    logger.info("Closed log consumer number " + consumer_id)
コード例 #17
0
ファイル: test_producer.py プロジェクト: kngenie/kafka-python
def test_end_to_end(kafka_broker, compression):

    if compression == 'lz4':
        # LZ4 requires 0.8.2
        if version() < (0, 8, 2):
            return
        # python-lz4 crashes on older versions of pypy
        elif platform.python_implementation() == 'PyPy':
            return

    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=30000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    messages = 100
    futures = []
    for i in range(messages):
        futures.append(producer.send(topic, 'msg %d' % i))
    ret = [f.get(timeout=30) for f in futures]
    assert len(ret) == messages
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(messages):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % i for i in range(messages)])
    consumer.close()
コード例 #18
0
    def get_all_messages(self):
        consumer = KafkaConsumer(group_id=self.group_id,
                                 bootstrap_servers=self.bootstrap_servers)
        tp = TopicPartition(self.kafka_topic, 0)

        # register to the topic
        consumer.assign([tp])

        # obtain the last offset value
        consumer.seek_to_end(tp)
        last_offset = consumer.position(tp)
        consumer.seek_to_beginning(tp)

        arr = []
        for message in consumer:
            arr.append(message.value.decode('utf-8'))
            if message.offset == last_offset - 1:
                break

        consumer.close()
        return arr
コード例 #19
0
    def debug(self, topic):
        c=KafkaConsumer(bootstrap_servers=kafka_hosts, client_id=self._client_id , group_id=None, api_version=(0,10))

        # assign/subscribe topic
        partitions=c.partitions_for_topic(topic)
        if not partitions: raise Exception("Topic "+topic+" not exist")
        c.assign([TopicPartition(topic,p) for p in partitions])

        # seek to beginning if needed
        c.seek_to_beginning()

        # fetch messages
        while True:
            partitions=c.poll(100)
            if partitions:
                for p in partitions:
                    for msg in partitions[p]:
                        yield msg.value.decode('utf-8')
            yield ""

        c.close()
コード例 #20
0
ファイル: inference_cache.py プロジェクト: pinpom/singa-auto
    def pop_queries_for_worker(self, worker_id: str,
                               batch_size: int) -> List[Query]:
        name = f'workers_{worker_id}_queries'

        query_consumer = KafkaConsumer(name,
                                       bootstrap_servers=self.connection_url,
                                       auto_offset_reset='earliest',
                                       group_id=QUERIES_QUEUE)

        partition = TopicPartition(name, 0)
        partitiondic = query_consumer.end_offsets([partition])
        offsetend = partitiondic.get(partition, None)
        if offsetend == 0:
            query_consumer.close()
            return []
        try:
            queries = []
            while True:
                record = next(query_consumer)
                queries.append(record.value)
                query_consumer.commit()
                if record.offset >= offsetend - 1 or len(
                        queries) == batch_size:
                    break

            queries = [pickle.loads(x) for x in queries]
            query_consumer.close()
            return queries
        except KafkaError:
            query_consumer.close()
            return []
def main():
    try:
        #conenction to kafka and setting the parameters and arguments
        consumer = KafkaConsumer('data',
                                 group_id='my-group',
                                 consumer_timeout_ms=10000,
                                 fetch_max_wait_ms=100,
                                 bootstrap_servers=['35.180.144.76'])

        #fetching data from topic
        consumer.poll()
        #we are checking the results from select query so we if its the first time that we are running
        #the application. We need to know if it is the first time or not.
        if (len(check_offset_number()) == 0):
            consumer.seek_to_beginning()

        while True:
            #we are using while true so the program will run forever until we stop it.
            #we are using the insert_into_db function to pass the values into db
            for message in consumer:
                print(message.offset)
                data = eval(message.value)
                if (data["ad_type"] == "Free"):
                    insert_into_db(data["id"], data["customer_id"],
                                   data["created_at"], data["text"],
                                   data["ad_type"], None, None, None, None,
                                   message.offset)
                else:
                    insert_into_db(data["id"], data["customer_id"],
                                   data["created_at"], data["text"],
                                   data["ad_type"], data["price"],
                                   data["currency"], data["payment_type"],
                                   data["payment_cost"], message.offset)
            #we are using commit_async so we dont wait for the values to commited and then continue
            #in that way it will be much faster.
            consumer.commit_async()
    except Exception:
        print("There was an error")
    finally:
        consumer.close()
コード例 #22
0
def consume_save(display_topic, offset, db_info, event):
    # logger
    sav_logger = setup_logger('sav_log', 'logs/saver_log.log', logging.DEBUG)
    sav_logger.info("New saving starts")

    # init db connection
    db = pymysql.connect(db_info['host'], db_info['user'], db_info['passwd'],
                         db_info['db'])
    cursor = db.cursor()
    # consume predicted sentiment
    consumer = KafkaConsumer(display_topic,
                             auto_offset_reset=offset,
                             bootstrap_servers=['localhost:9092'],
                             api_version=(0, 10),
                             consumer_timeout_ms=1000)
    # consume one by one and save the result to db
    flag = True
    num_trials = 5
    while flag:
        if event.is_set():
            break
        for pred in consumer:
            if event.is_set():
                flag = False
                break
            pred = pred.value
            try:
                pred = int(pred.decode("utf-8"))
                # save sentiment
                query = "INSERT INTO PRED (pred) VALUES ({})".format(pred)
                cursor.execute(query)
                db.commit()
                sav_logger.debug('Sucessfully saved sentiment {}'.format(pred))
            except Exception as ex:
                sav_logger.debug('failed')
                sav_logger(ex)
                db.rollback()
        num_trials -= 1
    consumer.close()
    db.close()
コード例 #23
0
class ConsumeEvents():
    def __init__(self,
                 ip_topic_name,
                 ip_topic_partition,
                 ip_topic_group_id=None,
                 op_topic_name=None,
                 op_topic_group_id=None,
                 producer_name=None,
                 df_name=None,
                 consumer_name=None):

        self.ip_topic_name = ip_topic_name
        self.ip_topic_partition = ip_topic_partition
        self.op_topic_name = op_topic_name
        self.ip_topic_group_id = ip_topic_group_id
        self.op_topic_group_id = op_topic_group_id
        self.producer_name = producer_name
        #Dynamic consumer and dataframe to be used for consuming messages
        self.consumer_name = "consumer_" + self.ip_topic_name + "_" + str(
            self.ip_topic_partition)
        self.df_name = self.consumer_name + "_" + self.ip_topic_name + "_df"

    def consume_events_and_publish(self):
        """ Initiate consumer for reading messages from input topic"""
        try:
            self.consumer_name = KafkaConsumer(
                bootstrap_servers=bootstrap_servers_list,
                group_id=self.ip_topic_group_id,
                value_deserializer=lambda x: json.loads(x.decode('utf-8')))
        except:
            print("Error!!! Unable to initialize consumer")

        tp = TopicPartition(self.ip_topic_name, self.ip_topic_partition)
        """ assign partition to consumer"""
        self.consumer_name.assign([tp])
        """ obtain the last offset value"""
        self.consumer_name.seek_to_end(tp)
        lastOffset = self.consumer_name.position(tp)
        self.consumer_name.seek_to_beginning(tp)

        if lastOffset == 0:
            self.consumer_name.close()
            print("No messages to consume from partition: ",
                  self.ip_topic_partition)
        else:
            try:
                for message in self.consumer_name:
                    print("Offset:", message.offset)
                    print("Partition:", self.ip_topic_partition)
                    """ Apply Transformation to the incoming messages and publish them to output topic"""
                    df = self.df_name
                    df = pd.read_json(json.dumps(message.value))
                    print(len(df.index))
                    """ Consumer reached end of reading producer topic messages"""
                    if message.offset == lastOffset - 1:
                        break
            except:
                self.consumer_name.close()
            """ Close the consumer as soon as its completed reading messages from input topic"""
            self.consumer_name.close()
コード例 #24
0
class Opera_Kafka():
    def __init__(self, bootstrap_servers, topic, group_id):
        self.bootstrap_servers = bootstrap_servers
        self.producer = KafkaProducer(
            bootstrap_servers=[self.bootstrap_servers])
        self.consumer = KafkaConsumer(
            topic,
            group_id=group_id,
            bootstrap_servers=[self.bootstrap_servers],
            auto_offset_reset='latest')

    # def Producer(self):
    #     self.producer = KafkaProducer(bootstrap_servers=[self.bootstrap_servers])
    #
    #
    # def Consumer(self,topic,group_id):
    #     self.consumer = KafkaConsumer(topic,group_id=group_id,bootstrap_servers=[self.bootstrap_servers],auto_offset_reset='latest')

    def send_msg(self, topic, msg):
        try:
            self.producer.send(topic, value=msg)
        except KafkaError as e:
            print(e)
            self.producer.close(100)
        finally:
            pass

    def poll_persist_msg(self, topic_producer):
        try:
            # message=self.consumer.poll(timeout_ms=0)
            # print(message)
            for msg in self.consumer:
                # print(msg)
                f.write_to_file(str(msg) + '\n')
                self.send_msg(topic_producer, msg)
        except KafkaError as e:
            print(e)
            self.consumer.close()
        finally:
            pass
コード例 #25
0
class KafkaC(MQConsumer):

    @classmethod
    def new(cls, conf: MQConfig) -> 'KafkaC':
        c = cls(conf)
        return c

    def __init__(self, conf: MQConfig):
        self.topic = conf.topic
        self.kafka = KafkaConsumer(
                self.topic,
                bootstrap_servers=conf.bootstrap_servers,
                client_id=conf.client_id,
                group_id=conf.group_id,
                enable_auto_commit=False,
                auto_commit_interval_ms=conf.auto_commit_interval_ms,)

    def get_stream(self) -> Iterator:
        return self.kafka

    def close(self):
        self.kafka.close()
コード例 #26
0
class KafkaConsumerM3():
    def __init__(self, kafkaBroker, topic, pt, replay):
        self.kafkaBroker = kafkaBroker
        self.topic = topic
        self.partition = pt

        if replay == True:
            offsetReset = 'earliest'
        else:
            offsetReset = 'latest'

        self.consumer = KafkaConsumer(
            bootstrap_servers=self.kafkaBroker,
            auto_offset_reset=offsetReset,
            value_deserializer=lambda x: json.loads(x.decode('utf-8')))
        tp = TopicPartition(self.topic, self.partition)
        self.consumer.assign([tp])

    def GetData(self):
        import sys
        data = []
        numMsgs = 0

        try:
            while True:
                message = next(self.consumer)
                if message.value['Status'] == 'Start':
                    print(message.offset)
                data.append(message.value)
                numMsgs = numMsgs + 1

                #print(message.value)
                if message.value['Status'] == 'End':
                    print('Num Messages Received:{}'.format(numMsgs))
                    return data
        except KeyboardInterrupt:
            print("Closing Consumer")
            self.consumer.close()
            pass
コード例 #27
0
def run_consumer(topic_name):
    try:
        consumer = KafkaConsumer(topic_name, auto_offset_reset='latest',\
                bootstrap_servers=['10.168.0.2:9092'], api_version=(0, 10), consumer_timeout_ms=1000)
        
        while(True):
            for msg in consumer:
                with open('RTAL.log','a') as logf:
                    logf.write('{0}\n'.format(msg.value.decode('utf-8')))
        """
        while True:
            if not consumer:
                sleep(20)
            for msg in consumer:
                logf.write(msg.value.decode('utf-8'))
        """
    except KeyboardInterrupt:
        pass
    except Exception as e:
        print("Exception {0} occurred".format(e))
    finally:
        consumer.close()
コード例 #28
0
def consume_kafka_messages():
    print("consume_kafka_topics called")
    parsed_topic_name = 'test'
    # Notify if a recipe has more than 200 calories

    consumer = KafkaConsumer(parsed_topic_name,
                             auto_offset_reset='earliest',
                             enable_auto_commit=True,
                             bootstrap_servers=['localhost:9092'],
                             api_version=(0, 10),
                             consumer_timeout_ms=1000)
    for msg in consumer:
        # TODO: do some error handling here i.e. if the message is the right one or not.
        #print("loaded json is:", loaded_json)
        loaded_json = json.loads(msg.value)
        #decoded = json.loads(json_input)
        print(json.dumps(loaded_json, sort_keys=True, indent=4))
        #print("loaded json is:", loaded_json)
        print(loaded_json['ok'])
        parse_apartment_data(loaded_json['data'])

    consumer.close()
コード例 #29
0
def kafka_data_consumer(consumer_id):
    logger.info("Started log consumer number " + consumer_id)
    # Kafka consumer configuration
    (brokers, topic, filter_hosts) = get_kafka_config()
    if agentConfigVars["clientId"] == "":
        consumer = KafkaConsumer(bootstrap_servers=brokers,
                                 auto_offset_reset='latest',
                                 consumer_timeout_ms=1000 *
                                 parameters['timeout'],
                                 group_id=agentConfigVars['groupId'])
    else:
        logger.info(agentConfigVars["clientId"])
        consumer = KafkaConsumer(bootstrap_servers=brokers,
                                 auto_offset_reset='latest',
                                 consumer_timeout_ms=1000 *
                                 parameters['timeout'],
                                 group_id=agentConfigVars['groupId'],
                                 client_id=agentConfigVars["clientId"])
    consumer.subscribe([topic])
    parse_consumer_messages(consumer, filter_hosts)
    consumer.close()
    logger.info("Closed log consumer number " + consumer_id)
コード例 #30
0
    def consumeKafka():
        consumer = KafkaConsumer(bootstrap_servers='localhost:9092',
                                 auto_offset_reset='earliest',
                                 consumer_timeout_ms=1000)
        consumer.subscribe(['KeyEvent'])

        while True:
            for message in consumer:
                url = "/cat/events"
                try:
                    print(message.value)
                    msg = json.loads(message.value)
                    href = msg["href"]
                    if href:
                        url = href
                except Exception as e:
                    print(e)
                    traceback.print_exc()
                yield "id: %s\nevent: %s\ndata: %s\n\n" % (message.offset, url,
                                                           message.value)

        consumer.close()
コード例 #31
0
def python_kafka_consumer_performance(topic=topic):

    print("\n>>> Connect Kafka in {} by kafka-python as consumer". format(bootstrap_servers))

    consumer = KafkaConsumer(
        bootstrap_servers=bootstrap_servers,
        auto_offset_reset = 'earliest', # start at earliest topic
        group_id = None # do no offest commit
    )
    msg_consumed_count = 0

    consumer_start = time.time()
    consumer.subscribe([topic])
    for msg in consumer:
        msg_consumed_count += 1

        if msg_consumed_count >= msg_count:
            break

    consumer_timing = time.time() - consumer_start
    consumer.close()
    return consumer_timing
コード例 #32
0
    def take_prediction_for_worker(self, worker_id: str,
                                   query_id: str) -> Union[Prediction, None]:
        name = f'workers_{worker_id}_{query_id}_prediction'

        prediction_consumer = KafkaConsumer(
            name,
            api_version=API_VERSION,
            bootstrap_servers=self.connection_url,
            auto_offset_reset='earliest',
            group_id=PREDICTIONS_QUEUE)
        prediction = None
        try:
            prediction = next(prediction_consumer).value
            prediction_consumer.commit()
            prediction = pickle.loads(prediction)
        except KafkaError:
            pass
        prediction_consumer.close()
        logger.info(
            f'Took prediction for query "{query_id}" from worker "{worker_id}"'
        )
        return prediction
コード例 #33
0
    def run(self):
        consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                                 auto_offset_reset='earliest',
                                 consumer_timeout_ms=1000)
        consumer.subscribe(['topic_ls3'])

        # with table.batch_writer(overwrite_by_pkeys=['id']) as batch:
        while not self.stop_event.is_set():
            for message in consumer:
                # print(message.topic, message.key.decode("utf-8"), umsgpack.unpackb(message.value))
                tags, text = umsgpack.unpackb(message.value)
                table.put_item(
                    Item={
                        'id': message.key.decode("utf-8"),
                        'hashtags': tags,
                        'text': text
                    }
                )
                if self.stop_event.is_set():
                    break

        consumer.close()
コード例 #34
0
ファイル: kafka_test.py プロジェクト: netdetpla/NDP-executor
class Kafka_consumer():

    def __init__(self, kafkahost, kafkaport, kafkatopic, groupid):
        self.kafkaHost = kafkahost
        self.kafkaPort = kafkaport
        self.kafkatopic = kafkatopic
        self.groupid = groupid
        self.consumer = KafkaConsumer(self.kafkatopic, group_id=self.groupid,
                                      bootstrap_servers='{kafka_host}:{kafka_port}'.format(
                                          kafka_host=self.kafkaHost,
                                          kafka_port=self.kafkaPort))

    def consume_data(self):
        try:
            for message in self.consumer:
                # print json.loads(message.value)
                yield message
        except KeyboardInterrupt as e:
            print(e)

    def close(self):
        self.consumer.close()
コード例 #35
0
def check_for_new_messages(topic):
    consumer = KafkaConsumer(
        group_id='notary-service',
        bootstrap_servers=settings.KAFKA_SERVERS,
        key_deserializer=lambda m: json.loads(m.decode('ascii')),
        value_deserializer=lambda m: json.loads(m.decode('ascii')),
        auto_offset_reset='earliest',
        enable_auto_commit=True,
        consumer_timeout_ms=1000,
    )
    consumer.subscribe([topic])

    for message in consumer:
        # message value and key are raw bytes -- decode if necessary!
        # e.g., for unicode: `message.value.decode('utf-8')`
        print("%s:%d:%d: key=%s value=%s" %
              (message.topic, message.partition, message.offset, message.key,
               message.value))
        create_ns_message(message)

    consumer.close()
    pass
コード例 #36
0
class KafkaConsumerM2():
    def __init__(self, kafkaBroker, topic):
        self.kafkaBroker = kafkaBroker

        self.consumer = KafkaConsumer(
            bootstrap_servers=self.kafkaBroker,
            auto_offset_reset='latest',
            value_deserializer=lambda x: json.loads(x.decode('utf-8')))

        tp = TopicPartition(topic, 0)
        self.consumer.assign([tp])

    def WaitforSequenceStart(self):
        try:
            while True:
                message = next(self.consumer)
                return message.value

        except KeyboardInterrupt:
            print("Closing Message 2 Consumer")
            self.consumer.close()
            pass
コード例 #37
0
    def messages(self, topic, timeout=None):
        c = KafkaConsumer(topic,
                          bootstrap_servers=KAFKA_HOSTS,
                          client_id=self._client_id,
                          group_id=self._group,
                          api_version=(0, 10))

        partitions = c.partitions_for_topic(topic)
        if not partitions:
            raise Exception("Topic " + topic + " not exist")

        timeout1 = 100 if timeout is None else timeout
        while True:
            partitions = c.poll(timeout1)
            if partitions:
                for p in partitions:
                    for msg in partitions[p]:
                        yield msg.value.decode('utf-8')
            if timeout is not None:
                yield ""

        c.close()
コード例 #38
0
    def run(self):
        consumer = KafkaConsumer(bootstrap_servers='localhost:9092',
                                 auto_offset_reset='earliest',
                                 consumer_timeout_ms=1000)
        consumer.subscribe(['my-topic1'])

        connection = happybase.Connection(host='localhost', port=9090)
        connection.open()

        table = connection.table('my-topic11')
        count = 0
        while not self.stop_event.is_set():
            for message in consumer:
                count += 1
                table.put('row-key' + str(count), {'cf:col1': message.value})
                if self.stop_event.is_set():
                    break

        for key, data in table.scan():
            print(key, data)

        consumer.close()
コード例 #39
0
def describe_group(args, topic):
    """
    Get group descriptions. Important are the partitions and last committed
    offset.
    """
    global bootstrap
    out = ()

    consumer = KafkaConsumer(
        bootstrap_servers=bootstrap,
        group_id="backbeat-replication-group-{0}".format(args.destination),
        enable_auto_commit=False,
    )
    topics = consumer.topics()
    if not topic in topics:
        return False

    for part in consumer.partitions_for_topic(topic):
        tp = TopicPartition(topic, part)
        consumer.assign([tp])
        committed = consumer.committed(tp)
        consumer.seek_to_end(tp)
        last_offset = consumer.position(tp)
        try:
            out += (
                {
                    "topic": topic,
                    "partition": part,
                    "committed": committed,
                    "last_offset": last_offset,
                    "lag": (last_offset - committed),
                },
            )
        except TypeError:
            sys.stderr.write("bad/missing info on consumer group (doesn't exist?)\n")
            sys.exit(1)

    consumer.close(autocommit=False)
    return out
コード例 #40
0
    def run(self):
        consumer = KafkaConsumer(topicName,
                     bootstrap_servers=[ kafkaHost + ':9092'],
                     value_deserializer=lambda m: json.loads(m.decode('utf-8')))

        connection = happybase.Connection(host=hbaseHost, port=9090)
        connection.open()
        
        while not self.stop_event.is_set():
            for message in consumer:
                if message.topic in kafka_offset:
                    if kafka_offset[message.topic] == message.offset:
                        continue
                else: 
                    kafka_offset[message.topic] = message.offset
                    
                kafka_offset[message.topic] = message.offset
                data = json.loads(str(message.value).replace("\'", "\""))
          
                if 'table_name' in data:
                    table_name = data['table_name']
                else:
                    table_name = topicName
                    
                table = connection.table(table_name)
                
                if 'table_name' in data:  
                    b = table.batch()
                    
                    data_list = data['datalist']
                    for i in data_list:  
                        b.put(i['rowkey'], i['data'])
                    b.send()
                
                break
                if self.stop_event.is_set():
                    break
                    
        consumer.close()        
コード例 #41
0
ファイル: consumer.py プロジェクト: A00430396/Take_Home_test
    def run(self):
        consumer = KafkaConsumer(bootstrap_servers='kafka:9092',
                                 auto_offset_reset='latest',
                                 consumer_timeout_ms=1000)
        consumer.subscribe(['stock'])

        while not self.stop_event.is_set():
            for message in consumer:
                res = json.loads(message.value.decode())
                if res['name'] in self.data:
                    key = res['name']
                    value = self.data[key]
                    self.data[key] = ((value[0] * value[1] + res['price']) /
                                      (value[1] + 1), value[1] + 1)
                    print(self.data)
                else:
                    self.data[res['name']] = (res['price'], 1)

                if self.stop_event.is_set():
                    break

        consumer.close()
コード例 #42
0
def main():
    global NAME
    NAME = getLastFourOfLocalIP()
    KAFKA_CLIENT_ID = NAME
    KAFKA_GROUP_ID = NAME

    global applications

    consumer = KafkaConsumer(KAFKA_TOPIC,
                             bootstrap_servers=KAFKA_HOST,
                             client_id=KAFKA_CLIENT_ID,
                             group_id=KAFKA_GROUP_ID,
                             consumer_timeout_ms=KAFKA_CONSUMER_TIMEOUT)
    producer = KafkaProducer(
        bootstrap_servers=KAFKA_HOST,
        value_serializer=lambda v: json.dumps(v).encode(KAFKA_JSON_ENCODING),
        retries=KAFKA_SEND_RETRIES,
        retry_backoff_ms=KAFKA_RETRY_BACKOFF)

    # Listen For Deployment Command
    global isNotFirstRun
    global shouldContinue

    while (shouldContinue):
        for msg in consumer:
            if (shouldContinue and isNotFirstRun):
                shouldContinue = handleInboundMessage(msg)
        monitorApplications(producer)
        producer.send(KAFKA_TOPIC, buildStatusMessage())
        isNotFirstRun = True

    try:
        producer.close()
        consumer.close()
    except:
        print("Error shutting down clients")

    reboot()
コード例 #43
0
def python_kafka_consumer_performance(consumer_number):
    file = open("consmer_res" + str(consumer_number) + ".txt", "a")
    #  topic = TOPIC
    msg_count = 0
    print("in multip!")
    print(topic)
    file.write("\n{}".format(time.time()))
    # file.write(str(time.perf_counter()))
    consumer = KafkaConsumer(group_id='my-group',
                             auto_offset_reset='earliest',
                             bootstrap_servers=[kafka_server + ":9092"],
                             consumer_timeout_ms=20000,
                             max_partition_fetch_bytes=max_msg_size)

    msg_consumed_count = 0
    print("msg_count: {}".format(msg_count))
    consumer.subscribe([topic])
    consumer_start = time.time()

    for message in consumer:
        #    print("hejhej")
        # print("{}, msg nb: {}".format(consumer_number, msg_consumed_count))
        msg_consumed_count += 1
        file.write("\n{}".format(time.time()))
    #  img = cv2.imdecode(np.frombuffer(message.value, dtype=np.uint16), -1)
    #   fin2 = Image.fromarray(img)
    # if msg_consumed_count >= msg_count:
    #     break

    consumer_timing = time.time(
    ) - consumer_start - 2  # consumer waits 2 sec before closing if there are no new
    # messages

    print("{} consumer_time: {} msg_count: {}".format(consumer_number,
                                                      consumer_timing,
                                                      msg_consumed_count))
    consumer.close()
    return "done!"
コード例 #44
0
    def run(self):
        if hasattr(os, 'getppid'):  # only available on Unix
            print 'parent process:', os.getppid()
            procID = os.getppid()
        #Bootstraps an instance of a Kafka producer.
        #Initializes the producer and identifies the docker server.
        #kafka-spotify is listed in /etc/hosts with the ip of the container
        #Input:
        #  topic to subscribe to: 'test'
        #  Id to identify the consumer should be unique to the connection
        #  Servers kafka is advertising as
        #  Which message rule to subscribe to. 'earliest' will grab the earliest unprocessed message
        #  Timeout limit
        consumer = KafkaConsumer('test',
                                 client_id='python-consumer-%s' % (procID),
                                 bootstrap_servers=['kafka-spotify:9092'],
                                 auto_offset_reset='latest',
                                 consumer_timeout_ms=1000)

        #Alternative way to subscribe to a topic
        #consumer.subscribe(['test'])

        #loop until the thread is stopped by checking the stop event
        while not self.stop_event.is_set():
            #Loop through ConsumerRecord objects in the consumer object
            for message in consumer:
                #print the messages to the screen with a note of the thread/client ID
                #print("python-consumer-%s processed message:  %s" % (procID, message))
                #print the messages to the screen with a note of the thread/client ID, Current Topic, message number,                #               The value of the message decoded as it is sent as bytecode
                print("python-consumer-%s processed message: %s:%d: value=%s" %
                      (procID, message.topic, message.offset,
                       message.value.decode('utf-8')))
                #break out of the for loop if the thread was notified of closure
                if self.stop_event.is_set():
                    break

        #Close the TCP connection to kafka
        consumer.close()
コード例 #45
0
ファイル: runner.py プロジェクト: cicadatesting/cicada-2
def configure_consumer(topic: str, offset: str) -> KafkaMessage:
    bootstrap_servers = [
        server.strip() for server in getenv("RUNNER_SERVERS").split(",")
    ]
    key_encoding = getenv("RUNNER_KEYENCODING", "utf-8")
    value_encoding = getenv("RUNNER_VALUEENCODING", "utf-8")

    try:
        consumer = KafkaConsumer(
            topic,
            bootstrap_servers=bootstrap_servers,
            key_deserializer=lambda k: k.decode(key_encoding) if k else k,
            value_deserializer=lambda v: v.decode(value_encoding) if v else v,
            auto_offset_reset=offset,
            **extract_auth_parameters(),
        )
    except KafkaError as err:
        raise RuntimeError(f"Unable to create kafka consumer: {err}")

    try:
        yield consumer
    finally:
        consumer.close()
コード例 #46
0
def read_joined():

    consumer = KafkaConsumer(
        bootstrap_servers=
        'b-1.listings-pipeline-beta.jlg1k0.c1.kafka.us-east-1.amazonaws.com:9092,b-2.listings-pipeline-beta.jlg1k0.c1.kafka.us-east-1.amazonaws.com:9092,b-3.listings-pipeline-beta.jlg1k0.c1.kafka.us-east-1.amazonaws.com:9092,b-4.listings-pipeline-beta.jlg1k0.c1.kafka.us-east-1.amazonaws.com:9092,b-5.listings-pipeline-beta.jlg1k0.c1.kafka.us-east-1.amazonaws.com:9092,b-6.listings-pipeline-beta.jlg1k0.c1.kafka.us-east-1.amazonaws.com:9092',
        auto_offset_reset='earliest',
        consumer_timeout_ms=1000000)

    consumer.subscribe(['data_listings_joined_aspen_mls_rets_av_1'])

    data = []
    while True:
        for message in consumer:
            decoded = BytesIO(base64.b64decode(message.value))
            avro = fastavro.schemaless_reader(decoded, MESSAGE_SCHEMA)
            #json_msg = json.dumps(avro)
            #print("%s\n%s" % (avro['trace_id'], avro['payload']))
            print(avro)
            write_joined(avro)

    consumer.close()

    return data
コード例 #47
0
ファイル: consumer_server.py プロジェクト: Wolfgang90/dsn_sfc
class ConsumerServer(KafkaConsumer):
    def __init__(self, topic, **kwargs):
        super().__init__(**kwargs)
        self.topic = topic
        self.consumer = KafkaConsumer(bootstrap_servers="localhost:9092",
                                      request_timeout_ms=1000,
                                      auto_offset_reset="earliest",
                                      max_poll_records=10)
        self.consumer.subscribe(topics=self.topic)

    def consume_data(self):
        try:
            while True:
                for metadata, list_records in self.consumer.poll().items():
                    for record in list_records:
                        if record:
                            print(record.value)
                        else:
                            pass
            time.sleep(0.5)
        except:
            print("Error: Consumer is closed")
            self.consumer.close()
コード例 #48
0
ファイル: kafka_csv_consumer.py プロジェクト: iyersv/TestRepo
def Consumer():
	global data
	start_time=timer()
	consumer = KafkaConsumer('temp',group_id='consumer-temp',bootstrap_servers=['vm1:9092'],consumer_timeout_ms=20000,heartbeat_interval_ms=1000)
#	consumer.subscribe('temp')
	consumer.zookeeper_connect='localhost:2181'
	try:
	    for message in consumer:
		data.append(message.value)
		#time.sleep(3)
		if len(data) >5000:
			kafka_insert_data.insert_vals(data)
			data=[]
		else:
			continue
   		#print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,message.offset, message.key, message.value))
	finally:
	      print('Exiting now')
              if len(data) >0:
                        kafka_insert_data.insert_vals(data)
                        data=[]

	      #consumer.commit_async()
	      consumer.close()
コード例 #49
0
ファイル: kafka_connector.py プロジェクト: intrad/fooltrader
def list_topics():
    try:
        consumer = KafkaConsumer(bootstrap_servers=[KAFKA_HOST])
        return consumer.topics()
    finally:
        consumer.close()
コード例 #50
0
ファイル: kafkadump.py プロジェクト: cjzswust/test
def main():
    # initial main parser setup
    parser = argparse.ArgumentParser(
        description='Kafka Dump: Scrapy Cluster Kafka topic dump utility for '
                    'debugging.', add_help=False)
    parser.add_argument('-h', '--help', action=ArgparseHelper,
                        help='show this help message and exit')

    subparsers = parser.add_subparsers(help='commands', dest='command')

    # args to use for all commands
    base_parser = argparse.ArgumentParser(add_help=False)
    base_parser.add_argument('-kh', '--kafka-host', action='store', required=False,
                        help="The override Kafka host")
    base_parser.add_argument('-s', '--settings', action='store', required=False,
                        help="The settings file to read from",
                        default="localsettings.py")
    base_parser.add_argument('-ll', '--log-level', action='store', required=False,
                        help="The log level", default=None,
                        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])

    # list command
    list_parser = subparsers.add_parser('list', help='List all Kafka topics',
                                        parents=[base_parser])

    # dump command
    dump_parser = subparsers.add_parser('dump', help='Dump a Kafka topic',
                                        parents=[base_parser])
    dump_parser.add_argument('-t', '--topic', action='store', required=True,
                             help="The Kafka topic to read from")
    dump_parser.add_argument('-c', '--consumer', action='store',
                             required=False, default=None,
                             help="The Kafka consumer id to use")
    dump_parser.add_argument('-b', '--from-beginning', action='store_const',
                             required=False, const=True,
                             help="Read the topic from the beginning")
    dump_parser.add_argument('-nb', '--no-body', action='store_const',
                             required=False, const=True, default=False,
                             help="Do not include the raw html 'body' key in"
                             " the json dump of the topic")
    dump_parser.add_argument('-p', '--pretty', action='store_const',
                             required=False, const=True, default=False,
                             help="Pretty print the json objects consumed")
    dump_parser.add_argument('-d', '--decode-base64', action='store_const',
                             required=False, const=True, default=False,
                             help="Decode the base64 encoded raw html body")
    dump_parser.add_argument('-m', '--mongodb', action="store", help="Set mongodb to save webpages")

    args = vars(parser.parse_args())

    wrapper = SettingsWrapper()
    settings = wrapper.load(args['settings'])

    kafka_host = args['kafka_host'] if args['kafka_host'] else settings['KAFKA_HOSTS']
    log_level = args['log_level'] if args['log_level'] else settings['LOG_LEVEL']
    logger = LogFactory.get_instance(level=log_level, name='kafkadump')

    if args['command'] == 'list':
        try:
            logger.debug("Connecting to {0}...".format(kafka_host))
            kafka = SimpleClient(kafka_host)
            logger.info("Connected to {0}".format(kafka_host))
        except KafkaUnavailableError as ex:
            message = "An exception '{0}' occured. Arguments:\n{1!r}" \
                .format(type(ex).__name__, ex.args)
            logger.error(message)
            sys.exit(1)
        logger.debug('Running list command')
        print("Topics:")
        for topic in list(kafka.topic_partitions.keys()):
            print("-", topic)
        kafka.close()
        return 0
    elif args['command'] == 'dump':
        logger.debug('Running dump command')
        topic = args["topic"]
        consumer_id = args["consumer"]

        try:
            logger.debug("Getting Kafka consumer")

            offset = 'earliest' if args["from_beginning"] else 'latest'

            consumer = KafkaConsumer(
                topic,
                group_id=consumer_id,
                bootstrap_servers=kafka_host,
                consumer_timeout_ms=settings['KAFKA_CONSUMER_TIMEOUT'],
                auto_offset_reset=offset,
                auto_commit_interval_ms=settings['KAFKA_CONSUMER_COMMIT_INTERVAL_MS'],
                enable_auto_commit=settings['KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'],
                max_partition_fetch_bytes=settings['KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES'])
        except NoBrokersAvailable as ex:
                logger.error('Unable to connect to Kafka')
                sys.exit(1)

        num_records = 0
        total_bytes = 0
        item = None

        while True:
            try:
                for message in consumer:
                    if message is None:
                        logger.debug("no message")
                        break
                    logger.debug("Received message")
                    val = message.value
                    try:
                        item = json.loads(val)
                        if args['decode_base64'] and 'body' in item:
                            item['body'] = str(base64.b64decode(item['body']))

                        if args['no_body'] and 'body' in item:
                            del item['body']
                    except BaseException, msg:
                        logger.info("Message is not a JSON object")
                        logger.info("base64 error: ", msg)
                        item = val
                    body_bytes = len(item)

                    if args['pretty']:
                        print(json.dumps(item, indent=4))
                    else:
                        print(item)
                    num_records = num_records + 1
                    total_bytes = total_bytes + body_bytes
            except KeyboardInterrupt:
                logger.debug("Keyboard interrupt received")
                break
            except:
                logger.error(traceback.print_exc())
                break

        total_mbs = old_div(float(total_bytes), (1024*1024))
        if item is not None:
            print("Last item:")
            print(json.dumps(item, indent=4))
        if num_records > 0:
            logger.info("Num Records: {n}, Total MBs: {m}, kb per message: {kb}"
                    .format(n=num_records, m=total_mbs,
                            kb=(float(total_bytes) / num_records / 1024)))
        else:
            logger.info("No records consumed")
            num_records = 0

        logger.info("Closing Kafka connection")
        try:
            consumer.close()
        except:
            # Exception is thrown when group_id is None.
            # See https://github.com/dpkp/kafka-python/issues/619
            pass
        return 0
コード例 #51
0
ファイル: kafkabus.py プロジェクト: Preetwinder/frontera
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, location, topic, group, partition_id):
        self._location = location
        self._group = group
        self._topic = topic
        self._consumer = KafkaConsumer(
            bootstrap_servers=self._location,
            group_id=self._group,
            max_partition_fetch_bytes=10485760,
            consumer_timeout_ms=100,
            client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
            request_timeout_ms=120 * 1000,
        )

        if partition_id is not None:
            self._partition_ids = [TopicPartition(self._topic, partition_id)]
            self._consumer.assign(self._partition_ids)
        else:
            self._partition_ids = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
            self._consumer.subscribe(topics=[self._topic])
            if self._consumer._use_consumer_group():
                self._consumer._coordinator.ensure_coordinator_known()
                self._consumer._coordinator.ensure_active_group()

        self._consumer._update_fetch_positions(self._partition_ids)
        self._start_looping_call()

    def _start_looping_call(self, interval=60):
        def errback(failure):
            logger.exception(failure.value)
            if failure.frames:
                logger.critical(str("").join(format_tb(failure.getTracebackObject())))
            self._poll_task.start(interval).addErrback(errback)

        self._poll_task = LoopingCall(self._poll_client)
        self._poll_task.start(interval).addErrback(errback)

    def _poll_client(self):
        self._consumer._client.poll()

    def get_messages(self, timeout=0.1, count=1):
        result = []
        while count > 0:
            try:
                m = next(self._consumer)
                result.append(m.value)
                count -= 1
            except StopIteration:
                break
        return result

    def get_offset(self, partition_id):
        for tp in self._partition_ids:
            if tp.partition == partition_id:
                return self._consumer.position(tp)
        raise KeyError("Can't find partition %d", partition_id)

    def close(self):
        self._poll_task.stop()
        self._consumer.commit()
        # getting kafka client event loop running some more and execute commit
        tries = 3
        while tries:
            self.get_messages()
            sleep(2.0)
            tries -= 1
        self._consumer.close()
コード例 #52
0
ファイル: messageHub.py プロジェクト: ibm-cds-labs/pixiedust
class MessagehubStreamingAdapter(StreamingDataAdapter):
    def __init__(self, topic, username, password, prod=True):
        # Create a new context using system defaults, disable all but TLS1.2
        context = ssl.create_default_context()
        context.options &= ssl.OP_NO_TLSv1
        context.options &= ssl.OP_NO_TLSv1_1
        conf = {
            'client_id': 'pixieapp.client.id',
            'group_id': 'pixieapp.group',
            'sasl_mechanism': 'PLAIN',
            'security_protocol': 'SASL_SSL',
            'ssl_context': context,
            "bootstrap_servers": [ "kafka0{}-{}.messagehub.services.us-south.bluemix.net:9093".format(i, "prod01" if prod else "stage1") for i in range(1,6)],
            "sasl_plain_username": username,
            "sasl_plain_password": password,
            "auto_offset_reset":"latest"
        }
        self.consumer = KafkaConsumer(**conf)
        self.consumer.subscribe([topic])
        self.schema = {}
        self.sampleDocCount = 0
        
    def close(self):
        self.consumer.unsubscribe()
        self.consumer.close() 
        
    def tryCast(self, value, t):
        def _innerTryCast(value, t):
            try:
                return t(value)
            except:
                return None

        if isinstance(t, tuple):
            for a in t:
                ret = _innerTryCast(value, a)
                if ret is not None:
                    return ret
            return None
        
        return _innerTryCast(value, t)
        
    def inferType(self, value):
        if isinstance(value, string_types):
            value = self.tryCast(value, integer_types) or self.tryCast(value, float) or value
        return "integer" if value.__class__==int else "float" if value.__class__ == float else "string"
        
    def inferSchema(self, eventJSON):
        if self.sampleDocCount > 20:
            return
        for key,value in iteritems(eventJSON):
            if not key in self.schema:
                self.schema[key] = self.inferType(value)
        self.sampleDocCount = self.sampleDocCount + 1 
    
    def doGetNextData(self):
        msgs = []
        msg = self.consumer.poll(1000, max_records=10)
        if msg is not None:
            for topicPartition,records in iteritems(msg):
                for record in records:
                    if record.value is not None:                    
                        jsonValue = json.loads(record.value.decode('utf-8'))
                        self.inferSchema(jsonValue)
                        msgs.append(jsonValue)
        return msgs
    
    def close(self):
        self.consumer.close()
コード例 #53
0
ファイル: online.py プロジェクト: cjzswust/test
class TestRedisMonitor(TestCase):

    maxDiff = None
    queue_key = "link:istresearch.com:queue"
    consumer = None

    def setUp(self):
        self.redis_monitor = RedisMonitor("localsettings.py")
        self.redis_monitor.settings = self.redis_monitor.wrapper.load("localsettings.py")
        self.redis_monitor.logger = MagicMock()
        self.redis_monitor.settings['KAFKA_TOPIC_PREFIX'] = "demo_test"
        self.redis_monitor.settings['STATS_TOTAL'] = False
        self.redis_monitor.settings['STATS_PLUGINS'] = False
        self.redis_monitor.settings['PLUGINS'] = {
            'plugins.info_monitor.InfoMonitor': None,
            'plugins.stop_monitor.StopMonitor': None,
            'plugins.expire_monitor.ExpireMonitor': None,
            'tests.online.CustomMonitor': 100,
        }
        self.redis_monitor.redis_conn = redis.Redis(
            host=self.redis_monitor.settings['REDIS_HOST'],
            port=self.redis_monitor.settings['REDIS_PORT'],
            db=self.redis_monitor.settings['REDIS_DB'])

        self.redis_monitor._load_plugins()
        self.redis_monitor.stats_dict = {}

        self.consumer = KafkaConsumer(
            "demo_test.outbound_firehose",
            bootstrap_servers=self.redis_monitor.settings['KAFKA_HOSTS'],
            group_id="demo-id",
            auto_commit_interval_ms=10,
            consumer_timeout_ms=5000,
            auto_offset_reset='earliest'
        )
        sleep(1)

    def test_process_item(self):
        # set the info flag
        key = "info-test:blah"
        value = "ABC1234"
        self.redis_monitor.redis_conn.set(key, value)

        # process the request
        plugin = list(self.redis_monitor.plugins_dict.items())[0][1]
        self.redis_monitor._process_plugin(plugin)

        # ensure the key is gone
        self.assertEquals(self.redis_monitor.redis_conn.get(key), None)
        self.redis_monitor.close()
        sleep(10)
        # now test the message was sent to kafka
        success = {
            u'info-test': "ABC1234",
            u"appid": u"someapp"
        }

        message_count = 0
        m = next(self.consumer)

        if m is None:
            pass
        else:
            the_dict = json.loads(m.value)
            self.assertEquals(success, the_dict)
            message_count += 1

        self.assertEquals(message_count, 1)

    def tearDown(self):
        # if for some reason the tests fail, we end up falling behind on
        # the consumer
        for m in self.consumer:
            pass
        self.consumer.close()
コード例 #54
0
        # make sure image is jpeg
        if is_jpg(image_data) == False:
            print("Invalid panda {0}".format(msg.offset))
            continue

        # run tensorflow image recognition
        predictions, top_k = run_inference_on_image(image_data)

        # determine if there is a panda match, if so how good the match is (score should be > 0.5)
        for node_id in top_k:
            # giant panda = 169
            # red panda = 7
            # human_string = node_lookup.id_to_string(node_id)
            score = predictions[node_id]

            if node_id in [7, 169] and score > 0.5:
                # WE HAVE PANDA
                media['panda_node_id'] = str(node_id)

                producer.send("Panda_Image_Tweets", jsonpickle.encode(twete).encode('UTF-8'), str(twete.id).encode('UTF-8'))
                break

            # print('%i : %s (score = %.5f)' % (node_id, human_string, score))

    consumer.commit_async()

producer.close()
consumer.close()

コード例 #55
0
ファイル: online.py プロジェクト: cjzswust/test
class TestLinkSpider(TestCase):

    example_feed = "{\"allowed_domains\":null,\"allow_regex\":null,\""\
        "crawlid\":\"abc12345\",\"url\":\"istresearch.com\",\"expires\":0,\""\
        "ts\":1461549923.7956631184,\"priority\":1,\"deny_regex\":null,\""\
        "cookie\":null,\"attrs\":null,\"appid\":\"test\",\"spiderid\":\""\
        "link\",\"useragent\":null,\"deny_extensions\":null,\"maxdepth\":0}"

    def setUp(self):
        self.settings = get_project_settings()
        self.settings.set('KAFKA_TOPIC_PREFIX', "demo_test")
        # set up redis
        self.redis_conn = redis.Redis(host=self.settings['REDIS_HOST'],
                                      port=self.settings['REDIS_PORT'],
                                      db=self.settings['REDIS_DB'])
        try:
            self.redis_conn.info()
        except ConnectionError:
            print("Could not connect to Redis")
            # plugin is essential to functionality
            sys.exit(1)

        # clear out older test keys if any
        keys = self.redis_conn.keys("test-spider:*")
        for key in keys:
            self.redis_conn.delete(key)

        # set up kafka to consumer potential result
        self.consumer = KafkaConsumer(
            "demo_test.crawled_firehose",
            bootstrap_servers=self.settings['KAFKA_HOSTS'],
            group_id="demo-id",
            auto_commit_interval_ms=10,
            consumer_timeout_ms=5000,
            auto_offset_reset='earliest'
        )
        time.sleep(1)

    def test_crawler_process(self):
        runner = CrawlerRunner(self.settings)
        d = runner.crawl(CustomSpider)
        d.addBoth(lambda _: reactor.stop())
        # add crawl to redis
        key = "test-spider:istresearch.com:queue"
        self.redis_conn.zadd(key, self.example_feed, -99)

        # run the spider, give 20 seconds to see the url, crawl it,
        # and send to kafka. Then we kill the reactor
        def thread_func():
            time.sleep(20)
            reactor.stop()

        thread = threading.Thread(target=thread_func)
        thread.start()
        reactor.run()

        message_count = 0
        m = next(self.consumer)

        if m is None:
            pass
        else:
            the_dict = json.loads(m.value)
            if the_dict is not None and the_dict['appid'] == 'test' \
                    and the_dict['crawlid'] == 'abc12345':
                message_count += 1

        self.assertEquals(message_count, 1)

    def tearDown(self):
        keys = self.redis_conn.keys('stats:crawler:*:test-spider:*')
        keys = keys + self.redis_conn.keys('test-spider:*')
        for key in keys:
            self.redis_conn.delete(key)

        # if for some reason the tests fail, we end up falling behind on
        # the consumer
        for m in self.consumer:
            pass
        self.consumer.close()
コード例 #56
0
ファイル: test_pillow.py プロジェクト: dimagi/commcare-hq
class ChangeFeedPillowTest(SimpleTestCase):
    # note: these tests require a valid kafka setup running

    def setUp(self):
        super(ChangeFeedPillowTest, self).setUp()
        self._fake_couch = FakeCouchDb()
        # use a 'real' db name here so that we don't cause other
        # tests down the line to fail.
        # Specifically KafkaChangeFeedTest.test_multiple_topics_with_partial_checkpoint
        self._fake_couch.dbname = 'test_commcarehq'
        self.consumer = KafkaConsumer(
            topics.CASE,
            bootstrap_servers=settings.KAFKA_BROKERS,
            consumer_timeout_ms=100,
            enable_auto_commit=False,
        )
        try:
            # This initialized the consumer listening from the latest offset
            next(self.consumer)
        except StopIteration:
            pass
        self.pillow = get_change_feed_pillow_for_db('fake-changefeed-pillow-id', self._fake_couch)

    def tearDown(self):
        self.consumer.close()
        super(ChangeFeedPillowTest, self).tearDown()

    def test_process_change(self):
        document = {
            'doc_type': 'CommCareCase',
            'type': 'mother',
            'domain': 'kafka-test-domain',
        }
        self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))

        message = next(self.consumer)
        change_meta = change_meta_from_kafka_message(message.value)
        self.assertEqual(SOURCE_COUCH, change_meta.data_source_type)
        self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name)
        self.assertEqual('test-id', change_meta.document_id)
        self.assertEqual(document['doc_type'], change_meta.document_type)
        self.assertEqual(document['type'], change_meta.document_subtype)
        self.assertEqual(document['domain'], change_meta.domain)
        self.assertEqual(False, change_meta.is_deletion)

        with self.assertRaises(StopIteration):
            next(self.consumer)

    def test_process_change_with_unicode_domain(self):
        document = {
            'doc_type': 'CommCareCase',
            'type': 'mother',
            'domain': 'हिंदी',
        }
        self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
        message = next(self.consumer)
        change_meta = change_meta_from_kafka_message(message.value)
        self.assertEqual(document['domain'], change_meta.domain)

    def test_no_domain(self):
        document = {
            'doc_type': 'CommCareCase',
            'type': 'mother',
            'domain': None,
        }
        self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
        message = next(self.consumer)
        change_meta = change_meta_from_kafka_message(message.value)
        self.assertEqual(document['domain'], change_meta.domain)

    def test_publish_timestamp(self):
        document = {
            'doc_type': 'CommCareCase',
            'type': 'mother',
            'domain': None,
        }
        self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
        message = next(self.consumer)
        change_meta = change_meta_from_kafka_message(message.value)
        self.assertLessEqual(change_meta.publish_timestamp, datetime.utcnow())