Example #1
0
def main():
    producer = KafkaProducer(
        bootstrap_servers=server,
        security_protocol='SSL',
        ssl_cafile='<CARoot>',
        ssl_certfile='<certificate>',
        ssl_keyfile='<key>',
        value_serializer=lambda v: dumps(v).encode("utf-8"))

    consumer = KafkaConsumer("my_topic",
                             bootstrap_servers=server,
                             security_protocol='SSL',
                             ssl_cafile='<CARoot>',
                             ssl_certfile='<certificate>',
                             ssl_keyfile='<key>',
                             value_deserializer=lambda v: loads(v))

    producer_metrics, consumer_metrics = producer.metrics(), consumer.metrics()
    pprint(producer_metrics)
    pprint(consumer_metrics)

    for msg in consumer:
        print(msg)
        producer.send("new_topic", msg.value)
        producer.flush()
def main(kafka_broker, kafka_topic, file_path):
    """Parse the csv and send every line to the kafka topic"""
    producer = KafkaProducer(
        bootstrap_servers=kafka_broker,
        value_serializer=lambda x: json.dumps(x).encode("utf-8"))
    logger.info("Reading messages from csv")
    for item in read_data(file_path):
        producer.send(kafka_topic, item)

    logger.info("Sending all messages to Kafka")
    producer.close()
    pprint.pprint(producer.metrics())
def kafka_producer_test():
    # https://github.com/dpkp/kafka-python#kafkaconsumer
    producer = KafkaProducer(
        bootstrap_servers=['172.26.210.149:9092', '172.26.210.150:9092', '172.26.210.151:9092'],
        value_serializer=lambda v: json.dumps(v).encode('utf-8')
    )

    future = producer.send(topic='service_test', value={'foo': 'bar'})
    result = future.get(timeout=60)
    print(result)

    metrics = producer.metrics()
    print(metrics)
Example #4
0
def main(kafka_broker, kafka_topic, file_path):
    """Parse the csv and send every line to the kafka topic as a json dict of
    column headers and values"""
    producer = KafkaProducer(
        bootstrap_servers=kafka_broker,
        value_serializer=lambda x: json.dumps(x).encode("utf-8"))
    logger.info("Reading messages from csv")
    for item in read_data(file_path):
        # Important that both key and value are byte arrays
        producer.send(kafka_topic, value=item)

    logger.info("Sending all messages to Kafka")
    producer.flush()
    producer.close()
    pprint.pprint(producer.metrics())
def process_SEC_rss(item):
    index_rss = 'http://www.sec.gov/Archives/edgar/monthly/xbrlrss-{}.xml'.format(
        item)
    producer = KafkaProducer(bootstrap_servers=kafka_url)
    rss_feed = urllib2.urlopen(index_rss)
    index_data = rss_feed.read()
    rss_feed.close()

    index_doc = xmltodict.parse(index_data)
    item_list = index_doc['rss']['channel']['item']
    msg_count = 0
    for entry in item_list:
        formType = entry['edgar:xbrlFiling']['edgar:formType']
        filingInfo = entry['edgar:xbrlFiling']

        if (formType == '10-Q' or formType == '10-K'):
            newRow = {
                'companyName': get_value(filingInfo, 'edgar:companyName'),
                'guid': get_value(entry, 'guid'),
                'xml_filing': index_rss,
                'pubDate': get_value(entry, 'pubDate'),
                'formType': formType,
                'filingDate': get_value(filingInfo, 'edgar:filingDate'),
                'cikNumbver': get_value(filingInfo, 'edgar:cikNumber'),
                'accessionNumber': get_value(filingInfo,
                                             'edgar:accessionNumber'),
                'fileNumber': get_value(filingInfo, 'edgar:fileNumber'),
                'filingInfo': get_value(filingInfo, 'edgar:period'),
                'fiscalYearEnd': get_value(filingInfo, 'edgar:fiscalYearEnd'),
            }
            #           cols = newRow.keys()
            #           vals = [newRow[x] for x in cols]
            #            vals_str_list = ["%s"] * len(vals)
            #            vals_str = ", ".join(vals_str_list)
            try:
                jsec = json.dumps(newRow)
                producer.send(topic_name, jsec)
                producer.flush()

                msg_count = msg_count + 1

                print("Added {} sec filings".format(msg_count))
            except e:
                print "Exception encountered {e}"
    metrics = producer.metrics()
    print metrics
    producer.close()
class Kafka_Producer(Kafka_Connection):
    def __init__(self, config):
        super(Kafka_Producer, self).__init__(config)

    def connect(self):
        self.__log.info("Kafka Producer Connecting to Kafka Topic")
        if self.security_protocol == "PLAINTEXT":
            self.broker = KafkaProducer(bootstrap_servers=json.loads(
                self.bootstrap_servers),
                                        retries=5)
            self.__log.info("Status of the connection is {0}".format(
                self.broker.metrics()))
        elif self.security_protocol == "SASL_SSL":
            self.__log.info("Connecting over sasl_ssl")
            # SSL
            # create a new context using system defaults, disable all but TLS1.2
            context = ssl.create_default_context()
            context.verify_mode = ssl.CERT_REQUIRED
            context.load_verify_locations(self.ssl_cafile)
            context.load_cert_chain(certfile=self.ssl_certfile,
                                    keyfile=self.ssl_keyfile)
            context.options &= ssl.OP_NO_TLSv1
            context.options &= ssl.OP_NO_TLSv1_1

            self.broker = KafkaProducer(
                bootstrap_servers=json.loads(self.bootstrap_servers),
                # consumer_timeout_ms=self.consumer_timeout_ms,
                security_protocol=self.security_protocol,
                sasl_mechanism=self.sasl_mechanism,
                ssl_context=context,
                api_version=(0, 10))

    def send(self, msg):
        self.__log.info("Kafka Producer Send Function")
        self.broker.send(self.outbound_topic, b"{0}".format(msg))
        return 1

    def close(self):
        self.broker.close()
Example #7
0
    def run(self):
        producer = KafkaProducer(bootstrap_servers='localhost:9092', \
				 acks=1, \
				 linger_ms=100, \
                                 compression_type="gzip", \
				 buffer_memory=64*1024*1024 \
                   )

        totalNum = 0
#        while not self.stop_event.is_set():
        start = time.time()
        for i in range(self.event_range[0], self.event_range[1]):
            #producer.send('my-topic', b"event id="+str(i))
            producer.send('my-topic', MY_MSG)
            totalNum+=1
            if totalNum%NO_OF_EACHROUND==0:
                end = time.time()
                print("Producer,%d,%d" % (NO_OF_EACHROUND, (end-start)*1000))
                start = time.time()
                #time.sleep(1)
        print "producer metrics######,", producer.metrics()
        producer.close()
    if s >= 200 and s < 400:
        u2 = u
        urls_traversed = [
            r.url for r in req.history
            if r.status_code >= 300 and r.status_code < 400
        ]
        if len(urls_traversed) > 0:
            u2 = urls_traversed[-1]

        parts = urlparse(u2)
        # only successful contacts are placed into the canonical_url_queue, with the
        # recommended url to use based on whether http redirects are issued
        uuid_bytes = uuid.uuid1().bytes
        producer.send('canonical_sites',
                      key=uuid_bytes,
                      value={
                          'original_url': u,
                          'canonical_url': u2,
                          'canonical_domain': parts.hostname,
                          'when': when.strftime("%d-%b-%Y %H:%M:%S%z"),
                          'http_status': s
                      })
        # ensure thug processes this url ie. the url is scanned with a view to sub-pages being scanned
        producer.send('4thug', {'url': u2})
    else:
        producer.send('failed_url_queue', {'url': u, 'http_status': s})

producer.flush()
print(json.dumps(producer.metrics(), sort_keys=True, indent=4))
Example #9
0
class KafkaClient(object):
    def __init__(self, bootstrap_servers, topic, group_id=None):
        if group_id is not None:
            self.group_id = group_id
            self.allow_hotreload = True
        else:
            self.group_id = 'kafka_topic_dumper_{}'.format(uuid4())
            self.allow_hotreload = False
        self.bootstrap_servers = bootstrap_servers.split(",")
        self.topic = topic
        self.consumer = None
        self.producer = None
        self.timeout_in_sec = 60
        self.dump_state_topic = 'kafka-topic-dumper'
        self.s3_path = 'kafka-topic-dumper-data/'
        self.s3_client = None

    def _get_consumer(self):
        if self.consumer is not None:
            return
        try:
            logger.info('Starting consumer')
            self.consumer = KafkaConsumer(
                bootstrap_servers=self.bootstrap_servers,
                group_id=self.group_id,
                enable_auto_commit=True)
        except Exception as err:
            msg = 'Can not create KafkaConsumer instance. Reason=<{}>'
            logger.exception(msg.format(err))
            raise err

    def _get_s3_client(self):
        if self.s3_client is None:
            self.s3_client = boto3.client('s3')
        return self.s3_client

    def _get_producer(self):
        if self.producer is not None:
            return
        try:
            logger.info('Starting producer')
            self.producer = KafkaProducer(
                bootstrap_servers=self.bootstrap_servers,
                key_serializer=bytes_serializer,
                value_serializer=bytes_serializer)
        except Exception as err:
            msg = 'Can not create KafkaProducer instance. Reason=<{}>'
            logger.exception(msg.format(err))
            raise err

    def open(self):
        self._get_consumer()
        self._get_producer()

    def _close_consumer(self):
        logger.info("Closing consumer")
        self.consumer.close()
        self.consumer = None

    def _close_producer(self):
        logger.info("Closing producer")
        self.producer.flush()
        logger.debug('Statistics {}'.format(self.producer.metrics()))
        self.producer.close()
        self.producer = None

    def close(self):
        self._close_consumer()
        self._close_producer()

    def _get_partitions(self, topic):
        partitions = self.consumer.partitions_for_topic(topic) or []

        count = 0
        while not partitions and count < 500000:
            self.consumer.subscribe(topic)
            partitions = self.consumer.partitions_for_topic(topic) or []
            sleep(0.1)

        msg = "Got the following partitions=<{}> for topic=<{}>"
        logger.info(msg.format(partitions, topic))

        topic_partitions = list(
            map(lambda p: TopicPartition(topic, p), partitions))
        msg = "Got the following topic partitions=<{}>"
        logger.info(msg.format(topic_partitions))
        return topic_partitions

    def _get_offsets(self, topic=None):
        if topic is None:
            topic = self.topic
        topic_partitions = self._get_partitions(topic=topic)
        beginning_offsets = (self.consumer.beginning_offsets(topic_partitions)
                             or {})
        msg = "Got the following beginning offsets=<{}>"
        logger.info(msg.format(beginning_offsets))

        commited_offsets = {}
        msg = "Partition=<{}> has the current offset=<{}> for <{}>"
        for tp in topic_partitions:
            offset = self.consumer.committed(tp)
            commited_offsets[tp] = offset
            logger.debug(msg.format(tp, offset, self.group_id))

        end_offsets = self.consumer.end_offsets(topic_partitions) or {}
        msg = "Got the following end offsets=<{}>"
        logger.info(msg.format(end_offsets))

        return beginning_offsets, commited_offsets, end_offsets

    def _calculate_offsets(self, beginning_offsets, end_offsets,
                           num_messages_to_consume):
        perfect_displacement = ceil(num_messages_to_consume /
                                    max(len(beginning_offsets), 1))
        offsets = {}
        num_messages_available = 0

        for tp, offset in beginning_offsets.items():
            offsets[tp] = max(beginning_offsets[tp],
                              end_offsets[tp] - perfect_displacement)
            num_messages_available += end_offsets[tp] - offsets[tp]

        return offsets, num_messages_available

    def _set_offsets(self, offsets):
        offset_and_metadata = {
            tp: OffsetAndMetadata(offset, b'')
            for tp, offset in offsets.items()
        }

        msg = "Generated the following offsets=<{}>"
        logger.debug(msg.format(offset_and_metadata))

        self.consumer.commit(offset_and_metadata)

    def _get_messages(self, num_messages_to_consume):
        messages = []
        while len(messages) < num_messages_to_consume:
            record = next(self.consumer)
            line = (record.key, record.value)
            messages.append(line)
        self.consumer.commit()

        return messages

    def _write_messages_to_file(self, messages, local_path):
        df = pd.DataFrame(messages)
        table = pa.Table.from_pandas(df)
        pq.write_table(table, local_path, compression='gzip')

    def _send_dump_file(self, local_path, bucket_name, dump_id):
        file_name = path.basename(local_path)
        s3_path = path.join(self.s3_path, dump_id, file_name)

        logger.info('Sending file <{}> to s3'.format(file_name))
        s3_client = self._get_s3_client()
        s3_client.upload_file(local_path,
                              bucket_name,
                              s3_path,
                              ExtraArgs={'ACL': 'private'},
                              Callback=ProgressPercentage(local_path))

        logger.debug('Deleting file <{}>'.format(file_name))
        remove(local_path)

    def _get_transformer_class(self, transformer_id):
        [module_name, class_name] = transformer_id.split(":")

        module = __import__(module_name, globals(), locals(), [class_name], 0)
        cl = getattr(module, class_name)

        return cl()

    def get_messages(self, num_messages_to_consume, max_package_size_in_msgs,
                     local_dir, bucket_name, dry_run, dump_id):

        # set offsets
        msg = ('Will ask kafka for <{}> messages ' +
               'and save it in files with <{}> messages')
        logger.debug(
            msg.format(num_messages_to_consume, max_package_size_in_msgs))

        beginning_offsets, commited_offsets, end_offsets = self._get_offsets()

        offsets, num_messages_available = self._calculate_offsets(
            beginning_offsets=beginning_offsets,
            end_offsets=end_offsets,
            num_messages_to_consume=num_messages_to_consume)

        self._set_offsets(offsets)

        # get messages
        self.consumer.subscribe(topics=[self.topic])

        msg = 'Trying to dump <{}> messages'
        logger.info(msg.format(num_messages_available))

        remaining_messages = num_messages_available
        num_dumped_messages = 0

        dump_dir = path.join(local_dir, dump_id)
        makedirs(dump_dir, exist_ok=True)
        logger.debug('Dump directory <{}> created'.format(dump_dir))

        while remaining_messages > 0:
            batch_size = min(remaining_messages, max_package_size_in_msgs)
            logger.debug('Fetching batch with size=<{}>'.format(batch_size))

            file_name = '{}-{:015d}.parquet'.format(dump_id,
                                                    num_dumped_messages)

            local_path = path.join(local_dir, dump_id, file_name)

            messages = self._get_messages(num_messages_to_consume=batch_size)
            self._write_messages_to_file(messages=messages,
                                         local_path=local_path)
            if not dry_run:
                self._send_dump_file(local_path=local_path,
                                     bucket_name=bucket_name,
                                     dump_id=dump_id)
            remaining_messages -= batch_size
            num_dumped_messages += batch_size

        logger.info('Dump done!')

    def find_latest_dump_id(self, bucket_name):
        paginator = self._get_s3_client().get_paginator('list_objects_v2')

        prefix = self.s3_path.rstrip('/') + '/'

        response_iterator = paginator.paginate(Bucket=bucket_name,
                                               Prefix=prefix,
                                               Delimiter='/')

        def strip(r):
            return r['Prefix'][len(prefix):].rstrip('/')

        prefixes = []
        for response in response_iterator:
            prefixes.extend(map(strip, response['CommonPrefixes']))

        dump_id = max(prefixes)
        logger.debug('Prefix chosen was <{}>'.format(dump_id))

        return dump_id

    def _get_file_names(self, bucket_name, dump_id):
        paginator = self._get_s3_client().get_paginator('list_objects_v2')
        dump_path = path.join(self.s3_path, dump_id) + '/'

        response_iterator = paginator.paginate(Bucket=bucket_name,
                                               Prefix=dump_path)
        file_names = []
        for response in response_iterator:
            if response['KeyCount'] > 0:
                file_names.extend(
                    (f['Key'], f['Size']) for f in response['Contents'])
        file_names.sort()

        if not file_names:
            msg = 'Can not found files for this dump id <{}>'
            logger.error(msg.format(dump_id))
            raise Exception('EmptyS3Response')

        return file_names

    def _gen_state(self, dump_id, transformer_id):
        _, _, end_offsets = self._get_offsets()

        if not end_offsets:
            msg = 'Can not find offsets for topic <{}>'
            raise Exception(msg.format(self.topic))

        state_offsets = {}

        for partition, offset in end_offsets.items():
            state_offsets[partition.partition] = offset

        state = {
            'dump_id': dump_id,
            'topic_name': self.topic,
            'offsets': state_offsets,
            'dump_date': int(time.time()),
            'transformer_id': transformer_id
        }

        return state

    def _save_state(self, state):
        future = self.producer.send(topic=self.dump_state_topic,
                                    key=self.topic,
                                    value=json.dumps(state))
        future.get(timeout=self.timeout_in_sec)
        logger.info('State saved')

    def _get_last_state_message(self):
        beginning_offsets, _, end_offsets = (self._get_offsets(
            topic=self.dump_state_topic))

        if beginning_offsets:
            offsets, num_messages_available = self._calculate_offsets(
                beginning_offsets=beginning_offsets,
                end_offsets=end_offsets,
                num_messages_to_consume=1)
            self._set_offsets(offsets)
            self.consumer.subscribe(self.dump_state_topic)
            messages = [
                json.loads(m.decode())
                for k, m in self._get_messages(num_messages_available)
            ]
            if messages:
                last_state_message = max(messages,
                                         key=lambda m: m['dump_date'])
                return last_state_message

        return None

    def _get_state(self, dump_id, transformer_id):
        if self.allow_hotreload:
            state_message = self._get_last_state_message()
            if state_message and \
               state_message['topic_name'] == self.topic and \
               state_message['dump_id'] == dump_id and \
               'transformer_id' in state_message and \
               state_message['transformer_id'] == transformer_id:
                return state_message['offsets']
        return None

    def _reset_offsets(self, dump_offsets):
        logger.info('Messages already uploaded. Just resetting offsets')
        partitions = self._get_partitions(self.topic)
        offsets = {}

        for partition in partitions:
            offsets[partition] = dump_offsets[str(partition.partition)]

        logger.debug('Will reset offsets to <{}>'.format(offsets))

        self._set_offsets(offsets)

    def _load_dump(self, bucket_name, dump_id, download_dir, files,
                   transformer_instance):
        s3_client = self._get_s3_client()

        transformer_id = transformer_instance.get_id()

        state = self._gen_state(dump_id, transformer_id)

        current_file_number = 0
        msg = "Loading messages from file {}/{} to kafka"
        for file_name, file_size in files:
            current_file_number += 1
            tmp_name = '{}.tmp'.format(path.basename(file_name))
            file_path = path.join(download_dir, tmp_name)
            s3_client.download_file(Bucket=bucket_name,
                                    Filename=file_path,
                                    Key=file_name,
                                    Callback=ProgressPercentage(
                                        tmp_name, file_size))
            logger.info(msg.format(current_file_number, len(files)))
            try:
                table = pq.read_table(file_path)
                df = table.to_pandas()
                for raw_row in df.itertuples():
                    for row in transformer_instance.transform(raw_row):
                        self.producer.send(self.topic,
                                           key=row[1],
                                           value=row[2])
                logger.debug('File <{}> reloaded to kafka'.format(file_path))
                self.producer.flush(self.timeout_in_sec)
            finally:
                remove(file_path)

        self._save_state(state)

    def reload_kafka_server(self, bucket_name, local_dir, dump_id,
                            transformer_class):
        transformer_instance = self._get_transformer_class(transformer_class)
        msg = 'Using class=<{}> to transform events before production'
        logger.info(msg.format(type(transformer_instance)))

        transformer_id = transformer_instance.get_id()
        dump_offsets = self._get_state(dump_id, transformer_id)

        if dump_offsets:
            self._reset_offsets(dump_offsets=dump_offsets)
        else:
            files = self._get_file_names(bucket_name=bucket_name,
                                         dump_id=dump_id)
            self._load_dump(bucket_name=bucket_name,
                            dump_id=dump_id,
                            download_dir=local_dir,
                            files=files,
                            transformer_instance=transformer_instance)

        logger.info('Reload done!')

    def __enter__(self):
        self.open()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()
Example #10
0
		otherReader = csv.DictReader(unzip_file)
		out = json.dumps( [ row for row in otherReader ], sort_keys=True, indent=4).encode('utf-8')  
		parsed = json.loads(out)

		for line in parsed:
			producer.send(topic, value=line)
			total_lines = total_lines + 1
except:
	print("failed to send data to kafka server.")


end_folder = time.time()
total_time = end_folder-start_folder
lines_per_second = total_lines / total_time

metrics = producer.metrics()
metrics_file = open("benchmark.txt", "a")

print('Record send rate per second: {metric}'.format(metric=metrics['producer-topic-metrics.{topic}'.format(topic=topic)]['record-send-rate']))
print('Incoming byte rate: {metric}'.format(metric=metrics['producer-node-metrics.node-0']['incoming-byte-rate']))
print('Total records sent per second: {metric}'.format(metric=metrics['producer-topic-metrics.{topic}'.format(topic=topic)]['record-send-rate']))
print('{total_lines} lines parsed and sent to kafka in {total_time}'.format(total_time=total_time, total_lines=total_lines))
print('{lines_per_second} lines per second'.format(lines_per_second=lines_per_second))

try:
	metrics_file = open("benchmark.txt", "a")
	metrics_file.write('Record send rate per second: {metric}'.format(metric=metrics['producer-topic-metrics.{topic}'.format(topic=topic)]['record-send-rate']))
	metrics_file.write('Incoming byte rate: {metric}'.format(metric=metrics['producer-node-metrics.node-0']['incoming-byte-rate']))
	metrics_file.write('Total records sent per second: {metric}'.format(metric=metrics['producer-topic-metrics.{topic}'.format(topic=topic)]['record-send-rate']))
	metrics_file.write('{total_lines} lines parsed and sent to kafka in {total_time}'.format(total_time=total_time, total_lines=total_lines))
	metrics_file.write('{lines_per_second} lines per second'.format(lines_per_second=lines_per_second))
Example #11
0
class Producer:
    """
    封装kafka-python KafkaProducer
    """
    def __init__(self):
        pass

    def __enter__(self):
        self.cfg = Config().cfg
        self.producer = KafkaProducer(
            bootstrap_servers=self.cfg["serList"],
            # api_version=self.cfg["apiVersion"],
            api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"],
            security_protocol=self.cfg["protocol"],
            sasl_mechanism=self.cfg["mechanism"],
            sasl_kerberos_service_name=self.cfg["kerverosSerName"],
        )
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.producer.close()

    def flush(self):
        """
        调用此方法会使得所有缓存记录变成立即可发送状态.(一般用于send之后, 需要刷新)
        :return:
        """
        try:
            self.producer.flush(timeout=TIME_OUT)
        except KafkaTimeoutError:
            log.tag_error(KafkaInfo.KafkaProducer,
                          "Flush buffered record failed, TimeOut")
            raise ActionError(KafkaErr.FlushFailed)

    def metrics(self):
        """
        获取producer的性能记录(包含各个kafka broker)
        :return:
        """
        performance = self.producer.metrics()
        return performance

    def partition_set_get(self, topic_name: str):
        """
        获取topic的所有分区
        :param topic_name:
        :return: set
        """
        return self.producer.partitions_for(topic_name)

    def send_message(self, topic_name: str, value: bytes, key: str):
        """
        Producer产生数据
        :param topic_name: topic where the message will be published
        :param value: message value
        :param key: key to associate with the message
        :return:
        """
        try:
            result = self.producer.send(topic_name,
                                        value=value,
                                        key=key.encode("utf-8")).add_errback(
                                            self.send_err,
                                            topic=topic_name,
                                            value=value,
                                            key=key)
        except KafkaTimeoutError:
            log.tag_warn(
                KafkaInfo.KafkaProducer,
                "Kafka send data timeout, topic: %s, key: %s, msg: %s" %
                (topic_name, key, value.decode("utf-8")))
            raise ActionError(KafkaErr.SendDataFailed)
        return result

    @staticmethod
    def send_err(topic: str, value: bytes, key: str):
        """
        producer send data failed callback function
        :param topic:
        :param value:
        :param key:
        :return:
        :return:
        """
        log.tag_error(
            KafkaInfo.KafkaProducer, "Kafka send data failed, topic: %s, "
            "key: %s msg: %s" % (topic, key, value.decode("utf-8")))
        raise ActionError(KafkaErr.SendDataFailed)
from kafka import KafkaProducer
producer = KafkaProducer(bootstrap_servers='localhost:9092')

while True:
    print("\n\nType \"quit\" to exit")
    print("Enter message to be sent:")
    msg = input()
    if msg == "quit":
        print("Exiting")
        break
    producer.send('Hello-everyone', msg.encode('utf-8'))
    print("Sending msg \"{}\"".format(msg))

    metric = producer.metrics(raw=False)
    print(metric)
    print('\nThe Producer metrics : ')
    print('Request size max : ')
    print(metric['producer-metrics']['request-size-max'])
    print('\nKafka-metrics-count : ')
    print(metric['kafka-metrics-count'])

    print(metric.keys())

print("Message sent!")
Example #13
0
def dataPub():

    cnt = 0

    producer = KafkaProducer(bootstrap_servers='10.4.10.239:9092')  # 连接kafka
    devNum = len(producer.partitions_for('testyg'))

    print(producer.metrics())

    st = time.time()
    for i in range(30):
        print(i)
        # if i%2 == 0:
        img = Image.open(pwd + 'imgs/test.jpg')
        # else:
        #     img = Image.open('../imgs/army.jpg')

        # img = clintInf.transformData(img)

        img = pickle.dumps(img)
        print(img.__len__())
        # producer.send('byzantine', str(msg[i]).encode('utf-8'))  # 发送的topic为test
        # producer.send('result', str(i).encode())
        producer.send('testyg', img, str(i).encode())
        producer.flush()
        # time.sleep(1)
    print('end')
    log.logSend("INFO " + localhost + " publish 30 msgs!")

    colDevs = []
    for msg in consumer:
        print(cnt)
        # print(msg)
        # if cnt == 0:
        # st = time.time()
        cnt += 1
        if colDevs.count(msg.key.decode()) == 0:
            colDevs.append(msg.key.decode())
        if cnt == 30:
            # ed =time.time()
            # print(ed-st)
            ed = time.time()
            consumer.close()
    costTime = ed - st

    log.logSend("INFO" + localhost + "'s datas handle done, costs time " +
                costTime)

    taskInfo = {
        'name': 'dataOffload',
        'type': 'classification',
        'startDevice': localhost,
        'dataNum': 30,
        'devNum': devNum,
        'colDevs': colDevs,
        'latency': ed - st
    }
    js = json.dumps(taskInfo)

    conn.lpush("kafkaTasks", js)

    producer.close()
Example #14
0
import pickle
import redis
import json
from MyConsumer import *

consumer = KafkaConsumer('result',
                         group_id="test_group_1",
                         bootstrap_servers=['10.4.10.239:9092'])
cnt = 0

producer = KafkaProducer(bootstrap_servers='10.4.10.239:9092')  # 连接kafka
devNum = len(producer.partitions_for('testyg'))

print(devNum)

print(producer.metrics())
clintInf = ClientInf(0, 18)
# msg = [1,2,3,4]
st = time.time()
for i in range(30):
    print(i)
    # if i%2 == 0:
    img = Image.open('../imgs/test.jpg')
    # else:
    #     img = Image.open('../imgs/army.jpg')

    # img = clintInf.transformData(img)

    img = pickle.dumps(img)
    print(img.__len__())
    # producer.send('byzantine', str(msg[i]).encode('utf-8'))  # 发送的topic为test
Example #15
0
from kafka import KafkaProducer
from kafka.errors import KafkaError
import json
import sys

servid=sys.argv[1]
print('arg',servid)

producer = KafkaProducer(bootstrap_servers=['kafka:port'],
security_protocol="SSL",
ssl_cafile="ca.pem",
ssl_certfile="service.cert",
ssl_keyfile="service.key",
value_serializer=lambda m: json.dumps(m).encode('ascii'))

data = producer.send('shrtest', {'ServiceId': servid,'details':{'status': 'active','stdate': '12-01-2019','enddate': '01-01-9999'}})
producer.metrics()
producer.flush()
try:
    record_metadata = data.get(timeout=1000)
    print('response Partition',record_metadata.partition)
    print ('response offset',record_metadata.offset)
except KafkaError:
    log.exception()    #Exception
    for f in foreFiles:
        with open(f, 'r') as fp:
            foreDat = json.load(fp)

        for city, dat in foreDat.items():
            topic = f"wf-{city}"
            outdat = mapForeWeatherFlat(dat)
            if outdat is not None:
                if outdat['dt'] != 'NA':
                    producer.send(topic, outdat)
                    print(f"forecast weather city={city}", "time=",
                          datetime.fromtimestamp(outdat['dt']))
                else:
                    print(f"no dat reported for city={city}...skipping")
            else:
                print(f"Could not report for city={city}...skipping")

else:
    # implemenation for api currently missing
    pw_log('processing via api source is currently not implemented')
    pass

adm.close()
consumer.close()

producer.flush()

pi_log("kafkaProducerMetrics:")
pi_log("performance:" + str(json.dumps(producer.metrics(), indent=4)))
pi_log("successful completion")
producer.close()