class KafkaPipeline(object): @classmethod def from_crawler(cls, crawler): settings = crawler.settings name = crawler.spider.name return cls(name, **settings) def __init__(self, name, **kwargs): self.kafkaproducer = KafkaProducer( bootstrap_servers=kwargs.get('KAFKA_SERVERS'), key_serializer=lambda m: m.encode('utf-8'), value_serializer=lambda m: json.dumps(m).encode('utf-8')) self.partitions = list( self.kafkaproducer.partitions_for(name)) # 获取所有分区 def process_item(self, item, spider): """ :param item: :param spider: :return: 数据分表入库 """ new_item = {key: value for key, value in item.items()} new_item['ctime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) try: self.kafkaproducer.send(topic=spider.name, partition=choice(self.partitions), key=item.tablename, value=new_item).get(timeout=10) logger.info(f"入库成功 <= 主题:{spider.name} key名:{item.tablename}") except Exception as e: logger.error( f"入库失败 <= 主题:{spider.name} key名:{item.tablename} 错误原因:{e}") return item
def produce_to_bruce(schema, args, config): topic = config['kafka']['topic'] if args.partition_count: partition_count = args.partition_count else: print 'fetch partition info for topic ' + topic producer = KafkaProducer(bootstrap_servers = config['kafka']['brokers']) partition_count = 1 + max(producer.partitions_for(topic)) producer.close() socket = bruce.open_bruce_socket() # batching socket send buff = [] def flush_buff(): for msg in buff: socket.sendto(msg, '/var/run/bruce/bruce.socket') del buff[:] def f_produce(topic, partition, key, value): if len(buff) < 1000: buff.append(bruce.create_msg(partition, topic, bytes(key), bytes(value))) else: flush_buff() try: bootstrap(f_produce, partition_count, schema, args.database, args.table, config) flush_buff() except KeyboardInterrupt: sys.exit(1) finally: socket.close()
def kafka_check(self, server, topic, test_data): producer = KafkaProducer(bootstrap_servers=server) partitions = producer.partitions_for(topic) self.logger.info('host:' + server + ' topic:' + topic + ' 可用partitons' + str(producer.partitions_for(topic))) for partiton in partitions: try: info = producer.send(topic=topic, value=test_data.encode('utf-8'), partition=partiton).get(5) self.logger.info( ('当前host:{}, 当前topic:{}当前partition:{}, offset:{}'.format( server, topic, partiton, info.offset))) except: self.logger.error( 'could not send message to host:{}, topic:{}, partiton:{}.' .format(server, topic, partiton))
def send_bytes_to_kafka(): producer = KafkaProducer(bootstrap_servers=broker_address) print(producer.partitions_for(topic)) for i in range(1, 100): print(i) producer.send(topic, value=b'msg %d' % i).get(30) time.sleep(1)
def main(): producer = KafkaProducer( bootstrap_servers="worker2.hengan.shop:9092", value_serializer=lambda m: json.dumps(m).encode('utf-8')) for i in range(1): ack = producer.send('foobar2', {"name": "a" + str(i), "age": i + 10}) metadata = ack.get() print(metadata.topic) print(metadata.partition) producer.flush() print(producer.partitions_for('foobar2'))
def generate_kafka_producer_consumer(config): topic = config['kafka']['topic'] kafka_producer = KafkaProducer(bootstrap_servers=config['kafka']['brokers']) partition_count = 1 + max(kafka_producer.partitions_for(topic)) def consume(key, value): database = key['database'] key_str = json.dumps(key, separators=(',',':')) value_str = json.dumps(value, separators=(',',':')) partition = abs(java_string_hashcode(database) % partition_count) kafka_producer.send(topic, key=key_str, value=value_str, partition=partition) return consume
def get_partitions_list(topic): """Get the list of the partitions for topic Arguments: topic - name of the topic Return: partitions_list - list of the topic's partitions """ producer = KafkaProducer(bootstrap_servers='localhost:9092') partitions_list = producer.partitions_for(topic) return partitions_list
def produce_to_kafka(schema, args, config): topic = config['kafka']['topic'] producer = KafkaProducer(bootstrap_servers = config['kafka']['brokers']) def f_produce(topic, partition, key, value): producer.send(topic, key = key, value = value, partition = partition) partition_count = 1 + max(producer.partitions_for(topic)) try: bootstrap(f_produce, partition_count, schema, args.database, args.table, config) except KeyboardInterrupt: sys.exit(1) producer.flush() producer.close()
def generate_kafka_producer_consumer(config): topic = config['kafka']['topic'] kafka_producer = KafkaProducer( bootstrap_servers=config['kafka']['brokers']) partition_count = 1 + max(kafka_producer.partitions_for(topic)) def consume(key, value): database = key['database'] key_str = json.dumps(key, separators=(',', ':')) value_str = json.dumps(value, separators=(',', ':')) partition = abs(java_string_hashcode(database) % partition_count) kafka_producer.send(topic, key=key_str, value=value_str, partition=partition) return consume
def generate_stream(**kwargs): producer = KafkaProducer(bootstrap_servers=['kafka:9092'], # set up Producer value_serializer=lambda x: dumps(x).encode('utf-8')) stream_sample = pickle.load(open(os.getcwd() + kwargs['path_stream_sample'], "rb")) # load stream sample file rand = random.sample(range(0, 20000), 200) # the stream sample consists of 20000 observations - and along this setup 200 samples are selected randomly x_new = stream_sample[0] y_new = stream_sample[1] logging.info('Partitions: ', producer.partitions_for('TopicA')) for i in rand: json_comb = encode_to_json(x_new[i], y_new[i]) # pick observation and encode to JSON producer.send('TopicA', value=json_comb) # send encoded observation to Kafka topic logging.info("Sent number: {}".format(y_new[i])) sleep(1) producer.close()
def generate_stream(**kwargs): # We create a Kafka producer producer = KafkaProducer( bootstrap_servers=['kafka:9092'], value_serializer=lambda x: dumps(x).encode('utf-8')) # Create some sample data, for demonstration purposes, we just take som samples from the initial training data # This could be your continous flow of incoming data movements_stream_input = pd.read_csv( "../../../data/SmartMovementExport.csv") # From the whole input set, take random index for 500 new training examples rand = random.sample(range(0, len(movements_stream_input)), 500) logging.info('We stream now over Kafka some data.', producer.partitions_for('MovementsTopic')) for i in rand: json_stream_data = encode_as_json(movements_stream_input[i]) producer.send('MovementsTopic', value=json_stream_data) sleep(1) producer.close()
def submit_add_topic(): """在add_topic頁面按下submit後的動作,取得使用者填寫的資料並建立topic""" try: message = "<h3>Result</h3>" topic = request.form.get("input_topic") producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers) par = producer.partitions_for(topic) producer.flush() consumer2.close() message = message + "<h4>Success to add topic!</B></h4>" message = message + "topic name: " + topic + "<BR>" except Exception as e: message = message + "<B>Fail to add topic : " + topic + "<B><BR>Detail:" message = message + str(e) message = message + "<hr><a href='index'>Back to index</a><BR>" message = message + "<a href='help'>help</a>" return message
class Producer: """ 封装kafka-python KafkaProducer """ def __init__(self): pass def __enter__(self): self.cfg = Config().cfg self.producer = KafkaProducer( bootstrap_servers=self.cfg["serList"], # api_version=self.cfg["apiVersion"], api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"], security_protocol=self.cfg["protocol"], sasl_mechanism=self.cfg["mechanism"], sasl_kerberos_service_name=self.cfg["kerverosSerName"], ) return self def __exit__(self, exc_type, exc_val, exc_tb): self.producer.close() def flush(self): """ 调用此方法会使得所有缓存记录变成立即可发送状态.(一般用于send之后, 需要刷新) :return: """ try: self.producer.flush(timeout=TIME_OUT) except KafkaTimeoutError: log.tag_error(KafkaInfo.KafkaProducer, "Flush buffered record failed, TimeOut") raise ActionError(KafkaErr.FlushFailed) def metrics(self): """ 获取producer的性能记录(包含各个kafka broker) :return: """ performance = self.producer.metrics() return performance def partition_set_get(self, topic_name: str): """ 获取topic的所有分区 :param topic_name: :return: set """ return self.producer.partitions_for(topic_name) def send_message(self, topic_name: str, value: bytes, key: str): """ Producer产生数据 :param topic_name: topic where the message will be published :param value: message value :param key: key to associate with the message :return: """ try: result = self.producer.send(topic_name, value=value, key=key.encode("utf-8")).add_errback( self.send_err, topic=topic_name, value=value, key=key) except KafkaTimeoutError: log.tag_warn( KafkaInfo.KafkaProducer, "Kafka send data timeout, topic: %s, key: %s, msg: %s" % (topic_name, key, value.decode("utf-8"))) raise ActionError(KafkaErr.SendDataFailed) return result @staticmethod def send_err(topic: str, value: bytes, key: str): """ producer send data failed callback function :param topic: :param value: :param key: :return: :return: """ log.tag_error( KafkaInfo.KafkaProducer, "Kafka send data failed, topic: %s, " "key: %s msg: %s" % (topic, key, value.decode("utf-8"))) raise ActionError(KafkaErr.SendDataFailed)
class ProducerClient(TestAdapterLib.Adapter): @doc_public def __init__ (self, parent, name=None, bootstrap_servers=None, debug=False, agentSupport=False, agent=None, shared=False, verbose=True, logEventSent=True, logEventReceived=True): """ KAFKA Producer client Adapter. Mapping of kafka-python KafkaProducer @param parent: parent testcase @type parent: testcase @bootstrap_servers: Kafka broker used to boostrap at connect call (list of ip address port ) @type bootstrap_servers: List @param agent: agent to use when this mode is activated @type agent: string/None @param name: adapter name used with from origin/to destination (default=None) @type name: string/none @param debug: active debug mode (default=False) @type debug: boolean @param shared: shared adapter (default=False) @type shared: boolean """ TestAdapterLib.Adapter.__init__(self, name = __NAME__, parent = parent, debug=debug, realname=name, agentSupport=agentSupport, agent=agent, shared=shared, caller=TestAdapterLib.caller(), agentType=AGENT_TYPE_EXPECTED) self.parent = parent self.codecX2D = Xml2Dict.Xml2Dict() self.codecD2X = Dict2Xml.Dict2Xml(coding = None) self.logEventSent = logEventSent self.logEventReceived = logEventReceived self.parent = parent self.cfg = {} if agent is not None: self.cfg['agent'] = agent self.cfg['agent-name'] = agent['name'] self.cfg['agent-support'] = agentSupport self.TIMER_ALIVE_AGT = TestAdapterLib.Timer(parent=self, duration=20, name="keepalive-agent", callback=self.aliveAgent, logEvent=False, enabled=True) self.__checkConfig() # initialize the agent with no data if agent is not None: if self.cfg['agent-support']: self.prepareAgent(data={'shared': shared}) if self.agentIsReady(timeout=30) is None: raise Exception("Agent %s is not ready" % self.cfg['agent-name'] ) self.TIMER_ALIVE_AGT.start() self.bootstrap_servers = bootstrap_servers if debug: self.__getKafkaClientLogger() def __checkConfig(self): """ """ self.debug("config: %s" % self.cfg) self.warning('Agent used Name=%s Type=%s' % (self.cfg['agent']['name'], self.cfg['agent']['type']) ) def encapsule(self, *input_layers): """ Encapsule layers in template message """ if self.cfg['agent-support']: layer_agent= TestTemplatesLib.TemplateLayer('AGENT') layer_agent.addKey(name='name', data=self.cfg['agent']['name'] ) layer_agent.addKey(name='type', data=self.cfg['agent']['type'] ) tpl = TestTemplatesLib.TemplateMessage() if self.cfg['agent-support']: tpl.addLayer(layer=layer_agent) for layer in input_layers: tpl.addLayer(layer=layer) return tpl def onReset(self): """ Called automaticly on reset adapter """ # stop timer self.TIMER_ALIVE_AGT.stop() # cleanup remote agent self.resetAgent() def receivedNotifyFromAgent(self, data): """ Function to reimplement """ if 'cmd' in data: if data['cmd'] == AGENT_INITIALIZED: tpl = TestTemplatesLib.TemplateMessage() layer = TestTemplatesLib.TemplateLayer('AGENT') layer.addKey("ready", True) layer.addKey(name='name', data=self.cfg['agent']['name'] ) layer.addKey(name='type', data=self.cfg['agent']['type'] ) tpl.addLayer(layer= layer) self.logRecvEvent( shortEvt = "Agent Is Ready" , tplEvt = tpl ) elif data['cmd'] == "producer_{0}".format(CONNECT): self.__kafka_connected = True tpl = templates.kafka_ops(method=CONNECT,bootstrap_servers=self.bootstrap_servers) self.logRecvEvent( shortEvt = "connected", tplEvt = self.encapsule(self.producerTpl ,tpl)) elif data['cmd'] == "producer_{0}".format(SEND): record_metadata = data['result'] self.__kafka_send = True rec = { "Topic":record_metadata[0], "Partition": record_metadata[1] , "Offset":record_metadata[3] , "Timestamp": record_metadata[4] , "Checksum": record_metadata[5], "Serialized_key_size": record_metadata[6], "Serialized_value_size": record_metadata[7]} tpl = templates.kafka_ops(method=SEND, more=rec) self.logRecvEvent( shortEvt = "sended", tplEvt = self.encapsule(self.producerTpl ,tpl)) elif data['cmd'] =="producer_{0}".format(FLUSH) : tpl = templates.kafka_ops(method=FLUSH) self.logRecvEvent( shortEvt = "flushed", tplEvt = self.encapsule(self.producerTpl ,tpl)) elif data['cmd'] =="producer_{0}".format(PARTITIONS_FOR) : partitions = data['result'] tpl = templates.kafka_ops(method=PARTITIONS_FOR, partitions=partitions) self.logRecvEvent( shortEvt = "partitions_for", tplEvt = self.encapsule(self.producerTpl ,tpl)) elif data['cmd'] == "producer_{0}".format(CLOSE): tpl = templates.kafka_ops(method=CLOSE) self.logRecvEvent( shortEvt = "closed", tplEvt = self.encapsule(self.producerTpl ,tpl)) else: self.warning( 'Notify received from agent: %s' % data ) def receivedErrorFromAgent(self, data): """ Function to reimplement """ if "cmd" in data: if data['cmd'] in [ CONNECT, CLOSE, SEND, FLUSH,PARTITIONS_FOR ]: tpl = self.encapsule(self.producerTpl, templates.response_err(msg=data['err-msg'], method=data['cmd'] )) self.logRecvEvent( shortEvt = "response error", tplEvt = tpl ) else: self.error("unknown command received: %s" % data["cmd"]) else: self.error( 'Generic error: %s' % data ) def receivedDataFromAgent(self, data): """ Function to reimplement """ self.warning( 'Data received from agent: %s' % data ) def prepareAgent(self, data): """ Prepare agent """ self.parent.sendReadyToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData=data) def initAgent(self, data): """ Init agent """ self.parent.sendInitToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData=data) def resetAgent(self): """ Reset agent """ self.parent.sendResetToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData='') def aliveAgent(self): """ Keep alive agent """ self.parent.sendAliveToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData='') self.TIMER_ALIVE_AGT.restart() def sendInitToAgent(self, data): """ """ self.parent.sendInitToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData=data) def sendNotifyToAgent(self, data): """ """ self.parent.sendNotifyToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData=data) def sendResetToAgent(self, data): """ """ self.parent.sendResetToAgent(adapterId=self.getAdapterId(), agentName=self.cfg['agent-name'], agentData=data) def agentIsReady(self, timeout=1.0): """ Waits to receive "agent ready" event until the end of the timeout @param timeout: time max to wait to receive event in second (default=1s) @type timeout: float @return: an event matching with the template or None otherwise @rtype: templatemessage """ tpl = TestTemplatesLib.TemplateMessage() layer = TestTemplatesLib.TemplateLayer('AGENT') layer.addKey("ready", True) layer.addKey(name='name', data=self.cfg['agent']['name'] ) layer.addKey(name='type', data=self.cfg['agent']['type'] ) tpl.addLayer(layer= layer) evt = self.received( expected = tpl, timeout = timeout ) return evt def __getKafkaClientLogger(self): logger = logging.getLogger('kafka') logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) @doc_public def connect(self, **kargs ): """ Instantiate the KafkaProducer and Fetch Kafka Cluster Metadata @param kargs: keyword arguments from KafkaProducer class: @type kargs: keyword """ if 'bootstrap_servers' in kargs: bootstrap_servers = kargs.pop('bootstrap_servers') else: bootstrap_servers=self.bootstrap_servers # Log start connexion event self.producerTpl = templates.kafka_connect(api=PRODUCER,bootstrap_servers=bootstrap_servers, **kargs) tpl = templates.kafka_ops(method=CONNECT,bootstrap_servers=bootstrap_servers, **kargs) self.logSentEvent( shortEvt = "connection", tplEvt = self.encapsule(self.producerTpl,tpl)) self.__kafka_connected = False # Agent mode if self.cfg['agent-support']: remote_cfg = { 'cmd': "producer_{0}".format(CONNECT), 'bootstrap_servers': bootstrap_servers, 'kargs': kargs } self.sendNotifyToAgent(data=remote_cfg) else: try: self.producer = KafkaProducer(bootstrap_servers=bootstrap_servers, **kargs ) tpl = templates.kafka_ops(method=CONNECT,bootstrap_servers=bootstrap_servers, **kargs) self.logRecvEvent( shortEvt = "connected", tplEvt = self.encapsule(self.producerTpl,tpl)) except KafkaError as e: tpl = self.encapsule(self.producerTpl, templates.response_err(msg=e, method=CONNECT )) self.logRecvEvent( shortEvt = "response error", tplEvt = tpl ) @doc_public def send(self, topic, **kargs): """ Publish a message to a topic. @topic (str): topic where the message will be published @value (optional): message value as bytes. @partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured 'partitioner'. @key (optional): a key to associate with the message. Can be used to determine which partition to send the message to. @timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time. """ tpl = templates.kafka_ops(method=SEND, **kargs) self.logSentEvent( shortEvt = "req send", tplEvt = self.encapsule(self.producerTpl ,tpl)) # Timeout for record metadata retreving if "timeout" in kargs: timeout = kargs.pop("timeout") else: timeout=2 if self.cfg['agent-support']: remote_cfg = { 'cmd': "producer_{0}".format(SEND), 'topic': topic, 'timeout': timeout, 'kargs': kargs } self.sendNotifyToAgent(data=remote_cfg) else: try: future = self.producer.send(topic,**kargs) record_metadata=future.get(timeout=timeout) rec = { "Topic":record_metadata[0], "Partition": record_metadata[1] , "Offset":record_metadata[3] , "Timestamp": record_metadata[4] , "Checksum": record_metadata[5], "Serialized_key_size": record_metadata[6], "Serialized_value_size": record_metadata[7]} tpl = templates.kafka_ops(method=SEND, more=rec) self.logRecvEvent( shortEvt = "resp send", tplEvt = self.encapsule(self.producerTpl,tpl)) except KafkaError as e: tpl = self.encapsule(self.producerTpl, templates.response_err(msg=e, method=SEND )) self.logRecvEvent( shortEvt = "response error", tplEvt = tpl ) @doc_public def partitions_for(self, topic): """ All fonction documentation available on http://kafka-python.readthedocs.io. """ tpl = templates.kafka_ops(method=PARTITIONS_FOR, topic=topic) self.logSentEvent( shortEvt = "req partitions_for", tplEvt = self.encapsule(self.producerTpl ,tpl)) if self.cfg['agent-support']: remote_cfg = { 'cmd': "producer_{0}".format(PARTITIONS_FOR), 'topic': topic } self.sendNotifyToAgent(data=remote_cfg) else: try: partitions = self.producer.partitions_for(topic) tpl = templates.kafka_ops(method=PARTITIONS_FOR,topic=topic, partitions=partitions) self.logRecvEvent( shortEvt = "resp partitions_for", tplEvt = self.encapsule(self.producerTpl,tpl)) except KafkaError as e: tpl = self.encapsule(self.producerTpl, templates.response_err(msg=e, method=PARTITIONS_FOR )) self.logRecvEvent( shortEvt = "response error", tplEvt = tpl ) @doc_public def flush(self, timeout=None): """ All fonction documentation available on http://kafka-python.readthedocs.io. """ tpl = templates.kafka_ops(method=FLUSH, timeout=timeout) self.logSentEvent( shortEvt = "req flush", tplEvt = self.encapsule(self.producerTpl,tpl)) if self.cfg['agent-support']: remote_cfg = { 'cmd': "producer_{0}".format(FLUSH), 'timeout': timeout } self.sendNotifyToAgent(data=remote_cfg) else: try: self.producer.flush(timeout) tpl = templates.kafka_ops(method=FLUSH) self.logRecvEvent( shortEvt = "resp flush", tplEvt = self.encapsule(self.producerTpl,tpl)) except KafkaError as e: tpl = self.encapsule(self.producerTpl, templates.response_err(msg=e, method=FLUSH )) self.logRecvEvent( shortEvt = "response error", tplEvt = tpl ) @doc_public def close(self, timeout=None): """ All fonction documentation available on http://kafka-python.readthedocs.io. """ tpl = templates.kafka_ops(method=CLOSE, timeout=timeout) self.logSentEvent( shortEvt = "req close", tplEvt = self.encapsule(self.producerTpl,tpl)) if self.cfg['agent-support']: remote_cfg = { 'cmd': "producer_{0}".format(CLOSE), 'timeout': timeout } self.sendNotifyToAgent(data=remote_cfg) else: try: self.producer.close(timeout=timeout) tpl = templates.kafka_ops(method=CLOSE,timeout=timeout) self.logRecvEvent( shortEvt = "closed", tplEvt = self.encapsule(self.producerTpl,tpl)) except KafkaError as e: tpl = self.encapsule(self.producerTpl, templates.response_err(msg=e, method=CLOSE )) self.logRecvEvent( shortEvt = "response error", tplEvt = tpl ) @doc_public def isSend(self, timeout=2, record=None): """ Wait to receive response from "send" request and match returned RecordMetadata until the end of the timeout. @param timeout: time max to wait to receive event in second (default=2s) @type timeout: float @param offset: Optional RecordMetadata that we expect to be assigned to consumer @type offset: RecordMetadata """ TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout) if record == None: record = { "Topic":TestOperatorsLib.Any(), "Partition": TestOperatorsLib.Any(), "Offset":TestOperatorsLib.Any() , "Timestamp":TestOperatorsLib.Any() , "Checksum": TestOperatorsLib.Any(), "Serialized_key_size":TestOperatorsLib.Any(), "Serialized_value_size": TestOperatorsLib.Any()} expected = templates.kafka_ops(method=SEND, more=record) # try to match the template evt = self.received( expected=self.encapsule( self.producerTpl ,expected ), timeout=timeout ) return evt @doc_public def isConnect(self, timeout=2): """ Wait to receive response from "connect" request until the end of the timeout @param timeout: time max to wait to receive event in second (default=2s) @type timeout: float """ TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout) # construct the expected template expected = templates.kafka_ops(method=CONNECT, bootstrap_servers=self.bootstrap_servers) # try to match the template evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout ) return evt @doc_public def isFlush(self, timeout=2): """ Wait to receive response from "flush" request until the end of the timeout @param timeout: time max to wait to receive event in second (default=2s) @type timeout: float """ TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout) # construct the expected template expected = templates.kafka_ops(method=FLUSH) # try to match the template evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout ) return evt @doc_public def isClose(self, timeout=2): """ Wait to receive response from "close" request until the end of the timeout @param timeout: time max to wait to receive event in second (default=2s) @type timeout: float """ TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout) # construct the expected template expected = templates.kafka_ops(method=CLOSE) # try to match the template evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout ) return evt @doc_public def isPartitions_for(self, timeout=2,partitions=None): """ Wait to receive response from "partitions_for" request and match returned Topics until the end of the timeout. @param timeout: time max to wait to receive event in second (default=2s) @type timeout: float @param offset: Optional list that we expect to be view by producer @type offset: list of of Topics """ TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout) if partitions == None: partitions= { "partitions":TestOperatorsLib.Any()} expected = templates.kafka_ops(method=PARTITIONS_FOR,more=partitions) # try to match the template evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout ) return evt
logger.error(traceback.format_exc()) sys.exit(msg) logger.info('KafkaProducer successfully initialized') for i in range(len(conf.target_websites)): logger.info( "Checking availability of {0} website every {1} seconds".format( conf.target_websites[i], str(conf.delta_times_availability_check_sec[i]))) logger.info("Pattern to match - {0}".format( conf.patterns_expected_to_be_found[i])) logger.info("Sending info to topic {0}".format(conf.website_checker_topic)) # check wether number of partitions configured properly enough_partitions = True partitions = producer.partitions_for(conf.website_checker_topic) if (len(partitions) != len(conf.target_websites)): logger.warning("Increase number of partitions for topic {0} . \ Number of partitions (current = {1}) needs to be equal to the number of target_websites specified in settings.ini = {2}" \ .format(conf.website_checker_topic,len(partitions),len(conf.target_websites) )) enough_partitions = False threads = list() for i in range(len(conf.target_websites)): partitions_assigned = i if enough_partitions else 0 thread = Thread(target = website_check, args = (producer, logger, partitions_assigned, conf.website_checker_topic, conf.target_websites[i], \ conf.patterns_expected_to_be_found[i], conf.delta_times_availability_check_sec[i],)) thread.start() threads.append(thread) for t in threads:
def error(exception): # handle exception print('> message unsent with exception:', exception) producer = KafkaProducer( acks=1, # acks='all', retries=5, # compression_type='lz4', #just use default compression: gzip request_timeout_ms=60000, bootstrap_servers=bootstrap_servers, value_serializer=lambda m: json.dumps(m).encode('ascii')) partitions = list(producer.partitions_for(topic)) sendingCount = len(partitions) * 1000 for i in range(sendingCount): partition = i % len(partitions) message['partition'] = partition print('> send message to %s with partition %d. %s' % (topic, partition, message['data']['url'])) # produce asynchronously with callbacks producer.send(topic=topic, value=message, partition=partition, key=str.encode( message['id'])).add_callback(success).add_errback(error)
def run(self): """Publish video frames as json objects, timestamped, marked with camera number. Source: self.video_path: URL for streaming video self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent. Publishes: A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape, "timestamp": time.time(), "camera": camera, "frame_num": frame_num} """ if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) else: partitioner = Murmur2Partitioner(partitions=[ TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions) ]) # Producer object, set desired partitioner frame_producer = KafkaProducer( bootstrap_servers=[params.KAFKA_BROKER], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner, max_request_size=134217728) print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format( self.camera_num, self.video_path, frame_producer.partitions_for(self.frame_topic))) # Use either option if self.use_cv2: # video = cv2.VideoCapture(self.video_path) # Here we use sampler to read all videos from a folder self.sampler.add_video(self.video_path) else: video = VideoStream(self.video_path).start() # Track frame number frame_num = 0 start_time = time.time() print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time)) while True: if self.use_cv2: success, image, self.location = self.sampler.read() if not success: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format( self.name, self.video_path, frame_num)) break else: image = video.read() if image is None: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format( self.name, self.video_path, frame_num)) break # Attach metadata to frame, transform into JSON message = self.transform(frame=image, frame_num=frame_num, location=self.location, object_key=self.object_key, camera=self.camera_num, verbose=self.verbose) self.sizecnt += 1 if time.time() - self.timer > self.report_range: acc = self.sizecnt #if self.verbose: print("[Cam {}]Minute {} send out size {}".format( self.camera_num, int(self.timer - self.zerotime) // self.report_range, acc)) self.sizecnt = 0 self.timer = time.time() # Callback function def on_send_success(record_metadata): print(record_metadata.topic) print(record_metadata.partition) print(record_metadata.offset) def on_send_error(excp): print(excp) # log.error('I am an errback', exc_info=excp) # Partition to be sent to part = frame_num % self.topic_partitions # Logging # Publish to specific partition if self.verbose: print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format( message["camera"], frame_num, part)) frame_producer.send( self.frame_topic, key="{}_{}".format(self.camera_num, frame_num), value=message).add_callback(on_send_success).add_errback( on_send_error) else: frame_producer.send(self.frame_topic, key="{}_{}".format(self.camera_num, frame_num), value=message) # if frame_num % 1000 == 0: frame_producer.flush() frame_num += 1 if self.use_cv2: self.sampler.release() else: video.stop() if self.verbose: print("[CAM {}] FINISHED. STREAM TIME {}: ".format( self.camera_num, time.time() - start_time)) return True if frame_num > 0 else False
setOffset(database,offset) logger.info(database+'------------->'+str(offset)) mongo_db = mongo_con.get_database(database) # stream = mongo_db.watch(full_document = 'updateLookup',start_at_operation_time=bson.timestamp.Timestamp(int(offset),1)) ##筛选collection,去除黑名单中的表 blacklist=getBlackList(database) if blacklist: stream = mongo_db.watch([{'$match': {'ns.coll': {'$nin':blacklist}}}], full_document = 'updateLookup',start_at_operation_time=bson.timestamp.Timestamp(int(offset),1)) else: stream = mongo_db.watch(full_document='updateLookup', start_at_operation_time=bson.timestamp.Timestamp(int(offset), 1)) producer = KafkaProducer(bootstrap_servers = hosts_producer_arr) partition = producer.partitions_for(topic) numPartitions = len(partition) logger.info('*****************开始发送数据*****************') for change in stream: msg =bytes(dumps(change,ensure_ascii=False),encoding='utf8') jsondata = str(msg,'utf-8') text = json.loads(jsondata) tb = text['ns']['db']+'.'+text['ns']['coll'] i = abs(getHashCode(tb)) %numPartitions if 'fullDocument' in text and text['fullDocument']!=None: msg_data = {} full_doc = text['fullDocument'] #将fullDocument里面的ky转小写 doc = key2lower(full_doc) if "_id" in doc and type(doc["_id"]) == dict and "$oid" in doc["_id"]: doc["_id"] = doc["_id"]["$oid"]
class BaseMintSearch(object): logger = logging.getLogger() logger.setLevel(logging.WARNING) # Log等级开关 path = '../log' folder = os.path.exists(path) if not folder: # 判断是否存在文件夹如果不存在则创建为文件夹 os.makedirs(path) print('创建' + path) else: print('文件夹存在') log_name = path + '/log.log' logfile = log_name file_handler = logging.FileHandler(logfile, mode='a+') file_handler.setLevel(logging.ERROR) # 输出到file的log等级的开关 #定义handler的输出格式 formatter = logging.Formatter( "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" ) file_handler.setFormatter(formatter) # 将handler添加到logger里面 logger.addHandler(file_handler) # 如果需要同時需要在終端上輸出,定義一個streamHandler print_handler = logging.StreamHandler() # 往屏幕上输出 print_handler.setFormatter(formatter) # 设置屏幕上显示的格式 logger.addHandler(print_handler) def __init__(self, url_pattern, server='localhost:9092', topic='test'): self.num = 0 self.url_pattrn = url_pattern self.logger = BaseMintSearch.logger self.server = server self.topic = topic self.producer = KafkaProducer(bootstrap_servers=self.server) self.partition = set() self.kafka_check('kafka_check') pass def __del__(self): if self.producer is not None: self.producer.close(5) def request(self, flow: HTTPFlow): self.num = self.num + 1 ctx.log.info("Weve seen %d flows" % self.num) def response(self, flow: HTTPFlow): pass def kafka_check(self, test_data): self.producer = KafkaProducer(bootstrap_servers=self.server) self.partition = self.producer.partitions_for(self.topic) print('host:' + self.server + ' topic:' + self.topic + ' 可用partiton' + str(self.producer.partitions_for(self.topic))) for partiton in self.partition: info = self.producer.send(topic=self.topic, value=test_data.encode('utf-8'), partition=partiton).get(5) print('当前partition:{}, offset:{}'.format(partiton, info.offset))
# d = pd.DataFrame(a) # d = d.append({'a': 0, 'c': 2}, ignore_index=True) # print(d) import json import socket from kafka import KafkaProducer, KafkaConsumer from kafka.errors import KafkaError bootstrap_servers = '172.16.100.31:9092,172.16.100.29:9092,172.16.100.30:9092' producer = KafkaProducer(bootstrap_servers=bootstrap_servers.split(','), api_version=(0, 10), retries=5) topic_name = 'Topic_Live_Heartbeat_Msg' partitions = producer.partitions_for(topic_name) d = pd.read_csv('live_data.csv').astype('int32').to_dict('records') for i in d[:1]: future = producer.send(topic_name, value=json.dumps(i).encode('utf-8'), key='test'.encode('utf-8')) # producer.flush() future.get() # consumer_id = 'Gid_Real_Time_Live_Heartbeat_Msg' # consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, # group_id=consumer_id, # api_version=(0, 10)) # class Kafka(object): # def __init__(self, broker):
access_secret = "L0ygY1KY0NxePWhfok70PDGJCklVvjM6nMOTeIwh3gLBqBo" #creating an OAuthHandler instance auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #The access token is the “key” for opening the Twitter API treasure box auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) #Creating a Kafka Producer Instance myproducer = KafkaProducer(bootstrap_servers='sandbox.hortonworks.com:6667', acks=1, retries=1) print("List of Partitions for topic 'ktwitter': " + str(myproducer.partitions_for('ktwitter'))) with open('twitterhandle.txt', 'r+') as f: for handle in f: print("Getting Tweets from " + str(handle).strip()) alltweets = [] tweets = api.user_timeline(screen_name=handle, count=10) alltweets.extend(tweets) for x in range(len(alltweets)): #print(alltweets[x].id, alltweets[x].lang,alltweets[x].source,alltweets[x].retweeted,type(alltweets[x].text),alltweets[x].text.encode('utf-8')) try: future = myproducer.send( 'ktwitter', value=alltweets[x].text.encode('utf-8'))
# -*- coding:utf8 -*- """ @author: [email protected] @date: Mon, Nov 28 @time: 13:25:55 GMT+8 """ from kafka import KafkaProducer topic = 'dnshj' bootstrap_servers = "10.0.0.156:9092,10.0.0.156:9093,10.0.0.156:9094,10.0.0.156:9095,10.0.0.156:9096" producer = KafkaProducer(bootstrap_servers=bootstrap_servers) print producer.partitions_for(topic) # for i in range(1, 100): # producer.send(topic, 'some_message_byddddddds %s' % i)
def run(self): """Publish video frames as json objects, timestamped, marked with camera number. Source: self.video_path: URL for streaming video self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent. Publishes: A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape, "timestamp": time.time(), "camera": camera, "frame_num": frame_num} """ if self.rr_distribute: partitioner = RoundRobinPartitioner(partitions= [TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions)]) else: partitioner = Murmur2Partitioner(partitions= [TopicPartition(topic=self.frame_topic, partition=i) for i in range(self.topic_partitions)]) # Producer object, set desired partitioner frame_producer = KafkaProducer(bootstrap_servers=["kafka1-kafka-brokers:9092"], key_serializer=lambda key: str(key).encode(), value_serializer=lambda value: json.dumps(value).encode(), partitioner=partitioner) print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format(self.camera_num, self.video_path, frame_producer.partitions_for( self.frame_topic))) # Use either option video = cv2.VideoCapture(self.video_path) if self.use_cv2 else FileVideoStream(self.video_path).start() #video.set(cv2.CAP_PROP_FPS,30) # Track frame number frame_num = 0 start_time = time.time() print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time)) # Read URL, Transform, Publish while True: # using raw cv2, frame by frame if self.use_cv2: success, image = video.read() # check if the file has read if not success: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format(self.name, self.video_path, frame_num)) break # using smart, only unique frames, skips frames, faster fps else: image = video.read() # check if the file has read if image is None: if self.verbose: print("[CAM {}] URL: {}, END FRAME: {}".format(self.name, self.video_path, frame_num)) break # Attach metadata to frame, transform into JSON message = self.transform(frame=image, frame_num=frame_num, object_key=self.object_key, camera=self.camera_num, verbose=self.verbose) # Partition to be sent to part = frame_num % self.topic_partitions # Logging if self.verbose: print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format(message["camera"], frame_num, part)) # Publish to specific partition frame_producer.send(self.frame_topic, key="{}-{}".format(self.camera_num, frame_num), value=message) # if frame_num % 1000 == 0: frame_producer.flush() frame_num += 1 # clear the capture if self.use_cv2: video.release() else: video.stop() if self.verbose: print("[CAM {}] FINISHED. STREAM TIME {}: ".format(self.camera_num, time.time() - start_time)) return True if frame_num > 0 else False
class Kafka(GenericTool.Tool): def __init__(self, controllerIp, controllerPort, toolName, toolDesc, defaultTool, supportProxy=0, proxyIp=None, proxyPort=None, sslSupport=True): """ Kafka agent @param controllerIp: controller ip/host @type controllerIp: string @param controllerPort: controller port @type controllerPort: integer @param toolName: agent name @type toolName: string @param toolDesc: agent description @type toolDesc: string @param defaultTool: True if the agent is started by the server, False otherwise @type defaultTool: boolean """ GenericTool.Tool.__init__(self, controllerIp, controllerPort, toolName, toolDesc, defaultTool, supportProxy=supportProxy, proxyIp=proxyIp, proxyPort=proxyPort, sslSupport=sslSupport) self.__type__ = __TYPE__ self.__mutex__ = threading.RLock() def getType(self): """ Returns agent type @return: agent type @rtype: string """ return self.__type__ def onCleanup(self): """ Cleanup all In this function, you can stop your program """ pass def initAfterRegistration(self): """ Called on successful registration In this function, you can start your program automatically. """ self.onToolLogWarningCalled("Starting dummy agent") self.onToolLogWarningCalled("Dummy agent started") self.onPluginStarted() def pluginStarting(self): """ Function to reimplement """ pass def onPluginStarted(self): """ Function to reimplement """ pass def pluginStopped(self): """ Function to reimplement """ pass def onResetAgentCalled(self): """ Function to reimplement """ pass def onToolLogWarningCalled(self, msg): """ Logs warning on main application @param msg: warning message @type msg: string """ pass def onToolLogErrorCalled(self, msg): """ Logs error on main application @param msg: error message @type msg: string """ pass def onToolLogSuccessCalled(self, msg): """ Logs success on main application @param msg: error message @type msg: string """ pass def onAgentAlive(self, client, tid, request): """ Called on keepalive received from test server {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'} @param client: server address ip/port @type client: tuple @param tid: transaction id @type tid: integer @param request: request received from the server @type request: dict """ pass def onAgentInit(self, client, tid, request): """ Called on init received from test server {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'} @param client: server address ip/port @type client: tuple @param tid: transaction id @type tid: integer @param request: request received from the server @type request: dict """ self.onToolLogWarningCalled(msg="init called: %s" % request['data']) self.sendNotify(request=request, data="notify sent") def onAgentReset(self, client, tid, request): """ Called on reset received from test server {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'} or {'event': 'agent-reset', 'source-adapter': '1', 'script_id': '7_3_0'} @param client: server address ip/port @type client: tuple @param tid: transaction id @type tid: integer @param request: request received from the server @type request: dict """ if 'data' in request: self.onToolLogWarningCalled(msg="reset called: %s" % request['data']) else: self.onToolLogWarningCalled(msg="reset called") def onAgentNotify(self, client, tid, request): """ Called on notify received from test server and dispatch it {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'} @param client: server address ip/port @type client: tuple @param tid: transaction id @type tid: integer @param request: request received from the server @type request: dict """ self.__mutex__.acquire() self.onToolLogWarningCalled(msg="notify received: %s" % request['data']) if request['uuid'] in self.context(): if request['source-adapter'] in self.context()[request['uuid']]: ctx_test = self.context()[request['uuid']][request['source-adapter']] self.execAction(request) else: self.error("Adapter context does not exists TestUuid=%s AdapterId=%s" % (request['uuid'], request['source-adapter'] ) ) else: self.error("Test context does not exits TestUuid=%s" % request['uuid']) self.__mutex__.release() def execAction(self, request): """ Execute action """ currentTest = self.context()[request['uuid']][request['source-adapter']] self.onToolLogWarningCalled( "<< Starting Command=%s TestId=%s AdapterId=%s" % (request['data']['cmd'], request['script_id'], request['source-adapter']) ) try: cmd = request['data']['cmd'] data = request['data'] # connect if cmd == 'producer_connect': # init kargs=data['kargs'] try: self.producer = KafkaProducer(bootstrap_servers=data['bootstrap_servers'], **kargs ) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'connected' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'producer_send': kargs=data['kargs'] try: future = self.producer.send(data['topic'], **kargs) record_metadata=future.get(timeout=data['timeout']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': record_metadata } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'producer_flush': try: self.producer.flush(data['timeout']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'flushed' }) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'producer_partitions_for': try: partitions = self.producer.partitions_for(data['topic']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': partitions }) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'producer_close': try: self.producer.close(int(data['timeout'])) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'closed' }) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_connect': kargs=data['kargs'] try: if not data['topics']: self.consumer = KafkaConsumer(bootstrap_servers=data['bootstrap_servers'], **kargs) else: self.consumer = KafkaConsumer(data['topics'][0], bootstrap_servers=data['bootstrap_servers'], **kargs) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'connected' }) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_consume': try: for msg in self.consumer : self.sendNotify(request=request, data={ "cmd": cmd , 'result': msg } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_close': try: self.consumer.close(data['autocommit']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'closed' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_assign': try: self.consumer.assign(data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'assigned' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_assignment': try: topicpartitions = self.consumer.assignment() self.sendNotify(request=request, data={ "cmd": cmd , 'topicpartitions': topicpartitions } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_beginning_offsets': try: offsets = self.consumer.beginning_offsets(data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'offsets': offsets } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_commit': try: self.consumer.commit(data['offsets']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'committed' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_commit_async': try: future = self.consumer.commit_async(offsets=data['offsets'],callback=data['callback']) self.sendNotify(request=request, data={ "cmd": cmd , 'future': future } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_committed': try: offsets = self.consumer.committed(data['topicpartition']) self.sendNotify(request=request, data={ "cmd": cmd , 'offsets': offsets } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_end_offsets': try: partitions = self.consumer.end_offsets(data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'partitions': partitions } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_highwater': try: offset = self.consumer.highwater(data['partition']) self.sendNotify(request=request, data={ "cmd": cmd , 'offset': offset } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_offsets_for_times': try: offsets = self.consumer.offsets_for_times(data['timestamps']) self.sendNotify(request=request, data={ "cmd": cmd , 'offsets': offsets } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_partitions_for_topic': try: partitions = self.consumer.partitions_for_topic(data['topic']) self.sendNotify(request=request, data={ "cmd": cmd , 'partitions': partitions } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_pause': try: self.consumer.pause(data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_paused': try: partitions=self.consumer.paused() self.sendNotify(request=request, data={ "cmd": cmd , 'partitions': partitions } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_poll': try: records = self.consumer.poll(timeout_ms=data['timeout_ms'], max_records=data['max_records']) self.sendNotify(request=request, data={ "cmd": cmd , 'records': records } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_position': try: offset = self.consumer.position(data['topicpartition']) self.sendNotify(request=request, data={ "cmd": cmd , 'offset': offset } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_resume': try: self.consumer.resume(data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_seek': try: self.consumer.seek(data['partition'],data['offset']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_seek_to_beginning': try: self.consumer.seek_to_beginning(*data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_seek_to_end': try: self.consumer.seek_to_end(*data['partitions']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_subscribe': try: self.consumer.subscribe(topics=data['topics'], pattern=data['pattern'], listener=data['listener']) self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_subscription': try: topics=self.consumer.subscription() self.sendNotify(request=request, data={ "cmd": cmd , 'topics': topics } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_topics': try: topics = self.consumer.topics() self.sendNotify(request=request, data={ "cmd": cmd , 'topics': topics } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) elif cmd == 'consumer_unsubscribe': try: self.consumer.unsubscribe() self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } ) except KafkaError as e: self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} ) # unknown command else: raise Exception('cmd not supported: %s' % request['data']['cmd'] ) except Exception as e: self.error( 'unable to run command: %s' % str(e) ) self.sendError( request , data="unable to run command") self.onToolLogWarningCalled( "<< Terminated Command=%s TestId=%s AdapterId=%s" % (request['data']['cmd'], request['script-id'], request['source-adapter']) )
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Time : 2020/5/27 14:24 # @Author : way # @Site : # @Describe: import json from kafka import KafkaProducer from random import choice servers = [ '172.16.122.17:9092', ] producer = KafkaProducer( bootstrap_servers=servers, key_serializer=lambda m: m.encode('utf-8'), value_serializer=lambda m: json.dumps(m).encode('utf-8')) topic = 'test' key = 'hi' value = 'way' partitions = producer.partitions_for(topic) # 获取所有分区,均匀地写到分区中 for i in range(10000): producer.send(topic=topic, partition=choice(list(partitions)), key=key, value=value).get(timeout=10)
def dataPub(): cnt = 0 producer = KafkaProducer(bootstrap_servers='10.4.10.239:9092') # 连接kafka devNum = len(producer.partitions_for('testyg')) print(producer.metrics()) st = time.time() for i in range(30): print(i) # if i%2 == 0: img = Image.open(pwd + 'imgs/test.jpg') # else: # img = Image.open('../imgs/army.jpg') # img = clintInf.transformData(img) img = pickle.dumps(img) print(img.__len__()) # producer.send('byzantine', str(msg[i]).encode('utf-8')) # 发送的topic为test # producer.send('result', str(i).encode()) producer.send('testyg', img, str(i).encode()) producer.flush() # time.sleep(1) print('end') log.logSend("INFO " + localhost + " publish 30 msgs!") colDevs = [] for msg in consumer: print(cnt) # print(msg) # if cnt == 0: # st = time.time() cnt += 1 if colDevs.count(msg.key.decode()) == 0: colDevs.append(msg.key.decode()) if cnt == 30: # ed =time.time() # print(ed-st) ed = time.time() consumer.close() costTime = ed - st log.logSend("INFO" + localhost + "'s datas handle done, costs time " + costTime) taskInfo = { 'name': 'dataOffload', 'type': 'classification', 'startDevice': localhost, 'dataNum': 30, 'devNum': devNum, 'colDevs': colDevs, 'latency': ed - st } js = json.dumps(taskInfo) conn.lpush("kafkaTasks", js) producer.close()
conf = setting.kafka_setting print conf context = ssl.create_default_context() context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.verify_mode = ssl.CERT_REQUIRED # context.check_hostname = True context.load_verify_locations("ca-cert") producer = KafkaProducer(bootstrap_servers=conf['bootstrap_servers'], sasl_mechanism="PLAIN", ssl_context=context, security_protocol='SASL_SSL', api_version = (0,10), retries=5, sasl_plain_username=conf['sasl_plain_username'], sasl_plain_password=conf['sasl_plain_password']) partitions = producer.partitions_for(conf['topic_name']) print 'Topic下分区: %s' % partitions try: future = producer.send(conf['topic_name'], 'hello aliyun-kafka!') future.get() print 'send message succeed.' except KafkaError, e: print 'send message failed.' print e
from kafka import KafkaProducer, KafkaConsumer import time from PIL import Image import pickle import redis import json from MyConsumer import * consumer = KafkaConsumer('result', group_id="test_group_1", bootstrap_servers=['10.4.10.239:9092']) cnt = 0 producer = KafkaProducer(bootstrap_servers='10.4.10.239:9092') # 连接kafka devNum = len(producer.partitions_for('testyg')) print(devNum) print(producer.metrics()) clintInf = ClientInf(0, 18) # msg = [1,2,3,4] st = time.time() for i in range(30): print(i) # if i%2 == 0: img = Image.open('../imgs/test.jpg') # else: # img = Image.open('../imgs/army.jpg') # img = clintInf.transformData(img)