Example #1
0
class KafkaPipeline(object):
    @classmethod
    def from_crawler(cls, crawler):
        settings = crawler.settings
        name = crawler.spider.name
        return cls(name, **settings)

    def __init__(self, name, **kwargs):
        self.kafkaproducer = KafkaProducer(
            bootstrap_servers=kwargs.get('KAFKA_SERVERS'),
            key_serializer=lambda m: m.encode('utf-8'),
            value_serializer=lambda m: json.dumps(m).encode('utf-8'))
        self.partitions = list(
            self.kafkaproducer.partitions_for(name))  # 获取所有分区

    def process_item(self, item, spider):
        """
        :param item:
        :param spider:
        :return: 数据分表入库
        """
        new_item = {key: value for key, value in item.items()}
        new_item['ctime'] = time.strftime("%Y-%m-%d %H:%M:%S",
                                          time.localtime())
        try:
            self.kafkaproducer.send(topic=spider.name,
                                    partition=choice(self.partitions),
                                    key=item.tablename,
                                    value=new_item).get(timeout=10)
            logger.info(f"入库成功 <= 主题:{spider.name} key名:{item.tablename}")
        except Exception as e:
            logger.error(
                f"入库失败 <= 主题:{spider.name} key名:{item.tablename} 错误原因:{e}")
        return item
Example #2
0
def produce_to_bruce(schema, args, config):
    topic = config['kafka']['topic']

    if args.partition_count:
        partition_count = args.partition_count
    else:
        print 'fetch partition info for topic ' + topic
        producer = KafkaProducer(bootstrap_servers = config['kafka']['brokers'])
        partition_count = 1 + max(producer.partitions_for(topic))
        producer.close()

    socket = bruce.open_bruce_socket()

    # batching socket send
    buff = []

    def flush_buff():
        for msg in buff:
            socket.sendto(msg, '/var/run/bruce/bruce.socket')
        del buff[:]

    def f_produce(topic, partition, key, value):
        if len(buff) < 1000:
            buff.append(bruce.create_msg(partition, topic, bytes(key), bytes(value)))
        else:
            flush_buff()

    try:
        bootstrap(f_produce, partition_count, schema, args.database, args.table, config)
        flush_buff()
    except KeyboardInterrupt:
        sys.exit(1)
    finally:
        socket.close()
Example #3
0
 def kafka_check(self, server, topic, test_data):
     producer = KafkaProducer(bootstrap_servers=server)
     partitions = producer.partitions_for(topic)
     self.logger.info('host:' + server + ' topic:' + topic +
                      ' 可用partitons' + str(producer.partitions_for(topic)))
     for partiton in partitions:
         try:
             info = producer.send(topic=topic,
                                  value=test_data.encode('utf-8'),
                                  partition=partiton).get(5)
             self.logger.info(
                 ('当前host:{}, 当前topic:{}当前partition:{}, offset:{}'.format(
                     server, topic, partiton, info.offset)))
         except:
             self.logger.error(
                 'could not send message to host:{}, topic:{}, partiton:{}.'
                 .format(server, topic, partiton))
Example #4
0
def send_bytes_to_kafka():
    producer = KafkaProducer(bootstrap_servers=broker_address)
    print(producer.partitions_for(topic))

    for i in range(1, 100):
        print(i)
        producer.send(topic, value=b'msg %d' % i).get(30)
        time.sleep(1)
Example #5
0
def main():
    producer = KafkaProducer(
        bootstrap_servers="worker2.hengan.shop:9092",
        value_serializer=lambda m: json.dumps(m).encode('utf-8'))
    for i in range(1):
        ack = producer.send('foobar2', {"name": "a" + str(i), "age": i + 10})
        metadata = ack.get()
        print(metadata.topic)
        print(metadata.partition)
    producer.flush()
    print(producer.partitions_for('foobar2'))
Example #6
0
def generate_kafka_producer_consumer(config):
    topic = config['kafka']['topic']
    kafka_producer = KafkaProducer(bootstrap_servers=config['kafka']['brokers'])
    partition_count = 1 + max(kafka_producer.partitions_for(topic))

    def consume(key, value):
        database = key['database']
        key_str = json.dumps(key, separators=(',',':'))
        value_str = json.dumps(value, separators=(',',':'))
        partition = abs(java_string_hashcode(database) % partition_count)
        kafka_producer.send(topic, key=key_str, value=value_str, partition=partition)

    return consume
Example #7
0
def get_partitions_list(topic):
    """Get the list of the partitions for topic

    Arguments:
    topic - name of the topic
    
    Return:
    partitions_list - list of the topic's partitions

    """
    producer = KafkaProducer(bootstrap_servers='localhost:9092')
    partitions_list = producer.partitions_for(topic)
    return partitions_list
Example #8
0
def produce_to_kafka(schema, args, config):
    topic = config['kafka']['topic']
    producer = KafkaProducer(bootstrap_servers = config['kafka']['brokers'])

    def f_produce(topic, partition, key, value):
        producer.send(topic, key = key, value = value, partition = partition)

    partition_count = 1 + max(producer.partitions_for(topic))
    try:
        bootstrap(f_produce, partition_count, schema, args.database, args.table, config)
    except KeyboardInterrupt:
        sys.exit(1)
    producer.flush()
    producer.close()
Example #9
0
def generate_kafka_producer_consumer(config):
    topic = config['kafka']['topic']
    kafka_producer = KafkaProducer(
        bootstrap_servers=config['kafka']['brokers'])
    partition_count = 1 + max(kafka_producer.partitions_for(topic))

    def consume(key, value):
        database = key['database']
        key_str = json.dumps(key, separators=(',', ':'))
        value_str = json.dumps(value, separators=(',', ':'))
        partition = abs(java_string_hashcode(database) % partition_count)
        kafka_producer.send(topic,
                            key=key_str,
                            value=value_str,
                            partition=partition)

    return consume
def generate_stream(**kwargs):

	producer = KafkaProducer(bootstrap_servers=['kafka:9092'],                              # set up Producer
                         value_serializer=lambda x: dumps(x).encode('utf-8'))

	stream_sample = pickle.load(open(os.getcwd() + kwargs['path_stream_sample'], "rb"))       # load stream sample file

	rand = random.sample(range(0, 20000), 200)                                                # the stream sample consists of 20000 observations - and along this setup 200 samples are selected randomly

	x_new = stream_sample[0]
	y_new = stream_sample[1]

	logging.info('Partitions: ', producer.partitions_for('TopicA'))

	for i in rand:
		json_comb = encode_to_json(x_new[i], y_new[i])                                         # pick observation and encode to JSON
		producer.send('TopicA', value=json_comb)                                               # send encoded observation to Kafka topic
		logging.info("Sent number: {}".format(y_new[i]))
		sleep(1)

	producer.close()
Example #11
0
def generate_stream(**kwargs):
    # We create a Kafka producer
    producer = KafkaProducer(
        bootstrap_servers=['kafka:9092'],
        value_serializer=lambda x: dumps(x).encode('utf-8'))

    # Create some sample data, for demonstration purposes, we just take som samples from the initial training data
    # This could be your continous flow of incoming data
    movements_stream_input = pd.read_csv(
        "../../../data/SmartMovementExport.csv")
    # From the whole input set, take random index for 500 new training examples
    rand = random.sample(range(0, len(movements_stream_input)), 500)
    logging.info('We stream now over Kafka some data.',
                 producer.partitions_for('MovementsTopic'))

    for i in rand:
        json_stream_data = encode_as_json(movements_stream_input[i])
        producer.send('MovementsTopic', value=json_stream_data)
        sleep(1)

    producer.close()
Example #12
0
def submit_add_topic():
    """在add_topic頁面按下submit後的動作,取得使用者填寫的資料並建立topic"""
    try:
        message = "<h3>Result</h3>"
        topic = request.form.get("input_topic")

        producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers)
        par = producer.partitions_for(topic)
        producer.flush()

        consumer2.close()
        message = message + "<h4>Success to add topic!</B></h4>"
        message = message + "topic name: " + topic + "<BR>"

    except Exception as e:
        message = message + "<B>Fail to add topic : " + topic + "<B><BR>Detail:"
        message = message + str(e)

        message = message + "<hr><a href='index'>Back to index</a><BR>"
    message = message + "<a href='help'>help</a>"

    return message
Example #13
0
class Producer:
    """
    封装kafka-python KafkaProducer
    """
    def __init__(self):
        pass

    def __enter__(self):
        self.cfg = Config().cfg
        self.producer = KafkaProducer(
            bootstrap_servers=self.cfg["serList"],
            # api_version=self.cfg["apiVersion"],
            api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"],
            security_protocol=self.cfg["protocol"],
            sasl_mechanism=self.cfg["mechanism"],
            sasl_kerberos_service_name=self.cfg["kerverosSerName"],
        )
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.producer.close()

    def flush(self):
        """
        调用此方法会使得所有缓存记录变成立即可发送状态.(一般用于send之后, 需要刷新)
        :return:
        """
        try:
            self.producer.flush(timeout=TIME_OUT)
        except KafkaTimeoutError:
            log.tag_error(KafkaInfo.KafkaProducer,
                          "Flush buffered record failed, TimeOut")
            raise ActionError(KafkaErr.FlushFailed)

    def metrics(self):
        """
        获取producer的性能记录(包含各个kafka broker)
        :return:
        """
        performance = self.producer.metrics()
        return performance

    def partition_set_get(self, topic_name: str):
        """
        获取topic的所有分区
        :param topic_name:
        :return: set
        """
        return self.producer.partitions_for(topic_name)

    def send_message(self, topic_name: str, value: bytes, key: str):
        """
        Producer产生数据
        :param topic_name: topic where the message will be published
        :param value: message value
        :param key: key to associate with the message
        :return:
        """
        try:
            result = self.producer.send(topic_name,
                                        value=value,
                                        key=key.encode("utf-8")).add_errback(
                                            self.send_err,
                                            topic=topic_name,
                                            value=value,
                                            key=key)
        except KafkaTimeoutError:
            log.tag_warn(
                KafkaInfo.KafkaProducer,
                "Kafka send data timeout, topic: %s, key: %s, msg: %s" %
                (topic_name, key, value.decode("utf-8")))
            raise ActionError(KafkaErr.SendDataFailed)
        return result

    @staticmethod
    def send_err(topic: str, value: bytes, key: str):
        """
        producer send data failed callback function
        :param topic:
        :param value:
        :param key:
        :return:
        :return:
        """
        log.tag_error(
            KafkaInfo.KafkaProducer, "Kafka send data failed, topic: %s, "
            "key: %s msg: %s" % (topic, key, value.decode("utf-8")))
        raise ActionError(KafkaErr.SendDataFailed)
Example #14
0
class ProducerClient(TestAdapterLib.Adapter):
	@doc_public
	def __init__ (self, parent, name=None, bootstrap_servers=None, 
											debug=False, agentSupport=False, agent=None, 
											shared=False, verbose=True, 
											logEventSent=True, logEventReceived=True):
		"""
		KAFKA Producer client Adapter. Mapping of kafka-python KafkaProducer
		
		@param parent: parent testcase
		@type parent: testcase
		
		@bootstrap_servers: Kafka broker used to boostrap at connect call (list of ip address port )
		@type bootstrap_servers: List
		
		@param agent: agent to use when this mode is activated
		@type agent: string/None
		
		@param name: adapter name used with from origin/to destination (default=None)
		@type name: string/none
		
		@param debug: active debug mode (default=False)
		@type debug:	boolean
		
		@param shared: shared adapter (default=False)
		@type shared:	boolean
		"""
		TestAdapterLib.Adapter.__init__(self, name = __NAME__, parent = parent, 
																									debug=debug, realname=name,
																									agentSupport=agentSupport, 
																									agent=agent, shared=shared,
																									caller=TestAdapterLib.caller(),
																									agentType=AGENT_TYPE_EXPECTED)
		self.parent = parent
		self.codecX2D = Xml2Dict.Xml2Dict()
		self.codecD2X = Dict2Xml.Dict2Xml(coding = None)
		self.logEventSent = logEventSent
		self.logEventReceived = logEventReceived
		self.parent = parent
		self.cfg = {}
		if agent is not None:
			self.cfg['agent'] = agent
			self.cfg['agent-name'] = agent['name']
		self.cfg['agent-support'] = agentSupport
		
		self.TIMER_ALIVE_AGT = TestAdapterLib.Timer(parent=self, duration=20, 
																																			name="keepalive-agent", 
																																			callback=self.aliveAgent,
																																			logEvent=False, enabled=True)
		self.__checkConfig()
		
		# initialize the agent with no data
		if agent is not None:
			if self.cfg['agent-support']:
				self.prepareAgent(data={'shared': shared})
				if self.agentIsReady(timeout=30) is None: 
					raise Exception("Agent %s is not ready" % self.cfg['agent-name'] )
				self.TIMER_ALIVE_AGT.start()

		self.bootstrap_servers = bootstrap_servers

		if debug:
			self.__getKafkaClientLogger()

	def __checkConfig(self):
		"""
		"""
		self.debug("config: %s" % self.cfg)		
		self.warning('Agent used Name=%s Type=%s' % (self.cfg['agent']['name'], 
																																										self.cfg['agent']['type']) ) 
	
	
	def encapsule(self, *input_layers):
		"""
		Encapsule layers in template message
		"""
		if self.cfg['agent-support']:
			layer_agent= TestTemplatesLib.TemplateLayer('AGENT')
			layer_agent.addKey(name='name', data=self.cfg['agent']['name'] )
			layer_agent.addKey(name='type', data=self.cfg['agent']['type'] )

		tpl = TestTemplatesLib.TemplateMessage()

		if self.cfg['agent-support']:
			tpl.addLayer(layer=layer_agent)
		for layer in input_layers:
			tpl.addLayer(layer=layer)

		return tpl
		
		
	def onReset(self):
		"""
		Called automaticly on reset adapter
		"""
		# stop timer
		self.TIMER_ALIVE_AGT.stop()
		
		# cleanup remote agent
		self.resetAgent()

	def receivedNotifyFromAgent(self, data):
		"""
		Function to reimplement
		"""
		if 'cmd' in data:
			if data['cmd'] == AGENT_INITIALIZED:
				tpl = TestTemplatesLib.TemplateMessage()
				layer = TestTemplatesLib.TemplateLayer('AGENT')
				layer.addKey("ready", True)
				layer.addKey(name='name', data=self.cfg['agent']['name'] )
				layer.addKey(name='type', data=self.cfg['agent']['type'] )
				tpl.addLayer(layer= layer)
				self.logRecvEvent( shortEvt = "Agent Is Ready" , tplEvt = tpl )

			elif data['cmd'] == "producer_{0}".format(CONNECT):
				self.__kafka_connected = True
				tpl = templates.kafka_ops(method=CONNECT,bootstrap_servers=self.bootstrap_servers)
				self.logRecvEvent( shortEvt = "connected", tplEvt = self.encapsule(self.producerTpl ,tpl))

			elif data['cmd'] == "producer_{0}".format(SEND):
				record_metadata = data['result']
				self.__kafka_send = True
				rec = { "Topic":record_metadata[0], 
										"Partition": record_metadata[1] , 
										"Offset":record_metadata[3] , 
										"Timestamp": record_metadata[4] ,
										"Checksum": record_metadata[5], 
										"Serialized_key_size": record_metadata[6], 
										"Serialized_value_size": record_metadata[7]}
				tpl = templates.kafka_ops(method=SEND, more=rec)
				self.logRecvEvent( shortEvt = "sended", tplEvt =  self.encapsule(self.producerTpl ,tpl))

			elif data['cmd'] =="producer_{0}".format(FLUSH) :
				tpl = templates.kafka_ops(method=FLUSH)
				self.logRecvEvent( shortEvt = "flushed", tplEvt =  self.encapsule(self.producerTpl ,tpl))	

			elif data['cmd'] =="producer_{0}".format(PARTITIONS_FOR) :
				partitions = data['result']
				tpl = templates.kafka_ops(method=PARTITIONS_FOR, partitions=partitions)
				self.logRecvEvent( shortEvt = "partitions_for", tplEvt =  self.encapsule(self.producerTpl ,tpl))				
			elif data['cmd'] == "producer_{0}".format(CLOSE):
				tpl = templates.kafka_ops(method=CLOSE)
				self.logRecvEvent( shortEvt = "closed", tplEvt =  self.encapsule(self.producerTpl ,tpl))			
		else:
			self.warning( 'Notify received from agent: %s' % data )

	def receivedErrorFromAgent(self, data):
		"""
		Function to reimplement
		"""
		if "cmd" in data:
			if data['cmd'] in [ CONNECT, CLOSE, SEND, FLUSH,PARTITIONS_FOR	]:
				tpl = self.encapsule(self.producerTpl, templates.response_err(msg=data['err-msg'], method=data['cmd'] ))
				self.logRecvEvent( shortEvt = "response error", tplEvt = tpl )
				
			else:
				self.error("unknown command received: %s" % data["cmd"])
				
		else:
			self.error( 'Generic error: %s' % data )

	def receivedDataFromAgent(self, data):
		"""
		Function to reimplement
		"""
		self.warning( 'Data received from agent: %s' % data )
		
	def prepareAgent(self, data):
		"""
		Prepare agent
		"""
		self.parent.sendReadyToAgent(adapterId=self.getAdapterId(), 
																								agentName=self.cfg['agent-name'], 
																								agentData=data)
																										
	def initAgent(self, data):
		"""
		Init agent
		"""
		self.parent.sendInitToAgent(adapterId=self.getAdapterId(), 
																						agentName=self.cfg['agent-name'], 
																						agentData=data)
																								
	def resetAgent(self):
		"""
		Reset agent
		"""
		self.parent.sendResetToAgent(adapterId=self.getAdapterId(), 
																								agentName=self.cfg['agent-name'], 
																								agentData='')
																										
	def aliveAgent(self):
		"""
		Keep alive agent
		"""
		self.parent.sendAliveToAgent(adapterId=self.getAdapterId(), 
																							agentName=self.cfg['agent-name'], 
																							agentData='')
		self.TIMER_ALIVE_AGT.restart()

	def sendInitToAgent(self, data):
		"""
		"""
		self.parent.sendInitToAgent(adapterId=self.getAdapterId(), 
																						agentName=self.cfg['agent-name'], 
																						agentData=data)
																								
	def sendNotifyToAgent(self, data):
		"""
		"""
		self.parent.sendNotifyToAgent(adapterId=self.getAdapterId(), 
																								agentName=self.cfg['agent-name'], 
																								agentData=data)

	def sendResetToAgent(self, data):
		"""
		"""
		self.parent.sendResetToAgent(adapterId=self.getAdapterId(), 
																							agentName=self.cfg['agent-name'], 
																							agentData=data)

	def agentIsReady(self, timeout=1.0):
		"""
		Waits to receive "agent ready" event until the end of the timeout
		
		@param timeout: time max to wait to receive event in second (default=1s)
		@type timeout: float	
		
		@return: an event matching with the template or None otherwise
		@rtype: templatemessage		
		"""
		tpl = TestTemplatesLib.TemplateMessage()
		layer = TestTemplatesLib.TemplateLayer('AGENT')
		layer.addKey("ready", True)
		layer.addKey(name='name', data=self.cfg['agent']['name'] )
		layer.addKey(name='type', data=self.cfg['agent']['type'] )
		tpl.addLayer(layer= layer)
		evt = self.received( expected = tpl, timeout = timeout )
		return evt
		
	def __getKafkaClientLogger(self):

		logger = logging.getLogger('kafka')
		logger.addHandler(logging.StreamHandler(sys.stdout))
		logger.setLevel(logging.DEBUG)

	@doc_public
	def connect(self, **kargs ):
		"""
		Instantiate the KafkaProducer and Fetch Kafka Cluster Metadata

		@param kargs: keyword arguments from KafkaProducer class: 
		@type kargs: keyword 
		
		"""
		if 'bootstrap_servers' in kargs:
			bootstrap_servers = kargs.pop('bootstrap_servers')
		else:
			bootstrap_servers=self.bootstrap_servers

		# Log start connexion  event
		self.producerTpl = templates.kafka_connect(api=PRODUCER,bootstrap_servers=bootstrap_servers, **kargs)
		tpl = templates.kafka_ops(method=CONNECT,bootstrap_servers=bootstrap_servers, **kargs)
		self.logSentEvent( shortEvt = "connection", tplEvt = self.encapsule(self.producerTpl,tpl))

		self.__kafka_connected = False

		# Agent mode
		if self.cfg['agent-support']:
			remote_cfg = {
							'cmd': "producer_{0}".format(CONNECT),
							'bootstrap_servers': bootstrap_servers,
							'kargs': kargs
						}
			self.sendNotifyToAgent(data=remote_cfg)
				
		else:
			try:
				self.producer = KafkaProducer(bootstrap_servers=bootstrap_servers, **kargs )
				tpl = templates.kafka_ops(method=CONNECT,bootstrap_servers=bootstrap_servers, **kargs)
				self.logRecvEvent( shortEvt = "connected", tplEvt = self.encapsule(self.producerTpl,tpl))
			except KafkaError  as e:
				tpl = self.encapsule(self.producerTpl,  templates.response_err(msg=e, method=CONNECT ))
				self.logRecvEvent( shortEvt = "response error", tplEvt = tpl )

	@doc_public			
	def send(self, topic, **kargs):
		"""
		Publish a message to a topic.

		@topic (str): topic where the message will be published    
		
		@value (optional): message value as bytes.
		
		@partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured 'partitioner'.
		
		@key (optional): a key to associate with the message. Can be used to determine which partition to send the message to. 
		
		@timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time.
		"""		
		tpl = templates.kafka_ops(method=SEND, **kargs)
		self.logSentEvent( shortEvt = "req send", tplEvt = self.encapsule(self.producerTpl ,tpl))

		# Timeout for record metadata retreving
		if "timeout" in kargs:
			timeout = kargs.pop("timeout")
		else:
			timeout=2
		if self.cfg['agent-support']:
			remote_cfg = {
							'cmd': "producer_{0}".format(SEND),
							'topic': topic,
							'timeout': timeout,
							'kargs': kargs
						}
			self.sendNotifyToAgent(data=remote_cfg)
		else:
			try:
				future = self.producer.send(topic,**kargs)
				record_metadata=future.get(timeout=timeout)

				rec = { "Topic":record_metadata[0], 
										"Partition": record_metadata[1] , 
										"Offset":record_metadata[3] , 
										"Timestamp": record_metadata[4] ,
										"Checksum": record_metadata[5], 
										"Serialized_key_size": record_metadata[6], 
										"Serialized_value_size": record_metadata[7]}
				tpl = templates.kafka_ops(method=SEND, more=rec)
				self.logRecvEvent( shortEvt = "resp send", tplEvt =  self.encapsule(self.producerTpl,tpl))
			except KafkaError  as e:
				tpl = self.encapsule(self.producerTpl,  templates.response_err(msg=e, method=SEND ))
				self.logRecvEvent( shortEvt = "response error", tplEvt = tpl )

	@doc_public	
	def partitions_for(self, topic):
		"""
		All fonction documentation available on http://kafka-python.readthedocs.io.
		"""		
		tpl = templates.kafka_ops(method=PARTITIONS_FOR, topic=topic)
		self.logSentEvent( shortEvt = "req partitions_for", tplEvt = self.encapsule(self.producerTpl ,tpl))

		if self.cfg['agent-support']:
			remote_cfg = {
							'cmd': "producer_{0}".format(PARTITIONS_FOR),
							'topic': topic
						}
			self.sendNotifyToAgent(data=remote_cfg)
		else:
			try:
				partitions = self.producer.partitions_for(topic)	
				tpl = templates.kafka_ops(method=PARTITIONS_FOR,topic=topic, partitions=partitions)
				self.logRecvEvent( shortEvt = "resp partitions_for", tplEvt =  self.encapsule(self.producerTpl,tpl))	
			except KafkaError  as e:
				tpl = self.encapsule(self.producerTpl,  templates.response_err(msg=e, method=PARTITIONS_FOR ))
				self.logRecvEvent( shortEvt = "response error", tplEvt = tpl )				

	@doc_public
	def flush(self, timeout=None):
		"""
		All fonction documentation available on http://kafka-python.readthedocs.io.
		"""		
		tpl = templates.kafka_ops(method=FLUSH, timeout=timeout)
		self.logSentEvent( shortEvt = "req flush", tplEvt = self.encapsule(self.producerTpl,tpl))	

		if self.cfg['agent-support']:
			remote_cfg = {
							'cmd': "producer_{0}".format(FLUSH),
							'timeout': timeout
						}
			self.sendNotifyToAgent(data=remote_cfg)
		else:
			try:
				self.producer.flush(timeout)	
				tpl = templates.kafka_ops(method=FLUSH)
				self.logRecvEvent( shortEvt = "resp flush", tplEvt =  self.encapsule(self.producerTpl,tpl))	
			except KafkaError  as e:
				tpl = self.encapsule(self.producerTpl,  templates.response_err(msg=e, method=FLUSH ))
				self.logRecvEvent( shortEvt = "response error", tplEvt = tpl )

	@doc_public
	def close(self, timeout=None):
		"""
		All fonction documentation available on http://kafka-python.readthedocs.io.
		"""		
		tpl = templates.kafka_ops(method=CLOSE, timeout=timeout)
		self.logSentEvent( shortEvt = "req close", tplEvt = self.encapsule(self.producerTpl,tpl))	

		if self.cfg['agent-support']:
			remote_cfg = {
							'cmd': "producer_{0}".format(CLOSE),
							'timeout': timeout
						}
			self.sendNotifyToAgent(data=remote_cfg)
		else:
			try:
				self.producer.close(timeout=timeout)
				tpl = templates.kafka_ops(method=CLOSE,timeout=timeout)
				self.logRecvEvent( shortEvt = "closed", tplEvt =  self.encapsule(self.producerTpl,tpl))	
			except KafkaError  as e:
				tpl = self.encapsule(self.producerTpl,  templates.response_err(msg=e, method=CLOSE ))
				self.logRecvEvent( shortEvt = "response error", tplEvt = tpl )
				
	@doc_public
	def isSend(self, timeout=2, record=None):
		"""
		Wait to receive response from "send" request and match returned RecordMetadata  until the end of the timeout.

		@param timeout: time max to wait to receive event in second (default=2s)
		
		@type timeout: float	
		
		@param offset: Optional RecordMetadata that we expect to be assigned to consumer 
		
		@type offset:  RecordMetadata
		"""
		TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout)
		
		if record == None:
			record = { "Topic":TestOperatorsLib.Any(), 
											"Partition": TestOperatorsLib.Any(), 
											"Offset":TestOperatorsLib.Any() , 
											"Timestamp":TestOperatorsLib.Any() ,
											"Checksum": TestOperatorsLib.Any(), 
											"Serialized_key_size":TestOperatorsLib.Any(),
											"Serialized_value_size": TestOperatorsLib.Any()}
		expected = templates.kafka_ops(method=SEND, more=record)
		# try to match the template 
		evt = self.received( expected=self.encapsule( self.producerTpl ,expected ), timeout=timeout )
		return evt

	@doc_public		
	def isConnect(self, timeout=2):
		"""
		Wait to receive response from "connect" request until the end of the timeout

		@param timeout: time max to wait to receive event in second (default=2s)
		@type timeout: float		
		"""
		TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout)
		
		# construct the expected template
		expected = templates.kafka_ops(method=CONNECT, bootstrap_servers=self.bootstrap_servers)
		# try to match the template 
		evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout )
		return evt	

	@doc_public
	def isFlush(self, timeout=2):
		"""
		Wait to receive response from "flush" request until the end of the timeout

		@param timeout: time max to wait to receive event in second (default=2s)
		@type timeout: float		
		"""
		TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout)
		
		# construct the expected template
		expected = templates.kafka_ops(method=FLUSH)
		# try to match the template 
		evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout )
		return evt		
	
	@doc_public    
	def isClose(self, timeout=2):
		"""
		Wait to receive response from "close" request until the end of the timeout

		@param timeout: time max to wait to receive event in second (default=2s)
		@type timeout: float		
		"""
		TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout)
		
		# construct the expected template
		expected = templates.kafka_ops(method=CLOSE)
		# try to match the template 
		evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout )
		return evt		
		
	@doc_public
	def isPartitions_for(self, timeout=2,partitions=None):
		"""
		Wait to receive response from "partitions_for" request and match returned Topics until the end of the timeout.

		@param timeout: time max to wait to receive event in second (default=2s)
		@type timeout: float		

		@param offset: Optional list that we expect to be view by producer 
		@type offset: list of of Topics
		"""
		TestAdapterLib.check_timeout(caller=TestAdapterLib.caller(), timeout=timeout)

		if partitions == None:
			partitions= { "partitions":TestOperatorsLib.Any()}
		expected = templates.kafka_ops(method=PARTITIONS_FOR,more=partitions)
		# try to match the template 
		evt = self.received( expected=self.encapsule( self.producerTpl ,expected), timeout=timeout )
		return evt	
Example #15
0
        logger.error(traceback.format_exc())
        sys.exit(msg)

    logger.info('KafkaProducer successfully initialized')
    for i in range(len(conf.target_websites)):
        logger.info(
            "Checking availability of {0} website every {1} seconds".format(
                conf.target_websites[i],
                str(conf.delta_times_availability_check_sec[i])))
        logger.info("Pattern to match - {0}".format(
            conf.patterns_expected_to_be_found[i]))
    logger.info("Sending info to topic {0}".format(conf.website_checker_topic))

    # check wether number of partitions configured properly
    enough_partitions = True
    partitions = producer.partitions_for(conf.website_checker_topic)
    if (len(partitions) != len(conf.target_websites)):
        logger.warning("Increase number of partitions for topic {0} . \
         Number of partitions (current = {1}) needs to be equal to the number of target_websites specified in settings.ini = {2}" \
         .format(conf.website_checker_topic,len(partitions),len(conf.target_websites) ))
        enough_partitions = False

    threads = list()
    for i in range(len(conf.target_websites)):
        partitions_assigned = i if enough_partitions else 0
        thread = Thread(target = website_check, args = (producer, logger, partitions_assigned, conf.website_checker_topic, conf.target_websites[i], \
                                                            conf.patterns_expected_to_be_found[i], conf.delta_times_availability_check_sec[i],))
        thread.start()
        threads.append(thread)

    for t in threads:
Example #16
0
def error(exception):
    # handle exception
    print('> message unsent with exception:', exception)


producer = KafkaProducer(
    acks=1,
    # acks='all',
    retries=5,
    # compression_type='lz4', #just use default compression: gzip
    request_timeout_ms=60000,
    bootstrap_servers=bootstrap_servers,
    value_serializer=lambda m: json.dumps(m).encode('ascii'))

partitions = list(producer.partitions_for(topic))
sendingCount = len(partitions) * 1000

for i in range(sendingCount):
    partition = i % len(partitions)
    message['partition'] = partition

    print('> send message to %s with partition %d. %s' %
          (topic, partition, message['data']['url']))

    # produce asynchronously with callbacks
    producer.send(topic=topic,
                  value=message,
                  partition=partition,
                  key=str.encode(
                      message['id'])).add_callback(success).add_errback(error)
Example #17
0
    def run(self):
        """Publish video frames as json objects, timestamped, marked with camera number.
        Source:
            self.video_path: URL for streaming video
            self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent.
        Publishes:
            A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape,
                    "timestamp": time.time(), "camera": camera, "frame_num": frame_num}
        """
        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])
        else:
            partitioner = Murmur2Partitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        # Producer object, set desired partitioner
        frame_producer = KafkaProducer(
            bootstrap_servers=[params.KAFKA_BROKER],
            key_serializer=lambda key: str(key).encode(),
            value_serializer=lambda value: json.dumps(value).encode(),
            partitioner=partitioner,
            max_request_size=134217728)

        print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format(
            self.camera_num, self.video_path,
            frame_producer.partitions_for(self.frame_topic)))
        # Use either option
        if self.use_cv2:
            # video = cv2.VideoCapture(self.video_path)
            # Here we use sampler to read all videos from a folder
            self.sampler.add_video(self.video_path)
        else:
            video = VideoStream(self.video_path).start()

        # Track frame number
        frame_num = 0
        start_time = time.time()
        print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time))

        while True:
            if self.use_cv2:
                success, image, self.location = self.sampler.read()
                if not success:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(
                            self.name, self.video_path, frame_num))
                    break
            else:
                image = video.read()
                if image is None:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(
                            self.name, self.video_path, frame_num))
                    break
            # Attach metadata to frame, transform into JSON
            message = self.transform(frame=image,
                                     frame_num=frame_num,
                                     location=self.location,
                                     object_key=self.object_key,
                                     camera=self.camera_num,
                                     verbose=self.verbose)
            self.sizecnt += 1
            if time.time() - self.timer > self.report_range:
                acc = self.sizecnt
                #if self.verbose:
                print("[Cam {}]Minute {} send out size {}".format(
                    self.camera_num,
                    int(self.timer - self.zerotime) // self.report_range, acc))
                self.sizecnt = 0
                self.timer = time.time()

            # Callback function
            def on_send_success(record_metadata):
                print(record_metadata.topic)
                print(record_metadata.partition)
                print(record_metadata.offset)

            def on_send_error(excp):
                print(excp)
                # log.error('I am an errback', exc_info=excp)

            #  Partition to be sent to
            part = frame_num % self.topic_partitions
            # Logging
            # Publish to specific partition
            if self.verbose:
                print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format(
                    message["camera"], frame_num, part))
                frame_producer.send(
                    self.frame_topic,
                    key="{}_{}".format(self.camera_num, frame_num),
                    value=message).add_callback(on_send_success).add_errback(
                        on_send_error)
            else:
                frame_producer.send(self.frame_topic,
                                    key="{}_{}".format(self.camera_num,
                                                       frame_num),
                                    value=message)

            # if frame_num % 1000 == 0:
            frame_producer.flush()

            frame_num += 1

        if self.use_cv2:
            self.sampler.release()
        else:
            video.stop()

        if self.verbose:
            print("[CAM {}] FINISHED. STREAM TIME {}: ".format(
                self.camera_num,
                time.time() - start_time))

        return True if frame_num > 0 else False
            setOffset(database,offset)

        logger.info(database+'------------->'+str(offset))
        mongo_db = mongo_con.get_database(database)
        # stream = mongo_db.watch(full_document = 'updateLookup',start_at_operation_time=bson.timestamp.Timestamp(int(offset),1))
        ##筛选collection,去除黑名单中的表
        blacklist=getBlackList(database)
        if blacklist:
            stream = mongo_db.watch([{'$match': {'ns.coll': {'$nin':blacklist}}}],
                                full_document = 'updateLookup',start_at_operation_time=bson.timestamp.Timestamp(int(offset),1))
        else:
            stream = mongo_db.watch(full_document='updateLookup',
                                    start_at_operation_time=bson.timestamp.Timestamp(int(offset), 1))

        producer = KafkaProducer(bootstrap_servers = hosts_producer_arr)
        partition = producer.partitions_for(topic)
        numPartitions = len(partition)

        logger.info('*****************开始发送数据*****************')
        for change in stream:
            msg =bytes(dumps(change,ensure_ascii=False),encoding='utf8')
            jsondata = str(msg,'utf-8')
            text = json.loads(jsondata)
            tb = text['ns']['db']+'.'+text['ns']['coll']
            i = abs(getHashCode(tb)) %numPartitions
            if 'fullDocument' in text and text['fullDocument']!=None:
                msg_data = {}
                full_doc = text['fullDocument'] #将fullDocument里面的ky转小写
                doc = key2lower(full_doc)
                if "_id" in doc and type(doc["_id"]) == dict and "$oid" in doc["_id"]:
                    doc["_id"] = doc["_id"]["$oid"]
Example #19
0
class BaseMintSearch(object):
    logger = logging.getLogger()
    logger.setLevel(logging.WARNING)  # Log等级开关

    path = '../log'
    folder = os.path.exists(path)
    if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
        os.makedirs(path)
        print('创建' + path)
    else:
        print('文件夹存在')

    log_name = path + '/log.log'
    logfile = log_name
    file_handler = logging.FileHandler(logfile, mode='a+')
    file_handler.setLevel(logging.ERROR)  # 输出到file的log等级的开关

    #定义handler的输出格式
    formatter = logging.Formatter(
        "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
    )
    file_handler.setFormatter(formatter)

    # 将handler添加到logger里面
    logger.addHandler(file_handler)

    # 如果需要同時需要在終端上輸出,定義一個streamHandler
    print_handler = logging.StreamHandler()  # 往屏幕上输出
    print_handler.setFormatter(formatter)  # 设置屏幕上显示的格式
    logger.addHandler(print_handler)

    def __init__(self, url_pattern, server='localhost:9092', topic='test'):
        self.num = 0
        self.url_pattrn = url_pattern
        self.logger = BaseMintSearch.logger
        self.server = server
        self.topic = topic
        self.producer = KafkaProducer(bootstrap_servers=self.server)
        self.partition = set()
        self.kafka_check('kafka_check')
        pass

    def __del__(self):
        if self.producer is not None:
            self.producer.close(5)

    def request(self, flow: HTTPFlow):
        self.num = self.num + 1
        ctx.log.info("Weve seen %d flows" % self.num)

    def response(self, flow: HTTPFlow):
        pass

    def kafka_check(self, test_data):
        self.producer = KafkaProducer(bootstrap_servers=self.server)
        self.partition = self.producer.partitions_for(self.topic)
        print('host:' + self.server + ' topic:' + self.topic + ' 可用partiton' +
              str(self.producer.partitions_for(self.topic)))
        for partiton in self.partition:
            info = self.producer.send(topic=self.topic,
                                      value=test_data.encode('utf-8'),
                                      partition=partiton).get(5)
            print('当前partition:{}, offset:{}'.format(partiton, info.offset))
Example #20
0
# d = pd.DataFrame(a)
# d = d.append({'a': 0, 'c': 2}, ignore_index=True)
# print(d)

import json
import socket
from kafka import KafkaProducer, KafkaConsumer
from kafka.errors import KafkaError

bootstrap_servers = '172.16.100.31:9092,172.16.100.29:9092,172.16.100.30:9092'
producer = KafkaProducer(bootstrap_servers=bootstrap_servers.split(','),
                         api_version=(0, 10),
                         retries=5)
topic_name = 'Topic_Live_Heartbeat_Msg'

partitions = producer.partitions_for(topic_name)
d = pd.read_csv('live_data.csv').astype('int32').to_dict('records')
for i in d[:1]:
    future = producer.send(topic_name,
                           value=json.dumps(i).encode('utf-8'),
                           key='test'.encode('utf-8'))
    # producer.flush()
    future.get()

# consumer_id = 'Gid_Real_Time_Live_Heartbeat_Msg'
# consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers,
#                          group_id=consumer_id,
#                          api_version=(0, 10))

# class Kafka(object):
#     def __init__(self, broker):
access_secret = "L0ygY1KY0NxePWhfok70PDGJCklVvjM6nMOTeIwh3gLBqBo"

#creating an OAuthHandler instance
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)

#The access token is the “key” for opening the Twitter API treasure box
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)

#Creating a Kafka Producer Instance
myproducer = KafkaProducer(bootstrap_servers='sandbox.hortonworks.com:6667',
                           acks=1,
                           retries=1)

print("List of Partitions for topic 'ktwitter': " +
      str(myproducer.partitions_for('ktwitter')))

with open('twitterhandle.txt', 'r+') as f:
    for handle in f:

        print("Getting Tweets from " + str(handle).strip())

        alltweets = []
        tweets = api.user_timeline(screen_name=handle, count=10)
        alltweets.extend(tweets)

        for x in range(len(alltweets)):
            #print(alltweets[x].id, alltweets[x].lang,alltweets[x].source,alltweets[x].retweeted,type(alltweets[x].text),alltweets[x].text.encode('utf-8'))
            try:
                future = myproducer.send(
                    'ktwitter', value=alltweets[x].text.encode('utf-8'))
# -*- coding:utf8 -*-
"""
@author: [email protected]

@date: Mon, Nov 28

@time: 13:25:55 GMT+8
"""
from kafka import KafkaProducer

topic = 'dnshj'

bootstrap_servers = "10.0.0.156:9092,10.0.0.156:9093,10.0.0.156:9094,10.0.0.156:9095,10.0.0.156:9096"

producer = KafkaProducer(bootstrap_servers=bootstrap_servers)

print producer.partitions_for(topic)

# for i in range(1, 100):
#     producer.send(topic, 'some_message_byddddddds %s' % i)
Example #23
0
    def run(self):
        """Publish video frames as json objects, timestamped, marked with camera number.

        Source:
            self.video_path: URL for streaming video
            self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent.
        Publishes:
            A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape,
                    "timestamp": time.time(), "camera": camera, "frame_num": frame_num}
        """

        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=
                                                [TopicPartition(topic=self.frame_topic, partition=i)
                                                 for i in range(self.topic_partitions)])

        else:

            partitioner = Murmur2Partitioner(partitions=
                                             [TopicPartition(topic=self.frame_topic, partition=i)
                                              for i in range(self.topic_partitions)])

        # Producer object, set desired partitioner
        frame_producer = KafkaProducer(bootstrap_servers=["kafka1-kafka-brokers:9092"],
                                       key_serializer=lambda key: str(key).encode(),
                                       value_serializer=lambda value: json.dumps(value).encode(),
                                       partitioner=partitioner)

        print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format(self.camera_num,
                                                                            self.video_path,
                                                                            frame_producer.partitions_for(
                                                                                self.frame_topic)))
        # Use either option
        video = cv2.VideoCapture(self.video_path) if self.use_cv2 else FileVideoStream(self.video_path).start()
        #video.set(cv2.CAP_PROP_FPS,30)

        # Track frame number
        frame_num = 0
        start_time = time.time()
        print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time))

        # Read URL, Transform, Publish
        while True:

            # using raw cv2, frame by frame
            if self.use_cv2:
                success, image = video.read()
                # check if the file has read
                if not success:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(self.name,
                                                                       self.video_path,
                                                                       frame_num))
                    break

            # using smart, only unique frames, skips frames, faster fps
            else: 
                image = video.read()
                # check if the file has read
                if image is None:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(self.name,
                                                                       self.video_path,
                                                                       frame_num))
                    break

            # Attach metadata to frame, transform into JSON
            message = self.transform(frame=image,
                                     frame_num=frame_num,
                                     object_key=self.object_key,
                                     camera=self.camera_num,
                                     verbose=self.verbose)

            # Partition to be sent to
            part = frame_num % self.topic_partitions
            # Logging
            if self.verbose:
                print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format(message["camera"],
                                                                               frame_num, part))
            # Publish to specific partition
            frame_producer.send(self.frame_topic, key="{}-{}".format(self.camera_num, frame_num), value=message)

            # if frame_num % 1000 == 0:
            frame_producer.flush()

            frame_num += 1

        # clear the capture
        if self.use_cv2:
            video.release()
        else:
            video.stop()

        if self.verbose:
            print("[CAM {}] FINISHED. STREAM TIME {}: ".format(self.camera_num, time.time() - start_time))

        return True if frame_num > 0 else False
Example #24
0
class Kafka(GenericTool.Tool):
    def __init__(self, controllerIp, controllerPort, toolName, toolDesc, defaultTool, supportProxy=0,
                        proxyIp=None, proxyPort=None, sslSupport=True):
        """
        Kafka agent

        @param controllerIp: controller ip/host
        @type controllerIp: string

        @param controllerPort: controller port
        @type controllerPort: integer

        @param toolName: agent name
        @type toolName: string

        @param toolDesc: agent description
        @type toolDesc: string

        @param defaultTool: True if the agent is started by the server, False otherwise
        @type defaultTool: boolean
        """
        GenericTool.Tool.__init__(self, controllerIp, controllerPort, toolName, toolDesc, defaultTool, 
                                    supportProxy=supportProxy, proxyIp=proxyIp, 
                                    proxyPort=proxyPort, sslSupport=sslSupport)
        self.__type__ = __TYPE__
        self.__mutex__ = threading.RLock()


    def getType(self):
        """
        Returns agent type

        @return: agent type
        @rtype: string
        """
        return self.__type__

    def onCleanup(self):
        """
        Cleanup all
        In this function, you can stop your program
        """
        pass
        
    def initAfterRegistration(self):
        """
        Called on successful registration
        In this function, you can start your program automatically.
        """
        self.onToolLogWarningCalled("Starting dummy agent")
        self.onToolLogWarningCalled("Dummy agent started")
        self.onPluginStarted()
    
    def pluginStarting(self):
        """
        Function to reimplement
        """
        pass
        
    def onPluginStarted(self):
        """
        Function to reimplement
        """
        pass

    def pluginStopped(self):
        """
        Function to reimplement
        """
        pass

    def onResetAgentCalled(self):
        """
        Function to reimplement
        """
        pass
        
    def onToolLogWarningCalled(self, msg):
        """
        Logs warning on main application

        @param msg: warning message
        @type msg: string
        """
        pass

    def onToolLogErrorCalled(self, msg):
        """
        Logs error on main application

        @param msg: error message
        @type msg: string
        """
        pass

    def onToolLogSuccessCalled(self, msg):
        """
        Logs success on main application

        @param msg: error message
        @type msg: string
        """
        pass
    
    def onAgentAlive(self, client, tid, request):
        """
        Called on keepalive received from test server
        {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 
        'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'}

        @param client: server address ip/port
        @type client: tuple

        @param tid: transaction id
        @type tid: integer

        @param request: request received from the server
        @type request: dict
        """
        pass
        
    def onAgentInit(self, client, tid, request):
        """
        Called on init received from test server
        {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 
        'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'}

        @param client: server address ip/port
        @type client: tuple

        @param tid: transaction id
        @type tid: integer

        @param request: request received from the server
        @type request: dict
        """
        self.onToolLogWarningCalled(msg="init called: %s" % request['data'])
        self.sendNotify(request=request, data="notify sent")

    def onAgentReset(self, client, tid, request):
        """
        Called on reset received from test server
        {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 
        'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'}
        or 
        {'event': 'agent-reset', 'source-adapter': '1', 'script_id': '7_3_0'}
        
        @param client: server address ip/port
        @type client: tuple

        @param tid: transaction id
        @type tid: integer

        @param request: request received from the server
        @type request: dict
        """
        if 'data' in request:
            self.onToolLogWarningCalled(msg="reset called: %s" % request['data'])
        else:
            self.onToolLogWarningCalled(msg="reset called")
            
    def onAgentNotify(self, client, tid, request):
        """
        Called on notify received from test server and dispatch it
        {'task-id': 'xx', 'from': 'tester', 'destination-agent': 'xxxxx', 'source-adapter': 'xx', 
        'script-name': 'xxxx', 'script_id': 'xxx', 'data': 'xxx', 'event': 'agent-init', 'test-id': 'xxx'}

        @param client: server address ip/port
        @type client: tuple

        @param tid: transaction id
        @type tid: integer

        @param request: request received from the server
        @type request: dict
        """
        self.__mutex__.acquire()
        self.onToolLogWarningCalled(msg="notify received: %s" % request['data'])

        if request['uuid'] in self.context():
            if request['source-adapter'] in self.context()[request['uuid']]:
                ctx_test = self.context()[request['uuid']][request['source-adapter']]
                self.execAction(request)
            else:
                self.error("Adapter context does not exists TestUuid=%s AdapterId=%s" % (request['uuid'], 
                                                                                         request['source-adapter'] ) )
        else:
            self.error("Test context does not exits TestUuid=%s" % request['uuid'])
        self.__mutex__.release()

    def execAction(self, request):
        """
        Execute action
        """
        currentTest = self.context()[request['uuid']][request['source-adapter']]

        self.onToolLogWarningCalled( "<< Starting Command=%s TestId=%s AdapterId=%s" % (request['data']['cmd'],
                                                                                        request['script_id'], 
                                                                                        request['source-adapter']) )
        try:
            cmd = request['data']['cmd']
            data = request['data']
            # connect
            if cmd == 'producer_connect':
                # init 
                kargs=data['kargs']
                try:
                    self.producer = KafkaProducer(bootstrap_servers=data['bootstrap_servers'], **kargs )
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'connected' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )	
                    
            elif cmd == 'producer_send':
                kargs=data['kargs']
                try:
                    future = self.producer.send(data['topic'], **kargs)
                    record_metadata=future.get(timeout=data['timeout'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': record_metadata } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'producer_flush':
                try:
                    self.producer.flush(data['timeout'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'flushed' })
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'producer_partitions_for':
                try:
                    partitions = self.producer.partitions_for(data['topic'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': partitions })
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'producer_close':
                try:
                    self.producer.close(int(data['timeout']))
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'closed' })
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_connect':
                kargs=data['kargs']
                try:
                    if not data['topics']:
                        self.consumer = KafkaConsumer(bootstrap_servers=data['bootstrap_servers'], **kargs)
                    else:
                        self.consumer = KafkaConsumer(data['topics'][0], bootstrap_servers=data['bootstrap_servers'], **kargs)
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'connected' })
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_consume':
                try:
                    for msg in self.consumer :
                        self.sendNotify(request=request, data={ "cmd": cmd , 'result': msg } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_close':
                try:
                    self.consumer.close(data['autocommit'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'closed' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_assign':
                try:
                    self.consumer.assign(data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'assigned' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_assignment':
                try:
                    topicpartitions = self.consumer.assignment()
                    self.sendNotify(request=request, data={ "cmd": cmd , 'topicpartitions': topicpartitions } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_beginning_offsets':
                try:
                    offsets = self.consumer.beginning_offsets(data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'offsets': offsets } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_commit':
                try:
                    self.consumer.commit(data['offsets'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'committed' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_commit_async':
                try:
                    future = self.consumer.commit_async(offsets=data['offsets'],callback=data['callback'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'future': future } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_committed':
                try:
                    offsets = self.consumer.committed(data['topicpartition'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'offsets': offsets } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_end_offsets':
                try:
                    partitions = self.consumer.end_offsets(data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'partitions': partitions } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_highwater':
                try:
                    offset = self.consumer.highwater(data['partition'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'offset': offset } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_offsets_for_times':
                try:
                    offsets = self.consumer.offsets_for_times(data['timestamps'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'offsets': offsets } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_partitions_for_topic':
                try:
                    partitions = self.consumer.partitions_for_topic(data['topic'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'partitions': partitions } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_pause':
                try:
                    self.consumer.pause(data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_paused':
                try:
                    partitions=self.consumer.paused()
                    self.sendNotify(request=request, data={ "cmd": cmd , 'partitions': partitions } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_poll':
                try:
                    records = self.consumer.poll(timeout_ms=data['timeout_ms'], max_records=data['max_records'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'records': records } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_position':
                try:
                    offset = self.consumer.position(data['topicpartition'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'offset': offset } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_resume':
                try:
                    self.consumer.resume(data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_seek':
                try:
                    self.consumer.seek(data['partition'],data['offset'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_seek_to_beginning':
                try:
                    self.consumer.seek_to_beginning(*data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_seek_to_end':
                try:
                    self.consumer.seek_to_end(*data['partitions'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_subscribe':
                try:
                    self.consumer.subscribe(topics=data['topics'], pattern=data['pattern'], listener=data['listener'])
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_subscription':
                try:
                    topics=self.consumer.subscription()
                    self.sendNotify(request=request, data={ "cmd": cmd , 'topics': topics } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_topics':
                try:
                    topics = self.consumer.topics()
                    self.sendNotify(request=request, data={ "cmd": cmd , 'topics': topics } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            elif cmd == 'consumer_unsubscribe':
                try:
                    self.consumer.unsubscribe()
                    self.sendNotify(request=request, data={ "cmd": cmd , 'result': 'success' } )
                except KafkaError  as e:
                    self.sendError( request , data={"cmd": cmd , "err-msg": str(e)} )
                    
            # unknown command
            else:
                raise Exception('cmd not supported: %s' % request['data']['cmd'] )
        except Exception as e:
            self.error( 'unable to run command: %s' % str(e) )
            self.sendError( request , data="unable to run command")

        self.onToolLogWarningCalled( "<< Terminated Command=%s TestId=%s AdapterId=%s" % (request['data']['cmd'],
                                                                                          request['script-id'], 
                                                                                          request['source-adapter']) )
Example #25
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2020/5/27 14:24
# @Author : way
# @Site :
# @Describe:

import json
from kafka import KafkaProducer
from random import choice

servers = [
    '172.16.122.17:9092',
]
producer = KafkaProducer(
    bootstrap_servers=servers,
    key_serializer=lambda m: m.encode('utf-8'),
    value_serializer=lambda m: json.dumps(m).encode('utf-8'))
topic = 'test'
key = 'hi'
value = 'way'
partitions = producer.partitions_for(topic)  # 获取所有分区,均匀地写到分区中
for i in range(10000):
    producer.send(topic=topic,
                  partition=choice(list(partitions)),
                  key=key,
                  value=value).get(timeout=10)
Example #26
0
def dataPub():

    cnt = 0

    producer = KafkaProducer(bootstrap_servers='10.4.10.239:9092')  # 连接kafka
    devNum = len(producer.partitions_for('testyg'))

    print(producer.metrics())

    st = time.time()
    for i in range(30):
        print(i)
        # if i%2 == 0:
        img = Image.open(pwd + 'imgs/test.jpg')
        # else:
        #     img = Image.open('../imgs/army.jpg')

        # img = clintInf.transformData(img)

        img = pickle.dumps(img)
        print(img.__len__())
        # producer.send('byzantine', str(msg[i]).encode('utf-8'))  # 发送的topic为test
        # producer.send('result', str(i).encode())
        producer.send('testyg', img, str(i).encode())
        producer.flush()
        # time.sleep(1)
    print('end')
    log.logSend("INFO " + localhost + " publish 30 msgs!")

    colDevs = []
    for msg in consumer:
        print(cnt)
        # print(msg)
        # if cnt == 0:
        # st = time.time()
        cnt += 1
        if colDevs.count(msg.key.decode()) == 0:
            colDevs.append(msg.key.decode())
        if cnt == 30:
            # ed =time.time()
            # print(ed-st)
            ed = time.time()
            consumer.close()
    costTime = ed - st

    log.logSend("INFO" + localhost + "'s datas handle done, costs time " +
                costTime)

    taskInfo = {
        'name': 'dataOffload',
        'type': 'classification',
        'startDevice': localhost,
        'dataNum': 30,
        'devNum': devNum,
        'colDevs': colDevs,
        'latency': ed - st
    }
    js = json.dumps(taskInfo)

    conn.lpush("kafkaTasks", js)

    producer.close()
conf = setting.kafka_setting

print conf

context = ssl.create_default_context()
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.verify_mode = ssl.CERT_REQUIRED
# context.check_hostname = True
context.load_verify_locations("ca-cert")

producer = KafkaProducer(bootstrap_servers=conf['bootstrap_servers'],
                        sasl_mechanism="PLAIN",
                        ssl_context=context,
                        security_protocol='SASL_SSL',
                        api_version = (0,10),
                        retries=5,
                        sasl_plain_username=conf['sasl_plain_username'],
                        sasl_plain_password=conf['sasl_plain_password'])

partitions = producer.partitions_for(conf['topic_name'])
print 'Topic下分区: %s' % partitions

try:
    future = producer.send(conf['topic_name'], 'hello aliyun-kafka!')
    future.get()
    print 'send message succeed.'
except KafkaError, e:
    print 'send message failed.'
    print e
Example #28
0
from kafka import KafkaProducer, KafkaConsumer
import time
from PIL import Image
import pickle
import redis
import json
from MyConsumer import *

consumer = KafkaConsumer('result',
                         group_id="test_group_1",
                         bootstrap_servers=['10.4.10.239:9092'])
cnt = 0

producer = KafkaProducer(bootstrap_servers='10.4.10.239:9092')  # 连接kafka
devNum = len(producer.partitions_for('testyg'))

print(devNum)

print(producer.metrics())
clintInf = ClientInf(0, 18)
# msg = [1,2,3,4]
st = time.time()
for i in range(30):
    print(i)
    # if i%2 == 0:
    img = Image.open('../imgs/test.jpg')
    # else:
    #     img = Image.open('../imgs/army.jpg')

    # img = clintInf.transformData(img)