Example #1
0
class SimpleProducer(BaseStreamProducer):
    def __init__(self, location, enable_ssl, cert_path, topic, compression, **kwargs):
        self._location = location
        self._topic = topic
        self._compression = compression
        self._create(enable_ssl, cert_path, **kwargs)

    def _create(self, enable_ssl, cert_path, **kwargs):
        max_request_size = kwargs.pop('max_request_size', DEFAULT_MAX_REQUEST_SIZE)
        kwargs.update(_prepare_kafka_ssl_kwargs(cert_path) if enable_ssl else {})
        self._producer = KafkaProducer(bootstrap_servers=self._location,
                                       retries=5,
                                       compression_type=self._compression,
                                       max_request_size=max_request_size,
                                       **kwargs)

    def send(self, key, *messages):
        for msg in messages:
            self._producer.send(self._topic, value=msg)

    def flush(self):
        self._producer.flush()

    def close(self):
        self._producer.close()
Example #2
0
def create_reservation(request):
    content = {'success': False}
    if request.method != 'POST':
        content['result'] = "Invalid request method. Expected POST."
    else:
        # AUTHENTICATE USER (get customer ID)
        authenticator = request.POST['authenticator']
        if not authenticator:
            return "No auth Anonymous"
        r = get_user(authenticator)
        if r['success']:
            # call function to put a new listing into model
            url = settings.MODELS_LAYER_URL + "api/reservations/create/"
            dt = json.loads(request.POST['reservation_details'])
            params = dt
            # print(r['user']['id'])
            params['customer'] = r['user']['id']
            content = requests.post(url, params).json()
            if content['success']:
                # add listing into kafka
                reservation_info = content['reservation']
                # reservation_info = json.load(content['reservation'])
                producer = KafkaProducer(bootstrap_servers='kafka:9092')
                new_listing = reservation_info
                producer.send('new-listings-topic', json.dumps(new_listing).encode('utf-8'))

            else:
                # failed to add it to the database
                return JsonResponse(content)
        else:
            content['result'] = "User not authenticated."
    print(content)
    return JsonResponse(content)
Example #3
0
    def run(self):
        producer = KafkaProducer(bootstrap_servers='localhost:9092')

        while True:
            producer.send('my-topic', b"test")
            producer.send('my-topic', b"\xc2Hola, mundo!")
            time.sleep(1)
Example #4
0
def create_restaurant(request):
    content = {"success": False}
    if request.method != "POST":
        content["result"] = "GET Request Received. Expected POST."
    else:
        request_url = settings.MODELS_LAYER_URL + "api/restaurants/create/"
        response = requests.post(request_url, data=request.POST)  # POST.dict() or POST?
        r = json.loads(response.content.decode('utf-8'))
        if r['success']:
            # reservation_info = json.load(content['reservation'])
            producer = KafkaProducer(bootstrap_servers='kafka:9092')
            new_listing = request.POST
            new_listing['restaurant_id'] = r['user']['id']
            producer.send('new-restaurant-topic', json.dumps(new_listing).encode('utf-8'))

        if r['success']:
            url = settings.MODELS_LAYER_URL + "api/auth/authenticator/create/"
            data = json.dumps(r['user'])
            r = requests.post(url, data={'user': data, 'username': request.POST['username'],
                                         'password': request.POST['password']}).json()

            if r['success']:
                content['success'] = True
                content['auth'] = r['auth']
            else:
                content['result'] = 'Models layer failed: ' + r['result']
        else:
            content['result'] = "Models layer failed: " + r['result']

    return JsonResponse(content)
Example #5
0
def test_end_to_end(kafka_broker):
    connect_str = 'localhost:' + str(kafka_broker.port)
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             max_block_ms=10000,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=10000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    for i in range(1000):
        producer.send(topic, 'msg %d' % i)
    producer.flush()
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(1000):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % i for i in range(1000)])
Example #6
0
class KafkaMessageSender(object):
	
	def __init__(self,config_source):

		self.config_source = config_source
		# config_source = "config/producer_config.yml"

		# load configuration parameters
		config = yaml_loader(self.config_source)

		# initialize parameters
		self.topics = config['topics']
		self.port = config['port']

		self.current_topic = self.topics[0]

		self.producer = KafkaProducer(bootstrap_servers=[self.port])

	def send_message(self,messages):
		for message in messages:
			# self.producer.send(self.current_topic, value = message.strip('[]').splitlines()[0] )
			print message.strip('[]')
			self.producer.send(self.current_topic, value = message.strip('[]') )

			# block until all async messages are sent
			self.producer.flush()
    def run(self):
        producer = KafkaProducer(bootstrap_servers='localhost:9092')

        while True:
            producer.send('my-topic', b"test for hw08-solution02")
            producer.send('my-topic', b"\you are good ,done!")
            time.sleep(1)
Example #8
0
def stream_generator():
	rediscon=redis.StrictRedis(host='ec2-52-40-47-83.us-west-2.compute.amazonaws.com', port=6379, db=0,password='')
	producer = KafkaProducer(bootstrap_servers=["52.41.140.111:9092","52.41.90.5:9092","52.41.120.152:9092"])
	res = rediscon.get('active')
	tp=random.randrange(900000,1800001)
	st =  int(round(time.time() * 1000))
	diff=0
	while  True:
		if res==1 and diff==0:			
			tp=random.randrange(900000,1800001)
			st =  int(round(time.time() * 1000))

		if res == 1:
			diff = int(round(time.time() * 1000))- st
			st1=0 #steps
			st2=0
			u1=0  #user_id
			u2=1
			now=datetime.datetime.now()-datetime.timedelta(hours=7)
			hr1=random.randrange(60,200)  #heart_rate
			hr2=random.randrange(60,200)
			if diff % 1000 == 0:
				st1=random.randrange(0,3)
				st2=random.randrange(0,3)
				print '-------------------'+str(diff)+'-----------------------'
			data1=str(now)+","+str(u1)+","+str(st1)+","+str(hr1)
			data2=str(now)+","+str(u2)+","+str(st2)+","+str(hr2)
			producer.send('stream_test',data1)
			producer.send('stream_test',data2)
			print '*'
			if diff ==tp:
				rediscon.set('active',0)
				res=rediscon.get('active')
				diff=0		
		res=rediscon.get('active')
def kafka_producer_call():
    kafka_producer = KafkaProducer(bootstrap_servers=KAFKA_SERVER)
    for i in range(NB_MESSAGES):
        word = "yay"
        kafka_producer.send(KAFKA_TOPIC, word)
    kafka_producer.flush()
    return 1
class SimpleKafkaProducer:
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=kafka_bootstrap_servers)

    def send_message(self, topic, msg, key=None):
        # print("# sending msg: ", key, msg)
        self.producer.send(topic, msg, key)
Example #11
0
def create_listing_exp_api(request):
	auth = request.POST.get('auth')
	user_id = requests.get('http://modelsul:8000/api/v1/get_userid_auth/' + auth).json()
	user_id1 = user_id['resp']['user_id']

	title = request.POST.get('title', 'default')
	category = request.POST.get('category', 'default')
	subcategory = request.POST.get('subcategory', 'default')
	summary = request.POST.get('summary', 'default')
	price = request.POST.get('price', 'default')
	#Needs to verify that the person is authorized
	auth = request.POST.get('auth', 'default')

	post = requests.post('http://modelsul:8000/api/v1/create_post/', data={"user_id": user_id1,
																		"title": title,
																		"category": category,
																		"subcategory": subcategory,
																		"summary": summary,
																		"price":price})
	if  not post.json()['ok']:
		return JsonResponse({})
	else:
		producer = KafkaProducer(bootstrap_servers='kafka:9092')
		some_new_listing = {'title': title, 'description': summary, 'id':post.json()['resp']['id']}
		producer.send('new-listings-topic', json.dumps(some_new_listing).encode('utf-8'))
		return JsonResponse(post.json())
class SensorHatLogger:

    """
    Logs the hostname, time (unixtime), temperature, humidity, and pressure to Kafka in JSON format. The data is
    generated by a Raspberry Pi with a Sense Hat: https://www.raspberrypi.org/products/sense-hat/
    
    This captures a read approx. every 10 seconds.

    TODO: https://github.com/initialstate/wunderground-sensehat/wiki/Part-3.-Sense-HAT-Temperature-Correction
    
    """

    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers='hdp01.woolford.io:6667')
        self.sense = SenseHat()
        self.sensor_record = dict()

    def read_values_from_sensor(self):
        self.sensor_record['host'] = socket.gethostname()
        self.sensor_record['timestamp'] = int(time.time())
        self.sensor_record['temperature'] = self.sense.get_temperature()
        self.sensor_record['humidity'] = self.sense.get_humidity()
        self.sensor_record['pressure'] = self.sense.get_pressure()

    def send_record_to_kafka(self):
        sensor_record_json = json.dumps(self.sensor_record)
        self.producer.send("temperature_humidity_json", sensor_record_json)

    def run(self):
        self.read_values_from_sensor()
        self.send_record_to_kafka()
Example #13
0
class Results:
    def GET(self):
        self.producer = KafkaProducer(bootstrap_servers='localhost:9092')
        self.goodtopic= 'goodtopic'
        self.badtopic= 'badtopic'
        self.spamtopic ='spamtopic'
        self.stop=set(nltk.corpus.stopwords.words('english'))
        self.stop.update(['http','https','rt'])
        self.db=pymongo.MongoClient()
        fp = open('json.txt','w')
        web.header('Access-Control-Allow-Origin',      '*')
        web.header('Access-Control-Allow-Credentials', 'true')
        web.header('Content-Type', 'application/json')
        user_data= web.input(id={})
        data = user_data.id
        data = json.loads(str(data))
        for line in data:
            texto = ''
            tokens = nltk.word_tokenize(data[line]['tweet'])
            for w in tokens:
                w = w.lower()
                w = w.encode('utf-8')
                if w.isalpha() and w not in self.stop:
                    texto=texto + ' ' + w
            texto = texto.encode('utf-8')
            if(data[line]['answer']=='Good'):
                self.db.LEARNING.goodlearning.update({"type":'contagem'},{"$inc": {'count':1}},upsert=True)
                self.producer.send(self.goodtopic,texto)
            if(data[line]['answer']=='Bad'):
                self.db.LEARNING.badlearning.update({"type":'contagem'},{"$inc": {'count':1}},upsert=True)
                self.producer.send(self.badtopic,texto)
            #if(data[line]['answer']=='Spam'):
                #self.producer.send(self.spamtopic,texto)
        return 'algo'
Example #14
0
    def run(self):
        producer = KafkaProducer(bootstrap_servers='172.16.218.128:10021')

        while True:
            producer.send("test", "msg")
            # producer.send("test", "abc")
            time.sleep(1)
Example #15
0
    def run(self, run_time):
        """
        Send checkresults to Kafka Topic
        """
        logging.debug("Establishing passive handler: Kafka")
        super(Handler, self).run()
        itemlist = []
        for check in self.checks:
            if check.needs_to_run():
                item = self.do_check(check)
                item.check_time = run_time
                check.set_next_run(run_time)
                item.hostname = self.get_kafka_hostname(item)
                itemlist.append(item)

        if len(itemlist) > 0:
            try:
                logging.info('Connect to Kafka Server')
                producer = KafkaProducer(bootstrap_servers=['{}'.format(self.str_kafakhosts)], client_id=self.str_client_id)
            except KafkaError:
                logging.warn(
                    'Problem to connect Kafka Server: {} with Topic: {} and Clientname {} '.format(self.str_kafakhosts,
                                                                                                   self.str_topic,
                                                                                                   self.str_client_id))
            for item in itemlist:
                producer.send(self.str_topic, key=str(item.hostname), value=json.dumps(self.format_for_kafka(self, item)))

            producer.flush()
 def run(self):
     producer = KafkaProducer(**KAFKA_PRODUCER_CONFIG)
     while True:
         producer.send('python-madrid', b"FOO")
         producer.send('python-madrid', b"BAR")
         producer.send('python-madrid', b"BAZ")
         time.sleep(5)
Example #17
0
def sendSingleMsg2Kafka(msg):
    if not msg:
        return
    producer = KafkaProducer(bootstrap_servers='10.128.184.167:9092')
    producer.send('topic_lpr', msg.encode('utf8'))
    producer.flush()
    producer.close(timeout=5)
Example #18
0
class KafkaProducerCountBolt(BasicBolt):
    numWindowChunks = 5
    emitFrequencyInSeconds = 10
    windowLengthInSeconds = numWindowChunks * emitFrequencyInSeconds

    def __init__(self):
        super(KafkaProducerCountBolt, self).__init__(script=__file__)
        self.counter = SlidingWindowCounter(5)
        

    def initialize(self, conf, context):
        self.producer = KafkaProducer(bootstrap_servers='localhost:9092')
        self.topic = 'spamwordcounttopic'
        
    @classmethod
    def declareOutputFields(cls):
        return ['word', 'count']
            
    def process(self, tup):       
        if tup.is_tick_tuple():
            self.emitCurrentWindowCounts()
        else:
            self.counter.incrementCount(tup.values[0])

    def emitCurrentWindowCounts(self):
        counts = self.counter.getCountsThenAdvanceWindow()
        for k, v in counts.iteritems():
            word2 = k.encode('utf-8')+ ' '+ str(v)
            self.producer.send(self.topic,word2)
            storm.emit([k, v])

    def getComponentConfiguration(self):
        return {"topology.tick.tuple.freq.secs":300}
Example #19
0
class Producer(object):

    def __init__(self):
        #self.client = SimpleClient(addr)
	#self.producer = KeyedProducer(self.client)
	self.producer = KafkaProducer(bootstrap_servers=["50.112.40.243","52.25.13.29","50.112.22.187","52.24.80.162"],value_serializer=lambda v: json.dumps(v).encode('utf-8'),acks=0,linger_ms=500)

    def jsonITEM(self,itemList):
        strout='{'
        strout=strout+'"location":'
        strout=strout+'"'+itemList[0]+'"'+','
        strout=strout+'"item":'
        strout=strout+'"'+str(itemList[1])+'"'+',' 
        strout=strout+'"time":'
        strout=strout+str(itemList[2])+',' 
        strout=strout+'"Producer":'
        strout=strout+str(itemList[3]) 
        strout=strout+'}'
        return strout

    def produce_msgs(self):
        msg_cnt = 0
        
        while True:
            lItem=getItemScanned()
	    message_info={"location":lItem[0],"item":lItem[1],"time":lItem[2],"storeid":random.randint(0,NUM_USERS-1)}
            self.producer.send('price', message_info)
	    print(message_info)
            time.sleep(.05)
            msg_cnt += 1
Example #20
0
def main():
    """
    A generic Kafka producer for use as a Cylc event handler.

    USAGE:
       cylc_kafka_producer.py <HOST:PORT> <TOPIC> key1=val1 key2=val2 ...
    serializes {key1: val1, key2: val2, ...} to TOPIC at Kafka on HOST:PORT.

    This is generic in that a JSON message schema is defined by the received
    command line keyword arguments. To enforce compliance to a particular
    schema, copy and modify as needed.

    Can be partnered with the generic cylc_kafka_consumer external trigger
    function, for triggering downstream suites.

    """

    if 'help' in sys.argv[1]:
        print cleandoc(main.__doc__)
        sys.exit(0)

    # TODO exception handling for bad inputs etc.
    kafka_server = sys.argv[1]
    kafka_topic = sys.argv[2]
    # Construct a message dict from kwargs.
    dmsg = dict([k.split('=') for k in sys.argv[3:]])

    producer = KafkaProducer(
        bootstrap_servers=kafka_server,
        value_serializer=lambda msg: json.dumps(msg).encode('utf-8'))

    producer.send(kafka_topic, dmsg)
    producer.flush()
Example #21
0
class TwitterListener(tweepy.StreamListener):
    def __init__(self,stop,user):
        self.producer = KafkaProducer(bootstrap_servers='localhost:9092')
        self.instanttopic = 'instanttopic'
        self.user = str(user)
        self.numstop = int(stop)

    def on_data(self, data):
        fil = open("meu.txt","a")
        stop=set(nltk.corpus.stopwords.words('english'))
        stop.update(['http','https','rt'])
        tweet = json.loads(data)
        if 'text' in tweet:
            texto =tweet['text'].encode('utf-8','ignore')
            self.numstop -=1
            texto = self.user+'-'+texto
            self.producer.send(self.instanttopic,texto)
            saveTweet('pos',tweet,self.user)
            saveLocation('pos',tweet,self.user)

            vs = vaderSentiment(str(texto))
            contagemneg= vs['neg']
            contagempos= vs['pos']
            contagemspam=vs['neu']
            filo= open("vader.txt",'a')
            if self.numstop == 0:
                return False
        return True
class sinktask(object):

    def __init__(self, kafka_URI, topic_str):
        self.producer = KafkaProducer(bootstrap_servers=kafka_URI)
        self.topic = topic_str

    def execute(self, data):
        self.producer.send(self.topic, bytes(data))
Example #23
0
    def run(self):
        producer = KafkaProducer(bootstrap_servers='localhost:9092')
        self.sent = 0

        while not producer_stop.is_set():
            producer.send('my-topic', self.big_msg)
            self.sent += 1
        producer.flush()
Example #24
0
def submit_kafka_job(job, type):
    producer = KafkaProducer(bootstrap_servers='kafka:9092')
    if type == CREATE:
        kafka_queue = 'create-ride-topic'
    elif type == UPDATE:
        kafka_queue = 'update-ride-topic'
    else:
        kafka_queue = 'delete-ride-topic'
    producer.send(kafka_queue, json.dumps(job).encode('utf-8'))
Example #25
0
    def run(self):
        producer = KafkaProducer(bootstrap_servers='localhost:9092')

        while not self.stop_event.is_set():
            producer.send('my-topic', b"test")
            producer.send('my-topic', b"\xc2Hola, mundo!")
            time.sleep(1)

        producer.close()
    def run(self):
        print "producer"
        producer = KafkaProducer(bootstrap_servers='kafka:9092')
        print "producer... ok"

        while True:
            producer.send('my-topic', b"test")
            producer.send('my-topic', b"\xc2Hola, mundo!")
            time.sleep(1)
def getstarted():
    name = request.form['userName'];
    print(request.form['temperature']);
    object = {"sensorID": str(name),"time":datetime.datetime.now().strftime('%a %b %d %Y %H:%M:%S'),"temperature": str(request.form['temperature']),"flag": "false"}
    print(object)
    producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('ascii'))
    producer.send('test',object)
    print("Generated...")
    return json.dumps(object)
Example #28
0
class KafkaSender():

    def __init__(self):
        self.client=KafkaClient(hosts)
        #self.producer = SimpleProducer(self.client,batch_send=batch_send,batch_send_every_n=batch_send_every_n)
        self.producer=KafkaProducer(bootstrap_servers=hosts)
        self.client.ensure_topic_exists(topic)
    def send_messages(self,msg):
        self.producer.send(topic,msg)
Example #29
0
class KafkaBeerPipeline(object):
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=['localhost:9092'])
        #serializer = MessageSerializer(client)
    def process_item(self, item, spider):
        client = SchemaRegistryClient(url='http://localhost:8081')
        schema_id, avro_schema, schema_version = client.get_latest_schema('beerscraper')
        serializer = MessageSerializer(client)
        encoded = serializer.encode_record_with_schema('beer',avro_schema,item.__dict__['_values'])
        self.producer.send('beer',encoded)
    def run(self):
        producer = KafkaProducer(bootstrap_servers='localhost:9092')

        while not self.stop_event.is_set():
            print("Sending message from: " + str(threading.get_ident()))
            producer.send('my-topic', b"test")
            producer.send('my-topic', b"\xc2Hola, mundo!")
            time.sleep(0.2)

        producer.close()
Example #31
0
    print("*"*30)
    print(KAFKA_BROKER_URL)
    producer = KafkaProducer(
        bootstrap_servers= KAFKA_BROKER_URL, #KAFKA_BROKER_URL,
        #Encode all values as JSON
        value_serializer = lambda value: json.dumps(value).encode(),
        api_version=(0, 10, 2)
    )
    sock = socket.socket(socket.AF_INET, # Internet
                     socket.SOCK_DGRAM) # UDP
    sock.bind((ip, port))
    while True:
        data, addr = sock.recvfrom(1024)
        data_conv = json.loads(data)
        #transaction: dict = create_random_transaction()
        producer.send(TRANSACTIONS_TOPIC, value = data_conv)
        #DEBUG
        print(data_conv)
        #sleep(SLEEP_TIME)

"""
# Define the port on which you want to connect
ip = '172.21.0.16'
#ip = '172.21.0.3'
#port = 58793
port = 6900

import socket
import json

#UDP_IP = "127.0.0.1"
Example #32
0
class SchemaBackup:
    def __init__(self, config_path: str, backup_path: str, topic_option: Optional[str] = None) -> None:
        with open(config_path) as handler:
            self.config = read_config(handler)

        self.backup_location = backup_path
        self.topic_name = topic_option or self.config["topic_name"]
        self.log = logging.getLogger("SchemaBackup")
        self.consumer = None
        self.producer = None
        self.admin_client = None
        self.timeout_ms = 1000

    def init_consumer(self):
        self.consumer = KafkaConsumer(
            self.topic_name,
            enable_auto_commit=False,
            bootstrap_servers=self.config["bootstrap_uri"],
            client_id=self.config["client_id"],
            security_protocol=self.config["security_protocol"],
            ssl_cafile=self.config["ssl_cafile"],
            ssl_certfile=self.config["ssl_certfile"],
            ssl_keyfile=self.config["ssl_keyfile"],
            auto_offset_reset="earliest",
            metadata_max_age_ms=self.config["metadata_max_age_ms"],
            kafka_client=KarapaceKafkaClient,
        )

    def init_producer(self):
        self.producer = KafkaProducer(
            bootstrap_servers=self.config["bootstrap_uri"],
            security_protocol=self.config["security_protocol"],
            ssl_cafile=self.config["ssl_cafile"],
            ssl_certfile=self.config["ssl_certfile"],
            ssl_keyfile=self.config["ssl_keyfile"],
            kafka_client=KarapaceKafkaClient,
        )

    def init_admin_client(self):
        start_time = time.monotonic()
        wait_time = constants.MINUTE
        while True:
            if time.monotonic() - start_time > wait_time:
                raise Timeout(f"Timeout ({wait_time}) on creating admin client")

            try:
                self.admin_client = KafkaAdminClient(
                    api_version_auto_timeout_ms=constants.API_VERSION_AUTO_TIMEOUT_MS,
                    bootstrap_servers=self.config["bootstrap_uri"],
                    client_id=self.config["client_id"],
                    security_protocol=self.config["security_protocol"],
                    ssl_cafile=self.config["ssl_cafile"],
                    ssl_certfile=self.config["ssl_certfile"],
                    ssl_keyfile=self.config["ssl_keyfile"],
                    kafka_client=KarapaceKafkaClient,
                )
                break
            except (NodeNotReadyError, NoBrokersAvailable, AssertionError):
                self.log.warning("No Brokers available yet, retrying init_admin_client()")
            except:  # pylint: disable=bare-except
                self.log.exception("Failed to initialize admin client, retrying init_admin_client()")

            time.sleep(2.0)

    def _create_schema_topic_if_needed(self):
        if self.topic_name != self.config["topic_name"]:
            self.log.info("Topic name overridden, not creating a topic with schema configuration")
            return

        self.init_admin_client()

        start_time = time.monotonic()
        wait_time = constants.MINUTE
        while True:
            if time.monotonic() - start_time > wait_time:
                raise Timeout(f"Timeout ({wait_time}) on creating admin client")

            schema_topic = KafkaSchemaReader.get_new_schema_topic(self.config)
            try:
                self.log.info("Creating schema topic: %r", schema_topic)
                self.admin_client.create_topics([schema_topic], timeout_ms=constants.TOPIC_CREATION_TIMEOUT_MS)
                self.log.info("Topic: %r created successfully", self.config["topic_name"])
                break
            except TopicAlreadyExistsError:
                self.log.info("Topic: %r already exists", self.config["topic_name"])
                break
            except:  # pylint: disable=bare-except
                self.log.exception(
                    "Failed to create topic: %r, retrying _create_schema_topic_if_needed()", self.config["topic_name"]
                )
                time.sleep(5)

    def close(self):
        self.log.info("Closing schema backup reader")
        if self.consumer:
            self.consumer.close()
            self.consumer = None
        if self.producer:
            self.producer.close()
            self.producer = None
        if self.admin_client:
            self.admin_client.close()
            self.admin_client = None

    def request_backup(self):
        if not self.consumer:
            self.init_consumer()
        self.log.info("Starting schema backup read for topic: %r", self.topic_name)

        values = []
        topic_fully_consumed = False

        while not topic_fully_consumed:

            raw_msg = self.consumer.poll(timeout_ms=self.timeout_ms)
            topic_fully_consumed = len(raw_msg) == 0

            for _, messages in raw_msg.items():
                for message in messages:
                    key = message.key.decode("utf8")
                    try:
                        key = json.loads(key)
                    except json.JSONDecodeError:
                        self.log.debug("Invalid JSON in message.key: %r, value: %r", message.key, message.value)
                    value = None
                    if message.value:
                        value = message.value.decode("utf8")
                        try:
                            value = json.loads(value)
                        except json.JSONDecodeError:
                            self.log.debug("Invalid JSON in message.value: %r, key: %r", message.value, message.key)
                    values.append((key, value))

        ser = json.dumps(values)
        if self.backup_location:
            with open(self.backup_location, "w") as fp:
                fp.write(ser)
                self.log.info("Schema backup written to %r", self.backup_location)
        else:
            print(ser)
            self.log.info("Schema backup written to stdout")
        self.close()

    def restore_backup(self):
        if not os.path.exists(self.backup_location):
            raise BackupError("Backup location doesn't exist")

        self._create_schema_topic_if_needed()

        if not self.producer:
            self.init_producer()
        self.log.info("Starting backup restore for topic: %r", self.topic_name)

        values = None
        with open(self.backup_location, "r") as fp:
            raw_msg = fp.read()
            values = json.loads(raw_msg)
        if not values:
            raise BackupError("Nothing to restore in %s" % self.backup_location)

        for item in values:
            key = encode_value(item[0])
            value = encode_value(item[1])
            future = self.producer.send(self.topic_name, key=key, value=value)
            self.producer.flush(timeout=self.timeout_ms)
            msg = future.get(self.timeout_ms)
            self.log.debug("Sent kafka msg key: %r, value: %r, offset: %r", key, value, msg.offset)
        self.close()
Example #33
0

if __name__ == '__main__':

    schema = json.load(open('avro/CheckoutEvent.avsc'))
    # schema = json.load(open('avro/sample.avsc'))

    producer = KafkaProducer(bootstrap_servers=KAFKA_BROKERS)

    # records = [
    #     {'station': 'blabla', 'temp': 5, 'time': 1433269388},
    #     {'station': 'hello', 'temp': 22, 'time': 1433270389},
    #     {'station': 'world', 'temp': -11, 'time': 1433273379},
    #     {'station': 'coca-cola', 'temp': 111, 'time': 1433275478},
    # ]

    records = []
    for line in open("sample_producer_data.json"):
        records.append(json.loads(line))

    # for _ in range(10000):
    #     record = random.choice(records)
    #     print(f"Send: {record}")
    #     producer.send(TOPIC, serialize(schema, record))
    #     time.sleep(2)

    for record in records:
        print(f"Send: {record}")
        producer.send(TOPIC, serialize(schema, record))
        time.sleep(1)
Example #34
0
        print(timestamp_to_date(time_stamp))
        #JSON数据
        data = {
            "id": i,
            "mac": "44-C3-49-5A-E5-93",
            "brand": "Huawei Technologies Co.Ltd",
            "cache_ssid": "",
            "capture_time": time_stamp,
            "terminal_fieldstrenth": -5,
            "identification_type": "",
            "certificate_code": "",
            "ssid_position": "",
            "access_ap_mac": "68-DB-54-E0-A0-62",
            "access_ap_channel": "",
            "access_ap_encryption_type": "99",
            "x_coordinate": 0.0,
            "y_coordinate": 0.0,
            "netbar_wacode": "",
            "collection_equipmentid": "210102000000181",
            "collection_equipment_longitude": 123.418713,
            "collection_equipment_latitude": 41.7719617,
            "lng_lat": "124.418713::41.7719617"
        }
        #value = bytearray(data, 'utf-8')
        # 发送数据到指定topic,超时时间10秒
        producer.send('big_data_kj', value=data).get(timeout=10)
        #每0.1秒发送由两个英文字母组成的字符串
        time.sleep(30)
        i += 1

    producer.close()
class Kafka:
    def __init__(self):
        self.logger = logging.getLogger(__name__)

        # producer set to None by default unless global config sets to active
        self.producer = None

        self.logger.debug(globalConfig.configDescriptor['supportedFeatures'])
        if 'asynchronousTransitionResponse' in globalConfig.configDescriptor[
                'supportedFeatures']:
            self.logger.debug('FOUND IT')

        #get the address of the kafka servers if it is set to active in config.yaml and create a producer
        if globalConfig.configDescriptor["supportedFeatures"][
                "asynchronousTransitionResponse"] == True:
            self.logger.debug('trying to configure Kafka')
            try:
                self.logger.debug(
                    'kafka is set to Active - trying to create kafka producer on '
                    + globalConfig.configDescriptor['properties']
                    ['responseKafkaConnectionUrl'])
                self.producer = KafkaProducer(bootstrap_servers=globalConfig.
                                              configDescriptor['properties']
                                              ['responseKafkaConnectionUrl'])
            except Exception as e:
                #self.logger.error(e.__class__.__name__)
                self.logger.error('could not connect to kafka server at ' +
                                  globalConfig.configDescriptor['properties']
                                  ['responseKafkaConnectionUrl'] +
                                  ' no messages will be published')
                self.producer = None

        else:
            self.logger.debug(
                'kafka not set to active - no messages will be published')

    def sendLifecycleEvent(self, msg):
        self.logger.debug('sending message to kafka ' + str(msg))

        # if have a valid producer then send a kafka message, otherwise do nothing
        if self.producer != None:
            self.logger.debug('have valid producer')

            topic = globalConfig.configDescriptor['properties'][
                'responseKafkaTopicName']

            self.logger.debug("sending transition event to Kafka topic " +
                              topic)

            future = self.producer.send(
                topic,
                json.JSONEncoder().encode(msg).encode('utf-8'))

            try:
                record_metadata = future.get(timeout=10)
            except KafkaError:
                log.exception()
                pass
        else:
            self.logger.debug('no valid kafka producer found')

    def sendMetric(self):
        self.logger.debug("TODO____----- implment sending metric to kafka")
Example #36
0
from kafka import KafkaProducer
from kafka.errors import KafkaError

producer = KafkaProducer(bootstrap_servers=['kafka:9092'])

topic = "test"

print(producer)
# Asynchronous by default
future = producer.send(topic=topic, key=b'foo', value=b'megyez')
print(future)
try:
    a = 1
    record_metadata = future.get(timeout=60)
    # Successful result returns assigned partition and offset
    print(record_metadata.topic)
    print(record_metadata.partition)
    print(record_metadata.offset)
except KafkaError as e:
    print(e)


Example #37
0
    # Establish the producer for each function call, cannot be global...?
    producer = KafkaProducer(
        bootstrap_servers=['192.168.0.62:31090',
                           '192.168.0.62:31091', '192.168.0.62:31092'])
    # bootstrap_servers=['127.0.0.1:9092'])

    evbody = {}
    value = event.value
    if value is not None and value != "":
        evbody = json.loads(value)
    # print("evbody:", evbody)

    try:
        wrapped(evbody, producer)
    except Exception as err:
        try:
            response = {
                "event_uuid": evbody["event_uuid"],
                "code": 500,
                "error": str(err),
                "stacktrace": traceback.format_exc()
            }

            new_event = bytearray(json.dumps(response), encoding='utf-8')
            producer.send('automation_v1_response', key=b'event',
                          value=new_event).get(timeout=30)
            producer.flush(timeout=5)
        except Exception as err:
            print(str(err))
            print(traceback.format_exc())
MsgGenSleepTime = 0.5  # time in seconds to prevent very fast msg generations
msgKey = 100

topicName = 'TestTopic1'
producer = KafkaProducer(bootstrap_servers='localhost:9092')
LoopNumber = 1
MaxLoops = 2
EndLoopSleepTime = 15  # time in seconds

while LoopNumber < (MaxLoops + 1):
    print('STARTING loop # %d ----------' % (LoopNumber))
    loopStartTime = time.time()
    while (time.time() - loopStartTime) < MsgGenTimeLimit:
        msgToSend = ''
        for i in range(0, 3):
            msgToSend = msgToSend + chr(random.randint(97, 122))
        msgToSend = msgToSend + str(msgKey)
        producer.send(topicName,
                      key=str(msgKey).encode('utf-8'),
                      value=msgToSend.encode('utf-8'))
        print('Msg number sent to Kafka::: ', msgToSend)
        msgKey = msgKey + 1
        time.sleep(MsgGenSleepTime)
    #--------- sleep for a bit
    print('STARTING SLEEP for %d secs loop # %d ----------' %
          (EndLoopSleepTime, LoopNumber))
    time.sleep(EndLoopSleepTime)
    print('FINISHED loop # %d ----------' % (LoopNumber))
    LoopNumber = LoopNumber + 1

print('\nProducer program END time:', datetime.now().strftime("%c"))
from kafka.errors import KafkaError
from kafka.future import log
import random
from random import randint
import datetime


producer = KafkaProducer(bootstrap_servers= 'localhost:9092', value_serializer=lambda v: json.dumps(v).encode('utf-8'))
users = ["1", "2", "3", "4", "5"]
# future = producer.send('topic1', {"test": "test"})
# try:
#     record_metadata = future.get(timeout=20)
# except KafkaError:
#     log.exeption()
#     pass
#
# print(record_metadata.topic)
# print(record_metadata.partition)
# print(record_metadata.offset)
while True:
    # producer.send('topic1', {"timestamp": str(datetime.datetime.utcnow().strftime('%B %d %Y - %H:%M:%S')),
    #                          "user": str(random.choice(users))})
    producer.send('topic1', {"timestamp": str(datetime.datetime.utcnow().strftime('%B %d %Y - %H:%M:%S')),
                             "user": "******"})
    time.sleep(2)
    producer.send('topic1', {"timestamp": str(datetime.datetime.utcnow().strftime('%B %d %Y - %H:%M:%S')),
                             "user": "******"})
    time.sleep(2)
    producer.send('topic1', {"timestamp": str(datetime.datetime.utcnow().strftime('%B %d %Y - %H:%M:%S')),
                             "user": "******"})
    time.sleep(4)
Example #40
0
logging.info("Starting producer")
producer = KafkaProducer(
    bootstrap_servers=bootstrap_server,
    value_serializer=lambda m: json.dumps(m).encode('ascii'))

with open(data_path, 'r') as f:
    r = csv.DictReader(f)
    line = r.next()
    # longitude, datetime, iddevice, latitude, speed, id
    try:
        current_dt = datetime.strptime(line['datetime'],
                                       '%Y-%m-%d %H:%M:%S.%f')
    except:
        current_dt = datetime.strptime(line['datetime'], '%Y-%m-%d %H:%M:%S')

    for line in r:
        try:
            event_dt = datetime.strptime(line['datetime'],
                                         '%Y-%m-%d %H:%M:%S.%f')
        except:
            event_dt = datetime.strptime(line['datetime'], '%Y-%m-%d %H:%M:%S')

        logging.info("Next event time: " + str(event_dt))
        logging.info((event_dt - current_dt).seconds / time_step)
        time.sleep((event_dt - current_dt).seconds / time_step)

        logging.info("Generating event: " + str(line))
        ev = producer.send(topic, line)
        current_dt = event_dt
Example #41
0
                                  yaml_config['public_password']),
                            verify=True)

    content_api = response.content
    content_json_loaded = json.loads(content_api)
    real_content = content_json_loaded['hits']['hits']
    parsed_content = [content['_source'] for content in real_content]

    broker = yaml_config['broker_url']
    topic = yaml_config['topic_name']
    producer = KafkaProducer(
        bootstrap_servers=[broker],
        value_serializer=lambda x: json.dumps(x).encode('utf-8'))

    for row in parsed_content:
        row['date_collected_data'] = now
        producer.send(topic, value=row)
        time.sleep(0.5)

    brokers = [yaml_config['broker_url']]
    topic = yaml_config['topic_name']
    consumer = KafkaConsumer(
        topic,
        bootstrap_servers=brokers,
        auto_offset_reset='earliest',
        value_deserializer=lambda m: json.loads(m.decode('utf-8')))
    consumer.subscribe(yaml_config['topic_name'])

    for message in consumer:
        print(message.offset, message.value)
Example #42
0
from time import sleep
from json import dumps
import json
from kafka import KafkaProducer
import requests

producer = KafkaProducer(bootstrap_servers=[
    'localhost:9096'
])  #(bootstrap_servers=['sandbox-hdp.hortonworks.com:6667'])

url = "https://coronavirus-monitor.p.rapidapi.com/coronavirus/cases_by_country.php"
headers = {
    'x-rapidapi-host': "coronavirus-monitor.p.rapidapi.com",
    'x-rapidapi-key': "3cc6f15daamsh01d728b65f1b4e0p13c7a2jsna981b69014c8"
}
r = requests.request("GET", url, headers=headers)
data_dict = r.json()

for i in data_dict['countries_stat']:
    producer.send('bigdata', json.dumps(i).encode('utf-8'))
    sleep(1)
    print(i)

#spark-submit producerAPI.py
Example #43
0
import csv
import time
from kafka import KafkaProducer

# 实例化一个KafkaProducer示例,用于向Kafka投递消息
producer = KafkaProducer(bootstrap_servers='localhost:9092')
# 打开数据文件
csvfile = open("../data/log_result.csv", "r", encoding='utf-8')
# 生成一个可用于读取csv文件的reader
reader = csv.reader(csvfile)

for line in reader:
    sex = line[10]  # 性别在每行日志代码的第9个元素
    if sex == 'gender':
        continue  # 去除第一行表头
    time.sleep(0.1)  # 每隔0.1秒发送一行数据
    # 发送数据,topic为'sex'
    producer.send('sex', sex.encode('utf8'))
Example #44
0
    def parse(self):
        cur = self.conn.cursor()
        cou = self.conn.cursor()
        try:
            # import pdb;pdb.set_trace()
            count = "select count(*)from bukalapak_url where status_feed = ''"
            sql = "select product_url from bukalapak_url where status_feed = ''"
            cur.execute(sql)
            cou.execute(count)
            results = cur.fetchall()
            b = cou.fetchall()
            terus = str(b).replace(",", "").replace("'", "").replace(
                "(", "").replace(")",
                                 "").replace("[",
                                             "").replace("]",
                                                         "").replace("L", "")
            print(terus)
            terus = int(terus)
            print(terus)
            count = 0
            for ulang in range(0, terus):
                try:
                    print(ulang)
                    count += 1
                    a = results[ulang]
                    url = str(a).replace(",", "").replace("'", "").replace(
                        "(", "").replace(")", "")
                    print "========================"
                    print url
                    self.driver.get(url)
                    response = TextResponse(url=url,
                                            body=self.driver.page_source,
                                            encoding='utf-8')
                    # if count == 4:
                    # import pdb;pdb.set_trace()
                    penjual_url = MySQLdb.escape_string(
                        response.xpath(
                            '//*[contains(@id,"mod-product-detail")]/aside/div/article/div[2]/h5/a/@href'
                        ).extract_first())
                    # self.driver.save_screenshot('SCEEN1.png')
                    status_feed = "done"
                    sql = "UPDATE bukalapak_url SET status_feed = '{}' WHERE product_url = '{}'".format(
                        status_feed, url)
                    cur.execute(sql)
                    self.conn.commit()
                    url = penjual_url.replace(
                        "?dtm_source=product_detail&dtm_section=sidebar&dtm_campaign=default",
                        "/feedback?feedback_as=as_seller&filter_by=all")
                    print "========================="
                    print url
                    self.driver.get(url)
                    for halaman in range(1, 500):
                        for loop in range(1, 21):
                            response = TextResponse(
                                url=url,
                                body=self.driver.page_source,
                                encoding='utf-8')
                            nama = response.xpath(
                                '/html/body/div[1]/section/div/div[2]/section/ul/li['
                                + str(loop) +
                                ']/article/div[1]/div/div[2]/div[1]/a/text()'
                            ).extract_first()
                            if nama == None:
                                try:
                                    nama = response.xpath(
                                        '// *[contains(@id,"reskinned_page")]/section/div/div[2]/section[1]/div/ul/li['
                                        + str(loop) +
                                        ']/article/div[2]/div/a/text()'
                                    ).extract_first()
                                    nama = nama.encode('utf-8')
                                except:
                                    nama = "Tanpa Nama"

                            pesan = response.xpath(
                                '/html/body/div[1]/section/div/div[2]/section/ul/li['
                                + str(loop) +
                                ']/article/div[2]/div/blockquote/p/text()'
                            ).extract_first()
                            tanggal = response.xpath(
                                '/html/body/div[1]/section/div/div[2]/section/ul/li['
                                + str(loop) +
                                ']/article/div[1]/div/div[2]/div[3]/time/text()'
                            ).extract_first()
                            try:
                                tanggal = tanggal.replace("\n", "")
                            except:
                                try:
                                    tanggal = response.xpath(
                                        '*[contains(@id,"reskinned_page")] / section / div / div[2] / section[1] / div / ul / li['
                                        + str(loop) +
                                        '] / article / div[2] / div / time/text()'
                                    ).extract_first()
                                    tanggal = tanggal.encode('utf-8').replace(
                                        "\n", "")
                                except:
                                    try:
                                        tanggal = response.xpath(
                                            '// *[ @ id = "feedback_list"] / ul / li['
                                            + str(loop) +
                                            '] / article / div[2] / div / time/text()'
                                        ).extract_first()
                                        tanggal = tanggal.encode(
                                            'utf-8').replace("\n", "")
                                    except:
                                        try:
                                            tanggal = response.xpath(
                                                '//*[@id="reskinned_page"]/section/div/div[2]/section[1]/div/ul/li['
                                                + str(loop) +
                                                ']/article/div[2]/div/time/text()'
                                            ).extract_first()
                                            tanggal = tanggal.encode(
                                                'utf-8').replace("\n", "")
                                        except:
                                            tanggal = None
                            try:
                                tanggal = tanggal.encode('utf-8').replace(
                                    "\n", "")
                            except:
                                pass
                            if pesan == None:
                                try:
                                    pesan = response.xpath(
                                        '// *[ contains(@ id , "reskinned_page")] / section / div / div[2] / section[1] / div[2] / ul / li['
                                        + str(loop) +
                                        '] / article / div[2] / p/text()'
                                    ).extract_first()
                                    pesan = pesan.encode('utf-8')
                                except:
                                    break
                            if pesan == None:
                                try:
                                    pesan = response.xpath(
                                        '// *[contains( @ id , "reskinned_page")] / section / div / div[2] / section[1] / div / ul / li['
                                        + str(loop) +
                                        '] / article / div[2] / p/text()'
                                    ).extract_first()
                                    pesan = pesan.encode('utf-8')
                                except:
                                    break
                                # // *[ @ id = "reskinned_page"] / section / div / div[2] / section[1] / div / ul / li[
                                #     1] / article / div[2] / p
                            else:
                                akhir = json.dumps({
                                    'type': 'bukalapak_feedback',
                                    'penjual_url': penjual_url,
                                    'nama': nama,
                                    'pesan': pesan,
                                    'tanggal': tanggal,
                                })
                                try:
                                    for kaf in range(0, 20):
                                        try:
                                            prod = KafkaProducer(
                                                bootstrap_servers=setting.
                                                broker)
                                            prod.send(setting.kafka_topic,
                                                      b"{}".format(akhir))
                                            print "=================================================="
                                            print "SUKSES SEND TO KAFKA"
                                            print "=================================================="
                                            print akhir

                                            kaf = 1
                                        except:
                                            pass
                                        if kaf == 1:
                                            break
                                except Exception, e:
                                    print e
                        time.sleep(5)
                        try:
                            coy = url + "&page=" + str(halaman + 1)
                            self.driver.get(coy)
                        except:
                            pass
                        if pesan == None:
                            break
                        # try:
                        #     self.driver.find_element_by_xpath('//a[@class="next_page"]').click()
                        #     time.sleep(3)
                        # except:
                        #     break

                except Exception, e:
                    print e
                    # prod.send('tokopedia_penjual', b"{}".format(url))
                    # prod.send('tokopedia_feedback', b"{}".format(url))
        except Exception, e:
            print e
Example #45
0
# coding:utf-8
import os

from kafka import KafkaProducer


def utf8len(s):
    return len(s.encode('utf-8'))


origin_str = "887xxx_"
print(utf8len(origin_str))
sss = origin_str * 262144
# print(sss)
print(utf8len(sss))
# KafkaProducer()
SERVERS = ["192.168.3.22:9094"]
producer = KafkaProducer(bootstrap_servers=SERVERS, max_request_size=5242880)

# msg = json.dumps(msg_dict)
msg = sss.encode(encoding="utf-8")
partition = None

future = producer.send("test", msg, partition=partition)

print("done")
Example #46
0
class Producer(AbstractProducer):
    def __init__(self, *args, **kwargs):
        """
        The producer is thread safe and sharing a single producer instance across
        threads will generally be faster than having multiple instances.

        sample configuration file:
        ::

            configs = {

                'future_timeout': 5,
                'close_timeout': 5,
                'client_config': {
                    'bootstrap_servers': '172.16.15.227:9092',
                    'client_id': '_'.join([TOPIC, 'producer_client']),
                    'acks': 1,
                    'retries': 1,
                    'batch_size': 16384,
                    'linger_ms': 5,
                    'buffer_memory': 33554432,
                    'connections_max_idle_ms': 9 * 60 * 1000,
                    'max_block_ms': 60000,
                    'max_request_size': 1048576,
                    'metadata_max_age_ms': 300000,
                    'retry_backoff_ms': 100,
                    'request_timeout_ms': 30000,
                    'max_in_flight_requests_per_connection': 5,
                }
            }


        """
        assert kwargs, 'unrecognized keyword arguments'
        self.configs = kwargs
        self.topic = self.configs.get('topic')
        assert self.configs.get('client_config'), 'unrecognized client_config'

        self._success_callback = kwargs.get('success_callback')
        self._failure_callback = kwargs.get('failure_callback')

        try:
            self.producer_client = KafkaProducer(
                **self.configs.get('client_config'))
        except KafkaError as e:
            raise e

    def pre_send(self, *args, **kwargs):
        pass

    def send(self, message):
        """ sends the message to specified topic """
        try:
            meta_data = self.send_sync(topic=self.topic,
                                       key=None,
                                       value=message,
                                       partition=None,
                                       timestamp_ms=None)
            return meta_data
        except (KafkaTimeoutError, KafkaError) as exc:
            raise exc

    def _send(self,
              topic=None,
              key=None,
              value=None,
              partition=None,
              timestamp_ms=None):
        """
        publish the message to topic synchronously

        :param topic: topic where the message will be published
        :type topic: str
        :param key: a key to associate with the message. Can be used to determine which partition to send the message to. If partition is None (and producer’s partitioner config is left as default), then messages with the same key will be delivered to the same partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer
        (optional, default: None)
        :type key: int, optional
        :param value: message value. Must be type bytes, or be serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a ‘delete’.
        :type value: optional, byte
        :param partition: optionally specify a partition. If not set, the partition will be selected using the configured ‘partitioner’
        :type partition: int, optional
        :param timestamp_ms: epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time
        :type timestamp_ms: int, optional
        :return: resolves to RecordMetadata
        :rtype:  FutureRecordMetadata
        :raises KafkaError: if unable to send
        :raises KafkaTimeoutError: if timeout has occurred
        """

        try:
            future = self.producer_client.send(topic=topic,
                                               key=key,
                                               value=value,
                                               partition=partition,
                                               timestamp_ms=timestamp_ms)
            return future
        except (KafkaError, KafkaTimeoutError) as e:
            logger.error(e, exc_info=True)
            raise e

    def send_sync(self,
                  topic=None,
                  key=None,
                  value=None,
                  partition=None,
                  timestamp_ms=None):
        """
        publish the message to topic synchronously and return meta_data or if it fails to send,
        it will raise an exception

        :param topic: topic where the message will be published
        :type topic: str
        :param key: a key to associate with the message. Can be used to determine which partition to send the message to. If partition is None (and producer’s partitioner config is left as default), then messages with the same key will be delivered to the same partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer
        :type key: int, optional
        :param value: message value. Must be type bytes, or be serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a ‘delete’
        :type value: optional, byte
        :param partition: optionally specify a partition. If not set, the partition will be selected using the configured ‘partitioner’
        :type partition: int, optional
        :param timestamp_ms: epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time
        :type timestamp_ms: int, optional
        :return: resolves to RecordMetadata
        :rtype:  FutureRecordMetadata
        :raises KafkaError: if unable to send
        :raises KafkaTimeoutError: if timeout has occurred
        """
        try:
            future = self._send(topic=topic,
                                key=key,
                                value=value,
                                partition=partition,
                                timestamp_ms=timestamp_ms)
            meta_data = future.get(
                timeout=self.configs.get('future_timeout') or 3)
            logger.info('sent to oubound topic {}, {}, {}'.format(
                meta_data.topic, meta_data.partition, meta_data.offset))
            return meta_data
        except (KafkaTimeoutError, KafkaError) as e:
            logger.error(e, exc_info=True)
            raise e

    def send_async(self,
                   topic=None,
                   key=None,
                   value=None,
                   partition=None,
                   timestamp_ms=None):
        """
        publish the message to kafka_client asynchronously. if success and failure callback handlers exists,
        it will add to the FutureRecordMetadata

        :param topic: topic where the message will be published
        :type topic: str
        :param key: a key to associate with the message. Can be used to determine which partition to send the message to. If partition is None (and producer’s partitioner config is left as default), then messages with the same key will be delivered to the same partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer
        :type key: int, optional
        :param value: message value. Must be type bytes, or be serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a ‘delete’
        :type value: byte, optional
        :param partition: optionally specify a partition. If not set, the partition will be selected using the configured ‘partitioner’
        :type partition: int, optional
        :param timestamp_ms: epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time
        :type timestamp_ms: int, optional
        :return: resolves to RecordMetadata
        :rtype:  FutureRecordMetadata
        :raises KafkaError: if unable to send
        :raises KafkaTimeoutError: if timeout has occurred
        """

        future = self._send(topic=topic,
                            key=key,
                            value=value,
                            partition=partition,
                            timestamp_ms=timestamp_ms)
        if self._success_callback and self._failure_callback:
            future.add_callback(self._success_callback)
            future.add_callback(self._failure_callback)

    def post_send(self, *args, **kwargs):
        pass

    def flush(self, timeout=None):
        """
        Invoking this method makes all buffered records immediately available to send
        (even if linger_ms is greater than 0) and blocks on the completion of the requests
        associated with these records. The post-condition of flush() is that any previously sent
        record will have completed (e.g. Future.is_done() == True). A request is considered
        completed when either it is successfully acknowledged according to the ‘acks’ configuration
        for the producer, or it results in an error.
        Other threads can continue sending messages while one thread is blocked waiting for a flush call to complete; however, no guarantee is made about the completion of messages sent after the flush call begins

        :param timeout: timeout in seconds to wait for completion
        :type timeout: float, optional
        :raises KafkaError: if unable to send
        :raises KafkaTimeoutError: if timeout has occurred
        """
        try:
            self.producer_client.flush(timeout=timeout)
        except (KafkaTimeoutError, KafkaError) as e:
            raise e

    def serialize_message(self, message, *args, **kwargs):
        """ serialize the message """
        pass

    def close(self):
        """ close the producer"""
        try:
            logger.info('closing producer...')
            self.producer_client.close(
                timeout=self.configs.get('close_timeout') or 5)
        except KafkaError as e:
            raise e
Example #47
0
from time import sleep
from json import dumps
from kafka import KafkaProducer

# Create a Kafka Producer
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                         value_serializer=lambda x: dumps(x).encode('utf-8'))

# Send numbers from 0 to 999
for number in range(100):
    producer.send('numbers', value={'number': number})
    sleep(0.025)

# producer.flush()
Example #48
0
from kafka import KafkaProducer
import login as pbbuf
import time
import fsp_proto_pb2 as fsp_pb
producer = KafkaProducer(
    bootstrap_servers='192.168.7.60:9092,192.168.7.61:9092,192.168.7.62:9092')
# cpbuf=pbbuf.ClientConnected(87,"ablert_test")
# producer.send('ablert_test',cpbuf)

#here , the topic is for consumer
for i in range(401, 445):
    cpbuf = pbbuf.ClientConnected(i)
    producer.send('albert_test', cpbuf)
    time.sleep(1)
    # cpt = fsp_pb.ClientConnected()
    # cpt.ParseFromString(cpbuf[5])
    # print cpt.client_id
    # print cpt.app_id
    # print cpt.client_name

    print i
Example #49
0
                logger.info("Sending back to gateway")
                # Publish data to gateway
                # UserID, TradeID, SID, updated user values
                # We publish twice, once for buyer, once for seller
                bData = {
                    "Type": "TradeComplete",
                    "User": data["User"],
                    "TradeID": data["TradeID"],
                    "SID": buyUserData["SID"],
                    "actual_cash": buyUserData["actual_cash"],
                    "potential_coins_owned":
                    buyUserData["potential_coins_owned"],
                    "actual_coins_owned": buyUserData["actual_coins_owned"],
                    "potential_cash": buyUserData["potential_cash"]
                }
                producer.send("MTG", str(json.dumps(bData)))

                sData = {
                    "Type": "TradeComplete",
                    "User": tradeItem["User"],
                    "TradeID": tradeItem["TradeID"],
                    "SID": sellUserData["SID"],
                    "actual_cash": sellUserData["actual_cash"],
                    "potential_cash": sellUserData["potential_cash"],
                    "actual_coins_owned": sellUserData["actual_coins_owned"],
                    "potential_coins_owned":
                    sellUserData["potential_coins_owned"]
                }
                producer.send("MTG", str(json.dumps(sData)))

            else:
Example #50
0
"""
读取本地数据文件,将数据写入接口
"""
import pandas as pd
import numpy as np
from kafka import KafkaProducer
import time
import json

producer = KafkaProducer(bootstrap_servers='localhost:9092',
                         key_serializer=lambda x: x.encode('utf-8'),
                         value_serializer=lambda x: x.encode('utf-8'))

data = pd.read_csv('aapl-trading-hour.csv')
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
# data.set_index('timestamp',inplace=True)

if __name__ == "__main__":
    while True:
        i = 0
        for k, v in data.iterrows():
            producer.send('test', key=str(i), value=v.to_json())
            i += 1
            time.sleep(1)
Example #51
0
class PoibaiduPipeline(object):
	
	overwrite_today = ""
	crawled_dir = ""
	csv_file_path = None

	kafka_producer = None
	to_kafka = None
	kafka_topic = ""
		
	def init_self_attributes(self, spider):
		self.overwrite_today = datetime.datetime.now().strftime("%Y%m%d")
		if 1 > len( self.crawled_dir ):
			self.crawled_dir = spider.settings.get( name="CRAWLED_DIR", default = "" )
		if self.csv_file_path is None or 1 > len( self.csv_file_path ):
			self.csv_file_path = os.path.join( self.crawled_dir, f"{spider.name}{self.overwrite_today}.csv" )
		if self.to_kafka is None:
			self.to_kafka = spider.settings.get( name="PIPELINE_TO_KAFKA", default = False )
		if 1 > len( self.kafka_topic ):
			self.kafka_topic = spider.name if hasattr( spider, "name" ) else ""
		if "entrobus28" == socket.gethostname():
			from kafka import KafkaProducer
			# for importing into hbase using Kafka
			if self.kafka_producer is None:
				self.kafka_producer = KafkaProducer(bootstrap_servers = "entrobus32:9092")
	
	def get_items_and_keys(self, item=None, excluded_list=[]):
		item_list = []
		all_keys = []
		if item is None:
			return all_keys, item_list
		for index, one in enumerate( item ):
			if one not in excluded_list:
				all_keys.append( one )
				if 0 == len(item[one]):
					item_list.append("")
				elif 1 == len(item[one]):
					item_list.append( item[one][0] )
				else:
					item_list.append( item[one] )
		return all_keys, item_list

	def process_item(self, item, spider):
		self.init_self_attributes( spider )

		page_type = ""
		for index, one in enumerate( item ):
			if "page_type" == one and 1 == len(item["page_type"]):
				page_type = str( item["page_type"][0] )
				break
		excluded_list = ["page_type", ]
		all_keys1, item_list1 = self.get_items_and_keys( item = item, excluded_list = excluded_list )
		index = -1
		content_dict = {}
		if "content" in all_keys1 and "json" == page_type:
			index = all_keys1.index( "content" )
			if -1 < index and index < len( item_list1 ):
				content_dict = eval(item_list1[index])
				item_list1.remove( item_list1[index] )
				all_keys1.remove( "content" )

				keys = []
				items = []
				for key, value in content_dict.items():
					keys.append( key )
					items.append( value )
				all_keys = keys + all_keys1
				item_list = items + item_list1
				self.append_row( spider = spider, all_keys = all_keys, item_list = item_list )
				if self.to_kafka and "entrobus28" == socket.gethostname():
					self.pipeline_to_kafka( spider = spider, all_keys = all_keys, item_list = item_list  )
		elif "json" == page_type:
			spider.logger.error( f"no content in all_keys1 ({all_keys1}) in Method process_item of Class QqhousePipeline. Exception = {ex}" )

		return item

	def append_row(self, spider = None, all_keys = [], item_list = [] ):
		try:
			if self.csv_file_path is None or 1 > len( self.csv_file_path ):
				spider.logger.error( f"missing filename qqhouse{self.overwrite_today}.csv or CRAWLED_DIR ({self.crawled_dir}) setting" )
			else:
				new_file = False
				if not os.path.isfile( self.csv_file_path ):
					new_file = True
				with open( self.csv_file_path, "a", encoding="utf-8", newline="") as f:
					writer = csv.writer(f)
					if new_file:
						writer.writerow( all_keys )
					writer.writerow( item_list )
		except Exception as ex:
			spider.logger.error( f"cannot write into file {self.csv_file_path}; content is: {item_list}; Exception = {ex}" )

	def pipeline_to_kafka( self, spider = None, all_keys = [], item_list = []  ):
		if 1 > len( self.kafka_topic ):
			spider.logger.error( f"Inside Method {sys._getframe().f_code.co_name} of Class {self.__class__.__name__}, Exception: None == spider.name" )
			return False
		if 1 > len( all_keys ) or 1 > len( item_list ):
			spider.logger.error( f"Inside Method {sys._getframe().f_code.co_name} of Class {self.__class__.__name__}, Exception: empty keys or empty values ({all_keys} or {item_list})" )
			return False
		if self.kafka_producer is None:
			spider.logger.error( f"Inside Method {sys._getframe().f_code.co_name} of Class {self.__class__.__name__}, Exception: self.kafka_producer is None" )
			return False

		self.kafka_producer.send(self.kafka_topic, bytes( json.dumps( dict(zip( all_keys, item_list )) ), encoding="utf-8" ), timestamp_ms = int(time.time()*1000) )
		return True

	def close_spider(self, spider = None):
		now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
		if spider is not None:
			spider.logger.info( f"Inside Method {sys._getframe().f_code.co_name} of Class {self.__class__.__name__}, spider closes at {now}" )
Example #52
0
"""
# Links

Start Zookeeper and Kafka server on mac
https://medium.com/@Ankitthakur/apache-kafka-installation-on-mac-using-homebrew-a367cdefd273https://medium.com/@Ankitthakur/apache-kafka-installation-on-mac-using-homebrew-a367cdefd273

Produce a topic and read it
https://towardsdatascience.com/kafka-python-explained-in-10-lines-of-code-800e3e07dad1

"""
import json
from time import sleep
from kafka import KafkaProducer

topic = "numtest"
producer = KafkaProducer(bootstrap_servers=["localhost:9092"], value_serializer=lambda x: json.dumps(x).encode("utf-8"))

# Creating a stream of integers
for x in range(1000):
    data = {"number": x}
    producer.send(topic, value=data)
    sleep(5)
class get_stock():
    #set the three arguments, stock symbol, kafka broker location, kafka topic
    def __init__(self, code, server, topic):
        self.__code = code
        self.__server = server
        self.__topic = topic
        self.__producer = KafkaProducer(bootstrap_servers=self.__server)

    #reassign new values to variables
    def set_code(self, code):
        self.__code = code

    def set_server(self, server):
        self.__server = server

    def set_topic(self, topic):
        self.__topic = topic

    #initialied the logging
    def logger(self):
        logging.basicConfig()
        logger = logging.getLogger()
        logger.setLevel(logging.DEBUG)
        return logger

    #Both print the log to console and store the log in log files
    def print_log(self, msg):
        logger = self.logger()
        f = logging.FileHandler(
            "/Users/AmberWang/stock/Big-Data/log/kafka.log")
        f.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(asctime)-15s %(message)s")
        f.setFormatter(formatter)
        logger.addHandler(f)
        logger.debug(msg)

    #check whether the stock is valid or not
    def getQuotes(self, code):
        try:
            #   msg=json.dumps(googlefinance.getQuotes(code))
            base = 'https://finance.google.com/finance?q='
            param = code
            suffix = '&output=json'
            url = base + param + suffix
            response = requests.get(url)
            if (response.status_code == 200):
                fin_data = json.loads(
                    response.content[6:-2].decode('unicode_escape'))
                send_data = {}
                send_data['Symbol'] = code
                send_data['company'] = fin_data['t']
                send_data['LastTradePrice'] = fin_data['op']
                msg = json.dumps(send_data)
                return msg
        except HTTPError:
            self.logger().error("Please enter correct stock code!")

    #send the message from kafka producer
    def send(self, topic, code):
        msg = self.getQuotes(code)
        print(msg)
        self.__producer.send(topic=topic, value=msg, timestamp_ms=time.time())
        self.print_log(msg)

    #execute the send message job
    def deliver(self):
        schedule.every(1).second.do(self.send, self.__topic, self.__code)
        while True:
            schedule.run_pending()
            time.sleep(1)

    # close kafka producer
    def shut_down(self):
        self.logger().debug('exiting the program')
        self.__producer.flush(10)
        self.__producer.close()
        self.logger().debug('kafka producer has been closed')
Example #54
0
   role=recordlist[5]
   duration=recordlist[6]
   txbytes=recordlist[7]
   rxbytes=recordlist[8]
   txaudiokbitrate=recordlist[9]
   rxaudiokbitrate=recordlist[10]
   txvideokbitrate=recordlist[11]
   rxvideokbitrate=recordlist[12]
   cpuappusage=recordlist[13]
   cputotalusage=recordlist[14]
   deviceskunum=recordlist[15]
   dstype='ods_meeting_running_stats_r'
   result=dstype+','+url+','+id+','+uid+','+createtime+','+apptype+','+clientappid+','+devicetype+','+deviceinfo+','+version+','+meetingid+','+role+','+duration+','+txbytes+','+rxbytes+','+txaudiokbitrate+','+rxaudiokbitrate+','+txvideokbitrate+','+rxvideokbitrate+','+cpuappusage+','+cputotalusage+','+deviceskunum
   #print(result)
   result=result.encode('utf-8')
   producer.send('pro_meeting_running_stats_R2P3',result)
 elif datatype=='1':
   width=recordlist[5]
   height=recordlist[6]
   framerate=recordlist[7]
   bitrate=recordlist[8]
   dstype='ods_meeting_local_video_stats_r'
   result=dstype+','+url+','+id+','+uid+','+createtime+','+apptype+','+clientappid+','+devicetype+','+deviceinfo+','+version+','+meetingid+','+width+','+height+','+framerate+','+bitrate
   #print(result)
   result=result.encode('utf-8')
   producer.send('pro_meeting_local_video_stats_R2P3',result)
 elif datatype=='2':
   width=recordlist[5]
   height=recordlist[6]
   bitrate=recordlist[7]
   framerate=recordlist[8]
Example #55
0
            win = -1
            if 'winner' in match_js:
                for team in match_js['teams']:
                    if team['winner'] == True:
                        win = int(team['teamId']) / 100
            if match_js[
                    'mapId'] == 11 and 'timeline' in match_js and 'frames' in match_js[
                        'timeline']:
                for frame in match_js['timeline']['frames']:
                    if 'events' in frame:
                        for event in frame['events']:
                            if event['eventType'] == 'CHAMPION_KILL':
                                #print event
                                pos = event['position']
                                x = int(pos['x'])
                                y = int(pos['y'])
                                kill_team = 1 if event['killerId'] <= 5 else 2
                                win_kill = 1 if kill_team == win else 0
                                event_key = key + "_" + str(
                                    win_kill) + "_" + str(kill_team)
                                producer.send(
                                    'match-event',
                                    json.dumps({
                                        'key': event_key,
                                        'x': x,
                                        'y': y
                                    }))
        producer.flush()
        time.sleep(3)
    time.sleep(60)
Example #56
0
# produce keyed messages to enable hashed partitioning
#producer.send('my-topic', key=b'foo', value=b'bar')

# encode objects via msgpack
#producer = KafkaProducer(value_serializer=msgpack.dumps)
#producer.send('msgpack-topic', {'key': 'value'})

# produce json messages
#producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('ascii'))
#producer.send('json-topic', {'key': 'value'})

import json
import time
with open('/tmp/extract_tweets_5000000.json', 'r') as f:
    data = json.load(f)
# produce asynchronously
for i in range(50000):
    print("Step {}".format(i))
    time.sleep(5)
    for j in range(100):
        msg = json.dumps(data[i * 100 + j])
        print("Message : {}".format(msg))
        producer.send('my-topic', "{}".format(msg).encode("utf-8"))

# block until all async messages are sent
producer.flush()
print("DONE")
# configure multiple retries
# producer = KafkaProducer(retries=5)
Example #57
0
print('======================PRODUCER_CONFIG================================')



#try:
#遍历获取到的kafka数据
for message in consumer:
  r=message.value.decode('utf-8')
  rlist=json.loads(r)
  for k in range(0,len(rlist)):
    reslist_split=rlist[k].replace('["','').replace('"]','').split(',')
    createtime=str(int(time.time()* 1000))
    apptype=reslist_split[0]
    clientappid=reslist_split[1]
    devicetype=reslist_split[2]
    deviceinfo=reslist_split[3]
    version=reslist_split[4]
    url=reslist_split[5]
    if url.startswith('http'):
      urls=url.split('cn/')
      id=urls[1]
      data=ods_meeting_url_r(id,createtime,apptype,clientappid,devicetype,deviceinfo,version,url)
      res=json.dumps(convert_to_dict(data),separators=(',',':')).encode('utf-8')
      producer.send('pro_meeting_url_R2P3',res)

#except Exception as e:
  #print(e)
#finally:
  #cursor.close()
  #conn.close()
  #consumer.close()
from kafka import KafkaProducer
import json
import random
from time import sleep
from datetime import datetime

# Create an instance of the Kafka producer
producer = KafkaProducer(bootstrap_servers='192.168.99.100:29092',
                         value_serializer=lambda v: str(v).encode('utf-8'))

# Call the producer.send method with a producer-record
print("Ctrl+c to Stop")
while True:
    producer.send('kafka-python-topic', random.randint(1, 999))
Example #59
0
def api_v1_create_flow():
    if request.method == 'POST':
        producer = KafkaProducer(bootstrap_servers=bootstrap_servers)
        content = json.loads('{}'.format(request.data))
        flowID = assign_flow_id()

        if content['src_switch'] == content['dst_switch']:
            allflows = api_v1_topology_get_one_switch_flows(
                content['src_switch'], content['src_port'], content['src_vlan'],
                content['dst_switch'], content['dst_port'], content['dst_vlan'],
                content['bandwidth'], flowID)
        else:
            transitVlanForward = assign_transit_vlan()
            forwardFlows = api_v1_topology_get_path(
                content['src_switch'], content['src_port'], content['src_vlan'],
                content['dst_switch'], content['dst_port'], content['dst_vlan'],
                content['bandwidth'], transitVlanForward, flowID)

            transitVlanReturn = assign_transit_vlan()
            reverseFlows = api_v1_topology_get_path(
                content['dst_switch'], content['dst_port'], content['dst_vlan'],
                content['src_switch'], content['src_port'], content['src_vlan'],
                content['bandwidth'], transitVlanReturn, flowID)

            allflows = [forwardFlows, reverseFlows]

            if not forwardFlows or not reverseFlows:
                response = {"result": "failed", "message": "unable to find valid path in the network"}
                return json.dumps(response)

        forwardFlowSwitches = [str(f.switch_id) for f in forwardFlows]
        reverseFlowSwitches = [str(f.switch_id) for f in reverseFlows]


        for flows in allflows:
            for flow in flows:
                message = Message()
                message.data = flow
                message.type = "COMMAND"
                message.timestamp = 42
                kafkamessage = b'{}'.format(message.toJSON())
                print 'topic: {}, message: {}'.format(topic, kafkamessage)
                messageresult = producer.send(topic, kafkamessage)
                result = messageresult.get(timeout=5)

        a_switchNode = neo4j_connect.find_one('switch', property_key='name', property_value='{}'.format(content['src_switch']))
        b_switchNode = neo4j_connect.find_one('switch', property_key='name', property_value='{}'.format(content['dst_switch']))

        if not a_switchNode or not b_switchNode:
            return '{"result": "failed"}'

        pathQuery = "MATCH (u:switch {{name:'{}'}}), (r:switch {{name:'{}'}}) MERGE (u)-[:flow {{flowid:'{}', src_port: '{}', dst_port: '{}', src_switch: '{}', dst_switch: '{}', flowpath: {}}}]->(r)"

        pathForwardQuery = pathQuery.format(a_switchNode['name'], b_switchNode['name'], flowID, content['src_port'], content['dst_port'], content['src_switch'], content['dst_switch'], str(forwardFlowSwitches))
        pathReverseQuery = pathQuery.format(b_switchNode['name'], a_switchNode['name'], flowID, content['dst_port'], content['src_port'], content['dst_switch'], content['src_switch'], str(reverseFlowSwitches))

        neo4j_connect.run(pathForwardQuery)
        neo4j_connect.run(pathReverseQuery)

        response = {"result": "sucessful", "flowID": flowID}
        return json.dumps(response)
Example #60
0
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random

# 오리지널 데이타도 보요주기
# Make a Kafaka producer
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                         value_serializer=lambda x: dumps(x).encode('utf-8'))

# Generate data every sec
for e in range(100):
    data = {'data': random.randrange(0, 100)}
    producer.send('kafka_6', value=data)
    sleep(1)