def fetchFrom(): in_kafka = KafkaClient(IN_KAFKA_HOST) consumer = SimpleConsumer(in_kafka, 'trending', CONSUMER_TOPIC, max_buffer_size=20*1024*1024) out_kafka = KafkaClient(OUT_KAFKA_HOST) producer = SimpleProducer(out_kafka) for msg in consumer: record = json.loads(msg.message.value) if 'tags' in record and '_trends' in record['tags']: try: producer.send_messages("trends", msg.message.value) print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) continue if 'metadata' in record: print record['metadata'] if 'metadata' in record and 'tags' in record['metadata'] and '_channels' in record['metadata']['tags']: try: producer.send_messages("channels", msg.message.value) print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) continue in_kafka.close() out_kafka.close()
def main(): ## Pass the kafka_url, e.g. `192.168.1.110:9092` kafka_url = sys.argv[1] ## Register to read messages from the "rousseau" list consumer = KafkaConsumer('rousseau', group_id='my_group', bootstrap_servers=[kafka_url]) ## Register to send to the rousseau-chain channel kafka = KafkaClient(kafka_url) producer = SimpleProducer(kafka) # Initialize a chain backed by 2 disk files c = chain(diskHashList("fentries.dat"), diskHashList("fnodes.dat")) ## The main even loop for message in consumer: # message value is raw byte string -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) seq = c.add(message.value) response = "%s|%s|%s" % (seq, hexlify(c.head()), message.value) print (response) # Note that the application is responsible for encoding messages to type bytes producer.send_messages(b'rousseau-chain', response)
class Tail2kafka(object): def __init__(self, host, port, topic, logfile): self.host = host self.port = port self.topic = topic self.logfile = logfile self.create_kafka_producer() def create_kafka_producer(self): kafka = KafkaClient(self.host + ":" + self.port) self.producer = SimpleProducer(kafka) def log_lines_generator(self): cmd = ['tail', '-n', '0', '-F', self.logfile] process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=None) while True: line = process.stdout.readline().strip() yield line def begin_to_tail(self): try: for line in self.log_lines_generator(): self.producer.send_messages(self.topic, line) except KeyboardInterrupt, e: pass
def report(timestamp, vin, data): """ Log the location record """ conf = get_settings() kafka = None logger.info('Kafka MQ Server: Report Request: Time: %s, VIN: %s, Data: %s.', timestamp, vin, data) payload = {} payload['timestamp'] = timestamp payload['vin'] = vin payload['data'] = data # Connect to Kafka Message Queue Server try: kafka = KafkaClient(conf['TRACKING_MQ_URL']) except: logger.error("%s: Kafka Message Queue Server unavailable:", conf['TRACKING_MQ_URL']) kafka = None return False producer = SimpleProducer(kafka) producer.send_messages(conf['TRACKING_MQ_TOPIC'], json.dumps(payload)) logger.info("%s: Report data published to message queue.", conf['TRACKING_MQ_URL']) return True
class TweeterStreamListener(tweepy.StreamListener): """ A class to read the twiiter stream and push it to Kafka""" def __init__(self, api): self.api = api super(tweepy.StreamListener, self).__init__() client = KafkaClient("localhost:9092") self.producer = SimpleProducer(client, async = True, batch_send_every_n = 1000, batch_send_every_t = 10) def on_status(self, status): """ This method is called whenever new data arrives from live stream. We asynchronously push this data to kafka queue""" msg = status.text.encode('utf-8') #print(msg) try: self.producer.send_messages(b'twitterstream', msg) except Exception as e: print(e) return False return True def on_error(self, status_code): print( status_code ) print("Error received in kafka producer") return True # Don't kill the stream def on_timeout(self): return True # Don't kill the stream
class listener(tweepy.StreamListener): def __init__(self): client = KafkaClient("localhost:9092") try: self.producer = SimpleProducer(client, async = True, batch_send_every_n = 1000, batch_send_every_t = 10) print 'Initialised' except e: print 'failed:',str(e) def on_data(self, data): #try: try: jsondata=json.loads(data) print jsondata self.producer.send_messages('trumpstream', str(data)) db.trumpdb.insert(jsondata) return True except TypeError as e: print 'TypeError:',str(e) time.sleep(5) pass def on_error(self, status): print status
class KafkaMessageAdapterPreHourly(MessageAdapter): adapter_impl = None def __init__(self): client_for_writing = KafkaClient(cfg.CONF.messaging.brokers) self.producer = SimpleProducer(client_for_writing) self.topic = cfg.CONF.messaging.topic_pre_hourly @staticmethod def init(): # object to keep track of offsets KafkaMessageAdapterPreHourly.adapter_impl = simport.load( cfg.CONF.messaging.adapter_pre_hourly)() def do_send_metric(self, metric): self.producer.send_messages( self.topic, json.dumps(metric, separators=(',', ':'))) return @staticmethod def send_metric(metric): if not KafkaMessageAdapterPreHourly.adapter_impl: KafkaMessageAdapterPreHourly.init() KafkaMessageAdapterPreHourly.adapter_impl.do_send_metric(metric)
def create_note(request): if request.method != "POST": return _error_response(request, "must make POST request") if "authenticator" not in request.POST or "title" not in request.POST or "details" not in request.POST: return _error_response(request, "missing fields") values = { "authenticator": request.POST["authenticator"], "title": request.POST["title"], "details": request.POST["details"], } data = urllib.parse.urlencode(values).encode("utf-8") req = urllib.request.Request("http://models:8000/api/v1/note/create", data=data, method="POST") resp_json = urllib.request.urlopen(req).read().decode("utf-8") resp = json.loads(resp_json) if resp["ok"] is True: kafka = KafkaClient("kafka:9092") producer = SimpleProducer(kafka) note_new_listing = { "title": request.POST["title"], "details": request.POST["details"], "id": resp["resp"]["id"], } producer.send_messages(b"note-listings-topic", json.dumps(note_new_listing).encode("utf-8")) # es_add is a temporay helper function adding listing to ES directly without working with kafka es_add_note_listing(request, resp["resp"]["id"], resp["resp"]["username"]) return _success_response(request, resp["resp"]) else: return _error_response(request, resp["error"])
class DmsKafkaClient(object): def __init__(self): config = ServiceContext().getConfigService() broker_list = config.get("Message","kafka_producer") kafka = KafkaClient(broker_list) self.producer = SimpleProducer(kafka) self.zabbix_alert = config.get("Message","zabbix_alert_topic") def sendPackageTimeout(self,accountId): message = { "accountId":accountId, "host":None, "item":None, "severity": "ERROR", "description": "account %s workflow timeout" % accountId } all = { "timestamp": 1L, "src": "rundeck", "host_ip": "10.74.113.101", "rawdata":json.dumps(message) } schema = avro.schema.parse(avro_schema) writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(all,encoder) try: self.producer.send_messages(b"%s"%self.zabbix_alert,bytes_writer.getvalue()) logger.info("send to zabbix sa successfully") except: logger.error("occur error when send package timeout message to zabbix alert topic")
def takePicture(): imagePath = '/tmp/image.jpg' try: os.remove(imagePath) except OSError: pass subprocess.call( "chdkptp -ec -e\"rec\" -e\"rs %s\"" % (imagePath[:-4]), shell=True ) if not os.path.isfile(imagePath): logging.warn("Error during taking picture") return with open(imagePath, "rb") as imageFile: imageEncoded = base64.b64encode(imageFile.read()) upload = { 'id': str(uuid.uuid4()), 'picture': imageEncoded, 'takenTime': int(time.time()), 'ride': 'cam2', } data = json.dumps(upload) logging.info("Message size %d" % len(data)) kafka = KafkaClient(kafka_server) producer = SimpleProducer(kafka) producer.send_messages(b'pictures', data)
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer( self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30 ) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def get(address=u'', lat=0, lon=0, radius=0): # streetAddress = '1600 Pennsylvania Ave, Washington, DC' client_id = '8728ec7ee9424eb4aae9d45107ee6481' resolvedAddress = lambda: None if len(address) > 0: resolvedAddress.__dict__ = getAddressLatLon(address) lat = float(resolvedAddress.lat) lon = float(resolvedAddress.lon) radius = 1000 now = datetime.now() sixHoursEarlier = now - timedelta(hours=1) instagramReturnTuple = doInstagramMagic(resolvedAddress, client_id, sixHoursEarlier, now, lat, lon, radius) instagramJson = instagramReturnTuple[0] resolvedAddress.imageCount = instagramReturnTuple[1] topic = 'qpr.geogram' host = 'k01.istresearch.com' port = 9092 kafka = KafkaClient("%s:%i" % (host, port)) producer = SimpleProducer(kafka) message = json.dumps({"message": instagramJson}) producer.send_messages(topic, message) return json.dumps(resolvedAddress.__dict__)
def fetchFrom(): in_kafka = KafkaClient('172.31.10.154:9092') consumer = SimpleConsumer(in_kafka, 'fetcher', 'cpp.pages', max_buffer_size=20*1024*1024) out_kafka = KafkaClient("172.31.1.70:9092") producer = SimpleProducer(out_kafka) for msg in consumer: page = json.loads(msg.message.value) if 'retweet' in page['meta']: print "remove twitter page" continue output = {} output['inlink']='' output['level']=1 output['url']=page['url'] output['fts']=page['ts_fetch'] output['content']=page['content'] try: producer.send_messages("process", json.dumps(output)) print(str(time.time()) + " pump url " + output['url'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) in_kafka.close() out_kafka.close()
def genData(topic): producer = SimpleProducer(kafka, async=True) with open(source_file) as f: for line in f: print line jd = json.dumps(line) producer.send_messages(topic, line.encode('utf-8'))
def run(self, topic, message, hosts=None): """ Simple round-robin synchronous producer to send one message to one topic. :param hosts: Kafka hostname(s) to connect in host:port format. Comma-separated for several hosts. :type hosts: ``str`` :param topic: Kafka Topic to publish the message on. :type topic: ``str`` :param message: The message to publish. :type message: ``str`` :returns: Response data: `topic`, target `partition` where message was sent, `offset` number and `error` code (hopefully 0). :rtype: ``dict`` """ if hosts: _hosts = hosts elif self.config.get('hosts', None): _hosts = self.config['hosts'] else: raise ValueError("Need to define 'hosts' in either action or in config") # set default for empty value _client_id = self.config.get('client_id') or self.DEFAULT_CLIENT_ID client = KafkaClient(_hosts, client_id=_client_id) client.ensure_topic_exists(topic) producer = SimpleProducer(client) result = producer.send_messages(topic, kafka_bytestring(message)) if result[0]: return result[0].__dict__
def send_kafka_msg(iters): # TODO: Add try/catch statements for kafka connection kafka = KafkaClient(kafka_host) producer = SimpleProducer(kafka) for key, val in iters: msg = combine_count_json(key, val) producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8")) kafka.close()
def genData(topic): producer = SimpleProducer(kafka, async=False) while True: with open(source_file) as f: for line in f: producer.send(topic, line) source_file.close()
def test_produce__new_topic_fails_with_reasonable_error(self): new_topic = 'new_topic_{guid}'.format(guid = str(uuid.uuid4())).encode('utf-8') producer = SimpleProducer(self.client, random_start=False) # At first it doesn't exist with self.assertRaises((UnknownTopicOrPartitionError, LeaderNotAvailableError)): producer.send_messages(new_topic, self.msg("one"))
def test_producer_random_order(self): producer = SimpleProducer(self.client, random_start=True) resp1 = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) resp2 = producer.send_messages(self.topic, self.msg("three")) resp3 = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assertEqual(resp1[0].partition, resp3[0].partition) self.assertNotEqual(resp1[0].partition, resp2[0].partition)
def genData(self): with open(self.source_file) as f: reader = csv.DictReader(f) crimeLocations = list(reader) kafka_cluster = self.config['kafka_cluster'] print "kafka_cluster is:" + kafka_cluster + " done"; kafka_client = KafkaClient(kafka_cluster) kafka_producer = SimpleProducer(kafka_client) # To send messages synchronously # kafkaSimple = KafkaClient('52.10.17.219:9092') # producerSimple = SimpleProducer(kafkaSimple, async=True) # geolocator = Nominatim() count = 0 while True: #while (count < 5): for loc in crimeLocations: userID = loc["userID"] userName = loc["userName"] ''' #date_rptd = loc["date_rptd"] date_rptd = str(datetime.datetime.now().month) + "/" + str(datetime.datetime.now().day) + "/" + str(datetime.datetime.now().year); #time_rptd = loc["time_rptd"] time_rptd = str(datetime.datetime.now().hour).zfill(2) + str(datetime.datetime.now().minute).zfill(2); #dateTemp = datetime.datetime.strptime(date_rptd_raw, '%m/%d/%y').strftime('%Y-%m-%d') locationObj = ""; #timestamp ''' latitude = float(loc['latitude']) longitude = float(loc['longitude']) msg = {} msg['userID'] = userID msg['userName'] = userName location = { 'latitude': latitude, 'longitude': longitude } msg['location'] = location #time.sleep(10); kafka_producer.send_messages(self.topic, json.dumps(msg)) #time.sleep(10); #producerSimple.send_messages(self.topic, json.dumps(msg)) #producerSimple.send_messages('crimeLocation1', 'tajmessage1'); print "sending location update for user %s" % userID count += 1 print "+++++++++++++FINISH ROUND %d+++++++++++++++++" % count
class KafkaProducer: def __init__(self): kafkahandle = KafkaClient("localhost:9092") self.producer = SimpleProducer(kafkahandle) def kafka_producer(self,topicname='harish_t',message=time.time()): _msg=str(message) #Converting to string explicitly since kafka expects string self.producer.send_messages(topicname,_msg)
def timeline_producer(twitter_account, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.user_timeline(twitter_account, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def query_text_producer(text, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.search(text, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def favorite_list_producer(id, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.favorite_list(id, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
class KafkaSender(LogSender): def __init__(self, config, msg_buffer, stats): LogSender.__init__(self, config=config, msg_buffer=msg_buffer, stats=stats, max_send_interval=config.get("max_send_interval", 0.3)) self.config = config self.msg_buffer = msg_buffer self.stats = stats self.kafka = None self.kafka_producer = None if not isinstance(self.config["kafka_topic"], bytes): topic = self.config["kafka_topic"].encode("utf8") self.topic = topic def _init_kafka(self): self.log.info("Initializing Kafka client, address: %r", self.config["kafka_address"]) while self.running: try: if self.kafka_producer: self.kafka_producer.stop() if self.kafka: self.kafka.close() self.kafka = KafkaClient( # pylint: disable=unexpected-keyword-arg self.config["kafka_address"], ssl=self.config.get("ssl", False), certfile=self.config.get("certfile"), keyfile=self.config.get("keyfile"), ca=self.config.get("ca") ) self.kafka_producer = SimpleProducer(self.kafka, codec=CODEC_SNAPPY if snappy else CODEC_NONE) self.log.info("Initialized Kafka Client, address: %r", self.config["kafka_address"]) break except KAFKA_CONN_ERRORS as ex: self.log.warning("Retriable error during Kafka initialization: %s: %s, sleeping", ex.__class__.__name__, ex) self.kafka = None self.kafka_producer = None time.sleep(5.0) def send_messages(self, message_batch): if not self.kafka: self._init_kafka() try: self.kafka_producer.send_messages(self.topic, *message_batch) return True except KAFKA_CONN_ERRORS as ex: self.log.info("Kafka retriable error during send: %s: %s, waiting", ex.__class__.__name__, ex) time.sleep(0.5) self._init_kafka() except Exception as ex: # pylint: disable=broad-except self.log.exception("Unexpected exception during send to kafka") self.stats.unexpected_exception(ex=ex, where="sender", tags={"app": "journalpump"}) time.sleep(5.0) self._init_kafka()
def test_acks_none(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_NOT_REQUIRED) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def query_location_producer(lat, lng, radius, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.area_search(lat, lng, radius, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
class TwitterStreamListener(StreamListener): def __init__(self, api = None): #connect to the kafka broker #need to handle error self.topic = "tweet" self.kafka = KafkaClient("localhost:9092") self.producer = SimpleProducer(self.kafka) def on_data(self, data): if 'in_reply_to_status' in data: self.on_status(data) return True def on_status(self, data): tweet = json.loads(data) text = tweet.get('text',' ') coord = tweet.get('coordinates', None) created_at = tweet.get('created_at'," ") id = tweet.get('id', ' ') lang = tweet.get('lang',' ') user = tweet.get('user',"user") timestamp = tweet.get('timestamp_ms'," ") timestamp = arrow.get(timestamp) text = re.sub(r'\W+', ' ', text) lon,lat = "","" print tweet.keys() if coord: lon = coord['coordinates'][0] lat = coord['coordinates'][1] tweet_csv = "{id}, {created_at}, {timestamp},{lang}, {lon}, {lat},{text},0".format(id=id,created_at=created_at,timestamp=timestamp,lang=lang, lon=lon, lat=lat,text=text) if lang == 'en': print tweet_csv self.producer.send_messages(self.topic, tweet_csv) else: print "not english" print tweet_csv return def on_limit(self, track): sys.stderr.write(track + "\n") return def on_error(self, status_code): sys.stderr.write('Error: ' + str(status_code) + "\n") return False def on_timeout(self): sys.stderr.write("Timeout, sleeping for 60 seconds...\n") time.sleep(60) return
class MessageService: def __init__(self,kafkaBroker,kafkaTopic): self.broker=kafkaBroker self.topic=kafkaTopic; self.client=KafkaClient(self.broker) self.producer=SimpleProducer(self.client) def sendMessage(self,message): self.producer.send_messages(self.topic,message)
def test_async_simple_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer(self.client, async=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def init_kafka(): global producer_tag, consumer_tag url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_tag = SimpleProducer(kafka) consumer_tag = KafkaConsumer("aggregator_v2", group_id="keyword_extract", bootstrap_servers=[url], auto_offset_reset='smallest')
class StdOutListener(StreamListener): def __init__(self): self.kafka = KafkaClient("localhost:9092") self.producer = SimpleProducer(self.kafka) def on_data(self, data): try: curr_tweet = json.loads(data) data = json.dumps(curr_tweet) self.producer.send_messages('tweets', data.encode('utf-8')) print("Successfully sent message to kafka") except BaseException as e: print("Error on_data %s" % str(e)) return True def on_error(self, status): print(status)
def on_status(self, status): #Tweets will need to be filtered, twitter default pulls ALL tweets with the username you're tracking if fromCreator(status): #filters tweets related to an account so only the original tweets trigger a response print('Tweet Filtered!') try: userid = status.user.id user_id = str(userid).replace("'", '"') print(str(userid)) tweetid = str(status.id) tweet_id = str(tweetid).replace("'", '"') print(tweetid) name = status.user.screen_name #pulls username of tweeter print('@',name, 'tweeted', status.text) #prints tweet to terminal date = datetime.now() chain_id = str(self.chain_id) topic = self.topic kafka = KafkaClient("localhost:9092") producer = SimpleProducer(kafka, value_serializer=('utf-8')) producer.send_messages((str(topic)), tweet_id.encode('utf-8')) print('Sending Tweet to Mempool!') print('Received at Mempool!') except BaseException as e: print("Error on_data %s" % str(e)) return True def on_error(self, status_code): print >> sys.stderr, 'Encountered error with status code:', status_code return True # Don't kill the stream print ("Stream restarted") def on_timeout(self): print >> sys.stderr, 'Timeout...' return True # Don't kill the stream print ("Stream restarted")
def kafka_init(parser_name, group_name): (url) = config.get_kafka_config() kafka = KafkaClient(url) kafka_producer = SimpleProducer(kafka) kafka_consumer = KafkaConsumer(parser_name, group_id=group_name, metadata_broker_list=[url], auto_offset_reset='smallest') return kafka_producer, kafka_consumer
def form(): form = FeatureForm() Datas = {} if form.is_submitted(): Datas['member_id'] = form.member_id.data Datas['bc_open_to_buy'] = form.bc_open_to_buy.data Datas['total_il_high_credit_limit'] = form.totalCreditLimit.data Datas['dti'] = form.dti.data Datas['annual_inc'] = form.annual_inc.data Datas['bc_util'] = form.bc_util.data Datas['int_rate'] = form.int_rate.data Datas['installment'] = form.installment.data Datas['term'] = form.term.data Datas['loan_amnt'] = form.loan_amnt.data Datas['fund_rate'] = form.fund_rate.data Datas['funded_amnt'] = form.funded_amnt.data Datas['grade'] = form.grade.data data = { "member_id": str(Datas['member_id']), "bc_open_to_buy": str(Datas['bc_open_to_buy']), "total_il_high_credit_limit": str(Datas['total_il_high_credit_limit']), "dti": str(Datas['dti']), "annual_inc": str(Datas['annual_inc']), "bc_util": str(Datas['bc_util']), "int_rate": str(Datas['int_rate']), "term": str(Datas['term']), "loan_amnt": str(Datas['loan_amnt']), "fund_rate": str(Datas['fund_rate']), "funded_amnt": str(Datas['funded_amnt']), "grade": str(Datas['grade']) } #for line in r.iter_lines(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) producer.send_messages('fintech-lendingclub', json.dumps(data)) return "success" return render_template('form.html', form=form)
def initKafka(): global kafkaProducer global kafkaConsumer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka) kafkaConsumer = KafkaConsumer("parser_v2", group_id="beian", metadata_broker_list=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer global kafkaProducer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka) kafkaConsumer = KafkaConsumer("validator_company", group_id="bamy2", bootstrap_servers=[url], auto_offset_reset='smallest')
class SimpleProducer(BaseStreamProducer): def __init__(self, connection, topic): self._connection = connection self._topic = topic self._create() def _create(self): self._producer = KafkaSimpleProducer(self._connection, codec=CODEC_SNAPPY) def send(self, key, *messages): self._producer.send_messages(self._topic, *messages) def flush(self): self._producer.stop() del self._producer self._create() def get_offset(self, partition_id): # Kafka has it's own offset management raise KeyError
def producer(self): if self._producer is None and not self._has_error: if self.kafka is not None: self._producer = SimpleProducer( self._kafka, async_send=False, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE, sync_fail_on_error=True ) else: # if self.kafka is None then we should be in an error state assert self._has_error return self._producer
def simple_producer(): '''simple producer''' from kafka import SimpleProducer, KafkaClient # To send messages synchronously kafka = KafkaClient(KAFKA_SERVER) producer = SimpleProducer(kafka) # Note that the application is responsible for encoding messages to type bytes producer.send_messages('topic', b'some message') producer.send_messages('topic', b'this method', b'is variadic') # Send unicode message producer.send_messages('topic', u'你怎么样?'.encode('utf-8'))
def init_kafka(index): global producer_search, consumer_search url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) producer_search = SimpleProducer(kafka) consumer_search = KafkaConsumer("keyword_v2", group_id="create search%s index" % index, bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer global kafkaProducer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka) kafkaConsumer = KafkaConsumer("gongshang_detect", group_id="testgoshang", bootstrap_servers=[url], auto_offset_reset='smallest')
def __init__(self): from kafka import SimpleProducer, KafkaClient from kafka.common import LeaderNotAvailableError self.kafka_client = KafkaClient(config.KAFKA_SERVER) self.kafka = SimpleProducer(self.kafka_client) schema_int_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_int.avsc").decode('utf-8') schema_float_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_float.avsc").decode('utf-8') schema_str_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_str.avsc").decode('utf-8') self.schema_int = avro.schema.Parse(schema_int_src) self.schema_float = avro.schema.Parse(schema_float_src) self.schema_str = avro.schema.Parse(schema_str_src) for oid in config.SNMP_OIDS: self.indices[oid._name()] = 0 try: #empty msg to ensure topic is created self.kafka.send_messages(config.KAFKA_TOPIC, (0).to_bytes(1, byteorder='big')) except LeaderNotAvailableError: time.sleep(1)
def init_kafka(): global kafkaConsumer, kafkaProducer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("user_log", group_id="visit_stat", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=True) kafka = KafkaClient(url) kafkaProducer = SimpleProducer(kafka)
def init_kafka(): global producer_coldcall global consumer_coldcall url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_coldcall = SimpleProducer(kafka) consumer_coldcall = KafkaConsumer("coldcall", group_id="coldcall incremental", metadata_broker_list=[url], auto_offset_reset='smallest')
def __init__(self, host, topic, consumer_id, settings): self.host = host self.topic = topic self.consumer_id = consumer_id or "Aria2Dispatcher" self.settings = importlib.import_module(settings[:-3]) self.kafka_client = KafkaClient(self.settings.KAFKA_HOSTS) self.producer = SimpleProducer(self.kafka_client) self.topic_prefix = self.settings.KAFKA_TOPIC_PREFIX self.topic_list = [] self.aria2_clients = [] for x in self.settings.ARIA2_ADDRESSES: rpc_uri = "ws://%s/jsonrpc" % x try: aria2_connection = create_connection(rpc_uri) self.aria2_clients.append({ 'rpc_uri': rpc_uri, 'ws': aria2_connection }) except: logger.error('create aria2_connection error!') raise
def init_kafka(index): global consumer url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default consumer = SimpleProducer(kafka) consumer = KafkaConsumer("keyword_v2", group_id="create search%s index" % index, bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global consumer_strack, producer_strack url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_strack = SimpleProducer(kafka) consumer_strack = KafkaConsumer("track_message", group_id="funding_track", bootstrap_servers=[url], auto_offset_reset='smallest')
def __init__(self, topic, kafkaConfig: AbstractKafkaConfig): self.topic = topic self.kafkaConfig = kafkaConfig kafka = KafkaClient(kafkaConfig.getKafkaBrokerIp()) self.producer = SimpleProducer(kafka) self.client = SchemaRegistryClient( url=kafkaConfig.getSchemaRegistryUrl()) self.serializer = MessageSerializer(self.client, False) if (topic is not None): self.schemaName = kafkaConfig.getSchemaByTopicName(self.topic) self.schema_id, self.avro_schema, self.schema_version = self.client.get_latest_schema( self.schemaName)
class KafkaHandler(logging.Handler): def __init__(self, settings): self.settings = settings self.client = KafkaClient(settings.get("KAFKA_HOSTS")) self.producer = SimpleProducer(self.client) self.producer.send_messages = failedpayloads_wrapper( settings.get("KAFKA_RETRY_TIME", 5))(self.producer.send_messages) super(KafkaHandler, self).__init__() def emit(self, record): self.client.ensure_topic_exists(self.settings.get("TOPIC")) buf = self.formatter.format(record) if hasattr(buf, "encode"): buf = buf.encode(sys.getdefaultencoding()) self.producer.send_messages(self.settings.get("TOPIC"), buf) def close(self): self.acquire() super(KafkaHandler, self).close() self.client.close() self.release()
def init_kafka(): global kafkaConsumer global kafkaProducer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("track_message_v2", group_id="push_hot_news", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=False) kafka = KafkaClient(url) kafkaProducer = SimpleProducer(kafka)
def publish_to_kafka_broker(self, metric_name, kafka_topic, value, tags, debug=True): """ Generate a payload and publish the data to kafka broker. """ now = datetime.datetime.utcnow() timestamp = now.strftime('%Y-%m-%dT%H:%M:%S.000Z') metric = {} metric['apikey'] = self.kafka_apikey metric['tenant_id'] = self.kafka_tenant_id #host should be passed by the caller. statsd might not be # running on the same host as the caller. #metric['host'] = socket.gethostname() metric['name'] = metric_name metric['value'] = value metric['@version'] = '1' metric['@timestamp'] = timestamp for tag in tags: metric[tag] = tags[tag] if debug: msg = "DEBUG-ON: Kafka metrics tobe sent: %s" % (metric) pystats_log.print_msg(msg) else: kafka = KafkaClient(self.kafka_broker) try: producer = SimpleProducer(kafka) result = producer.send_messages(kafka_topic, json.dumps(metric)) msg = "Kafka Metrics Pushed: [%s] [%s]" % (metric, str(result)) pystats_log.print_msg(msg) except socket.gaierror as gaierror: msg = "Publish metric [%s] failed. [%s]" % \ (metric, str(gaierror)) pystats_log.print_msg(msg)
def trigger(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) r = requests.get( "http://fintech.dataapplab.com:33334/api/v1.0/FinTech/streamingdata") print r producer.send_messages('fintech-lendingclub', r.content) data = { "bc_open_to_buy": 0, "total_il_high_credit_limit": 0, "dti": 2009, "annual_inc": 12000, "bc_util": 0, "int_rate": 10.08, "term": 36, "loan_amnt": 3500, "fund_rate": 0, "funded_amnt": 3500, } #for line in r.iter_lines(): producer.send_messages('fintech-lendingclub', json.dumps(data)) return "success" #print type(line) kafka.close()
def get_weather(sc): # To send messages synchronously kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) countriesArray = ["Singapore", "Chicago", "Madrid", "Beijing"] for country in countriesArray: # Call Weather API to get forecasts response = urllib2.urlopen( 'http://api.openweathermap.org/data/2.5/weather?q=' + country + '&appid=' + WEATHER_API_APPID) data = json.load(response) countryDataDict = {} countryDataDict["city"] = data["name"] countryDataDict["country"] = data["sys"]["country"] countryDataDict["timestamp"] = data["dt"] countryDataDict["wind_speed"] = data["wind"]["speed"] countryDataDict["visibility"] = data["visibility"] countryDataDict["weather"] = data["weather"] countryDataDict["main"] = data["main"] # Need to convert dict to bytes before sending to kafka bytesData = json.dumps(countryDataDict) producer.send_messages( b'weather', b'Weather data for ' + country + ' at ' + str(data["dt"])) producer.send_messages(b'weather', bytesData) print("Weather data for " + country + " sent to Kafka..") sc.enter(300, 1, get_weather, (sc, ))
def main(): client = KafkaClient("localhost:9092") producer = SimpleProducer(client) last_hour = datetime.now() - timedelta(hours=1) print(last_hour) delim = '$$$$' time.sleep(20) # Dow Jones Industrial Average 30 stocks removed 3M stocks = [ 'American Express', 'Apple', 'Boeing', 'Caterpillar', 'Chevron', 'Cisco', 'Coca-Cola', 'Walt Disney', 'DowDuPont', 'ExxonMobil', 'General Electric', 'Goldman Sachs', 'The Home Depot', 'IBM', 'Intel', 'Johnson & Johnson', 'JPMorgan Chase', 'McDonald\'s', 'Merck', 'Microsoft', 'Nike', 'Pfizer', 'Procter & Gamble', 'Travelers Companies', 'United Technologies', 'UnitedHealth', 'Verizon', 'Visa', 'Wal-Mart' ] for stock in stocks: time.sleep(7) print('Stock being analyzed : ' + stock) #stock = stock.replace(" ", "%20") #stock = stock.replace("\'", "%2527") url = 'https://newsapi.org/v2/everything?q=' + stock + '&apiKey=9714e1d74fb64495aaafdb54d4cdd0bc' response = requests.get(url) json_res = response.json() for post in json_res["articles"]: date_time = post["publishedAt"] #if parse(date_time).date() == last_hour.date() and parse(date_time).time() > last_hour.time(): data = stock + delim + date_time if post["description"] is not None: data += delim + post["description"] else: continue if post["content"] is not None: data += " " + post["content"] msg = data.encode('utf-8') producer.send_messages(b'newsstream', msg)
def send_kafka(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) while True: producer.send_messages("data", b'data data data') producer.send_messages("weights", b'8.46,1.74,6.08,4.25,1.92')
def main(): # check for --version or -V if args.version: print("Ask [email protected]") if args.run: topic = args.run.split('/')[0] msg = bytes('RUN ' + str(args.run.split('/')[1]), 'utf8') kafka = KafkaClient(':'.join([ipAddress, str(portKafka)])) producer = SimpleProducer(kafka) kafka.ensure_topic_exists(topic) try: print_response(producer.send_messages(topic, msg)) except LeaderNotAvailableError: time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()
def track_updates(db, producer_comps, cid, updates): if dbutil.get_company_active(db, cid) != 'Y': return updates = [update for update in updates if __track_comps_match(db, update)] if len(updates) < 1: return # producer_comps.send_messages("track_message", json.dumps({'id': cid, 'type': 'comps', 'comps': updates})) comments = ','.join([dbutil.get_company_name(db, c) for c in updates]) track_msg = u'%s发现了%s个潜在的竞争对手: %s' % (dbutil.get_company_name( db, cid), len(updates), comments) cmsg_id = dbutil.update_company_message( db, cid, track_msg, 6001, 60, ','.join([str(update) for update in updates]), comments=comments) if cmsg_id: try: producer_comps.send_messages( "track_message_v2", json.dumps({ 'id': cmsg_id, 'type': 'company_message', 'action': 'create' })) except FailedPayloadsError, fpe: url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) producer_comps = SimpleProducer(kafka) producer_comps.send_messages( "track_message_v2", json.dumps({ 'id': cmsg_id, 'type': 'company_message', 'action': 'create' }))
class Producer(): def __init__(self, hosts, batch_send=False, batch_send_every_n=20, async=True): self.hosts = hosts self.client = KafkaClient(self.hosts) self.batch_send = batch_send self.batch_send_every_n = batch_send_every_n self.producer = SimpleProducer(self.client, batch_send=batch_send, batch_send_every_n=batch_send_every_n)
class HeartBeat: def __init__(self,qinfo): self.topic = qinfo['kafka_topic'] self.client = KafkaClient(qinfo['kafka_broke']) self.producer = SimpleProducer(self.client, codec=CODEC_SNAPPY) def send(self,name,num=1): data = { "name":name, "num":num, "time":int(time.time()) } print "***************send********************" data_str = json.dumps(data) self.producer.send_messages(self.topic, data_str) def close(self): self.client.close() self.producer.stop()