def send_data_2_kafka(datas): ''' 向kafka解析队列发送数据 ''' KAFKABROKER = ['120.27.241.54', '120.27.243.224', '114.55.226.34'] PARTNUM = 5 TOPICNAME = 'test01' # TOPICNAME='logtest3' client = KafkaClient(hosts=KAFKABROKER, timeout=30) producer = SimpleProducer(client, async=False) curcount = len(datas) // PARTNUM print(curcount) print(len(datas)) for i in range(0, PARTNUM): start = i * curcount if i != PARTNUM - 1: end = (i + 1) * curcount curdata = datas[start:end] producer.send_messages(TOPICNAME, *curdata) else: curdata = datas[start:] producer.send_messages(TOPICNAME, *curdata) print(datetime.datetime.now()) producer.stop() client.close()
def kafka_servers_available(servers): try: client = KafkaClient(bootstrap_servers=servers) client.close() except Exception: return False return True
def get_all_kafka_connections(cluster_type, client_id, **kwargs): """Get a kafka connection for each available kafka cluster at Yelp. :param cluster_type: kafka cluster type (ex.'scribe' or 'standard'). :type cluster_type: string :param client_id: client_id to be used to connect to kafka. :type client_id: string :param kwargs: parameters to pass along when creating the KafkaClient instance. :returns: list (cluster_name, KafkaClient) :raises DiscoveryError: :py:class:`yelp_kafka.error.DiscoveryError` upon failure connecting to a cluster. .. note:: This function creates a KafkaClient for each cluster in a region and tries to connect to it. If a cluster is not available it fails and closes all the previous connections. """ clusters = get_all_clusters(cluster_type, client_id) connected_clusters = [] for cluster in clusters: try: client = KafkaClient(cluster.broker_list, client_id=client_id, **kwargs) connected_clusters.append((cluster.name, client)) except: log.exception( "Connection to kafka cluster %s using broker list %s failed", cluster.name, cluster.broker_list ) for _, client in connected_clusters: client.close() raise DiscoveryError("Failed to connect to cluster {0}".format( cluster.name)) return connected_clusters
def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join(['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending < check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer( self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30 ) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
class Client: """ 封装kafka-python KafkaClient, """ def __init__(self): pass def __enter__(self): self.cfg = Config().cfg self.client = SimpleClient( bootstrap_servers=self.cfg["serList"], # api_version=self.cfg["apiVersion"], api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"], security_protocol=self.cfg["protocol"], sasl_mechanism=self.cfg["mechanism"], sasl_kerberos_service_name=self.cfg["kerverosSerName"] ) return self def __exit__(self, exc_type, exc_val, exc_tb): self.client.close() def add_topic(self, topic_name: str): """
def assert_message_count(self, topic, check_count, timeout=10, partitions=None): hosts = ','.join( ['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) group = random_string(10) consumer = SimpleConsumer(client, group, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending != check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) consumer.stop() client.close() self.assertEqual(pending, check_count)
def assert_message_count(self, topic, check_count, timeout=10, partitions=None, at_least=False): hosts = ','.join( ['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) consumer = SimpleConsumer(client, None, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending < check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) time.sleep(0.5) consumer.stop() client.close() if pending < check_count: self.fail('Too few pending messages: found %d, expected %d' % (pending, check_count)) elif pending > check_count and not at_least: self.fail('Too many pending messages: found %d, expected %d' % (pending, check_count)) return True
def kafkaProducer(self): client = KafkaClient(hosts=["%s:%s" % (IP, PORT)], timeout=30) producer = SimpleProducer(client, async=False) self.timeCount = int(time.time() * 1000) while True: self.result.RecResult.Id = str(uuid.uuid4()) self.result.RecResult.Meta.Timestamp = int(time.time() * 1000) self.result.RecResult.Image.Id = str(uuid.uuid4()) #print self.result.RecResult.Image.Id #print self.result data = self.result.SerializeToString() producer.send_messages(kafkaTopic, data) print "send count:%s" % self.sendCount if self.sendCount == sendNumPerSencond: now_time = int(time.time() * 1000) elapse = now_time - self.timeCount print "elapse:%sms" % elapse if elapse <= 1000: sleep_time = (1000 - elapse) * 1.0 / 1000 print "sleep times:%ss" % sleep_time time.sleep(sleep_time) self.timeCount = int(time.time() * 1000) self.sendCount = 0 print "send All:%s" % self.sendAll self.sendAll = self.sendAll + 1 break self.sendCount = self.sendCount + 1 break producer.stop() client.close()
def get_files(): kafka = KafkaClient("129.16.125.231:9092") producer = SimpleProducer(kafka) topic = 'test' for root, dirs, files in os.walk( '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell 24h_20151215_110422/AssayPlate_NUNC_#165305-1/' ): if not files: print("files is empty") else: print("In else") print("root: ", root) print("dirs: ", dirs) print("files[0]: ", files[0]) if not dirs: print("dirs is empty") # else: print( '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell ' '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' + files[0]) for i in range(len(files)): img = cv2.imread( '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell ' '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' + files[i]) ret, jpeg = cv2.imencode('.png', img) producer.send_messages(topic, jpeg.tobytes()) kafka.close()
def fetchFrom(): in_kafka = KafkaClient(IN_KAFKA_HOST) consumer = SimpleConsumer(in_kafka, 'trending', CONSUMER_TOPIC, max_buffer_size=20*1024*1024) out_kafka = KafkaClient(OUT_KAFKA_HOST) producer = SimpleProducer(out_kafka) for msg in consumer: record = json.loads(msg.message.value) if 'tags' in record and '_trends' in record['tags']: try: producer.send_messages("trends", msg.message.value) print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) continue if 'metadata' in record: print record['metadata'] if 'metadata' in record and 'tags' in record['metadata'] and '_channels' in record['metadata']['tags']: try: producer.send_messages("channels", msg.message.value) print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) continue in_kafka.close() out_kafka.close()
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer(self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def fetchFrom(): in_kafka = KafkaClient('172.31.10.154:9092') consumer = SimpleConsumer(in_kafka, 'fetcher', 'cpp.pages', max_buffer_size=20*1024*1024) out_kafka = KafkaClient("172.31.1.70:9092") producer = SimpleProducer(out_kafka) for msg in consumer: page = json.loads(msg.message.value) if 'retweet' in page['meta']: print "remove twitter page" continue output = {} output['inlink']='' output['level']=1 output['url']=page['url'] output['fts']=page['ts_fetch'] output['content']=page['content'] try: producer.send_messages("process", json.dumps(output)) print(str(time.time()) + " pump url " + output['url'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) in_kafka.close() out_kafka.close()
def ensure_topic_existed(topic): try: server_info = get_server_kafka() client = KafkaClient(server_info) client.ensure_topic_exists(topic) client.close() except ValueError: print(ValueError.message)
def send_kafka_msg(iters): # TODO: Add try/catch statements for kafka connection kafka = KafkaClient(kafka_host) producer = SimpleProducer(kafka) for key, val in iters: msg = combine_count_json(key, val) producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8")) kafka.close()
def query_text_producer(text, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.search(text, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def sendMsgToKafka(obj, msg): #msg=msg #obj=obj client = KafkaClient("c9t26359.itcs.hpecorp.net:9092") # producer = SimpleProducer(client) producer.send_messages(obj, msg) #response=producer.send_messages(obj,msg) #print response client.close()
def consume(kafka_host): kafka = KafkaClient(kafka_host) consumer = SimpleConsumer(kafka, 'fetcher', cfg['kafka']['pages']) producer = SimpleProducer(kafka) consumer.max_buffer_size=20*1024*1024 for msg in consumer: page = json.loads(msg.message.value) process(page, producer) kafka.close()
def query_location_producer(lat, lng, radius, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.area_search(lat, lng, radius, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def favorite_list_producer(id, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.favorite_list(id, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def timeline_producer(twitter_account, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.user_timeline(twitter_account, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def send_kafka_msg(iters): #TODO: Add try/catch statements for kafka connection kafka = KafkaClient(kafka_host) producer = SimpleProducer(kafka) for key, val in iters: msg = combine_count_json(key, val) producer.send_messages( str(topic).encode('utf-8'), str(msg).encode('utf-8')) kafka.close()
def main(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) topic = 'test' msg = b'Hello World' print_response(producer.send_messages(topic, msg)) kafka.close()
def sendMsgToKafka(self, topic, msg): #msg=msg #obj=obj kafka = KafkaClient("c9t26359.itcs.hpecorp.net:9092") producer = SimpleProducer(kafka) producer.send_messages(topic, msg) #response=producer.send_messages(obj,msg) #print response kafka.close()
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None bytes_topic = None zk = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic self.bytes_topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ]) except: # XXX: We've seen some UnknownErrors here and cant debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise else: return offsets.offsets[0] def msgs(self, iterable): return [ self.msg(x) for x in iterable ] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
def setup_capture_new_messages_consumer(topic): """Seeks to the tail of the topic then returns a function that can consume messages from that point. """ kafka = KafkaClient(get_config().cluster_config.broker_list) group = str('data_pipeline_clientlib_test') consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE) consumer.seek(0, 2) # seek to tail, 0 is the offset, and 2 is the tail yield consumer kafka.close()
class Producer(object): def __init__(self, broker_ip_port): self.kafka = KafkaClient(broker_ip_port) self.producer = SimpleProducer(self.kafka) def send_message(self): response = self.producer.send_messages("HEY", "Hello World", "Kafka Deployment Worked!") return [("Error ", response[0].error), response] def close(self): self.kafka.close()
def producer(): global Flags t.sleep(100) url = '192.168.56.1:9092' kafka = KafkaClient(bootstrap_servers=url) producer = KafkaProducer(bootstrap_servers=["192.168.56.1:9092"]) counter = 1 while counter: url = requests.get( 'https://download.open.fda.gov/device/enforcement/device-enforcement-0001-of-0001.json.zip' ) d = None data = None with zipfile.ZipFile(BytesIO(url.content), "r") as z: for filename in z.namelist(): print(filename) with z.open(filename) as f: data = f.read() data = json.loads(data) data = pd.DataFrame(data['results']) data = data.replace({np.NaN: None}) data = data.drop('openfda', 1) data.insert(0, 'ID', range(0, len(data))) data['ID'] = data['ID'].astype('str') # upload to S3 conn = boto.connect_s3('AKIAQ4UBQVDV3RRYSD5Y', 'x5K/PgZwoDY8O/N+QVg99Lm2TnnkgB4mp981wGy4') bucket = conn.get_bucket('device-enforcement') upload = Key(bucket) csv_buffer = StringIO() data.to_csv(csv_buffer, index=False) csv_buffer.seek(0) filename = 'dataset.csv' upload.key = filename upload.set_contents_from_string(csv_buffer.getvalue()) # start consumer t2 = threading.Thread(target=consumer) t2.start() t3 = threading.Thread(target=Analytics1) t3.start() for index, rows in data.iterrows(): x = rows.to_dict() producer.send("devices", json.dumps(x).encode('utf-8')) Flags = True t.sleep(3600) kafka.close()
def main(): kafka = KafkaClient("104.154.244.37:9092") producer = SimpleProducer(kafka) msg = [] topic = b'test' try: with open("/root/kafkaprojects/Iris.csv") as f: for msg in f: print_response(producer.send_messages(topic, b'msg')) except LeaderNotAvailableError: # https://github.com/mumrah/kafka-python/issues/249 time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()
def old_connect(self, message): kafka = KafkaClient(kafka_server + ":9092") self.producer = SimpleProducer(kafka) self.topic = topic try: self.producer.send_messages(self.topic, message) except LeaderNotAvailableError: # https://github.com/mumrah/kafka-python/issues/249 time.sleep(1) KafkaStreamTarget.print_response( self.producer.send_messages(self.topic, message)) kafka.close()
def main(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) topic = b'topic_test_cluster' msg = b'Hello World, Hello Kafka' try: print_response(producer.send_messages(topic, msg)) except LeaderNotAvailableError: time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()
def post_data(): from kafka import KafkaClient from kafka import SimpleProducer kafka = KafkaClient(app.config['KAFKA_SERVER']) producer = SimpleProducer(kafka) if not request.json: resp = 'null post data' else: resp = producer.send_messages(app.config['KAFKA_TOPIC'], str(request.json)) if resp: resp = {'error':resp[0].error,'offset':resp[0].offset} kafka.close() return jsonify(resp)
class Producer(KeyedProducer): """ """ def __init__(self, hosts, client_id, timeout): self._client = KafkaClient(['localhost:9092']) self._client = KafkaClient(hosts, client_id=client_id, timeout=timeout) super(Producer, self).__init__(self._client) def close(self): try: self._client.close() except: pass
class KafkaBase(Base): """ A block defining common Kafka functionality. Properties: host (str): location of the database port (int): open port served by database topic (str): topic name """ host = StringProperty(title='Host', default='[[KAFKA_HOST]]') port = IntProperty(title='Port', default=9092) topic = StringProperty(title='Topic', default="", allow_none=False) def __init__(self): super().__init__() self._kafka = None self._encoded_topic = None def configure(self, context): super().configure(context) if not len(self.topic()): raise ValueError("Topic cannot be empty") self._connect() def stop(self): self._disconnect() super().stop() def _connect(self): self._kafka = KafkaClient("{0}:{1}".format(self.host(), self.port())) self._encoded_topic = self.topic() # ensuring topic is valid try: self._kafka.ensure_topic_exists(self._encoded_topic) except Exception: self.logger.exception("Topic: {0} does not exist" .format(self.topic())) raise def _disconnect(self): if self._kafka: self._kafka.close() self._kafka = None @property def connected(self): return self._kafka
def main(): kafka = KafkaClient("localhost:9092") producer = SimpleProducer(kafka) topic = b'test' msg = b'Hello World from Me!' try: print_response(producer.send_messages(topic, msg)) except LeaderNotAvailableError: # https://github.com/mumrah/kafka-python/issues/249 time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()
def pullData(): topic = sys.argv[1] kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) #url= 'http://countdown.api.tfl.gov.uk/interfaces/ura/instant_V1' url = sys.argv[2] r = requests.get(url, stream=True) for line in r.iter_lines(): producer.send_messages(topic, line) print(line) kafka.close()
class Consumer(object): def __init__(self, topic): self.kafka = KafkaClient("localhost:9092") self.consumer = SimpleConsumer(self.kafka, "1", topic) @classmethod def make_queue(cls): return Queue(4096) def run(self, q): try: for i in self.consumer: q.put(json.loads(i.message.value), True) except Exception as e: self.kafka.close()
class RTIService(object): def __init__(self): self._brokers = APP_CONFIG["rti_kafka"]["brokers"] self._partitions = APP_CONFIG["rti_kafka"]["partitions"] self._topic = APP_CONFIG["rti_kafka"]["topic"] self._kafka = KafkaClient(self._brokers) self.producer = None def close(self): self._kafka.close() def connect(self): if not self.producer: self.producer = RTIProducer(self._kafka) def emit_event(self, adgroupId, campaignId): adDocs = [] conn = getConnectWithDB(dbconfig_xadcms, "xadcms_pool") if adgroupId != "0": print "a level" document = make_ad_document(campaignId, adgroupId, conn) adDocs.append(document) # campaign level elif campaignId != "0": #get adgroupIds for this campaign adgroupIdRows = getInfoFromDB(campaignId, sql_campaign_adgroups, conn) print "c level" print adgroupIdRows for row in adgroupIdRows: document = make_ad_document(campaignId, row["adgroupId"], conn) adDocs.append(document) campaignDoc = make_campaign_document(adDocs) closeDB(conn) envelope = make_document_envelope(1, campaignDoc) # message = make_enigma_envelope(self._topic, document) message = make_enigma_envelope(self._topic, envelope) totalPartitions = self._partitions partition = randint(0, totalPartitions - 1) try: response = self.producer.send_messages(self._topic, partition, message) print response except Exception as e: return print(e)
def produce_messages(self): """ Produce sample messages """ # TODO: Support different kafka port kafka = KafkaClient(self.config.kafka_host) total_messages = self.batches * self.batch_size messages_batch = [create_message(random.choice(self.sample_messages)) for r in range(self.batch_size)] for i in range(self.batches): req = ProduceRequest(topic=self.config.kafka_topic, partition=0, messages=messages_batch) resps = kafka.send_produce_request(payloads=[req], fail_on_error=True) sent_messages = i * self.batch_size logging.info('Created {} out of {} sample messages'.format(sent_messages, total_messages)) kafka.close()
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): return if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8')) self.topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): offsets, = self.client.send_offset_request( [OffsetRequest(topic, partition, -1, 1)]) return offsets.offsets[0] def msgs(self, iterable): return [self.msg(x) for x in iterable] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
def topic_security(ip): """Ensures our topic exists If we're the first one online it won't exist, this will not be needed once we configure topics in the kafka configuration This will open a connection, create the topic, then close the connection **Issues**: - The Port is hardcoded :param ip: The IP of our Kafka Box :type ip: str """ kafka = KafkaClient("%s:9092" % (ip)) kafka.ensure_topic_exists(TOPIC) kafka.close()
def low_level(): '''low level''' from kafka import KafkaClient, create_message from kafka.protocol import KafkaProtocol from kafka.common import ProduceRequest kafka = KafkaClient(KAFKA_SERVER) req = ProduceRequest(topic=b'topic1', partition=1, messages=[create_message(b'some message')]) resps = kafka.send_produce_request(payloads=[req], fail_on_error=True) kafka.close() print resps[0].topic # b'topic1' print resps[0].partition # 1 print resps[0].error # 0 (hopefully) print resps[0].offset # offset of the first message sent in this request
def kafkaTasks(self, addr, topic,tasks): try : from kafka import SimpleProducer, KafkaClient, KeyedProducer except: logger.error("kafka-python is not installed") raise Exception("kafka-python is not installed") kafka_client = None try : kafka_client = KafkaClient(addr) producer = KeyedProducer(kafka_client) for task in tasks: #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict)) producer.send_messages(topic, self.manager.name, cPickle.dumps(task)) finally: if kafka_client: kafka_client.close()
def main(): """ Usage: dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>] """ args = docopt(main.__doc__) host = args["--host"] print "=> Connecting to {0}...".format(host) logger.info("=> Connecting to {0}...".format(host)) kafka = KafkaClient(host) print "=> Connected." logger.info("=> Connected.") if args["dump"]: topic = args["<topic>"] consumer_id = args["--consumer"] or "dump_to_mongodb" consumer = SimpleConsumer( kafka, consumer_id, topic, buffer_size=1024 * 200, # 100kb fetch_size_bytes=1024 * 200, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 1) while True: try: message = consumer.get_message() if message is None: time.sleep(1) continue val = message.message.value logger.info("message.message.value== %s " % val) print('val==', val) try: item = json.loads(val) except: continue if 'meta' in item and 'collection_name' in item['meta']: _insert_item_to_monggodb(item) except: traceback.print_exc() break kafka.close() return 0
def Producer(): #producer = KafkaProducer(bootstrap_servers='deepc04.acis.ufl.edu:9092') #producer.send('test', b"testingt123456") kafka = KafkaClient("deepc04.acis.ufl.edu:9092") producer = SimpleProducer(kafka) topic = b'test' msg = b'Hello World from Me/Rajendra!' try: print_response(producer.send_messages(topic, msg)) except LeaderNotAvailableError: # https://github.com/mumrah/kafka-python/issues/249 time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()
def assert_message_count(self, topic, check_count, timeout=10, partitions=None): hosts = ",".join(["%s:%d" % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) group = random_string(10) consumer = SimpleConsumer(client, group, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending != check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) consumer.stop() client.close() self.assertEqual(pending, check_count)
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): return if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8')) self.topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): offsets, = self.client.send_offset_request([ OffsetRequest(topic, partition, -1, 1) ]) return offsets.offsets[0] def msgs(self, iterable): return [ self.msg(x) for x in iterable ] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
def fetchFrom(kafka_host): kafka = KafkaClient(kafka_host) consumer = SimpleConsumer(kafka, 'fetcher', 'toppage.pages') producer = SimpleProducer(kafka) for msg in consumer: page = json.loads(msg.message.value) output = {} output['inlink']='' output['level']=1 output['url']=page['orig_url'] output['fts']=page['ts_fetch'] output['content']=page['content'] try: producer.send_messages("seeds", json.dumps(output)) except MessageSizeTooLargeError as err: logging.warning(err) kafka.close()
def main(): """ Usage: dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>] """ args = docopt(main.__doc__) host = args["--host"] print "=> Connecting to {0}...".format(host) logger.info("=> Connecting to {0}...".format(host)) kafka = KafkaClient(host) print "=> Connected." logger.info("=> Connected.") if args["dump"]: topic = args["<topic>"] consumer_id = args["--consumer"] or "dump_to_mongodb" consumer = SimpleConsumer(kafka, consumer_id, topic, buffer_size=1024*200, # 100kb fetch_size_bytes=1024*200, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 1) while True: try: message = consumer.get_message() if message is None: time.sleep(1) continue val = message.message.value logger.info("message.message.value== %s " % val) print('val==', val) try: item = json.loads(val) except: continue if 'meta' in item and 'collection_name' in item['meta']: _insert_item_to_monggodb(item) except: traceback.print_exc() break kafka.close() return 0
def main(): kafka = KafkaClient("mykafkaslave.example.com:6667") producer = SimpleProducer(kafka) topic = b'testkafka' cnt = 0 while(cnt < 10000): msg = b'Hello World - 1 - ' cnt = cnt + 1 msg += "%d" %(cnt) try: print_response(cnt,producer.send_messages(topic, msg)) except LeaderNotAvailableError: print_response(cnt,producer.send_messages(topic, msg)) kafka.close()
def setUp(self): logging.basicConfig() handler = logging.StreamHandler(stdout) logger = logging.getLogger("kafka") logger.setLevel(logging.INFO) logger.addHandler(handler) self.logger = logging.getLogger("tester") self.logger.debug("setup started") kafka_location = "127.0.0.1:9092" client = KafkaClient(kafka_location) client.ensure_topic_exists("frontier-todo") client.ensure_topic_exists("frontier-done") client.ensure_topic_exists("frontier-score") client.close() settings = Settings() settings.set('KAFKA_LOCATION', kafka_location) settings.set('SPIDER_FEED_PARTITIONS', 1) settings.set('SPIDER_LOG_PARTITIONS', 1) settings.set('QUEUE_HOSTNAME_PARTITIONING', True) self.messagebus = KafkaMessageBus(settings) spiderlog = self.messagebus.spider_log() # sw self.sw_sl_c = KafkaConsumerPolling(spiderlog.consumer(partition_id=0, type=b'sw')) scoring_log = self.messagebus.scoring_log() self.sw_us_p = scoring_log.producer() # db self.db_sl_c = KafkaConsumerPolling(spiderlog.consumer(partition_id=None, type=b'db')) self.db_us_c = KafkaConsumerPolling(scoring_log.consumer()) spider_feed = self.messagebus.spider_feed() self.db_sf_p = spider_feed.producer() # spider self.sp_sl_p = spiderlog.producer() self.sp_sf_c = KafkaConsumerPolling(spider_feed.consumer(partition_id=0)) self.logger.debug("init is done")
def main(): kafka = KafkaClient("192.168.33.10:9092") producer = SimpleProducer(kafka) topic = b'test_test' # msg = "{'resource':'student','action':'added','id':'db2987','object':{'last':'burge','first':'dustin','courses':['COMS W4115','COMS W4170','COMS E6111'],'uni':'db2987'}}" #msg = "{resource:student,action:added,id:db2987,object:{last:burge,first:dustin,courses:[COMS W4115,COMS W4170,COMS E6111],uni:db2987}}" msgAdded = {"resource":"student","action":"added","id":"db2987","object":{"last":"burge","first":"dustin","courses":["COMSW4115","COMSW4170","COMSE6111"],"uni":"db2987"}} msgRemoved = {"resource":"student","action":"removed","id":"db2987"} #msgModified = msg = msgRemoved try: print_response(producer.send_messages(topic, json.dumps(msg))) except LeaderNotAvailableError: # https://github.com/mumrah/kafka-python/issues/249 time.sleep(1) print_response(producer.send_messages(topic, msg)) kafka.close()