class Producer(object): def __init__(self, addr): self.producer = KafkaProducer( bootstrap_servers=addr, \ value_serializer= lambda v: json.dumps(v).encode() ) def produce_msgs(self, source_symbol): headers = [ 'CMTE_ID', 'AMNDT_IND', 'RPT_TP', 'TRANSACTION_PGI', 'IMAGE_NUM', \ 'TRANSACTION_TP','ENTITY_TP','NAME', 'CITY', 'STATE', 'ZIP_CODE', \ 'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT','TRANSACTION_AMT', \ 'OTHER_ID', 'TRAN_ID', 'FILE_NUM', 'MEMO_CD', 'MEMO_TEXT', 'SUB_ID' ] with open('/home/ubuntu/2016/by_date/itcont_2016_10151005_20150726.txt' ) as f: #data = f.readlines() reader = csv.reader(f, delimiter='|') for row in reader: row = {h: x for h, x in zip(headers, row)} print row self.producer.send('data', row) #data = [x.split("|") for x in data] #for row in data: # row = {h:x for h,x in zip(headers,row)} # print row # self.producer.send('data', row) producer.flush() producer = KafkaProducer(retries=5)
class KafkaLoggingHandler(logging.Handler): producer = None def __init__(self, hosts_list, topic, kafka_api_version): logging.Handler.__init__(self) self.kafka_topic_name = topic self.producer = KafkaProducer(bootstrap_servers=hosts_list, api_version=kafka_api_version) def emit(self, record): if record.name == 'kafka': return try: msg = self.format(record) if isinstance(msg, unicode): msg = msg.encode("utf-8") self.producer.send(self.kafka_topic_name, msg) except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) def close(self): if self.producer is not None: self.producer.close() logging.Handler.close(self)
class Producer(object): def __init__(self, addr): self.producer = KafkaProducer(bootstrap_servers=addr) #self.topic = self.client.topics["web_event"] #self.producer = self.topic.get_sync_producer() def produce_msgs(self, source_symbol): msg_cnt = 0 packaged_record = "" record_size = 0 total_records = 0 count = 0 while True: dt = datetime.datetime.now().strftime('%d-%m-%Y %H:%M:%S') user_event = Event(normal_distrib(user_agent_list), fake.ipv4(), normal_distrib(user_id_list), dt, normal_distrib(list(range(1, 1000))), random_choice(event_sample)) data = json.dumps(user_event.__dict__) total_records += 1 count += 1 # Package multiple records into a single record up to the byte limit if record_size < 100000: record_size += get_byte_size(data) packaged_record += data + '\n' else: self.producer.send('web_event', packaged_record) record_size = get_byte_size(data) packaged_record = data + '\n' if count % 100000 == 0: print "Records sent: {0}, Rate: {1}".format( total_records, total_records / (time.time() - start_time))
def my_function(file_key): '''This is a function that will run within the DAG execution''' producer = KafkaProducer(bootstrap_servers='10.0.0.24', linger_ms=4000) msg_cnt = 0 tz = pytz.timezone('America/Los_Angeles') init_time= datetime.now(tz) fs=360 while True: s3 = boto3.client('s3') obj = s3.get_object(Bucket="bhaudata", Key="101_signals.txt") for line in obj['Body'].iter_lines(): message_info = None try: linesplit = line.decode() str_fmt = "{},{},{}" timestamp= init_time + timedelta(seconds=round((msg_cnt/fs), 3)) y = timestamp.strftime("%H:%M:%S.%f") y = y[:-3] message_info = str_fmt.format(file_key, y, linesplit ) except Exception as e: print("fixn problem") try: msg = str.encode(message_info) except Exception as e: msg = None #self.logger.debug('empty message %s'%e) if msg is not None: producer.send("ecg-topic2", msg) msg_cnt += 1 break
def _init_kafka(self): self.producer = KafkaProducer( **KAFKA_CONNECTION_CONFIG, max_in_flight_requests_per_connection=1, # With retries, max_in_flight should always be 1 to ensure ordering of batches! retries=3) self.logger.info("Initialised Kafka connection")
class Producer(): def __init__(self): self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500) def produce_msgs(self,msg_list): while True: index = random.randrange(0,999) json_msg =json.dumps(msg_list[index]).encode('utf-8') self.producer.send(topic, json_msg)
class AppKafkaProducer(): def __init__(self): # Initialize Kafka. self.producer = KafkaProducer(bootstrap_servers=config.KAFKA_SERVER) def sendMessage(self, msg): self.producer.send(config.TOPIC, bytes(msg))
class MyKafkaProducer(object): """ class that implements Kafka producers that ingest data from S3 bucket """ def __init__(self, kafka_configfile, schema_file, s3_configfile): """ class constructor that initializes the instance according to the configurations of the S3 bucket and Kafka :type kafka_configfile: str path to kafka config file :type schema_file : str path to schema file :type s3_configfile : str path to S3 config file """ self.kafka_config = helpers.parse_config(kafka_configfile) self.schema = helpers.parse_config(schema_file) self.s3_config = helpers.parse_config(s3_configfile) self.producer = KafkaProducer( bootstrap_servers=self.kafka_config["BROKERS_IP"]) def get_key(self, msg): """ produces key for message to Kafka topic :type msg: dict message for which to generate the key :rtype : str key that has to be of type bytes """ msgwithkey = helpers.add_block_fields(msg) if msgwithkey is None: return x, y = msgwithkey["block_lonid"], msgwithkey["block_latid"] return str((x * 137 + y) % 77703).encode() def produce_msgs(self): """ produces messages and sends them to topic """ msg_cnt = 0 while True: s3 = boto3.client('s3') obj = s3.get_object(Bucket=self.s3_config["BUCKET"], Key="{}/{}".format( self.s3_config["FOLDER"], self.s3_config["STREAMING_FILE"])) for line in lazyreader.lazyread(obj['Body'], delimiter='\n'): message_info = line.strip() msg = helpers.map_schema(message_info, self.schema) if msg is not None: self.producer.send(self.kafka_config["TOPIC"], value=json.dumps(msg), key=self.get_key(msg)) msg_cnt += 1 time.sleep(0.001)
class Producer(object): """ Class to ingest ecg signal data and send them to kafka topic through kafka producer. """ def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile): if not os.path.exists('./tmp'): os.makedirs('./tmp') logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', filename='./tmp/kafka_producer.log', filemode='w') self.logger = logging.getLogger('py4j') # self.kafka_config = helpers.parse_config(kafka_config_infile) # self.s3bucket_config = helpers.parse_config(s3bucket_config_infile) self.producer = KafkaProducer(bootstrap_servers=ip_addr, linger_ms=4000) def produce_ecg_signal_msgs(self, file_key): """ Produces messages and sends them to topic. """ msg_cnt = 0 tz = pytz.timezone('America/Los_Angeles') init_time = datetime.now(tz) fs = 360 while True: s3 = boto3.client('s3') obj = s3.get_object(Bucket="testsmalldata", Key="%s_signals.txt" % file_key) for line in obj['Body'].iter_lines(): message_info = None try: linesplit = line.decode() str_fmt = "{},{},{}" timestamp = init_time + timedelta( seconds=round((msg_cnt / fs), 3)) y = timestamp.strftime("%H:%M:%S.%f") y = y[:-3] message_info = str_fmt.format(file_key, y, linesplit[1]) except Exception as e: self.logger.error('fxn produce_ecg_signal_msgs error %s' % e) try: msg = str.encode(message_info) except Exception as e: msg = None self.logger.debug('empty message %s' % e) if msg is not None: self.producer.send("ecg-topic", msg) msg_cnt += 1 print(message_info) time.sleep(0.001) break
class Producer(): def __init__(self): self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500) def produce_msgs(self,msg_list): while True: index = random.randrange(0,999) info = person_pb2.PersonInfo() serialize_protobuf (info.user.add(), msg_list[index]) _msg = user.SerializeToString() self.producer.send(topic, _msg)
def __init__(self, kafka_configfile, s3_configfile): """ class constructor that initializes the instance according to the configurations of the S3 bucket and Kafka :type kafka_configfile: str path to kafka config file :type s3_configfile : str path to S3 config file """ self.kafka_config = helper.parse_config(kafka_configfile) self.s3_config = helper.parse_config(s3_configfile) self.producer = KafkaProducer( bootstrap_servers=self.kafka_config["BROKERS_IP"])
class Producer(): def __init__(self): self.producer = KafkaProducer(bootstrap_servers = 'localhost:9092') def produce_msgs(self,source): for drop in source: if 'text' in drop: message = json.dumps(drop) self.producer.send('Twitter-Stream',message) print(message) self.producer.send('message-size',len(message))
class Producer(): def __init__(self): self.producer = KafkaProducer(bootstrap_servers='localhost:9092') def produce_msgs(self, source): for drop in source: if 'text' in drop: message = json.dumps(drop) self.producer.send('Twitter-Stream', message) print(message) self.producer.send('message-size', len(message))
def main(): s3 = boto3.resource('s3') bucket = s3.Bucket('nyc-tlc') # Iterates through all the objects, doing the pagination for you. Each obj # is an ObjectSummary, so it doesn't contain the body. You'll need to call # get to get the whole body. kafka_params = config('kafka') dataset_params = config('dataset') producer = KafkaProducer(bootstrap_servers=kafka_params['broker']) for obj in bucket.objects.all(): key = obj.key if dataset_params['rider'] not in key: continue # building absolute file name file_name = 's3://nyc-tlc/' + key ##skipping header firstline = True #processing the file for line in smart_open(file_name): print(line.decode('utf8')) if firstline: # skip first line firstline = False continue line_split = line.decode('utf8').split(",") print(line_split) if len(line_split) < 20: #skipping rows with huge number of columns continue if line_split[5] == '0' or line_split[6] == '0' or line_split[7] == '0' or line_split[8] == '0': continue else: start_point = (float(line_split[5]),float(line_split[6])) end_point = (float(line_split[7]), float(line_split[8])) print(start_point, end_point) trip_id = 'ride:' + str(datetime.now()) + ":" + str(random.randint(1, 1000)) #formatting the message str_fmt = "{};{};{};{};{};{}" message_info = str_fmt.format(trip_id, start_point[0], start_point[1], end_point[0], end_point[1], "In Progress" ) print(message_info) producer.send(kafka_params['rider_topic'], message_info.encode('utf8 '))
def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile): if not os.path.exists('./tmp'): os.makedirs('./tmp') logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', filename='./tmp/kafka_producer.log', filemode='w') self.logger = logging.getLogger('py4j') self.kafka_config = helpers.parse_config(kafka_config_infile) self.s3bucket_config = helpers.parse_config(s3bucket_config_infile) self.producer = KafkaProducer(bootstrap_servers=ip_addr)
class Producer(): def __init__(self): self.producer = KafkaProducer(bootstrap_servers=[ "52.41.44.90:9092", "52.36.206.57:9092", "52.40.205.225:9092" ], acks=0, linger_ms=500) def produce_msgs(self, msg_list): while True: index = random.randrange(0, 999) json_msg = json.dumps(msg_list[index]).encode('utf-8') self.producer.send(topic, json_msg)
def my_producer(path='../01_data/occupancy_data.csv', topic='test', low=0.5, high=1.5, limit=0): producer = KafkaProducer(bootstrap_servers='localhost:9092') rand = random.uniform(float(low), float(high)) f = open(path, 'rt') for idx, line in enumerate(f): if idx == limit and limit != 0: break producer.send(topic, bytes(line, 'utf8')) sleep(rand)
class Producer(object): def __init__(self, addr): self.producer = KafkaProducer(bootstrap_servers=addr) def produce_msgs(self): with bz2.open(args.path, 'rt', encoding='utf-8') as f: content = f.readlines() content = [x.strip() for x in content] while True: for tweet in content: tweet_dict = json.loads(tweet) if 'text' in tweet_dict and tweet_dict['lang'] == 'en': self.producer.send('twitter',tweet.encode('utf-8'))
def main(): producer = KafkaProducer(bootstrap_servers=config.KAFKA_SERVERS, value_serializer=lambda v: json.dumps(v).encode()) headers = [ 'CMTE_ID', 'AMNDT_IND', 'RPT_TP', 'TRANSACTION_PGI', 'IMAGE_NUM', 'TRANSACTION_TP', 'ENTITY_TP', 'NAME', 'CITY', 'STATE', 'ZIP_CODE', 'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT', 'TRANSACTION_AMT', 'OTHER_ID', 'TRAN_ID', 'FILE_NUM', 'MEMO_CD', 'MEMO_TEXT', 'SUB_ID' ] with open('/home/ubuntu/2016/by_date/itcont_2016_10151005_20150726.txt' ) as f: data = f.readlines() data = [x.split("|") for x in data] #producer = KafkaProducer(bootstrap_servers = '18.205.181.166:9092',value_serializer = lambda v: json.dumps(v).encode('utf-8')) for row in data: row = {h: x for h, x in zip(headers, row)} #print row producer.send('data', row) #print row producer.flush() producer = KafkaProducer(retries=5)
def init(self, args): print("Initialization of Kafka Python driver w/ args=%s" % args) try: self.hosts = args['hosts'] self.topic = args['topic'] except KeyError: print("Missing `hosts` or `topic` option...") return False # optional `programs` parameter to filter out messages if 'programs' in args: self.programs = parse_str_list(args['programs']) print("Found programs to filter against %s" % args['programs']) self.kafka_producer = KafkaProducer(bootstrap_servers=self.hosts) return True
class Producer(object): """ Class to ingest ecg signal data and send them to kafka topic through kafka producer. """ def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile): if not os.path.exists('./tmp'): os.makedirs('./tmp') logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', filename='./tmp/kafka_producer.log', filemode='w') self.logger = logging.getLogger('py4j') self.kafka_config = helpers.parse_config(kafka_config_infile) self.s3bucket_config = helpers.parse_config(s3bucket_config_infile) self.producer = KafkaProducer(bootstrap_servers=ip_addr) def produce_ecg_signal_msgs(self, file_key): """ Produces messages and sends them to topic. """ msg_cnt = 0 while True: s3 = boto3.client('s3') obj = s3.get_object(Bucket=self.s3bucket_config['bucket'], Key="%s_signals.txt" % file_key) for line in obj['Body'].iter_lines(): message_info = None try: linesplit = line.decode().split(',') str_fmt = "{},{},{},{},{}" message_info = str_fmt.format( file_key, datetime.now(pytz.timezone('US/Eastern')), linesplit[1], linesplit[2], linesplit[3]) except Exception as e: self.logger.error('fxn produce_ecg_signal_msgs error %s' % e) try: msg = str.encode(message_info) except Exception as e: msg = None self.logger.debug('empty message %s' % e) if msg is not None: self.producer.send(self.kafka_config['topic'], msg) msg_cnt += 1 print(message_info) time.sleep(0.001)
def _get_producer(self, bootstrap_servers: List[str]) -> Optional[KafkaProducer]: for bootstrap_server in bootstrap_servers: if bootstrap_server in self.producers: return self.producers[bootstrap_server] if bootstrap_server == DEFAULT_FLAG: return None if bootstrap_server in self.fail_pass: continue try: brokers = get_brokers(bootstrap_server) self.logger.debug(f"brokers from {bootstrap_server} {brokers}") except Exception as e: self.logger.warning( f"can not get brokers {bootstrap_server} {e}") self.fail_pass[bootstrap_server] = 0 continue for broker in brokers: if broker in self.producers: producer = self.producers[broker] self.producers.update(dict.fromkeys(brokers, producer)) self.producers[bootstrap_server] = producer return producer try: producer = KafkaProducer(bootstrap_servers=brokers, **self.configs) self.producers.update(dict.fromkeys(brokers, producer)) self.producers[bootstrap_server] = producer return producer except Exception as e: self.logger.warning( f"can not init producer {bootstrap_server} {e}")
class Producer(object): def __init__(self, addr): self.producer = KafkaProducer(bootstrap_servers=addr) def produce_msgs(self, source_symbol): while True: # generate random values for the IP and URL, and get the current time for the timestamp ip_field = numpy.random.choice(ips) url_field = WEBSITE_NAME + "page" + str( numpy.random.randint(1, 1001)) time_field = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # produce to the topic indicated in TOPIC_NAME str_fmt = "{};{};{};{}" message_info = str_fmt.format(source_symbol, time_field, ip_field, url_field) self.producer.send(TOPIC_NAME, message_info)
class Producer(object): def __init__(self, addr): self.producer = KafkaProducer(bootstrap_servers=addr) def produce_msgs(self, source_symbol): price_field = random.randint(800, 1400) msg_cnt = 0 while True: time_field = datetime.now().strftime("%Y%m%d %H%M%S") price_field += random.randint(-10, 10) / 10.0 volume_field = random.randint(1, 1000) str_fmt = "{};{};{};{}" message_info = str_fmt.format(source_symbol, time_field, price_field, volume_field) print message_info self.producer.send('price_data_part4', message_info) msg_cnt += 1
class Producer(object): def __init__(self, addr): self.producer = KafkaProducer(bootstrap_servers=addr) def produce_msgs(self, source_symbol, file_name): msg_cnt = 0 with open(file_name) as f: for line in f: time_field = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") str_fmt = "{};{};{}" message_info = str_fmt.format(source_symbol, time_field, line) #.encode('utf-8') print message_info self.producer.send('transactions1', message_info) print "{} transactions sent. Sending next one ...".format( msg_cnt) msg_cnt += 1
def __init__(self, kafkaHost, kafkaPort, tcpHost, tcpPort, group_id, topic, logTopic, interval): self.kafkaHost = kafkaHost self.kafkaPort = kafkaPort self.tcpHost = tcpHost self.tcpPort = tcpPort self.group_id = group_id self.topic = topic self.logTopic = logTopic self.interval = int(interval) self.consumer = KafkaConsumer( topic, bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)], group_id=group_id, enable_auto_commit=False) self.producer = KafkaProducer( bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)]) self.tcpWriter = None
def store_probe_results(site_id, status_code, regex_results): global g_config_kafka # I'll hardcode that SSL is required. producer = KafkaProducer( bootstrap_servers=[g_config_kafka['bootstrap_server']], security_protocol="SSL", ssl_cafile=g_config_kafka['ssl_cafile'], ssl_keyfile=g_config_kafka['ssl_keyfile'], ssl_certfile=g_config_kafka['ssl_certfile']) # Since both sides of this program are trusted I can send the raw dictionary and decode it on the other end: message = { 'site_id': site_id, 'status_code': status_code, 'regex_results': regex_results } ack = producer.send(g_config_kafka['topic_name'], str(json.dumps(message)).encode('utf-8')) return True
def __init__(self, kafka_host, kafka_port, tcp_host, tcp_port, topic, log_topic): self.kafka_host = kafka_host self.kafka_port = kafka_port self.tcp_host = tcp_host self.tcp_port = tcp_port self.topic = topic self.log_topic = log_topic self.consumer = KafkaConsumer( topic, bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)], enable_auto_commit=False, max_poll_records=1024 * 1024, max_partition_fetch_bytes=1024 * 1024 * 100) self.producer = KafkaProducer( bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)]) self.connections = {} self.sample_end_time = self.get_end_time(time()) self.lastPolled = []
class EventsProducer(Destination): """ Kafka Producer Sets Destination of API Ingestion to Kafka Cluster """ def __init__(self, addr, topic="git-events"): """Initializes with Broker Address and Topic Name""" self.producer = KafkaProducer(bootstrap_servers=addr, value_serializer=lambda m: json.dumps(m).encode('ascii'), api_version=(0,1,0)) self.topic = topic ######## PRODUCE TO TOPIC def move_to_dest(self, filename, datestring): """Sends Local File to Kafka Topic""" with open(filename, 'r') as file: for line in file: d = json.loads(line) self.producer.send(self.topic, d).get()
def connect_kafka_producer(): producer = None try: producer = KafkaProducer(acks=0, compression_type='gzip', bootstrap_servers=['localhost:9092'], api_version=(0,10)) except Exception as ex: print("Error: ", ex) return producer
def on_status(self, status): print status.text, "\n" #~ data ={} #~ data['text'] = status.text #~ data['created_at'] = status.created_at #~ data['geo'] = status.geo #~ data['source'] = status.source #~ self.db.Tweets.insert(data) msg = status.text.encode('utf-8') producer = KafkaProducer(bootstrap_servers='0.0.0.0:9092') #~ producer = KafkaProducer(bootstrap_servers=['broker1:1234']) #print(msg) try: producer.send(b'twitterstream', msg) except Exception as err: print(err) return False return True
class KafkaLoggingHandler(logging.Handler): def __init__(self, hosts_list, topic, **kwargs): logging.Handler.__init__(self) self.kafka_topic_name = topic self.producer = KafkaProducer(bootstrap_servers=hosts_list) def emit(self, record): # drop kafka logging to avoid infinite recursion if record.name == 'kafka': return try: # use default formatting msg = self.format(record) msg = str.encode(msg) self.producer.send(self.kafka_topic_name, msg) self.producer.flush() except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) def close(self): if self.producer is not None: self.producer.close() logging.Handler.close(self)
def __init__(self): self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500)
def __init__(self): self.producer = KafkaProducer(bootstrap_servers = 'localhost:9092')
# coding=utf-8 import logging from kafka.producer import KafkaProducer if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) producer = KafkaProducer(bootstrap_servers=["192.168.120.90:9092"]) producer.send("wangybnet", b"Hello, World!")
from kafka.producer import KafkaProducer import ConfigParser import socket if __name__ == "__main__": config = ConfigParser.ConfigParser() config.read('configuration.cfg') urlKafkaProducer = config.get('StreamingProperties', 'URLKafkaProducer') topicName = config.get('StreamingProperties', 'TopicName') virtualMachine = 'local' if socket.gethostname() == 'ubuntu': virtualMachine = socket.gethostname() if virtualMachine == 'local': fileName = config.get('StreamingProperties', 'StreamingFileLocal') else: fileName = config.get('StreamingProperties', 'StreamingFileVirtual') producer = KafkaProducer(bootstrap_servers=urlKafkaProducer) infile = open (fileName, 'r') for line in infile: producer.send (topicName, line) #time.sleep(0.000000001) infile.close()