Esempio n. 1
0
class Producer(object):
    def __init__(self, addr):
        self.producer = KafkaProducer( bootstrap_servers=addr,					\
             value_serializer= lambda v: json.dumps(v).encode() )

    def produce_msgs(self, source_symbol):

        headers = [ 'CMTE_ID', 'AMNDT_IND', 'RPT_TP', 'TRANSACTION_PGI', 'IMAGE_NUM',	\
          'TRANSACTION_TP','ENTITY_TP','NAME', 'CITY', 'STATE', 'ZIP_CODE',	\
          'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT','TRANSACTION_AMT',		\
           'OTHER_ID', 'TRAN_ID', 'FILE_NUM', 'MEMO_CD', 'MEMO_TEXT', 'SUB_ID' ]

        with open('/home/ubuntu/2016/by_date/itcont_2016_10151005_20150726.txt'
                  ) as f:
            #data = f.readlines()
            reader = csv.reader(f, delimiter='|')

            for row in reader:
                row = {h: x for h, x in zip(headers, row)}
                print row
                self.producer.send('data', row)

#data = [x.split("|") for x in data]

#for row in data:
#	row = {h:x for h,x in zip(headers,row)}
        #	print row
        #	self.producer.send('data', row)

        producer.flush()
        producer = KafkaProducer(retries=5)
Esempio n. 2
0
class KafkaLoggingHandler(logging.Handler):
    producer = None

    def __init__(self, hosts_list, topic, kafka_api_version):
        logging.Handler.__init__(self)

        self.kafka_topic_name = topic
        self.producer = KafkaProducer(bootstrap_servers=hosts_list,
                                      api_version=kafka_api_version)

    def emit(self, record):
        if record.name == 'kafka':
            return
        try:
            msg = self.format(record)
            if isinstance(msg, unicode):
                msg = msg.encode("utf-8")

            self.producer.send(self.kafka_topic_name, msg)
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            self.handleError(record)

    def close(self):
        if self.producer is not None:
            self.producer.close()
        logging.Handler.close(self)
Esempio n. 3
0
class Producer(object):
    def __init__(self, addr):
        self.producer = KafkaProducer(bootstrap_servers=addr)
        #self.topic = self.client.topics["web_event"]
        #self.producer = self.topic.get_sync_producer()
    def produce_msgs(self, source_symbol):

        msg_cnt = 0
        packaged_record = ""
        record_size = 0
        total_records = 0
        count = 0
        while True:
            dt = datetime.datetime.now().strftime('%d-%m-%Y %H:%M:%S')

            user_event = Event(normal_distrib(user_agent_list), fake.ipv4(),
                               normal_distrib(user_id_list), dt,
                               normal_distrib(list(range(1, 1000))),
                               random_choice(event_sample))
            data = json.dumps(user_event.__dict__)
            total_records += 1
            count += 1
            # Package multiple records into a single record up to the byte limit
            if record_size < 100000:
                record_size += get_byte_size(data)
                packaged_record += data + '\n'
            else:
                self.producer.send('web_event', packaged_record)
                record_size = get_byte_size(data)
                packaged_record = data + '\n'
            if count % 100000 == 0:
                print "Records sent: {0}, Rate: {1}".format(
                    total_records, total_records / (time.time() - start_time))
Esempio n. 4
0
def my_function(file_key):
    '''This is a function that will run within the DAG execution'''
    producer = KafkaProducer(bootstrap_servers='10.0.0.24', linger_ms=4000)
    msg_cnt = 0
    tz = pytz.timezone('America/Los_Angeles')
    init_time= datetime.now(tz)
    fs=360
    while True:
        s3 = boto3.client('s3')
        obj = s3.get_object(Bucket="bhaudata",
                            Key="101_signals.txt")
        for line in obj['Body'].iter_lines():
            message_info = None
            try:
                linesplit = line.decode()
                str_fmt = "{},{},{}"
                timestamp= init_time + timedelta(seconds=round((msg_cnt/fs), 3))
                y = timestamp.strftime("%H:%M:%S.%f")
                y = y[:-3]
                message_info = str_fmt.format(file_key,
                                              y,
                                              linesplit
                                              )
            except Exception as e:
                print("fixn problem")
            try:
                msg = str.encode(message_info)
            except Exception as e:
                msg = None
                #self.logger.debug('empty message %s'%e)
            if msg is not None:
                producer.send("ecg-topic2", msg)
                msg_cnt += 1
        break
Esempio n. 5
0
 def _init_kafka(self):
     self.producer = KafkaProducer(
         **KAFKA_CONNECTION_CONFIG,
         max_in_flight_requests_per_connection=1,
         # With retries, max_in_flight should always be 1 to ensure ordering of batches!
         retries=3)
     self.logger.info("Initialised Kafka connection")
class Producer():
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500)
    def produce_msgs(self,msg_list):
	while True:
		index = random.randrange(0,999)
    		json_msg =json.dumps(msg_list[index]).encode('utf-8')
    		self.producer.send(topic, json_msg)
Esempio n. 7
0
class AppKafkaProducer():
    def __init__(self):

        # Initialize Kafka.
        self.producer = KafkaProducer(bootstrap_servers=config.KAFKA_SERVER)

    def sendMessage(self, msg):
        self.producer.send(config.TOPIC, bytes(msg))
Esempio n. 8
0
class MyKafkaProducer(object):
    """
    class that implements Kafka producers that ingest data from S3 bucket
    """
    def __init__(self, kafka_configfile, schema_file, s3_configfile):
        """
        class constructor that initializes the instance according to the configurations
        of the S3 bucket and Kafka
        :type kafka_configfile: str     path to kafka config file
        :type schema_file     : str     path to schema file
        :type s3_configfile   : str     path to S3 config file
        """
        self.kafka_config = helpers.parse_config(kafka_configfile)
        self.schema = helpers.parse_config(schema_file)
        self.s3_config = helpers.parse_config(s3_configfile)

        self.producer = KafkaProducer(
            bootstrap_servers=self.kafka_config["BROKERS_IP"])

    def get_key(self, msg):
        """
        produces key for message to Kafka topic
        :type msg: dict     message for which to generate the key
        :rtype   : str      key that has to be of type bytes
        """
        msgwithkey = helpers.add_block_fields(msg)
        if msgwithkey is None:
            return
        x, y = msgwithkey["block_lonid"], msgwithkey["block_latid"]
        return str((x * 137 + y) % 77703).encode()

    def produce_msgs(self):
        """
        produces messages and sends them to topic
        """
        msg_cnt = 0

        while True:

            s3 = boto3.client('s3')
            obj = s3.get_object(Bucket=self.s3_config["BUCKET"],
                                Key="{}/{}".format(
                                    self.s3_config["FOLDER"],
                                    self.s3_config["STREAMING_FILE"]))

            for line in lazyreader.lazyread(obj['Body'], delimiter='\n'):

                message_info = line.strip()
                msg = helpers.map_schema(message_info, self.schema)

                if msg is not None:
                    self.producer.send(self.kafka_config["TOPIC"],
                                       value=json.dumps(msg),
                                       key=self.get_key(msg))
                    msg_cnt += 1

                time.sleep(0.001)
class Producer(object):
    """
    Class to ingest ecg signal data and send them to kafka topic through kafka producer.
    """
    def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile):
        if not os.path.exists('./tmp'):
            os.makedirs('./tmp')
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(levelname)s %(message)s',
                            filename='./tmp/kafka_producer.log',
                            filemode='w')
        self.logger = logging.getLogger('py4j')

        #        self.kafka_config = helpers.parse_config(kafka_config_infile)
        #        self.s3bucket_config = helpers.parse_config(s3bucket_config_infile)
        self.producer = KafkaProducer(bootstrap_servers=ip_addr,
                                      linger_ms=4000)

    def produce_ecg_signal_msgs(self, file_key):
        """
        Produces messages and sends them to topic.
        """
        msg_cnt = 0
        tz = pytz.timezone('America/Los_Angeles')
        init_time = datetime.now(tz)
        fs = 360
        while True:

            s3 = boto3.client('s3')
            obj = s3.get_object(Bucket="testsmalldata",
                                Key="%s_signals.txt" % file_key)
            for line in obj['Body'].iter_lines():
                message_info = None
                try:
                    linesplit = line.decode()
                    str_fmt = "{},{},{}"
                    timestamp = init_time + timedelta(
                        seconds=round((msg_cnt / fs), 3))
                    y = timestamp.strftime("%H:%M:%S.%f")
                    y = y[:-3]

                    message_info = str_fmt.format(file_key, y, linesplit[1])
                except Exception as e:
                    self.logger.error('fxn produce_ecg_signal_msgs error %s' %
                                      e)
                try:
                    msg = str.encode(message_info)
                except Exception as e:
                    msg = None
                    self.logger.debug('empty message %s' % e)
                if msg is not None:
                    self.producer.send("ecg-topic", msg)
                    msg_cnt += 1
                print(message_info)
                time.sleep(0.001)
            break
Esempio n. 10
0
class Producer():
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500)
    def produce_msgs(self,msg_list):
    while True:
        index = random.randrange(0,999)
            info = person_pb2.PersonInfo()
            serialize_protobuf (info.user.add(), msg_list[index])
            _msg = user.SerializeToString()
            self.producer.send(topic, _msg)
Esempio n. 11
0
 def __init__(self, kafka_configfile, s3_configfile):
     """
     class constructor that initializes the instance according to the configurations
     of the S3 bucket and Kafka
     :type kafka_configfile: str     path to kafka config file
     :type s3_configfile   : str     path to S3 config file
     """
     self.kafka_config = helper.parse_config(kafka_configfile)
     self.s3_config = helper.parse_config(s3_configfile)
     self.producer = KafkaProducer(
         bootstrap_servers=self.kafka_config["BROKERS_IP"])
class Producer():

    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers = 'localhost:9092')
    def produce_msgs(self,source):
	for drop in source:
        	if 'text' in drop:
			message = json.dumps(drop)
			self.producer.send('Twitter-Stream',message)
			print(message)
			self.producer.send('message-size',len(message))
Esempio n. 13
0
class Producer():
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers='localhost:9092')

    def produce_msgs(self, source):
        for drop in source:
            if 'text' in drop:
                message = json.dumps(drop)
                self.producer.send('Twitter-Stream', message)
                print(message)
                self.producer.send('message-size', len(message))
Esempio n. 14
0
def main():
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('nyc-tlc')
    # Iterates through all the objects, doing the pagination for you. Each obj
    # is an ObjectSummary, so it doesn't contain the body. You'll need to call
    # get to get the whole body.

    kafka_params = config('kafka')
    dataset_params = config('dataset')
    producer = KafkaProducer(bootstrap_servers=kafka_params['broker'])

    for obj in bucket.objects.all():
        key = obj.key
        if dataset_params['rider'] not in key:
            continue
        # building absolute file name
        file_name = 's3://nyc-tlc/' + key
        ##skipping header
        firstline = True

        #processing the file
        for line in smart_open(file_name):

            print(line.decode('utf8'))
            if firstline:  # skip first line
                firstline = False
                continue


            line_split = line.decode('utf8').split(",")
            print(line_split)
            if len(line_split) < 20: #skipping rows with huge number of columns
                continue
            if line_split[5] == '0' or line_split[6] == '0' or line_split[7] == '0' or line_split[8] == '0':
                continue
            else:
                start_point = (float(line_split[5]),float(line_split[6]))
                end_point = (float(line_split[7]), float(line_split[8]))
                print(start_point, end_point)

                trip_id =  'ride:' + str(datetime.now()) + ":" + str(random.randint(1, 1000))
                #formatting the message
                str_fmt = "{};{};{};{};{};{}"
                message_info = str_fmt.format(trip_id,
                                              start_point[0],
                                              start_point[1],
                                              end_point[0],
                                              end_point[1],
                                              "In Progress"
                                              )

                print(message_info)
                producer.send(kafka_params['rider_topic'], message_info.encode('utf8 '))
Esempio n. 15
0
    def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile):
        if not os.path.exists('./tmp'):
            os.makedirs('./tmp')
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(levelname)s %(message)s',
                            filename='./tmp/kafka_producer.log',
                            filemode='w')
        self.logger = logging.getLogger('py4j')

        self.kafka_config = helpers.parse_config(kafka_config_infile)
        self.s3bucket_config = helpers.parse_config(s3bucket_config_infile)
        self.producer = KafkaProducer(bootstrap_servers=ip_addr)
Esempio n. 16
0
class Producer():
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=[
            "52.41.44.90:9092", "52.36.206.57:9092", "52.40.205.225:9092"
        ],
                                      acks=0,
                                      linger_ms=500)

    def produce_msgs(self, msg_list):
        while True:
            index = random.randrange(0, 999)
            json_msg = json.dumps(msg_list[index]).encode('utf-8')
            self.producer.send(topic, json_msg)
def my_producer(path='../01_data/occupancy_data.csv',
                topic='test',
                low=0.5,
                high=1.5,
                limit=0):
    producer = KafkaProducer(bootstrap_servers='localhost:9092')
    rand = random.uniform(float(low), float(high))
    f = open(path, 'rt')
    for idx, line in enumerate(f):
        if idx == limit and limit != 0:
            break
        producer.send(topic, bytes(line, 'utf8'))
        sleep(rand)
class Producer(object):
    def __init__(self, addr):
        self.producer = KafkaProducer(bootstrap_servers=addr)

    def produce_msgs(self):
        with bz2.open(args.path, 'rt', encoding='utf-8') as f:
            content = f.readlines()
            content = [x.strip() for x in content] 

        while True:
            for tweet in content:
                tweet_dict = json.loads(tweet)
                if 'text' in tweet_dict and tweet_dict['lang'] == 'en':
                        self.producer.send('twitter',tweet.encode('utf-8'))
Esempio n. 19
0
def main():

    producer = KafkaProducer(bootstrap_servers=config.KAFKA_SERVERS,
                             value_serializer=lambda v: json.dumps(v).encode())

    headers = [
        'CMTE_ID', 'AMNDT_IND', 'RPT_TP', 'TRANSACTION_PGI', 'IMAGE_NUM',
        'TRANSACTION_TP', 'ENTITY_TP', 'NAME', 'CITY', 'STATE', 'ZIP_CODE',
        'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT', 'TRANSACTION_AMT',
        'OTHER_ID', 'TRAN_ID', 'FILE_NUM', 'MEMO_CD', 'MEMO_TEXT', 'SUB_ID'
    ]

    with open('/home/ubuntu/2016/by_date/itcont_2016_10151005_20150726.txt'
              ) as f:
        data = f.readlines()

    data = [x.split("|") for x in data]
    #producer = KafkaProducer(bootstrap_servers = '18.205.181.166:9092',value_serializer = lambda v: json.dumps(v).encode('utf-8'))

    for row in data:
        row = {h: x for h, x in zip(headers, row)}
        #print row
        producer.send('data', row)

#print row

    producer.flush()
    producer = KafkaProducer(retries=5)
Esempio n. 20
0
 def init(self, args):
     print("Initialization of Kafka Python driver w/ args=%s" % args)
     try:
         self.hosts = args['hosts']
         self.topic = args['topic']
     except KeyError:
         print("Missing `hosts` or `topic` option...")
         return False
     # optional `programs` parameter to filter out messages
     if 'programs' in args:
         self.programs = parse_str_list(args['programs'])
         print("Found programs to filter against %s" % args['programs'])
     self.kafka_producer = KafkaProducer(bootstrap_servers=self.hosts)
     return True
Esempio n. 21
0
class Producer(object):
    """
    Class to ingest ecg signal data and send them to kafka topic through kafka producer.
    """
    def __init__(self, ip_addr, kafka_config_infile, s3bucket_config_infile):
        if not os.path.exists('./tmp'):
            os.makedirs('./tmp')
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(levelname)s %(message)s',
                            filename='./tmp/kafka_producer.log',
                            filemode='w')
        self.logger = logging.getLogger('py4j')

        self.kafka_config = helpers.parse_config(kafka_config_infile)
        self.s3bucket_config = helpers.parse_config(s3bucket_config_infile)
        self.producer = KafkaProducer(bootstrap_servers=ip_addr)

    def produce_ecg_signal_msgs(self, file_key):
        """
        Produces messages and sends them to topic.
        """
        msg_cnt = 0

        while True:

            s3 = boto3.client('s3')
            obj = s3.get_object(Bucket=self.s3bucket_config['bucket'],
                                Key="%s_signals.txt" % file_key)
            for line in obj['Body'].iter_lines():
                message_info = None
                try:
                    linesplit = line.decode().split(',')
                    str_fmt = "{},{},{},{},{}"
                    message_info = str_fmt.format(
                        file_key, datetime.now(pytz.timezone('US/Eastern')),
                        linesplit[1], linesplit[2], linesplit[3])
                except Exception as e:
                    self.logger.error('fxn produce_ecg_signal_msgs error %s' %
                                      e)
                try:
                    msg = str.encode(message_info)
                except Exception as e:
                    msg = None
                    self.logger.debug('empty message %s' % e)
                if msg is not None:
                    self.producer.send(self.kafka_config['topic'], msg)
                    msg_cnt += 1
                print(message_info)
                time.sleep(0.001)
 def _get_producer(self,
                   bootstrap_servers: List[str]) -> Optional[KafkaProducer]:
     for bootstrap_server in bootstrap_servers:
         if bootstrap_server in self.producers:
             return self.producers[bootstrap_server]
         if bootstrap_server == DEFAULT_FLAG:
             return None
         if bootstrap_server in self.fail_pass:
             continue
         try:
             brokers = get_brokers(bootstrap_server)
             self.logger.debug(f"brokers from {bootstrap_server} {brokers}")
         except Exception as e:
             self.logger.warning(
                 f"can not get brokers {bootstrap_server} {e}")
             self.fail_pass[bootstrap_server] = 0
             continue
         for broker in brokers:
             if broker in self.producers:
                 producer = self.producers[broker]
                 self.producers.update(dict.fromkeys(brokers, producer))
                 self.producers[bootstrap_server] = producer
                 return producer
         try:
             producer = KafkaProducer(bootstrap_servers=brokers,
                                      **self.configs)
             self.producers.update(dict.fromkeys(brokers, producer))
             self.producers[bootstrap_server] = producer
             return producer
         except Exception as e:
             self.logger.warning(
                 f"can not init producer {bootstrap_server} {e}")
Esempio n. 23
0
class Producer(object):
    def __init__(self, addr):
        self.producer = KafkaProducer(bootstrap_servers=addr)

    def produce_msgs(self, source_symbol):
        while True:
            # generate random values for the IP and URL, and get the current time for the timestamp
            ip_field = numpy.random.choice(ips)
            url_field = WEBSITE_NAME + "page" + str(
                numpy.random.randint(1, 1001))
            time_field = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            # produce to the topic indicated in TOPIC_NAME
            str_fmt = "{};{};{};{}"
            message_info = str_fmt.format(source_symbol, time_field, ip_field,
                                          url_field)
            self.producer.send(TOPIC_NAME, message_info)
class Producer(object):
    def __init__(self, addr):
        self.producer = KafkaProducer(bootstrap_servers=addr)

    def produce_msgs(self, source_symbol):
        price_field = random.randint(800, 1400)
        msg_cnt = 0
        while True:
            time_field = datetime.now().strftime("%Y%m%d %H%M%S")
            price_field += random.randint(-10, 10) / 10.0
            volume_field = random.randint(1, 1000)
            str_fmt = "{};{};{};{}"
            message_info = str_fmt.format(source_symbol, time_field,
                                          price_field, volume_field)
            print message_info
            self.producer.send('price_data_part4', message_info)
            msg_cnt += 1
Esempio n. 25
0
class Producer(object):
    def __init__(self, addr):
        self.producer = KafkaProducer(bootstrap_servers=addr)

    def produce_msgs(self, source_symbol, file_name):
        msg_cnt = 0

        with open(file_name) as f:
            for line in f:
                time_field = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
                str_fmt = "{};{};{}"
                message_info = str_fmt.format(source_symbol, time_field,
                                              line)  #.encode('utf-8')
                print message_info
                self.producer.send('transactions1', message_info)
                print "{} transactions sent. Sending next one ...".format(
                    msg_cnt)
                msg_cnt += 1
Esempio n. 26
0
 def __init__(self, kafkaHost, kafkaPort, tcpHost, tcpPort, group_id, topic,
              logTopic, interval):
     self.kafkaHost = kafkaHost
     self.kafkaPort = kafkaPort
     self.tcpHost = tcpHost
     self.tcpPort = tcpPort
     self.group_id = group_id
     self.topic = topic
     self.logTopic = logTopic
     self.interval = int(interval)
     self.consumer = KafkaConsumer(
         topic,
         bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)],
         group_id=group_id,
         enable_auto_commit=False)
     self.producer = KafkaProducer(
         bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)])
     self.tcpWriter = None
Esempio n. 27
0
def store_probe_results(site_id, status_code, regex_results):
    global g_config_kafka
    # I'll hardcode that SSL is required.
    producer = KafkaProducer(
        bootstrap_servers=[g_config_kafka['bootstrap_server']],
        security_protocol="SSL",
        ssl_cafile=g_config_kafka['ssl_cafile'],
        ssl_keyfile=g_config_kafka['ssl_keyfile'],
        ssl_certfile=g_config_kafka['ssl_certfile'])
    # Since both sides of this program are trusted I can send the raw dictionary and decode it on the other end:

    message = {
        'site_id': site_id,
        'status_code': status_code,
        'regex_results': regex_results
    }
    ack = producer.send(g_config_kafka['topic_name'],
                        str(json.dumps(message)).encode('utf-8'))
    return True
Esempio n. 28
0
 def __init__(self, kafka_host, kafka_port, tcp_host, tcp_port, topic,
              log_topic):
     self.kafka_host = kafka_host
     self.kafka_port = kafka_port
     self.tcp_host = tcp_host
     self.tcp_port = tcp_port
     self.topic = topic
     self.log_topic = log_topic
     self.consumer = KafkaConsumer(
         topic,
         bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)],
         enable_auto_commit=False,
         max_poll_records=1024 * 1024,
         max_partition_fetch_bytes=1024 * 1024 * 100)
     self.producer = KafkaProducer(
         bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)])
     self.connections = {}
     self.sample_end_time = self.get_end_time(time())
     self.lastPolled = []
Esempio n. 29
0
class EventsProducer(Destination):
    """
    Kafka Producer
    Sets Destination of API Ingestion to Kafka Cluster
    """
    def __init__(self, addr, topic="git-events"):
        """Initializes with Broker Address and Topic Name"""
        self.producer = KafkaProducer(bootstrap_servers=addr,
                                        value_serializer=lambda m: json.dumps(m).encode('ascii'),
                                        api_version=(0,1,0))
        self.topic = topic

    ######## PRODUCE TO TOPIC
    def move_to_dest(self, filename, datestring):
        """Sends Local File to Kafka Topic"""
        with open(filename, 'r') as file:
            for line in file:
                d = json.loads(line)
                self.producer.send(self.topic, d).get()
Esempio n. 30
0
def connect_kafka_producer():
    producer = None
    
    try:
        producer = KafkaProducer(acks=0, compression_type='gzip', 
                                 bootstrap_servers=['localhost:9092'], 
                                 api_version=(0,10))
        
    except Exception as ex:
        print("Error: ", ex)
        
    return producer
Esempio n. 31
0
    def on_status(self, status):
        print status.text, "\n"

        #~ data ={}
        #~ data['text'] = status.text
        #~ data['created_at'] = status.created_at
        #~ data['geo'] = status.geo
        #~ data['source'] = status.source
        #~ self.db.Tweets.insert(data)

        msg =  status.text.encode('utf-8')

        producer = KafkaProducer(bootstrap_servers='0.0.0.0:9092')
        #~ producer = KafkaProducer(bootstrap_servers=['broker1:1234'])
        #print(msg)
        try:
            producer.send(b'twitterstream', msg)
        except Exception as err:
            print(err)
            return False
        return True
Esempio n. 32
0
class KafkaLoggingHandler(logging.Handler):
    def __init__(self, hosts_list, topic, **kwargs):
        logging.Handler.__init__(self)

        self.kafka_topic_name = topic
        self.producer = KafkaProducer(bootstrap_servers=hosts_list)

    def emit(self, record):
        # drop kafka logging to avoid infinite recursion
        if record.name == 'kafka':
            return
        try:
            # use default formatting
            msg = self.format(record)
            msg = str.encode(msg)

            self.producer.send(self.kafka_topic_name, msg)
            self.producer.flush()
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            self.handleError(record)

    def close(self):
        if self.producer is not None:
            self.producer.close()
        logging.Handler.close(self)
 def __init__(self):
     self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500)
 def __init__(self):
     self.producer = KafkaProducer(bootstrap_servers = 'localhost:9092')
Esempio n. 35
0
# coding=utf-8

import logging

from kafka.producer import KafkaProducer


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)

    producer = KafkaProducer(bootstrap_servers=["192.168.120.90:9092"])
    producer.send("wangybnet", b"Hello, World!")
Esempio n. 36
0
from kafka.producer import KafkaProducer

import ConfigParser
import socket

if __name__ == "__main__":

    config = ConfigParser.ConfigParser()
    config.read('configuration.cfg')

    urlKafkaProducer = config.get('StreamingProperties', 'URLKafkaProducer')
    topicName = config.get('StreamingProperties', 'TopicName')

    virtualMachine = 'local'
    if socket.gethostname() == 'ubuntu':
        virtualMachine = socket.gethostname()

    if virtualMachine == 'local':
        fileName = config.get('StreamingProperties', 'StreamingFileLocal')

    else:
        fileName = config.get('StreamingProperties', 'StreamingFileVirtual')

    producer = KafkaProducer(bootstrap_servers=urlKafkaProducer)

    infile = open (fileName, 'r')
    for line in infile:
        producer.send (topicName, line)
        #time.sleep(0.000000001)

    infile.close()