def sim_msg_stream(self, sleep_time=0.25):
        """Sends a stream of messages to the Kafka topic "messages".

        Args:
            sleep_time: float number in seconds representing the rate messages
                should be sent to the Kafka topic

        Returns:
            None
        """
        msg_cnt = 0

        while True:
            print len(self.county_state_list)
            county, state = IngUt.select_random_county(self.county_state_list)

            timestamp = list(time.localtime()[0:6])

            message_info = IngUt.create_json_message(county=county,
                                                     state=state,
                                                     rank=0,
                                                     timestamp=timestamp,
                                                     creator_id=random.randint(0, NUM_USERS-1),
                                                     sender_id=random.randint(0, NUM_USERS-1),
                                                     message_id=msg_cnt,
                                                     message=fake.text())

            self.producer.send_messages('messages', message_info)
            print timestamp

            if sleep_time != 0:
                time.sleep(sleep_time)

            msg_cnt += 1
Ejemplo n.º 2
0
__author__ = 'aouyang1'

import time
from datetime import datetime
import IngestionUtilities as IngUt
from faker import Factory

fake = Factory.create()
NUM_MESSAGES = 375000
NUM_USERS = 1000000
NUM_REPLICATIONS = 75
hadoop_remote_path = "/user/PuppyPlaydate/history/"

county_state_list = IngUt.parse_county_list("county_list.txt")

start_dtt = datetime(2013, 1, 1, 0, 0, 0).timetuple()
end_dtt = datetime(2015, 2, 4, 0, 0, 0).timetuple()
start_ts = time.mktime(start_dtt)
end_ts = time.mktime(end_dtt)

IngUt.gen_random_messages(county_state_list, start_ts, end_ts, reps=NUM_REPLICATIONS,
                          num_messages=NUM_MESSAGES, num_users=NUM_USERS,
                          date_model=IngUt.gen_random_date)

 def __init__(self, addr):
     """Initialize Producer with address of the kafka broker ip address."""
     self.client = KafkaClient(addr)
     self.producer = SimpleProducer(self.client)
     self.county_state_list = IngUt.parse_county_list('ingestion/county_list.txt')