class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(
            self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30
        )

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
Example #2
0
    def test_simple_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)
        producer = SimpleProducer(self.client)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"),
                                      self.msg("two"))
        self.assert_produce_response(resp, start_offset0)

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offset1)

        self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("two")])
        self.assert_fetch_offset(1, start_offset1, [self.msg("three")])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"),
                                      self.msg("five"))
        self.assert_produce_response(resp, start_offset0 + 2)
        self.assert_fetch_offset(0, start_offset0, [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        producer.stop()
Example #3
0
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(self.kafka_client,
                                             batch_send=True,
                                             batch_send_every_n=500,
                                             batch_send_every_t=30)

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
    def test_simple_producer(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = SimpleProducer(self.client, random_start=False)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"),
                                      self.msg("two"))
        self.assert_produce_response(resp, start_offsets[0])

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offsets[1])

        self.assert_fetch_offset(
            partitions[0], start_offsets[0],
            [self.msg("one"), self.msg("two")])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 [self.msg("three")])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"),
                                      self.msg("five"))
        self.assert_produce_response(resp, start_offsets[0] + 2)
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        producer.stop()
Example #5
0
    def test_batched_simple_producer__triggers_by_message(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        # Configure batch producer
        batch_messages = 5
        batch_interval = 5
        producer = SimpleProducer(self.client,
                                  batch_send=True,
                                  batch_send_every_n=batch_messages,
                                  batch_send_every_t=batch_interval,
                                  random_start=False)

        # Send 4 messages -- should not trigger a batch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        # send 3 more messages -- should trigger batch on first 5
        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # send messages groups all *msgs in a single call to the same partition
        # so we should see all messages from the first call in one partition
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        # Because we are batching every 5 messages, we should only see one
        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
        ])

        producer.stop()
    def test_batched_simple_producer__triggers_by_time(self):
        self.skipTest("Flakey test -- should be refactored or removed")
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        batch_interval = 5
        producer = SimpleProducer(self.client,
                                  async_send=True,
                                  batch_send_every_n=100,
                                  batch_send_every_t=batch_interval,
                                  random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(batch_interval)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()
    def test_acks_none(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_NOT_REQUIRED)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
        producer.stop()
    def test_acks_local_write(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE)
        resp = producer.send_messages(self.topic, self.msg("one"))

        self.assert_produce_response(resp, start_offset0)
        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])

        producer.stop()
    def test_async_simple_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(self.client, async=True, random_start=False)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])

        producer.stop()
Example #10
0
    def test_acks_none(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(self.client,
                                  req_acks=SimpleProducer.ACK_NOT_REQUIRED)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        producer.stop()
    def test_batched_simple_producer__triggers_by_time(self):
        self.skipTest("Flakey test -- should be refactored or removed")
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        batch_interval = 5
        producer = SimpleProducer(
            self.client,
            async_send=True,
            batch_send_every_n=100,
            batch_send_every_t=batch_interval,
            random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        resp = producer.send_messages(self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(batch_interval)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()
Example #12
0
def main():
    """Run a consumer.

    Two environment variables are expected:

    * CONFIG_URI: A PasteDeploy URI pointing at the configuration for the
      application.
    * QUEUE: The name of the queue to consume (currently one of "events" or
      "errors").

    """
    config_uri = os.environ["CONFIG_URI"]
    config = paste.deploy.loadwsgi.appconfig(config_uri)

    logging.config.fileConfig(config["__file__"])

    queue_name = os.environ["QUEUE"]
    queue = MessageQueue("/" + queue_name,
                         max_messages=MAXIMUM_QUEUE_LENGTH,
                         max_message_size=MAXIMUM_EVENT_SIZE)

    metrics_client = baseplate.make_metrics_client(config)

    topic_name = config["topic." + queue_name]

    producer_options = {
        "codec": CODEC_GZIP,
        "batch_send_every_n": 20,
        "batch_send_every_t": 0.01,  # 10 milliseconds
    }

    while True:
        try:
            kafka_client = KafkaClient(config["kafka_brokers"])
            kafka_producer = SimpleProducer(kafka_client, **producer_options)
        except KafkaError as exc:
            _LOG.warning("could not connect: %s", exc)
            metrics_client.counter("injector.connection_error").increment()
            time.sleep(_RETRY_DELAY)
            continue

        while True:
            message = queue.get()
            for retry in itertools.count():
                try:
                    kafka_producer.send_messages(topic_name, message)
                except KafkaError as exc:
                    _LOG.warning("failed to send message: %s", exc)
                    metrics_client.counter("injector.error").increment()
                    time.sleep(_RETRY_DELAY)
                else:
                    metrics_client.counter("collected.injector").increment()
                    break
        kafka_producer.stop()
    def test_batched_simple_producer__triggers_by_message(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        # Configure batch producer
        batch_messages = 5
        batch_interval = 5
        producer = SimpleProducer(
            self.client,
            batch_send=True,
            batch_send_every_n=batch_messages,
            batch_send_every_t=batch_interval,
            random_start=False)

        # Send 4 messages -- should not trigger a batch
        resp = producer.send_messages(self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        # send 3 more messages -- should trigger batch on first 5
        resp = producer.send_messages(self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # send messages groups all *msgs in a single call to the same partition
        # so we should see all messages from the first call in one partition
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        # Because we are batching every 5 messages, we should only see one
        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
        ])

        producer.stop()
Example #14
0
    def test_acks_local_write(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(
            self.client, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE)
        resp = producer.send_messages(self.topic, self.msg("one"))

        self.assert_produce_response(resp, start_offset0)
        self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        producer.stop()
Example #15
0
    def test_acks_cluster_commit(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(
            self.client, req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT)

        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assert_produce_response(resp, start_offset0)
        self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        producer.stop()
Example #16
0
    def test_async_simple_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = SimpleProducer(self.client, async_send=True, random_start=False)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        # flush messages
        producer.stop()

        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
    def test_async_simple_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = SimpleProducer(self.client, async=True, random_start=False)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        # flush messages
        producer.stop()

        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
def outputPrediction(iter):
    #print "output prediction: len = %d, host = %s, pid = %d, tid = %d" % (len(inputs), socket.gethostname(), os.getpid(), thread.get_ident())
    producer = None
    for activity in iter:
        if producer is None:
            producer = SimpleProducer(KafkaClient(kafkaHost))

        print "[%s] Prediction - user=%s, activity=%s" % (datetime.now(), activity.user_id, activity.activity_label)
        producer.send_messages(topicOut, activity.serialize())

    if producer is not None:
        producer.stop()
Example #19
0
def main():
    """Run a consumer.

    Two environment variables are expected:

    * CONFIG_URI: A PasteDeploy URI pointing at the configuration for the
      application.
    * QUEUE: The name of the queue to consume (currently one of "events" or
      "errors").

    """
    config_uri = os.environ["CONFIG_URI"]
    config = paste.deploy.loadwsgi.appconfig(config_uri)

    logging.config.fileConfig(config["__file__"])

    queue_name = os.environ["QUEUE"]
    queue = MessageQueue("/" + queue_name,
        max_messages=MAXIMUM_QUEUE_LENGTH, max_message_size=MAXIMUM_EVENT_SIZE)

    metrics_client = baseplate.make_metrics_client(config)

    topic_name = config["topic." + queue_name]

    producer_options = {
        "codec": CODEC_GZIP,
        "batch_send_every_n": 20,
        "batch_send_every_t": 0.01,  # 10 milliseconds
    }

    while True:
        try:
            kafka_client = KafkaClient(config["kafka_brokers"])
            kafka_producer = SimpleProducer(kafka_client, **producer_options)
        except KafkaError as exc:
            _LOG.warning("could not connect: %s", exc)
            metrics_client.counter("injector.connection_error").increment()
            time.sleep(_RETRY_DELAY)
            continue

        while True:
            message = queue.get()
            for retry in itertools.count():
                try:
                    kafka_producer.send_messages(topic_name, message)
                except KafkaError as exc:
                    _LOG.warning("failed to send message: %s", exc)
                    metrics_client.counter("injector.error").increment()
                    time.sleep(_RETRY_DELAY)
                else:
                    metrics_client.counter("collected.injector").increment()
                    break
        kafka_producer.stop()
    def test_acks_cluster_commit(self):
        start_offset0 = self.current_offset(self.topic, 0)

        producer = SimpleProducer(
            self.client,
            req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT)

        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assert_produce_response(resp, start_offset0)
        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])

        producer.stop()
Example #21
0
    def test_batched_simple_producer__triggers_by_time(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = SimpleProducer(self.client,
                                  batch_send=True,
                                  batch_send_every_n=100,
                                  batch_send_every_t=5,
                                  random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(0, start_offset0, [])
        self.assert_fetch_offset(1, start_offset1, [])

        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(5)

        self.assert_fetch_offset(0, start_offset0, [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(1, start_offset1, [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()
class WeatherProducer(object):

    def __init__(self, ip_address, topic='weather', port='9092'):
        self.topic = topic  
        self.kafka = KafkaClient(ip_address + ':' + '9092')
        self.producer = SimpleProducer(self.kafka)

    def publish_weather_data(self):
        metro = MetroDataset()
        while True:
            current_time = datetime.datetime.now().strftime('%d/%m/%y %H:%M')
            date_time = current_time.split(" ")
            time_now = date_time[1]
            time_list = time_now.split(":")
            minutes = time_list[1]
            interval_flag = int(minutes)/30.0
            if interval_flag == 0.0 or interval_flag == 1.0:
                try:
                    response = self.producer.send_messages(self.topic,
                                                json.dumps(metro.publish_data()))
                except LeaderNotAvailableError:
                    time.sleep(1)
                    response = self.producer.send_messages(self.topic,
                                                json.dumps(metro.publish_data()))
                print response
                time.sleep(70)
        self.producer.stop()


    def publish_to_file(self):
        metro = MetroDataset()
        data = {}
        while True:
            try:
                current_time = datetime.datetime.now().strftime('%d/%m/%y %H:%M')
                date_time = current_time.split(" ")
                time_now = date_time[1]
                time_list = time_now.split(":")
                minutes = time_list[1]
                interval_flag = int(minutes)/30.0     
                if interval_flag == 0.0 or interval_flag == 1.0:
                    metro_data = metro.publish_data() 
                    if data.has_key(metro_data['date']):
                        if not data[metro_data['date']].has_key(metro_data['time']):
                            data[metro_data['date']].update({metro_data['time']: metro_data['data']})
                    else:
                        data[metro_data['date']] = {metro_data['time']: metro_data['data']}
                print data
            except:
                with open('data.json', 'w') as outfile:
                    json.dump(data, outfile)
                sys.exit()
    def test_batched_simple_producer__triggers_by_time(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)

        producer = SimpleProducer(self.client,
            batch_send=True,
            batch_send_every_n=100,
            batch_send_every_t=5,
            random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(0, start_offset0, [])
        self.assert_fetch_offset(1, start_offset1, [])

        resp = producer.send_messages(self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(5)

        self.assert_fetch_offset(0, start_offset0, [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(1, start_offset1, [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()
Example #24
0
def partitionsLoadBalance():
    '''
       发送指定信息到kafka-topic(test),此时为5000次
    '''
    k1 = KafkaClient('localhost:9092')
    producer = SimpleProducer(k1)

    for i in range(200):
        msg = 'This is %dst test' % (i)
        msg1 = ''
        for j in range(2000):
            msg1 = ':'.join([msg1, msg])
        producer.send_messages(b'test', msg1)
    producer.stop()
def partitionsLoadBalance():
    """
       发送指定信息到kafka-topic(test),此时为5000次 
    """
    k1 = KafkaClient("localhost:9092")
    producer = SimpleProducer(k1)

    for i in range(200):
        msg = "This is %dst test" % (i)
        msg1 = ""
        for j in range(2000):
            msg1 = ":".join([msg1, msg])
        producer.send_messages(b"test", msg1)
    producer.stop()
    def test_async_simple_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = SimpleProducer(self.client, async=True, random_start=False)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        # wait for the server to report a new highwatermark
        while self.current_offset(self.topic, partition) == start_offset:
          time.sleep(0.1)

        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])

        producer.stop()
class EnergyProducer(object):

    def __init__(self, ip_address, topic='energy', port='9092'):
        self.topic = topic  
        self.kafka = KafkaClient(ip_address + ':' + '9092')
        self.producer = SimpleProducer(self.kafka)

    def replay_energy_data(self,WORKING_DIR=PRODUCER_DIR, start=10 ,end=11, year=2015):
        dataset = EnergyDataset()
        for num_month in range(start, end + 1):
            month = datetime.date(1900, num_month, 1).strftime('%b')
            print month
            data = dataset.replay(PRODUCER_DIR, month.lower() ,year)
            response = self.producer.send_messages(self.topic,json.dumps(data))
            print response
        self.producer.stop()
Example #28
0
class SimpleProducer(BaseStreamProducer):
    def __init__(self, connection, topic):
        self._connection = connection
        self._topic = topic
        self._create()

    def _create(self):
        self._producer = KafkaSimpleProducer(self._connection, codec=CODEC_SNAPPY)

    def send(self, key, *messages):
        self._producer.send_messages(self._topic, *messages)

    def flush(self):
        self._producer.stop()
        del self._producer
        self._create()

    def get_offset(self, partition_id):
        # Kafka has it's own offset management
        raise KeyError
Example #29
0
class SimpleProducer(BaseStreamProducer):
    def __init__(self, connection, topic):
        self._connection = connection
        self._topic = topic
        self._create()

    def _create(self):
        self._producer = KafkaSimpleProducer(self._connection, codec=CODEC_SNAPPY)

    def send(self, key, *messages):
        self._producer.send_messages(self._topic, *messages)

    def flush(self):
        self._producer.stop()
        del self._producer
        self._create()

    def get_offset(self, partition_id):
        # Kafka has it's own offset management
        raise KeyError
    def test_simple_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)
        producer = SimpleProducer(self.client, random_start=False)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
        self.assert_produce_response(resp, start_offset0)

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offset1)

        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two") ])
        self.assert_fetch_offset(1, start_offset1, [ self.msg("three") ])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))
        self.assert_produce_response(resp, start_offset0+2)
        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ])

        producer.stop()
Example #31
0
class HeartBeat:

    def __init__(self,qinfo):

        self.topic = qinfo['kafka_topic']
        self.client = KafkaClient(qinfo['kafka_broke'])
        self.producer = SimpleProducer(self.client, codec=CODEC_SNAPPY)


    def send(self,name,num=1):
        data = {
            "name":name,
            "num":num,
            "time":int(time.time())
        }
        print "***************send********************"
        data_str = json.dumps(data)
        self.producer.send_messages(self.topic, data_str)

    def close(self):
        self.client.close()
        self.producer.stop()
    def test_simple_producer(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        producer = SimpleProducer(self.client, random_start=False)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
        self.assert_produce_response(resp, start_offsets[0])

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offsets[1])

        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two") ])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("three") ])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))
        self.assert_produce_response(resp, start_offsets[0]+2)
        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ])

        producer.stop()
Example #33
0
def simpleTest():
    # 使用bamboo:9192kafka服务器进行连接测试
    try:
        kafka = KafkaClient('bamboo:9192')
    except KafkaUnavailableError as msg:
        print("KafkaUnavailableError:", msg)
        sys.exit(-1)
    except Exception as msg:
        print("Exception:", msg)
        sys.exit(-1)
    producer = SimpleProducer(kafka)
    print(kafka.topics)
    '''
    Note that the application is responsible
    for encoding messages to type bytes
    '''
    if 'JOB_TEST_1' in kafka.topics:
        producer.send_messages(b'JOB_TEST_1', b'some message')
        producer.send_messages(b'JOB_TEST_1', b'this method', b'is variadic')

        # Send unicode message
        producer.send_messages(b'JOB_TEST_1', '你怎么样?'.encode('utf-8'))
        producer.stop()
Example #34
0
    def upload_file_to_kafka(self, topic, file_path, **kwargs):
        """
        Utility function to upload contents of file to a given kafka topic
        :param topic: Kafka topic to which the file will be uploaded
        :param file_path: Absolute path of the file to be uploaded
        :param kwargs: append - If True, then file content will be uploaded to existing topic. If topic is not present
        then new one will be created.
        If false, and topic is not present then new topic is created. If topic is already present then error is returned.
        Default, async=False
        :return: True if content was uploaded else false
        """
        append = kwargs.get('append', False)
        result = False
        producer = None
        try:
            if not append:
                # Check if topic is already present
                if self.kafka.has_metadata_for_topic(topic):
                    print 'Error - Kafka topic : ' + topic + ' already present and append is : ' + str(append)
                    return False

            # In case of append is True and topic already present/not present
            # and append is False and topic already not present
            if self._ensure_kafka_topic_exists(topic):
                producer = SimpleProducer(self.kafka, batch_send=True,
                                          batch_send_every_n=20)
                with open(file_path, 'rU') as fh:
                    for line in fh:
                        producer.send_messages(topic, line.strip())
                result = True

        except:
            print 'Error - uploading file : ' + file_path + ' to topic : ' + topic
        finally:
            if producer:
                producer.stop()
        return result
Example #35
0
class KafkaLogger:
    def __init__(self):
        from kafka import SimpleProducer, KafkaClient
        from kafka.common import LeaderNotAvailableError
        self.kafka_client = KafkaClient(config.KAFKA_SERVER)
        self.kafka = SimpleProducer(self.kafka_client)

        self.head = HeadBuilder("db", "type", "tom", config.DB_NAME)
        try:
            self.kafka.send_messages(config.KAFKA_TOPIC, b"creating topic")
        except LeaderNotAvailableError:
            time.sleep(1)

    def close(self):
        self.kafka.stop(0)
        self.kafka_client.close()

    def start(self):
        pass

    def commit(self):
        pass

    def session_info(self):
        body = bb.session_info()
        now = int(time.time() * 1000)
        header = self.head.create('info', config.TOM, now)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))

    def cpu_sys(self, epoch, user_count, system_count, idle_count, percent):
        "Logs CPU metrics at system level"
        body = bb.cpu_sys(epoch, user_count, system_count, idle_count, percent)
        header = self.head.create('system_cpu', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
#        if config.PRINT_CONSOLE: print(reply)

    def cpu_proc(self, epoch, pid, priority, ctx_count, n_threads, cpu_user,
                 cpu_system, percent, pname):
        "Logs CPU metrics at process level"
        epoch *= 1000  #this converts it into milliseconds
        body = bb.cpu_proc(epoch, pid, priority, ctx_count, n_threads,
                           cpu_user, cpu_system, percent, pname)
        header = self.head.create('process_cpu', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
#        if config.PRINT_CONSOLE: print(reply)

    def mem_sys(self, epoch, available, percent, used, free, swap_total,
                swap_used, swap_free, swap_in, swap_out, swap_percent):
        "Logs memory metrics at system level"
        epoch *= 1000  #this converts it into milliseconds
        body = bb.mem_sys(epoch, available, percent, used, free, swap_total,
                          swap_used, swap_free, swap_in, swap_out,
                          swap_percent)
        header = self.head.create('system_memory', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
#        if config.PRINT_CONSOLE: print(reply)

    def mem_proc(self, epoch, pid, rss, vms, percent, pname):
        "Logs memory metrics at process level"
        epoch *= 1000  #this converts it into milliseconds
        body = bb.mem_proc(epoch, pid, rss, vms, percent, pname)
        header = self.head.create('process_memory', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
#        if config.PRINT_CONSOLE: print(reply)

    def io_sys(self, epoch, bytes_sent, bytes_recv, packets_sent, packets_recv,
               errin, errout, dropin, dropout):
        "Print a line to console and to a file"
        epoch *= 1000  #this converts it into milliseconds
        body = bb.io_sys(epoch, bytes_sent, bytes_recv, packets_sent,
                         packets_recv, errin, errout, dropin, dropout)
        header = self.head.create('system_io', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
#        if config.PRINT_CONSOLE: print(reply)

    def proc_error(self, epoch, pid, name):
        "Print a line to console and to a file"
        epoch *= 1000  #this converts it into milliseconds
        body = bb.proc_error(epoch, pid, name)
        header = self.head.create('event', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
#        if config.PRINT_CONSOLE: print(reply)

    def proc_info(self, epoch, pid, name):
        "Print a line to console and to a file"
        epoch *= 1000  #this converts it into milliseconds
        body = bb.proc_info(epoch, pid, name)
        header = self.head.create('process_info', config.TOM, epoch)
        msg = '{"header": ' + header + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))


#        if config.PRINT_CONSOLE: print(reply)

# from kafka import SimpleProducer, KafkaClient
#
# # To send messages synchronously
# kafka = KafkaClient("192.168.2.79:9092")
# producer = SimpleProducer(kafka)
#
# # Note that the application is responsible for encoding messages to type str
# producer.send_messages("my-topic", b"some message")
# producer.send_messages("my-topic", b"this method", b"is variadic")
#
# # Send unicode message
# producer.send_messages("my-topic", u'你怎么样?'.encode('utf-8'))
#
# # To send messages asynchronously
# # WARNING: current implementation does not guarantee message delivery on failure!
# # messages can get dropped! Use at your own risk! Or help us improve with a PR!
# producer = SimpleProducer(kafka, async=True)
# producer.send_messages("my-topic", b"async message")
#
# # To wait for acknowledgements
# # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to
# #                         a local log before sending response
# # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed
# #                            by all in sync replicas before sending a response
# producer = SimpleProducer(kafka, async=False,
#                           req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
#                           ack_timeout=2000)
#
# response = producer.send_messages("my-topic", b"another message")
#
# if response:
#     print(response[0].error)
#     print(response[0].offset)
#
# # To send messages in batch. You can use any of the available
# # producers for doing this. The following producer will collect
# # messages in batch and send them to Kafka after 20 messages are
# # collected or every 60 seconds
# # Notes:
# # * If the producer dies before the messages are sent, there will be losses
# # * Call producer.stop() to send the messages and cleanup
# producer = SimpleProducer(kafka, batch_send=True,
#                           batch_send_every_n=20,
#                           batch_send_every_t=60)
    def test_batched_simple_producer__triggers_by_message(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        # Configure batch producer
        batch_messages = 5
        batch_interval = 5
        producer = SimpleProducer(
            self.client,
            async=True,
            batch_send_every_n=batch_messages,
            batch_send_every_t=batch_interval,
            random_start=False)

        # Send 4 messages -- should not trigger a batch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        # send 3 more messages -- should trigger batch on first 5
        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait until producer has pulled all messages from internal queue
        # this should signal that the first batch was sent, and the producer
        # is now waiting for enough messages to batch again (or a timeout)
        timeout = 5
        start = time.time()
        while not producer.queue.empty():
            if time.time() - start > timeout:
                self.fail('timeout waiting for producer queue to empty')
            time.sleep(0.1)

        # send messages groups all *msgs in a single call to the same partition
        # so we should see all messages from the first call in one partition
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        # Because we are batching every 5 messages, we should only see one
        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
        ])

        producer.stop()
            print()
        self.counter += 1
        if self.counter == self.max_count:
            return False
        return True

    def on_error(self, status):
        print(status)
        return False


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Streaming tweets into kafka')
    parser.add_argument('--verbose', type=bool, default=False)
    parser.add_argument('--count', type=int, default=10000)
    args = parser.parse_args()
    # connecting to the kafka server
    kafka_client = SimpleClient("localhost:9092")
    # creating a producer to this server
    kafka_producer = SimpleProducer(kafka_client)
    # creating a standard output listener of tweets
    listener = StdOutListener(producer=kafka_producer, count=args.count, verbose=args.verbose)
    # connecting to the twitter api
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    # streaming tweets from Trump
    stream = Stream(auth, listener)
    stream.filter(track=["trump"])
    kafka_producer.stop()
class KafkaMetricSender(MetricSender):
    start_time = time.time()
    end_time = time.time()

    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        self.default_topic = None
        if kafka_config.has_key("default_topic"):
            self.default_topic = kafka_config["default_topic"].encode('utf-8')
        self.component_topic_mapping = {}
        if kafka_config.has_key("component_topic_mapping"):
            self.component_topic_mapping = kafka_config["component_topic_mapping"]

        if not self.default_topic and not bool(self.component_topic_mapping):
            raise Exception("both kafka config 'topic' and 'component_topic_mapping' are empty")

        # producer
        self.broker_list = kafka_config["broker_list"]
        self.kafka_client = None
        self.kafka_producer = None
        self.debug_enabled = False
        self.sent_count = 0
        if kafka_config.has_key("debug"):
            self.debug_enabled = bool(kafka_config["debug"])
            logging.info("Overrode output.kafka.debug: " + str(self.debug_enabled))

    def get_topic_id(self, msg):
        if msg.has_key("component"):
            component = msg["component"]
            if self.component_topic_mapping.has_key(component):
                return self.component_topic_mapping[component]
            else:
                return self.default_topic
        else:
            if not self.default_topic:
                raise Exception("no default topic found for unknown-component msg: " + str(msg))
            return self.default_topic

    def open(self):
        logging.info("Opening kafka connection for producer")
        self.kafka_client = KafkaClient(self.broker_list, timeout=50)
        self.kafka_producer = SimpleProducer(self.kafka_client, batch_send=False, batch_send_every_n=500,
                                             batch_send_every_t=30)
        self.start_time = time.time()

    def send(self, msg):
        if self.debug_enabled:
            logging.info("Send message: " + str(msg))
        self.sent_count += 1
        self.kafka_producer.send_messages(self.get_topic_id(msg), json.dumps(msg))

    def close(self):
        logging.info("Closing kafka connection and producer")
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()

        self.end_time = time.time()
        logging.info("Totally sent " + str(self.sent_count) + " metric events in "+str(self.end_time - self.start_time)+" sec")
Example #39
0
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        self.default_topic = None
        if kafka_config.has_key("default_topic"):
            self.default_topic = kafka_config["default_topic"].encode('utf-8')
        self.component_topic_mapping = {}
        if kafka_config.has_key("component_topic_mapping"):
            self.component_topic_mapping = kafka_config[
                "component_topic_mapping"]

        if not self.default_topic and not bool(self.component_topic_mapping):
            raise Exception(
                "both kafka config 'topic' and 'component_topic_mapping' are empty"
            )

        # producer
        self.broker_list = kafka_config["broker_list"]
        self.kafka_client = None
        self.kafka_producer = None
        self.debug_enabled = False
        self.sent_count = 0
        if kafka_config.has_key("debug"):
            self.debug_enabled = bool(kafka_config["debug"])
            logging.info("Overrode output.kafka.debug: " +
                         str(self.debug_enabled))

    def get_topic_id(self, msg):
        if msg.has_key("component"):
            component = msg["component"]
            if self.component_topic_mapping.has_key(component):
                return self.component_topic_mapping[component]
            else:
                return self.default_topic
        else:
            if not self.default_topic:
                raise Exception(
                    "no default topic found for unknown-component msg: " +
                    str(msg))
            return self.default_topic

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(self.kafka_client,
                                             batch_send=True,
                                             batch_send_every_n=500,
                                             batch_send_every_t=30)

    def send(self, msg):
        if self.debug_enabled:
            logging.info("Send message: " + str(msg))
        self.sent_count += 1
        self.kafka_producer.send_messages(self.get_topic_id(msg),
                                          json.dumps(msg))

    def close(self):
        logging.info("Totally sent " + str(self.sent_count) + " metric events")
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
 def test_simple_producer_new_topic(self):
     producer = SimpleProducer(self.client)
     resp = producer.send_messages('new_topic', self.msg('foobar'))
     self.assert_produce_response(resp, 0)
     producer.stop()
class KafkaDriver:

    def __init__(self, driver_args, event_loop):
        self.logger = logging.getLogger('KafkaDriver') # possible TODO: get logger from invoker
        self.logger.setLevel(logging.INFO)
        console_log_handler = logging.StreamHandler(sys.stdout)
        self.logger.addHandler(console_log_handler)

        self.logger.info("KafkaDriver initialized; driver_args=%s" % (driver_args))
        self.event_loop = event_loop
        if driver_args is "":
            kafka_server_addr =  "localhost:9092"
        else:
            kafka_server_addr = driver_args
        client_id = "KafkaDriver-%d-%d" % (time.time(), os.getpid()) # generate a unique client ID so that Kafka doesn't confuse us with a different instance
        self.kafka = KafkaClient(kafka_server_addr, client_id=client_id)

        self.queue_name = None
        ## APPEND direction
        self.get_message_stream = None
        # how frequently to add check for messages and (space permitting) to add them to the GET message stream, in seconds
        self.MESSAGE_CHECK_FREQ = 0.010
        # how many message we have sent from various queues
        self.get_message_count = 0
        self.producer = None
        ## GET direction
        self.consumer = None
        self.get_message_count = 0
        self.MAX_KAFKA_REQ_BATCH_MSGS = 200 # most number of messages that we will request from Kafka at a time

    ######## APPEND direction ########

    # called to tell driver of a new stream of appends than are going to come in; these should go to the end of the named queue
    def prepare_for_append_stream(self, queue_name):
        self.logger.info("KafkaDriver prepare_for_append_stream got: queue_name=%s" % (queue_name))
        self.queue_name = str(queue_name)
        self.producer = SimpleProducer(
            self.kafka,
            async=True,
            req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
            ack_timeout=5000,
            batch_send=True,
            batch_send_every_n= 100,
            batch_send_every_t=1000,
            random_start=True
        )

    def append(self, payload, ttl):
        ttl = int(ttl)
        self.logger.debug("KafkaDriver append got: ttl=%d, payload='%s'" % (ttl, payload))
        try:
            self.producer.send_messages(self.queue_name,payload)
        except UnknownTopicOrPartitionError:
            self.logger.warn("Kafka reports unknown topic or invalid partition number: " + str(sys.exc_info()))
            return 500
        except:
            self.logger.warn("Got exception from kafka-python SimpleProducer:" + str(sys.exc_info()))
            return 500

        # if random.uniform(0,1) < self.FRACTION_MSGS_TO_FAKE_APPEND_ERROR:
        #     self.logger.debug("faking error")
        #     return 400
        return 100

    def cancel_append_stream(self):
        self.logger.info("KafkaDriver cancel_append_stream got called")
        self.producer.stop()
        self.producer = None
        self.queue_name = None

    ######## GET direction ########

    # called to tell driver that a new stream of messages is needed for return to a client.   message_stream_queue is an instance of MessageStream to use to put messages the driver has available as a response to this request.  Other arguments have same meaning as in the Marconi API.
    def init_get_stream(self, get_message_stream, queue_name_spec, starting_marker, echo_requested, include_claimed):
        self.logger.info("KafkaDriver prepare_to_get_messages got: queue_name=%s, echo_requested=%s, include_claimed=%s, starting_marker=%s" % (queue_name_spec,str(echo_requested),str(include_claimed),starting_marker))
        self.logger.info("warning: KafkaDriver ignores echo_requested and include_claimed in GET requests")
        self.consume_group = "cg1" # default consume group
        if len(starting_marker) > 0:
            self.consume_group = starting_marker
        self.logger.info("consume group="+self.consume_group)

        # if the queue name contains "/n"  at the end, we interpret that is referring to partition to read from
        queue_name, partition_part = queue_name_spec.split("/",2)
        if partition_part is None:
            partition = None
        else:
            partition = int(partition_part)
            self.logger.info("limiting topic %s to partition %d" % (queue_name, partition))

        self.get_message_stream = get_message_stream
        self.queue_name = str(queue_name)
        self.consumer = SimpleConsumer(
            client=self.kafka,
            group=self.consume_group,
            topic=self.queue_name,
            partitions=[partition],
            auto_commit=False, # it seems we cannot do any kind of commit when using kafka-pythong 0.9.1 with Kafka versions before 0.8.1 because kafka-python will send a OffsetFetchReqeust (request type 9) or OffsetCommitRequest (request type 8) which is not supported
            fetch_size_bytes= self.MAX_KAFKA_REQ_BATCH_MSGS*4096, # in Marconi,messages can be up to 4k
            iter_timeout=None,
        )
        self.logger.debug("KafkaDriver: seeking to head of %s" % (self.queue_name))
        self.consumer.seek(0,0) # seek to head of topic; TODO: should get starting position from starting_marker param

        self.periodically_check_for_new_messages() # kick of periodic attainment of new messages (space permitting)

    def periodically_check_for_new_messages(self):
        #self.logger.debug("KafkaDriver.periodically_check_for_new_messages()")
        if self.get_message_stream is not None: # still providing messages
            self.check_for_new_messages()
            # TODO: call call_soon() rather than call_later() if we got some messages and there is still space available in the MessageStream
            self.new_msg_check_callback = self.event_loop.call_later(self.MESSAGE_CHECK_FREQ, self.periodically_check_for_new_messages) # schedules self to run again after MESSAGE_CHECK_FREQ seconds

    def check_for_new_messages(self):
        self.logger.debug("KafkaDriver.check_for_new_messages (start): space_used=%d, amount_of_space_avail=%d" % (self.get_message_stream.space_used(), self.get_message_stream.amount_of_space_avail()))
        max_number_of_messages = self.get_message_stream.amount_of_space_avail()
        if max_number_of_messages == 0:
            return # no space left to add message, so don't look for any

        # now try to get up to max_number_of_messages messages from the topic, but in a non-blocking manner
        messages = self.consumer.get_messages(count=max_number_of_messages, block=False)
        self.logger.debug("got %d messages from Kafka" % (len(messages)))
        assert len(messages) <= max_number_of_messages

        #add the messages to message stream
        for message_and_offset in messages:
            self.get_message_count += 1
            offset_str = "%016x" % (message_and_offset.offset) # make offset into 16 hex chars
            # construct a new message and add it to stream
            self.get_message_stream.add_message(
                payload = str(message_and_offset.message.value),
                marker = offset_str, # TODO: this is supposed to a value that we use a as start_marker but this doesn't indicate the partition, so is not unique
                id = offset_str,
                ttl = (2**31)-1, # we don't store the original TTL so (for now at least) just send max signed 32 bit int
                age = 0,
            )

        #self.logger.debug("KafkaDriver.check_for_new_messages (end): space_used=%d, amount_of_space_avail=%d" % (self.get_message_stream.space_used(), self.get_message_stream.amount_of_space_avail()))

    # called to let the driver know that there no more messages are needed for the previously requested stream of messages and that it should free up any associated resources.
    def cancel_get_stream(self):
        self.new_msg_check_callback.cancel() # cancel call to periodically_check_for_new_messages()
        self.consumer.stop()
        self.consumer = None
        self.get_message_stream = None
        self.queue_name = None
Example #42
0
class KafkaJsonLogger:
    indices = {}

    def __init__(self):
        from kafka import SimpleProducer, KafkaClient
        from kafka.common import LeaderNotAvailableError
        self.kafka_client = KafkaClient(config.KAFKA_SERVER)
        self.kafka = SimpleProducer(self.kafka_client)

        for oid in config.SNMP_OIDS:
            self.indices[oid._name()] = 0

        self.head = HeadBuilder("db", "type", "tom", config.DB_NAME)
        try:
            self.kafka.send_messages(config.KAFKA_TOPIC, b"creating topic")
        except LeaderNotAvailableError:
            time.sleep(1)

    def close(self):
        self.kafka.stop(0)
        self.kafka_client.close()

    def start(self, epoch):
        head = self.head.create('info', config.TOM, epoch)
        body = '{"description" : "started ('+ config.SESSION_NAME + ')"'+', "value" : '+str(config.SESSION_ID)+'}'
        msg = '{"header": '+ head + ', "body":'+ body+'}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)

    def stop(self, epoch):
        head = self.head.create('info', config.TOM, epoch)
        body = '{"description" : "stopped ('+ config.SESSION_NAME + ')"'+', "value" : '+str(config.SESSION_ID)+'}'
        msg = '{"header": '+ head + ', "body":'+ body+'}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)

    def value(self, epoch, oid, name, value):
#        name = oid._name()
        name = name.replace(' ', '_')
        index = self.indices[name]
        index += 1
        self.indices[name] = index
        is_error = False
        str_value = str(value)
        if oid.numeric and not utils.is_number(str_value):
            is_error = True
        head = self.head.create(name, oid.target_name, epoch)
        body = '{"target" : "' + str(oid.target()) + '", ' + \
               '"oid" : "' + str(oid.oid_id)
        if is_error:
            body += '", "error" : "' + str_value + '"}'
        else:
            body += '", "value" : ' + str_value + '}'
        msg = '{"header": '+ head + ', "body":'+ body+'}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)

    def error(self, epoch, description):
        head = self.head.create('info', config.TOM, epoch)
        body = '{"error" : "'+ description + '"}'
        msg = '{"header": '+ head + ', "body":'+ body+'}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)
Example #43
0
    argument_parser.add_argument('--hosts', type=str, default='localhost:9092')
    argument_parser.add_argument('--topic', type=str, default='test')
    arguments = argument_parser.parse_args()

    # getting the text to publish
    text_to_publish = arguments.message

    # getting the hosts for Kafka
    kafka_hosts = arguments.hosts

    # getting the topic to publish in
    topic_to_publish_in = arguments.topic

    # instantiating a Kafka Client
    kafka_client = SimpleClient(hosts=kafka_hosts)

    # instantiating a Kafka Producer
    kafka_producer = SimpleProducer(kafka_client)

    # printing what we are doing
    print('publishing "{}" \nto topic "{}" on broker(s) "{}"'.format(
        text_to_publish, topic_to_publish_in,
        '", "'.join(kafka_hosts.split(','))))

    # publishing the message
    kafka_producer.send_messages(topic_to_publish_in,
                                 text_to_publish.encode('utf-8'))

    # stopping the kafka producer
    kafka_producer.stop()
Example #44
0
class KafkaAvroLogger:
    indices = {}

    def __init__(self):
        from kafka import SimpleProducer, KafkaClient
        from kafka.common import LeaderNotAvailableError
        self.kafka_client = KafkaClient(config.KAFKA_SERVER)
        self.kafka = SimpleProducer(self.kafka_client)
        schema_int_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_int.avsc").decode('utf-8')
        schema_float_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_float.avsc").decode('utf-8')
        schema_str_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_str.avsc").decode('utf-8')
        self.schema_int = avro.schema.Parse(schema_int_src)
        self.schema_float = avro.schema.Parse(schema_float_src)
        self.schema_str = avro.schema.Parse(schema_str_src)

        for oid in config.SNMP_OIDS:
            self.indices[oid._name()] = 0

        try:
            #empty msg to ensure topic is created
            self.kafka.send_messages(config.KAFKA_TOPIC, (0).to_bytes(1, byteorder='big'))
        except LeaderNotAvailableError:
            time.sleep(1)

    def close(self):
        self.kafka.stop(0)
        self.kafka_client.close()

    def start(self, epoch):
        if config.PRINT_CONSOLE: print('starting session logging to kafka with avro')

    def stop(self, epoch):
        if config.PRINT_CONSOLE: print('stopping session logging to kafka with avro')

    def value(self, epoch, oid, name, value):
        name = oid._name()
        index = self.indices[name]
        index += 1
        self.indices[name] = index
        str_value = str(value)
        if oid.is_numeric() and not utils.is_number(str_value):
            self.error(epoch, "Invalid number, received:"+str_value)
            return

        value_int = None
        value_float = None
        value_str = None
        if oid.is_int(): value_int = int(value)
        elif oid.is_float(): value_float = float(value)
        else: value_str = str(value)

        writer = None
        id = None
        value = None
        if value_int is not None:
            writer = avro.io.DatumWriter(self.schema_int)
            id = config.AVRO_SCHEMA_INT_ID
            value = value_int
        if value_float is not None:
            writer = avro.io.DatumWriter(self.schema_float)
            id = config.AVRO_SCHEMA_FLOAT_ID
            value = value_float
        if value_str is not None:
            writer = avro.io.DatumWriter(self.schema_str)
            id = config.AVRO_SCHEMA_STR_ID
            value = value_str
        bytes_writer = io.BytesIO()
        id_bytes = (id).to_bytes(1, 'big')
        bytes_writer.write(id_bytes)
        encoder = avro.io.BinaryEncoder(bytes_writer)
        writer.write({"header": {"type": oid.oid_name, "tom": oid.target_name, "address": oid.ip, "oid": oid.oid_name, "time":epoch},
                       "body": {"value": value}
                       }, encoder)
        raw_bytes = bytes_writer.getvalue()
        self.kafka.send_messages(config.KAFKA_TOPIC, raw_bytes)
        if config.PRINT_CONSOLE: print(str(raw_bytes))

    def error(self, epoch, description):
        pass
class KafkaPythonClientSimple(PythonClient):
    def __init__(self,topic=topic_name, consumerGroup="perftest", kafkaHost=kafka_host, zookeeperHost=zookeeper_host):
        self.config["topic"] = topic
        self.config["kafkaHost"] = kafkaHost
        self.config["zookeeperHost"] = zookeeperHost
        self.config["consumerGroup"] = consumerGroup
        self.client = SimpleClient(self.config["kafkaHost"])
        super(KafkaPythonClientSimple, self).__init__()

    def createProducer(self, kafkaSync):
        self.config["kafkaSync"] = kafkaSync
        if self.config["kafkaSync"] == True:
            self.producer = SimpleProducer(self.client, async=False)
        else:
            print "ENOIMPL: async not impl. for kafka-python-simple"

    def createConsumer(self):
        self.consumer = SimpleConsumer(self.client,
            topic=self.config["topic"],
            group=self.config["consumerGroup"],
            auto_commit= True,
            max_buffer_size=3000000,
            iter_timeout=5)

    def produce(self, num_msg=20000):
        self.msgCount = num_msg
        for x in range (self.msgCount):
            self.prtProgress(x, 10000)
            self.producer.send_messages(self.config["topic"], self.msg)
        if (x >= 10000):
            sys.stdout.write('\n')

    def consume(self, num_msg=0):
        count = 0
        while True:
            message=self.consumer.get_message(block=False, timeout=1)
#       don't use this construct "for message in self.consumer:" instead of "while..." - much slower!
            if message is None:
#               print "consume, msg is None"
                break
            if len(message) == 0:
#               print "consume, len(msg) is 0"
                break
            count += 1
            self.prtProgress(count, 10000)
        sys.stdout.write('\n')
        if num_msg >  0:
            if count != num_msg:
                print "ERROR: KafkaPythonClientSimple.consume: # of messages not as expected, read: {}, expected: {}".format(count, num_msg)
        return count

    def startProducer(self):
        pass

    def stopProducer(self):
        self.beforeFlushTimer(self.timeDict['producer'])
        self.producer.stop()

    def stopConsumer(self): pass

    def initCount(self):
        self.consume(0)

    def finalize(self): pass
Example #46
0
 def test_simple_producer_new_topic(self):
     producer = SimpleProducer(self.client)
     resp = producer.send_messages('new_topic', self.msg('foobar'))
     self.assert_produce_response(resp, 0)
     producer.stop()
Example #47
0

filepath = args.file
topic = args.topic


producer = get_kafka_producer(args.broker, args.async)
# method that sends messages to given topic
send_message = lambda msg: producer.send_messages(topic, msg)


read_lines = 0
read_chars = 0

print "starting"
for l in read_all_from_file_or_dict(filepath, args.lines):
        read_lines +=1
        read_chars += len(l)
        responses = send_message(l)

if read_lines < args.lines:
    print "Not enough lines in file"

print "stopping"
producer.stop()
print "stopped"
print "Read", read_lines, "lines"
print "Read", read_chars, "chars"


Example #48
0
    def test_batched_simple_producer__triggers_by_message(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        # Configure batch producer
        batch_messages = 5
        batch_interval = 5
        producer = SimpleProducer(
            self.client,
            async_send=True,
            batch_send_every_n=batch_messages,
            batch_send_every_t=batch_interval,
            random_start=False)

        # Send 4 messages -- should not trigger a batch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        # send 3 more messages -- should trigger batch on first 5
        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait until producer has pulled all messages from internal queue
        # this should signal that the first batch was sent, and the producer
        # is now waiting for enough messages to batch again (or a timeout)
        timeout = 5
        start = time.time()
        while not producer.queue.empty():
            if time.time() - start > timeout:
                self.fail('timeout waiting for producer queue to empty')
            time.sleep(0.1)

        # send messages groups all *msgs in a single call to the same partition
        # so we should see all messages from the first call in one partition
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        # Because we are batching every 5 messages, we should only see one
        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
        ])

        producer.stop()
Example #49
0
class KafkaJsonLogger:
    indices = {}

    def __init__(self):
        from kafka import SimpleProducer, KafkaClient
        from kafka.common import LeaderNotAvailableError
        self.kafka_client = KafkaClient(config.KAFKA_SERVER)
        self.kafka = SimpleProducer(self.kafka_client)

        for oid in config.SNMP_OIDS:
            self.indices[oid._name()] = 0

        self.head = HeadBuilder("db", "type", "tom", config.DB_NAME)
        try:
            self.kafka.send_messages(config.KAFKA_TOPIC, b"creating topic")
        except LeaderNotAvailableError:
            time.sleep(1)

    def close(self):
        self.kafka.stop(0)
        self.kafka_client.close()

    def start(self, epoch):
        head = self.head.create('info', config.TOM, epoch)
        body = '{"description" : "started (' + config.SESSION_NAME + ')"' + ', "value" : ' + str(
            config.SESSION_ID) + '}'
        msg = '{"header": ' + head + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)

    def stop(self, epoch):
        head = self.head.create('info', config.TOM, epoch)
        body = '{"description" : "stopped (' + config.SESSION_NAME + ')"' + ', "value" : ' + str(
            config.SESSION_ID) + '}'
        msg = '{"header": ' + head + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)

    def value(self, epoch, oid, name, value):
        #        name = oid._name()
        name = name.replace(' ', '_')
        index = self.indices[name]
        index += 1
        self.indices[name] = index
        is_error = False
        str_value = str(value)
        if oid.numeric and not utils.is_number(str_value):
            is_error = True
        head = self.head.create(name, oid.target_name, epoch)
        body = '{"target" : "' + str(oid.target()) + '", ' + \
               '"oid" : "' + str(oid.oid_id)
        if is_error:
            body += '", "error" : "' + str_value + '"}'
        else:
            body += '", "value" : ' + str_value + '}'
        msg = '{"header": ' + head + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)

    def error(self, epoch, description):
        head = self.head.create('info', config.TOM, epoch)
        body = '{"error" : "' + description + '"}'
        msg = '{"header": ' + head + ', "body":' + body + '}'
        self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8"))
        if config.PRINT_CONSOLE: print(msg)