Beispiel #1
0
    def test_send_broker_unaware_request_fail(self):
        'Tests that call fails when all hosts are unavailable'

        mocked_conns = {
            ('kafka01', 9092): MagicMock(),
            ('kafka02', 9092): MagicMock()
        }

        # inject KafkaConnection side effects
        mocked_conns[('kafka01', 9092)].send.side_effect = RuntimeError("kafka01 went away (unittest)")
        mocked_conns[('kafka02', 9092)].send.side_effect = RuntimeError("Kafka02 went away (unittest)")

        def mock_get_conn(host, port):
            return mocked_conns[(host, port)]

        # patch to avoid making requests before we want it
        with patch.object(KafkaClient, 'load_metadata_for_topics'):
            with patch.object(KafkaClient, '_get_conn', side_effect=mock_get_conn):
                client = KafkaClient(hosts=['kafka01:9092', 'kafka02:9092'])

                req = KafkaProtocol.encode_metadata_request(b'client', 0)
                with self.assertRaises(KafkaUnavailableError):
                    client._send_broker_unaware_request(payloads=['fake request'],
                                                        encoder_fn=MagicMock(return_value='fake encoded message'),
                                                        decoder_fn=lambda x: x)

                for key, conn in six.iteritems(mocked_conns):
                    conn.send.assert_called_with(ANY, 'fake encoded message')
    def test_send_broker_unaware_request(self):
        'Tests that call works when at least one of the host is available'

        mocked_conns = {
            ('kafka01', 9092): MagicMock(),
            ('kafka02', 9092): MagicMock(),
            ('kafka03', 9092): MagicMock()
        }
        # inject KafkaConnection side effects
        mocked_conns[('kafka01', 9092)].send.side_effect = RuntimeError("kafka01 went away (unittest)")
        mocked_conns[('kafka02', 9092)].recv.return_value = 'valid response'
        mocked_conns[('kafka03', 9092)].send.side_effect = RuntimeError("kafka03 went away (unittest)")

        def mock_get_conn(host, port):
            return mocked_conns[(host, port)]

        # patch to avoid making requests before we want it
        with patch.object(KafkaClient, 'load_metadata_for_topics'), \
                patch.object(KafkaClient, '_get_conn', side_effect=mock_get_conn):

            client = KafkaClient(hosts='kafka01:9092,kafka02:9092')

            resp = client._send_broker_unaware_request(1, 'fake request')

            self.assertEqual('valid response', resp)
            mocked_conns[('kafka02', 9092)].recv.assert_called_with(1)
Beispiel #3
0
    def test_send_produce_request_raises_when_noleader(self, protocol, conn):
        "Send producer request raises LeaderNotAvailableError if leader is not available"

        conn.recv.return_value = 'response'  # anything but None

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            TopicMetadata('topic_noleader', NO_ERROR, [
                PartitionMetadata('topic_noleader', 0, -1, [], [],
                                  NO_LEADER),
                PartitionMetadata('topic_noleader', 1, -1, [], [],
                                  NO_LEADER),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = KafkaClient(hosts=['broker_1:4567'])

        requests = [ProduceRequest(
            "topic_noleader", 0,
            [create_message("a"), create_message("b")])]

        with self.assertRaises(LeaderNotAvailableError):
            client.send_produce_request(requests)
Beispiel #4
0
    def test_has_metadata_for_topic(self, protocol, conn):

        conn.recv.return_value = 'response'  # anything but None

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            TopicMetadata(b'topic_still_creating', NO_LEADER, []),
            TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
            TopicMetadata(b'topic_noleaders', NO_ERROR, [
                PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER),
                PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = KafkaClient(hosts=['broker_1:4567'])

        # Topics with no partitions return False
        self.assertFalse(client.has_metadata_for_topic('topic_still_creating'))
        self.assertFalse(client.has_metadata_for_topic('topic_doesnt_exist'))

        # Topic with partition metadata, but no leaders return True
        self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))
Beispiel #5
0
    def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn):
        "Get leader for partitions reload metadata if it is not available"

        conn.recv.return_value = 'response'  # anything but None

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            TopicMetadata('topic_no_partitions', NO_LEADER, [])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = KafkaClient(hosts=['broker_1:4567'])

        # topic metadata is loaded but empty
        self.assertDictEqual({}, client.topics_to_brokers)

        topics = [
            TopicMetadata('topic_one_partition', NO_ERROR, [
                PartitionMetadata('topic_no_partition', 0, 0, [0, 1], [0, 1], NO_ERROR)
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        # calling _get_leader_for_partition (from any broker aware request)
        # will try loading metadata again for the same topic
        leader = client._get_leader_for_partition('topic_one_partition', 0)

        self.assertEqual(brokers[0], leader)
        self.assertDictEqual({
            TopicAndPartition('topic_one_partition', 0): brokers[0]},
            client.topics_to_brokers)
Beispiel #6
0
    def test_get_leader_returns_none_when_noleader(self, protocol, conn):
        "Getting leader for partitions returns None when the partiion has no leader"

        conn.recv.return_value = 'response'  # anything but None

        brokers = {}
        brokers[0] = BrokerMetadata(0, 'broker_1', 4567)
        brokers[1] = BrokerMetadata(1, 'broker_2', 5678)

        topics = {}
        topics['topic_noleader'] = {
            0: PartitionMetadata('topic_noleader', 0, -1, [], []),
            1: PartitionMetadata('topic_noleader', 1, -1, [], [])
        }
        protocol.decode_metadata_response.return_value = (brokers, topics)

        client = KafkaClient(hosts=['broker_1:4567'])
        self.assertDictEqual(
            {
                TopicAndPartition('topic_noleader', 0): None,
                TopicAndPartition('topic_noleader', 1): None
            },
            client.topics_to_brokers)
        self.assertIsNone(client._get_leader_for_partition('topic_noleader', 0))
        self.assertIsNone(client._get_leader_for_partition('topic_noleader', 1))

        topics['topic_noleader'] = {
            0: PartitionMetadata('topic_noleader', 0, 0, [0, 1], [0, 1]),
            1: PartitionMetadata('topic_noleader', 1, 1, [1, 0], [1, 0])
        }
        protocol.decode_metadata_response.return_value = (brokers, topics)
        self.assertEqual(brokers[0], client._get_leader_for_partition('topic_noleader', 0))
        self.assertEqual(brokers[1], client._get_leader_for_partition('topic_noleader', 1))
Beispiel #7
0
    def test_send_broker_unaware_request_fail(self):
        'Tests that call fails when all hosts are unavailable'

        mocked_conns = {
            ('kafka01', 9092): MagicMock(),
            ('kafka02', 9092): MagicMock()
        }

        # inject KafkaConnection side effects
        mocked_conns[('kafka01', 9092)].send.side_effect = RuntimeError("kafka01 went away (unittest)")
        mocked_conns[('kafka02', 9092)].send.side_effect = RuntimeError("Kafka02 went away (unittest)")

        def mock_get_conn(host, port):
            return mocked_conns[(host, port)]

        # patch to avoid making requests before we want it
        with patch.object(KafkaClient, 'load_metadata_for_topics'):
            with patch.object(KafkaClient, '_get_conn', side_effect=mock_get_conn):
                client = KafkaClient(hosts=['kafka01:9092', 'kafka02:9092'])

                with self.assertRaises(KafkaUnavailableError):
                    client._send_broker_unaware_request(1, 'fake request')

                for key, conn in compat.dict_items(mocked_conns):
                    conn.send.assert_called_with(1, 'fake request')
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(
            self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30
        )

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = KafkaClient(hosts)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending < check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
    def _get_offsets_from_kafka(brokers,
                                topic,
                                offset_time):
        """get dict representing kafka
        offsets.
        """
        # get client
        client = KafkaClient(brokers)

        # get partitions for a topic
        partitions = client.topic_partitions[topic]

        # https://cwiki.apache.org/confluence/display/KAFKA/
        # A+Guide+To+The+Kafka+Protocol#
        # AGuideToTheKafkaProtocol-OffsetRequest
        MAX_OFFSETS = 1
        offset_requests = [OffsetRequestPayload(topic,
                                                part_name,
                                                offset_time,
                                                MAX_OFFSETS) for part_name
                           in partitions.keys()]

        offsets_responses = client.send_offset_request(offset_requests)

        offset_dict = {}
        for response in offsets_responses:
            key = "_".join((response.topic,
                            str(response.partition)))
            offset_dict[key] = response

        return offset_dict
Beispiel #11
0
    def run(self, topic, message, hosts=None):
        """
        Simple round-robin synchronous producer to send one message to one topic.

        :param hosts: Kafka hostname(s) to connect in host:port format.
                      Comma-separated for several hosts.
        :type hosts: ``str``
        :param topic: Kafka Topic to publish the message on.
        :type topic: ``str``
        :param message: The message to publish.
        :type message: ``str``

        :returns: Response data: `topic`, target `partition` where message was sent,
                  `offset` number and `error` code (hopefully 0).
        :rtype: ``dict``
        """

        if hosts:
            _hosts = hosts
        elif self.config.get('hosts', None):
            _hosts = self.config['hosts']
        else:
            raise ValueError("Need to define 'hosts' in either action or in config")

        # set default for empty value
        _client_id = self.config.get('client_id') or self.DEFAULT_CLIENT_ID

        client = KafkaClient(_hosts, client_id=_client_id)
        client.ensure_topic_exists(topic)
        producer = SimpleProducer(client)
        result = producer.send_messages(topic, kafka_bytestring(message))

        if result[0]:
            return result[0].__dict__
Beispiel #12
0
 def test_correlation_rollover(self):
     with patch.object(KafkaClient, 'load_metadata_for_topics'):
         big_num = 2**31 - 3
         client = KafkaClient(hosts=[], correlation_id=big_num)
         self.assertEqual(big_num + 1, client._next_id())
         self.assertEqual(big_num + 2, client._next_id())
         self.assertEqual(0, client._next_id())
 def send_kafka_msg(iters):
     # TODO: Add try/catch statements for kafka connection
     kafka = KafkaClient(kafka_host)
     producer = SimpleProducer(kafka)
     for key, val in iters:
         msg = combine_count_json(key, val)
         producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8"))
     kafka.close()
def query_location_producer(lat, lng, radius, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.area_search(lat, lng, radius, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
def query_text_producer(text, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.search(text, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
def favorite_list_producer(id, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.favorite_list(id, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
Beispiel #17
0
def consume(kafka_host):
    kafka = KafkaClient(kafka_host)
    consumer = SimpleConsumer(kafka, 'fetcher', cfg['kafka']['pages'])
    producer = SimpleProducer(kafka)
    consumer.max_buffer_size=20*1024*1024
    for msg in consumer:
        page = json.loads(msg.message.value)
        process(page, producer)
    kafka.close()
def timeline_producer(twitter_account, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.user_timeline(twitter_account, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
Beispiel #19
0
class KafkaSender():

    def __init__(self):
        self.client=KafkaClient(hosts)
        #self.producer = SimpleProducer(self.client,batch_send=batch_send,batch_send_every_n=batch_send_every_n)
        self.producer=KafkaProducer(bootstrap_servers=hosts)
        self.client.ensure_topic_exists(topic)
    def send_messages(self,msg):
        self.producer.send(topic,msg)
Beispiel #20
0
def checker( ):
    client = KafkaClient(kafka2)
    topic = "twittercontent"
    partitions = client.topic_partitions[topic]
    offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()]

    offsets_responses = client.send_offset_fetch_request("cpp_service_customers_50000", offset_requests)

    for r in offsets_responses:
        print "partition = %s, offset = %s"%(r.partition, r.offsets[0])
Beispiel #21
0
    def test_ensure_topic_exists(self, decode_metadata_response, conn):

        conn.recv.return_value = 'response'  # anything but None

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            TopicMetadata(b'topic_still_creating', NO_LEADER, []),
            TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
            TopicMetadata(b'topic_noleaders', NO_ERROR, [
                PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER),
                PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER),
            ]),
        ]
        decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = KafkaClient(hosts=['broker_1:4567'])

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.ensure_topic_exists('topic_doesnt_exist', timeout=1)

        with self.assertRaises(KafkaTimeoutError):
            client.ensure_topic_exists('topic_still_creating', timeout=1)

        # This should not raise
        client.ensure_topic_exists('topic_noleaders', timeout=1)
        client.ensure_topic_exists(b'topic_noleaders', timeout=1)
Beispiel #22
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    bytes_topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic
            self.bytes_topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ])
        except:
            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
class Producer(object):

    def __init__(self, broker_ip_port):
        self.kafka = KafkaClient(broker_ip_port)
        self.producer = SimpleProducer(self.kafka)

    def send_message(self):
        response = self.producer.send_messages("HEY", "Hello World", "Kafka Deployment Worked!")
        return [("Error ", response[0].error), response]

    def close(self):
        self.kafka.close()
Beispiel #24
0
def setup_capture_new_messages_consumer(topic):
    """Seeks to the tail of the topic then returns a function that can
    consume messages from that point.
    """
    kafka = KafkaClient(get_config().cluster_config.broker_list)
    group = str('data_pipeline_clientlib_test')
    consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
    consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail

    yield consumer

    kafka.close()
Beispiel #25
0
class Producer(KeyedProducer):
    """

    """
    def __init__(self, hosts, client_id, timeout):
        self._client = KafkaClient(['localhost:9092'])
        self._client = KafkaClient(hosts, client_id=client_id, timeout=timeout)
        super(Producer, self).__init__(self._client)

    def close(self):
        try:
            self._client.close()
        except:
            pass
Beispiel #26
0
def post_data():
    from kafka import KafkaClient
    from kafka import SimpleProducer

    kafka = KafkaClient(app.config['KAFKA_SERVER'])
    producer = SimpleProducer(kafka)
    if not request.json:
        resp = 'null post data'
    else:
        resp = producer.send_messages(app.config['KAFKA_TOPIC'], str(request.json))
        if resp:
            resp = {'error':resp[0].error,'offset':resp[0].offset}
    kafka.close()
    return jsonify(resp)
Beispiel #27
0
class KafkaBase(Base):

    """ A block defining common Kafka functionality.
    Properties:
        host (str): location of the database
        port (int): open port served by database
        topic (str): topic name
    """
    host = StringProperty(title='Host', default='[[KAFKA_HOST]]')
    port = IntProperty(title='Port', default=9092)
    topic = StringProperty(title='Topic', default="", allow_none=False)

    def __init__(self):
        super().__init__()
        self._kafka = None
        self._encoded_topic = None

    def configure(self, context):
        super().configure(context)

        if not len(self.topic()):
            raise ValueError("Topic cannot be empty")

        self._connect()

    def stop(self):
        self._disconnect()
        super().stop()

    def _connect(self):
        self._kafka = KafkaClient("{0}:{1}".format(self.host(), self.port()))
        self._encoded_topic = self.topic()

        # ensuring topic is valid
        try:
            self._kafka.ensure_topic_exists(self._encoded_topic)
        except Exception:
            self.logger.exception("Topic: {0} does not exist"
                                  .format(self.topic()))
            raise

    def _disconnect(self):
        if self._kafka:
            self._kafka.close()
            self._kafka = None

    @property
    def connected(self):
        return self._kafka
Beispiel #28
0
def main():
    kafka = KafkaClient("localhost:9092")
    producer = SimpleProducer(kafka)

    topic = b'test'
    msg = b'Hello World from Me!'

    try:
        print_response(producer.send_messages(topic, msg))
    except LeaderNotAvailableError:
        # https://github.com/mumrah/kafka-python/issues/249
        time.sleep(1)
        print_response(producer.send_messages(topic, msg))

    kafka.close()
Beispiel #29
0
    def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn):

        conn.recv.return_value = "response"  # anything but None

        brokers = [BrokerMetadata(0, "broker_1", 4567), BrokerMetadata(1, "broker_2", 5678)]

        topics = [TopicMetadata("topic_doesnt_exist", UNKNOWN_TOPIC_OR_PARTITION, [])]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = KafkaClient(hosts=["broker_1:4567"])

        requests = [ProduceRequest("topic_doesnt_exist", 0, [create_message("a"), create_message("b")])]

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.send_produce_request(requests)
Beispiel #30
0
class Consumer(object):
    def __init__(self, topic):
        self.kafka = KafkaClient("localhost:9092")
        self.consumer = SimpleConsumer(self.kafka, "1", topic)

    @classmethod
    def make_queue(cls):
        return Queue(4096)

    def run(self, q):
        try:
            for i in self.consumer:
                q.put(json.loads(i.message.value), True)
        except Exception as e:
            self.kafka.close()
    def _run(self):
        pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers,
                                        "kc-" + self._topic,
                                        timeout=5)
                self._failed = False
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic,\
                            buffer_size = 4096*4*4, max_buffer_size=4096*32*4)
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
                    self._failed = True
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1, 2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1, 1)
                else:
                    consumer.seek(0, 0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10, timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        pcount += len(mlist)
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" % str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                self._failed = True
                pause = True
                if hasattr(ex, 'errno'):
                    # This is an unrecoverable error
                    if ex.errno == errno.EMFILE:
                        raise SystemExit(1)

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
Beispiel #32
0
class KafkaBaseMonitor(BaseMonitor):
    '''
    Base monitor for handling outbound Kafka results
    '''
    def setup(self, settings):
        '''
        Setup the handler

        @param settings: The loaded settings file
        '''
        @MethodTimer.timeout(settings['KAFKA_CONN_TIMEOUT'], False)
        def _hidden_setup():
            try:
                # set up kafka
                self.kafka_conn = KafkaClient(settings['KAFKA_HOSTS'])
                self.producer = SimpleProducer(self.kafka_conn)
                self.topic_prefix = settings['KAFKA_TOPIC_PREFIX']
            except KafkaUnavailableError as ex:
                message = "An exception '{0}' occured while setting up kafka. "\
                    "Arguments:\n{1!r}".format(type(ex).__name__, ex.args)
                self.logger.error(message)
                return False
            return True

        ret_val = _hidden_setup()
        self.use_appid_topics = settings['KAFKA_APPID_TOPICS']

        if ret_val:
            self.logger.debug(
                "Successfully connected to Kafka in {name}".format(
                    name=self.__class__.__name__))
        else:
            self.logger.error(
                "Failed to set up Kafka Connection in {name} "
                "within timeout".format(name=self.__class__.__name__))
            # this is essential to running the redis monitor
            sys.exit(1)

    def _send_to_kafka(self, master):
        '''
        Sends the message back to Kafka
        @param master: the final dict to send
        @returns: True if successfully sent to kafka
        '''
        appid_topic = "{prefix}.outbound_{appid}".format(
            prefix=self.topic_prefix, appid=master['appid'])
        firehose_topic = "{prefix}.outbound_firehose".format(
            prefix=self.topic_prefix)
        try:
            self.kafka_conn.ensure_topic_exists(firehose_topic)
            # dont want logger in outbound kafka message
            dump = json.dumps(master)
            if self.use_appid_topics:
                self.kafka_conn.ensure_topic_exists(appid_topic)
                self.producer.send_messages(appid_topic, dump)
            self.producer.send_messages(firehose_topic, dump)

            return True
        except Exception as ex:
            message = "An exception '{0}' occured while sending a message " \
                "to kafka. Arguments:\n{1!r}" \
                .format(type(ex).__name__, ex.args)
            self.logger.error(message)

        return False
Beispiel #33
0
 def __init__(self):
     self.client = KafkaClient("localhost", 9092)
     self.producers = {}
app = Flask(__name__)

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.info('Starting Monitor Server Python')

# load json schema
with open('schemas/atmosphere_tma-m_schema.json') as f:
    tma_m_schema = json.load(f)
logger.debug('Schema loaded %s', tma_m_schema)

validator = Draft4Validator(tma_m_schema)
logger.info('Validator initialized %s', validator)

#Connect Kafka client to Kafka pod
kafka = KafkaClient('kafka-0.kafka-hs.default.svc.cluster.local:9093')

# Incialize producer structure to send messages
producer = SimpleProducer(kafka)


@app.route('/monitor', methods=['POST'])
def process_message():
    # load json file
    input = request.get_json(force=True)
    logger.info('Processing Request %s', input)

    return validate_schema(input)


@app.route('/monitor_demo', methods=['POST'])
Beispiel #35
0
class Aria2Dispatcher:
    def __init__(self, host, topic, consumer_id, settings):
        self.host = host
        self.topic = topic
        self.consumer_id = consumer_id or "Aria2Dispatcher"
        self.settings = importlib.import_module(settings[:-3])
        self.kafka_client = KafkaClient(self.settings.KAFKA_HOSTS)
        self.producer = SimpleProducer(self.kafka_client)
        self.topic_prefix = self.settings.KAFKA_TOPIC_PREFIX
        self.topic_list = []
        self.aria2_clients = []
        for x in self.settings.ARIA2_ADDRESSES:
            rpc_uri = "ws://%s/jsonrpc" % x
            try:
                aria2_connection = create_connection(rpc_uri)
                self.aria2_clients.append({
                    'rpc_uri': rpc_uri,
                    'ws': aria2_connection
                })
            except:
                logger.error('create aria2_connection error!')
                raise

    def _process_item(self, item, aria2_client_index):

        prefix = self.topic_prefix
        crawled_firehose_images_topic = "{prefix}.crawled_firehose_images".format(
            prefix=prefix)

        if 'updates' in item['meta']['collection_name']:
            message = json.dumps(item)
            print("in.....   if 'updates' in item['meta']['collection_name']:")
            print('collection_name::', item['meta']['collection_name'])
        else:
            self._process_item_images(item, aria2_client_index)
            try:
                if 'images' in item and len(item['images']) > 0:
                    message = json.dumps(item)
                else:
                    message = 'no images.'
            except:
                message = 'json failed to parse'
                logger.error(message)

        self._check_topic(crawled_firehose_images_topic)
        self.producer.send_messages(crawled_firehose_images_topic, message)
        logger.info("send message to kafka topic:: %s " %
                    crawled_firehose_images_topic)
        logger.info("message= %s" % message)

    def _process_item_images(self, item, aria2_client_index):
        image_urls = item["image_urls"]
        if len(image_urls) > 0:
            req_methods = []
            images = []
            for url in image_urls:
                filename, file_ext = splitext(basename(urlparse(url).path))
                if len(file_ext) == 0:
                    file_ext = ".jpg"

                out_file_name_base = sha1(url)
                out_file_name = "%s%s" % (out_file_name_base, file_ext)
                dir_name = '%s/%s/%s/%s/%s' % (
                    self.settings.IMAGES_STORE, item['meta']['spiderid'],
                    out_file_name_base[:3], out_file_name_base[3:6],
                    out_file_name_base[6:])

                options = dict(dir=dir_name, out=out_file_name)
                if not exists(dir_name + '/' + out_file_name):
                    req_methods.append({
                        "methodName": "aria2.addUri",
                        "params": [[url], options]
                    })

                images.append({
                    'url': url,
                    'path': "%s/%s" % (dir_name, out_file_name),
                    'aria2': {
                        'rpc_uri':
                        self.aria2_clients[aria2_client_index]['rpc_uri']
                    }
                })

            req = {
                "jsonrpc": 2,
                "id": str(uuid.uuid1()),
                "method": "system.multicall",
                "params": [req_methods]
            }
            jsonreq = json.dumps(req)

            try:
                self.aria2_clients[aria2_client_index]['ws'].send(jsonreq)
                resp = self.aria2_clients[aria2_client_index]['ws'].recv()
                ws_resp = json.loads(resp)
                print('resp:', resp)
                logger.info('resp:: %s ' % resp)
                for image, gid in zip(images,
                                      map(lambda x: x[0], ws_resp['result'])):
                    image['aria2']['gid'] = gid

            except Exception as err:
                print('error::', err)
                logger.error(err)

            item['images'] = images

    def _check_topic(self, topic_name):
        if topic_name not in self.topic_list:
            self.kafka_client.ensure_topic_exists(topic_name)
            self.topic_list.append(topic_name)

    def dispatch(self):
        consumer = SimpleConsumer(
            self.kafka_client,
            self.consumer_id,
            self.topic,
            buffer_size=1024 * 100,  # 100kb
            fetch_size_bytes=1024 * 100,  # 100kb
            max_buffer_size=None  # eliminate big message errors
        )
        consumer.seek(0, 1)
        i = 0
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    print datetime.datetime.now().strftime(
                        "%Y-%m-%d %H:%M:%S"), ' message is None:'
                    logger.info('message is None.')
                    time.sleep(1)
                    continue
                val = message.message.value
                try:
                    item = json.loads(val)
                    i += 1
                    self._process_item(item, i % len(self.aria2_clients))
                except:
                    print("error heppened in loads val to process : %s" % val)
                    logger.error("error heppened in loads val to process: %s" %
                                 val)
                    continue
            except:
                traceback.print_exc()
                break

        self.kafka_client.close()
        return 0
Beispiel #36
0
 def startConnection(self):
     idPlusPort = self.IP + ":" + str(self.port)
     kafka = KafkaClient(idPlusPort)
     self.producer = SimpleProducer(kafka, async=True)
import json, sys, os
from kafka import SimpleProducer, KafkaClient
import json, pika

sample = {'imageMatch': 'true', 'fileName': 'thilagaTest'}
kafka = KafkaClient('10.6.4.36:9092')
producer = SimpleProducer(kafka)

data = json.dumps(sample)
producer.send_messages(b'geoAttendance', data)

credentials = pika.PlainCredentials('guest', 'guest')
parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials)
connection = pika.BlockingConnection(parameters)

channel = connection.channel()
channel.queue_declare(queue='hello')
channel.basic_publish(exchange='', routing_key='hello', body=data)
print(" [x] Sent 'done.....!!!'")
connection.close()
Beispiel #38
0
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from kafka import SimpleProducer, KafkaClient

access_token = "1072837803908784129-Pv2y0HZlUcJVyHmePC5KtLIVM0ZHz6"
access_token_secret =  "ImJh7Q2oMhHoHyWsln2BjMFWouYQCliPik9BkE1wymk6H"
consumer_key =  "WFRKqgnMYAIXB1NkOhvttsNMY"
consumer_secret =  "X4zj67WfzYt0paiqOpukFW7wb4wWeazttwW98GNmvEyLW9tvrA"

class StdOutListener(StreamListener):
    def on_data(self, data):
        producer.send_messages("trump", data.encode('utf-8'))
        print (data)
        return True
    def on_error(self, status):
        print (status)

kafka = KafkaClient("localhost:9092")
producer = SimpleProducer(kafka)
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(track="trump")

/home/ubuntu/Documents/ghezloo/kafka-spark-cassandra/test.py
from kafka import SimpleConsumer, SimpleClient
from kafka import KafkaConsumer
from kafka import KafkaClient

group_name = "my-group"
topic_name = "fast-messages"

kafka = KafkaClient('127.0.0.1:9092')
consumer = KafkaConsumer(kafka, topic_name, group_id=group_name)

print "Created consumer for group: [%s] and topic: [%s]" % (group_name,
                                                            topic_name)
print "Waiting for messages..."

for msg in consumer:
    print msg
Beispiel #40
0
 def getProducer(self):
     if self.producer is None:
         kafka = KafkaClient(kafka_server)
         self.producer = SimpleProducer(kafka)
     return self.producer
Beispiel #41
0
data= {"srDetails": {"betaType": "PythonTesting", "build": "PythonTesting", "caseId": "9999-9999-P-9988", "ccEngineer": "PythonTesting", "ccList": "PythonTesting", "contractId": "PythonTesting", "contractStatus": "PythonTesting", "country": "PythonTesting", "courtesyDescription": "PythonTesting", "courtesykey": "PythonTesting", "criticalIssue": "PythonTesting", "criticalOutage": "PythonTesting", "customerCaseNumber": "PythonTesting", "cve": "PythonTesting", "cvss": "PythonTesting", "description": "PythonTesting", "employeeEmail": "PythonTesting", "employeeId": "PythonTesting", "endDate": "PythonTesting", "entitledSerialNumber": "PythonTesting", "entitlementChecked": "PythonTesting", "entitlementServiceLevel": "PythonTesting", "entitlementSource": "PythonTesting", "escalation": "PythonTesting", "escalationLevelDescription": "PythonTesting", "escalationLevelKey": "PythonTesting", "escalationkey": "PythonTesting", "externallyReported": "PythonTesting", "followupMethod": "PythonTesting", "followupMethodKey": "PythonTesting", "internalUse": "PythonTesting", "jsaAdvisoryBoard": "PythonTesting", "jtac": "PythonTesting", "knowledgeArticle": "PythonTesting", "numberOfSystemsAffected": "PythonTesting", "numberOfUsersAffected": "PythonTesting", "ouatgeCauseDescription": "PythonTesting", "outageCauseKey": "PythonTesting", "outageDescription": "PythonTesting", "outageImpactDescription": "PythonTesting", "outageImpactKey": "PythonTesting", "outageInfoAvailable": "PythonTesting", "outageKey": "PythonTesting", "outageTypeDescription": "PythonTesting", "outageTypeKey": "PythonTesting", "outsourcer": "PythonTesting", "overideOutage": "PythonTesting", "partnerFunction": [{"partnerFunctionKey": "00000001", "partnerFunctionName": "Sold-To Party", "partnerId": "100000151", "partnerName": "CENTURYLINK, INC"}, {"partnerFunctionName":"Employee Responsible","partnerFunctionKey":"00000014","partnerId":"0000018961","partnerName":"Vidhya Sadasivam"}], "platform": "PythonTesting", "previousOwnerSkill": "PythonTesting", "previousTeam": "PythonTesting", "priority": "PythonTesting", "priorityKey": "PythonTesting", "processType": "PythonTesting", "processTypeDescription": "PythonTesting", "productId": "PythonTesting", "productSeries": "PythonTesting", "raFa": "PythonTesting", "reason": "PythonTesting", "release": "PythonTesting", "reporterDetails": "PythonTesting", "routerName": "PythonTesting", "secVulnerability": "PythonTesting", "serialNumber": "PythonTesting", "serviceProduct": "PythonTesting", "severity": "PythonTesting", "severityKey": "PythonTesting", "sirtBundle": "PythonTesting", "sku": "PythonTesting", "smeContact": "PythonTesting", "software": "PythonTesting", "specialRelease": "PythonTesting", "srCategory1": "PythonTesting", "srCategory2": "PythonTesting", "srCategory3": "PythonTesting", "srCategory4": "PythonTesting", "srReqDate": [{"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}, {"dateStamp": "PythonTesting", "dateType": "PythonTesting", "duration": "PythonTesting", "timeUnit": "PythonTesting"}], "startDate": "PythonTesting", "status": "PythonTesting", "statusKey": "PythonTesting", "technicalCategory1": "PythonTesting", "technicalCategory2": "PythonTesting", "technicalCategory3": "PythonTesting", "technicalCategory4": "PythonTesting", "temperature": "PythonTesting", "theaterDescription": "PythonTesting", "theaterKey": "PythonTesting", "top5": "PythonTesting", "totalOutageTime": "PythonTesting", "urgency": "PythonTesting", "urgencyKey": "PythonTesting", "version": "PythonTesting", "viaDescription": "PythonTesting", "viaKey": "PythonTesting", "warrantyEndDate": "PythonTesting", "yearRoundSupport": "PythonTesting", "zzQ1": "PythonTesting", "zzQ10": "PythonTesting", "zzQ2": "PythonTesting", "zzQ3": "PythonTesting", "zzQ4": "PythonTesting", "zzQ5": "PythonTesting", "zzQ6": "PythonTesting", "zzQ7": "PythonTesting", "zzQ8": "PythonTesting", "zzQ9": "PythonTesting"}}

att = dict(srAttachements={"caseId": "2015-1004-T-0021",
                            "attachment": {"createdBy": "CMUSER", "dateCreated": "Mon+Oct+12+18%3A44%3A20+UTC+2015", "fileType": "",
                                           "path": "/archive/attachments/OETCLR/2015/10/04/T/20151012184420", "private": "",
                                           "sequenceNumber": "0002141498", "size": 19, "title": "Closed_JSA_Cases.xlsx",
                                           "uploadedBy": "*****@*****.**", "zDate": 20151012, "zTime": 114422}})

att2 = dict(srAttachements={"caseId": "9999-9999-P-9993",
                           "attachment": {"sequenceNumber": "0000026175", "title": "DK7444.txt", "zTime": 73411,
                                          "fileType": "", "private": "", "dateCreated": "20110525 073411",
                                          "createdBy": "", "path": "/archive/attachments/PCLR/2011/05/25/0334/1306334051497",
                                          "zDate": 20110525, "uploadedBy": "", "size": 8}})

# To send messages synchronously
kafka = KafkaClient('172.22.147.232:9092,172.22.147.242:9092,172.22.147.243:9092')
producer = SimpleProducer(kafka)

# Note that the application is responsible for encoding messages to type bytes
print producer.send_messages(b'SAPEvent', json.dumps(sr))
time.sleep(1)
print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
try:
    document = sr
    row = []
    utils = Utils()
    row = utils.validate_sr_details( document['srDetails'], row )
except Exception:
    print Exception.message
    print(traceback.format_exc())
print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
    global lock_socket
    lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
    try:
        lock_socket.bind('\0' + process_name)
        print 'Lock acquired'
    except socket.error:
        print 'Process already running. Exiting..'
        sys.exit()
get_lock('twitter streaming')



logging.basicConfig()


kafka = KafkaClient("localhost:9092")

tempfile_path = None
tempfile = None
batch_counter = 0
timestamp = None

# def get_topics(zookeeper_hosts, topic_regex):
#     """Uses shell zookeeper-client to read Kafka topics matching topic_regex from ZooKeeper."""
#     command        = "/usr/bin/zookeeper-client -server %s ls /brokers/topics | tail -n 1 | tr '[],' '   '" % ','.join(zookeeper_hosts)
#     topics         = os.popen(command).read().strip().split()
#     matched_topics = [ topic for topic in topics if re.match(topic_regex, topic) ]
#     return matched_topics

def standardized_timestamp(frequency, dt=None):
    '''
            self._logger.info("%s" % messag)
            return False
        else:
            self._callback(self._partno, chg)
        return True


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')

    workers = {}
    brokers = "localhost:9092,localhost:9093,localhost:9094"
    group = "workers"

    kafka = KafkaClient(brokers, str(os.getpid()))
    cons = SimpleConsumer(kafka, group, "ctrl")
    cons.provide_partition_info()
    print "Starting control"
    end_ready = False
    while end_ready == False:
        try:
            while True:
                part, mmm = cons.get_message(timeout=None)
                mm = mmm.message
                print "Consumed ctrl " + str(mm)
                if mm.value == "start":
                    if workers.has_key(mm.key):
                        print "Dup partition %s" % mm.key
                        raise ValueError
                    else:
Beispiel #44
0
def init_kafka():
    global kafkaProducer
    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafkaProducer = SimpleProducer(kafka)
Beispiel #45
0
def main():
    # initial main parser setup
    parser = argparse.ArgumentParser(
        description='Kafka Dump: Scrapy Cluster Kafka topic dump utility for '
        'debugging.',
        add_help=False)
    parser.add_argument('-h',
                        '--help',
                        action=ArgparseHelper,
                        help='show this help message and exit')

    subparsers = parser.add_subparsers(help='commands', dest='command')

    # args to use for all commands
    base_parser = argparse.ArgumentParser(add_help=False)
    base_parser.add_argument('-kh',
                             '--kafka-host',
                             action='store',
                             required=False,
                             help="The override Kafka host")
    base_parser.add_argument('-s',
                             '--settings',
                             action='store',
                             required=False,
                             help="The settings file to read from",
                             default="localsettings.py")
    base_parser.add_argument(
        '-ll',
        '--log-level',
        action='store',
        required=False,
        help="The log level",
        default=None,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])

    # list command
    list_parser = subparsers.add_parser('list',
                                        help='List all Kafka topics',
                                        parents=[base_parser])

    # dump command
    dump_parser = subparsers.add_parser('dump',
                                        help='Dump a Kafka topic',
                                        parents=[base_parser])
    dump_parser.add_argument('-t',
                             '--topic',
                             action='store',
                             required=True,
                             help="The Kafka topic to read from")
    dump_parser.add_argument('-c',
                             '--consumer',
                             action='store',
                             required=False,
                             default=None,
                             help="The Kafka consumer id to use")
    dump_parser.add_argument('-b',
                             '--from-beginning',
                             action='store_const',
                             required=False,
                             const=True,
                             help="Read the topic from the beginning")
    dump_parser.add_argument('-nb',
                             '--no-body',
                             action='store_const',
                             required=False,
                             const=True,
                             default=False,
                             help="Do not include the raw html 'body' key in"
                             " the json dump of the topic")
    dump_parser.add_argument('-p',
                             '--pretty',
                             action='store_const',
                             required=False,
                             const=True,
                             default=False,
                             help="Pretty print the json objects consumed")
    dump_parser.add_argument('-d',
                             '--decode-base64',
                             action='store_const',
                             required=False,
                             const=True,
                             default=False,
                             help="Decode the base64 encoded raw html body")

    args = vars(parser.parse_args())

    wrapper = SettingsWrapper()
    settings = wrapper.load(args['settings'])

    kafka_host = args['kafka_host'] if args['kafka_host'] else settings[
        'KAFKA_HOSTS']
    log_level = args['log_level'] if args['log_level'] else settings[
        'LOG_LEVEL']
    logger = LogFactory.get_instance(level=log_level, name='kafkadump')

    if args['command'] == 'list':
        try:
            logger.debug("Connecting to {0}...".format(kafka_host))
            kafka = KafkaClient(kafka_host)
            logger.info("Connected to {0}".format(kafka_host))
        except KafkaUnavailableError as ex:
            message = "An exception '{0}' occured. Arguments:\n{1!r}" \
                .format(type(ex).__name__, ex.args)
            logger.error(message)
            sys.exit(1)
        logger.debug('Running list command')
        print("Topics:")
        for topic in list(kafka.topic_partitions.keys()):
            print("-", topic)
        kafka.close()
        return 0
    elif args['command'] == 'dump':
        logger.debug('Running dump command')
        topic = args["topic"]
        consumer_id = args["consumer"]

        try:
            logger.debug("Getting Kafka consumer")

            offset = 'earliest' if args["from_beginning"] else 'latest'

            consumer = KafkaConsumer(
                topic,
                group_id=consumer_id,
                bootstrap_servers=kafka_host,
                consumer_timeout_ms=settings['KAFKA_CONSUMER_TIMEOUT'],
                auto_offset_reset=offset,
                auto_commit_interval_ms=settings[
                    'KAFKA_CONSUMER_COMMIT_INTERVAL_MS'],
                enable_auto_commit=settings[
                    'KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'],
                max_partition_fetch_bytes=settings[
                    'KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES'])
        except NoBrokersAvailable as ex:
            logger.error('Unable to connect to Kafka')
            sys.exit(1)

        num_records = 0
        total_bytes = 0
        item = None

        while True:
            try:
                for message in consumer:
                    if message is None:
                        logger.debug("no message")
                        break
                    logger.debug("Received message")
                    val = message.value
                    try:
                        item = json.loads(val)
                        if args['decode_base64'] and 'body' in item:
                            item['body'] = base64.b64decode(item['body'])

                        if args['no_body'] and 'body' in item:
                            del item['body']
                    except ValueError:
                        logger.info("Message is not a JSON object")
                        item = val
                        file1 = open("myfile.txt", "w")
                    body_bytes = len(item)

                    if args['pretty']:
                        print(json.dumps(item, indent=4))
                    else:
                        print(item)
                        file1.write(item)
                    num_records = num_records + 1
                    total_bytes = total_bytes + body_bytes
            except KeyboardInterrupt:
                logger.debug("Keyboard interrupt received")
                break
            except:
                logger.error(traceback.print_exc())
                break

        total_mbs = old_div(float(total_bytes), (1024 * 1024))
        if item is not None:
            print("Last item:")
            print(json.dumps(item, indent=4))
        if num_records > 0:
            logger.info(
                "Num Records: {n}, Total MBs: {m}, kb per message: {kb}".
                format(n=num_records,
                       m=total_mbs,
                       kb=(float(total_bytes) / num_records / 1024)))
        else:
            logger.info("No records consumed")
            num_records = 0

        logger.info("Closing Kafka connection")
        try:
            consumer.close()
        except:
            # Exception is thrown when group_id is None.
            # See https://github.com/dpkp/kafka-python/issues/619
            pass
        return 0
Beispiel #46
0
 def __init__(self):
     client = KafkaClient("localhost:9092")
     self.producer = SimpleProducer(client, async = True,
                       batch_send_every_n = 1000,
                       batch_send_every_t = 10)
Beispiel #47
0
class EpidataStreamingContext:
    def __init__(self,
                 sc=None,
                 ssc=None,
                 sql_ctx=None,
                 topics=None,
                 brokers=None,
                 cassandra_conf=None,
                 measurement_class=None):
        self._sc = sc
        self._sql_ctx = sql_ctx
        self._topics = topics
        self._ssc = ssc
        self._brokers = brokers
        self._cassandra_conf = cassandra_conf
        self._measurement_class = measurement_class

        # set up Schema
        self._sensor_measurement_schema = SensorMeasurement.get_schema()
        self._sensor_measurement_stats_schema = SensorMeasurement.get_stats_schema(
        )
        self._automated_test_schema = AutomatedTest.get_schema()
        self._automated_test_stats_schema = AutomatedTest.get_stats_schema()

        self._kafka_producer = KafkaProducer(bootstrap_servers=self._brokers)
        self._client = KafkaClient(self._brokers)

    def run_stream(self, ops, clean_up=True):

        self._client.ensure_topic_exists(self._topics)
        kvs = KafkaUtils.createDirectStream(
            self._ssc, [self._topics], {"metadata.broker.list": self._brokers})

        if self._measurement_class == "sensor_measurement":
            rows = kvs.map(SensorMeasurement.to_row)
        elif self._measurement_class == "automated_test":
            rows = kvs.map(AutomatedTest.to_row)

        def process(time, rdd):
            if rdd.isEmpty() == False:

                rdd_df = self._sql_ctx.createDataFrame(rdd)

                # convert to panda dataframe
                panda_df = ConvertUtils.convert_to_pandas_dataframe_model(
                    rdd_df, clean_up)

                # perform all transformation and save it to cassandra
                for op in ops:

                    # try:
                    # apply transformation
                    output_df = op.apply(panda_df, self._sql_ctx)

                    if not output_df.empty:

                        if op.datastore() == "cassandra":

                            # clean up unnecessary column
                            output_df = ConvertUtils.convert_meas_value(
                                output_df, op.destination())

                            # convert it back to spark data frame
                            spark_output_df = self._sql_ctx.createDataFrame(
                                output_df, self._get_schema(op.destination()))

                            # convert to db model to save to cassandra
                            output_df_db = self._convert_to_db_model(
                                spark_output_df, op.destination())

                            # save to cassandra
                            output_df_db.write.format(
                                "org.apache.spark.sql.cassandra"
                            ).mode('append').options(
                                table=op.destination(),
                                keyspace=self._cassandra_conf['keyspace'],
                                user=self._cassandra_conf['user'],
                                password=self._cassandra_conf['password']
                            ).save()

                        elif op.datastore() == "kafka":

                            output_df_kafka = output_df

                            for i in output_df_kafka.index:
                                row_json = output_df_kafka.loc[i].to_json()

                                # push to kafka
                                self._kafka_producer.send(
                                    op.destination(), row_json)

                            # Flush kakfa producer
                            self._kafka_producer.flush()

                    # except BaseException:
                    #     print("Failed transformation: " + op.destination())

        rows.foreachRDD(process)

    def _start(self):
        self._ssc.start()
        self._ssc.awaitTermination()

    def _get_schema(self, destination):
        if destination == "measurements_summary":
            if self._measurement_class == "sensor_measurement":
                return self._sensor_measurement_stats_schema
            elif self._measurement_class == "automated_test":
                return self._automated_test_stats_schema
        else:
            if self._measurement_class == "sensor_measurement":
                return self._sensor_measurement_schema
            elif self._measurement_class == "automated_test":
                return self._automated_test_schema

    def _convert_to_db_model(self, input_df, dest):
        if self._measurement_class == "sensor_measurement":
            return SensorMeasurement.convert_to_db_model(input_df, dest)
        elif self._measurement_class == "automated_test":
            return AutomatedTest.convert_to_db_model(input_df, dest)
def start_kafka(zk_client_port, broker_listen_port, broker_id=0):
    if not os.path.exists(kafka_bdir):
        output, _ = call_command_("mkdir " + kafka_bdir)
    kafka_download = 'wget -P ' + kafka_bdir + ' http://download.nextag.com/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz'

    if not os.path.exists(kafka_bdir + '/kafka_2.9.2-0.8.1.1.tgz'):
        process = subprocess.Popen(kafka_download.split(' '))
        process.wait()
        if process.returncode is not 0:
            return False

    basefile = 'kafka_2.9.2-0.8.1.1'
    kafkabase = "/tmp/kafka.%s.%d/" % (os.getenv('USER',
                                                 'None'), broker_listen_port)
    confdir = kafkabase + basefile + "/config/"
    output, _ = call_command_("rm -rf " + kafkabase)
    output, _ = call_command_("mkdir " + kafkabase)

    logging.info('Installing kafka in ' + kafkabase)
    os.system("cat " + kafka_bdir + '/kafka_2.9.2-0.8.1.1.tgz' +
              " | tar -xpzf - -C " + kafkabase)

    logging.info('kafka Port %d' % broker_listen_port)

    #Replace the brokerid and port # in the config file
    replace_string_(
        confdir + "server.properties",
        [("broker.id=0", "broker.id=" + str(broker_id)),
         ("port=9092", "port=" + str(broker_listen_port)),
         ("zookeeper.connect=localhost:2181",
          "zookeeper.connect=localhost:%d" % zk_client_port),
         ("log.dirs=/tmp/kafka-logs", "log.dirs=" + kafkabase + "logs")])

    replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh",
                    [("grep -v grep", "grep %s | grep -v grep" % kafkabase)])
    replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh",
                    [("SIGINT", "SIGKILL")])
    replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh",
                    [("#!/bin/sh", "#!/bin/sh -x")])
    output, _ = call_command_("chmod +x " + kafkabase + basefile +
                              "/bin/kafka-server-stop.sh")

    # Extra options for JMX : -Djava.net.preferIPv4Stack=true -Djava.rmi.server.hostname=xx.xx.xx.xx
    output, _ = call_command_(kafkabase + basefile +
                              "/bin/kafka-server-start.sh -daemon " +
                              kafkabase + basefile +
                              "/config/server.properties")

    count = 0
    start_wait = os.getenv('CONTRIAL_ANALYTICS_TEST_MAX_START_WAIT_TIME', 15)
    while count < start_wait:
        try:
            logging.info('Trying to connect...')
            kk = KafkaClient("localhost:%d" % broker_listen_port)
        except:
            count += 1
            time.sleep(1)
        else:
            return True

    logging.info("Kafka client cannot connect. Kafka logfile below:")
    with open(kafkabase + basefile + "/logs/kafkaServer.out", 'r') as fin:
        logging.info(fin.read())
    return False
Beispiel #49
0
    def check(self, instance):
        consumer_groups = self.read_config(instance, 'consumer_groups',
                                           cast=self._validate_consumer_groups)
        zk_connect_str = self.read_config(instance, 'zk_connect_str')
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')

        # Construct the Zookeeper path pattern
        zk_prefix = instance.get('zk_prefix', '')
        zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s'

        # Connect to Zookeeper
        zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout)
        zk_conn.start()

        try:
            # Query Zookeeper for consumer offsets
            consumer_offsets = {}
            topics = defaultdict(set)
            for consumer_group, topic_partitions in consumer_groups.iteritems():
                for topic, partitions in topic_partitions.iteritems():
                    # Remember the topic partitions that we've see so that we can
                    # look up their broker offsets later
                    topics[topic].update(set(partitions))
                    for partition in partitions:
                        zk_path = zk_path_tmpl % (consumer_group, topic, partition)
                        try:
                            consumer_offset = int(zk_conn.get(zk_path)[0])
                            key = (consumer_group, topic, partition)
                            consumer_offsets[key] = consumer_offset
                        except NoNodeError:
                            self.log.warn('No zookeeper node at %s' % zk_path)
                        except Exception:
                            self.log.exception('Could not read consumer offset from %s' % zk_path)
        finally:
            try:
                zk_conn.stop()
                zk_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Zookeeper connection')

        # Connect to Kafka
        kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout)

        try:
            # Query Kafka for the broker offsets
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = kafka_conn.send_offset_request([
                    OffsetRequest(topic, p, -1, 1) for p in partitions])

                for resp in offset_responses:
                    broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        for (topic, partition), broker_offset in broker_offsets.items():
            broker_tags = ['topic:%s' % topic, 'partition:%s' % partition]
            broker_offset = broker_offsets.get((topic, partition))
            self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags)

        # Report the consumer
        for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items():

            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))

            # Report the consumer offset and lag
            tags = ['topic:%s' % topic, 'partition:%s' % partition,
                    'consumer_group:%s' % consumer_group]
            self.gauge('kafka.consumer_offset', consumer_offset, tags=tags)
            self.gauge('kafka.consumer_lag', broker_offset - consumer_offset,
                       tags=tags)
from kafka import KafkaClient, SimpleProducer, SimpleConsumer

# To send messages synchronously
kafka = KafkaClient("cloud.soumet.com:9092")
producer = SimpleProducer(kafka)

# Note that the application is responsible for encoding messages to type str
#producer.send_messages("bitcoin", "some message")

consumer = SimpleConsumer(kafka,
                          "consumer",
                          "bitcoin_exchange_tmp",
                          max_buffer_size=1310720000)
for message in consumer:
    # message is raw byte string -- decode if necessary!
    # e.g., for unicode: `message.decode('utf-8')`
    print(message)

#kafka.close()
Beispiel #51
0
 def __init__(self, kafkaBroker, kafkaTopic):
     self.broker = kafkaBroker
     self.topic = kafkaTopic
     self.client = KafkaClient(self.broker)
     self.producer = SimpleProducer(self.client)
Beispiel #52
0
###################################################
# My own access tokens
####################################################
ACCESS_TOKEN = '28778811-sw3jVlgjtS14kvquuo765rjaIYvCE0iMpTsDXdiRs'
ACCESS_SECRET = 'HBGjT0uixYSC6PXvyewvBuFmHv4FYtU6UmsDG98khY'
CONSUMER_KEY = '2VsrZwlSbtToGYbpHe42hmB36'
CONSUMER_SECRET = 'vuXhfCmMVMwecUzV3hwK8vvkGWZnAM5wtEDvzMMenq6rH8yFqe'

my_auth = requests_oauthlib.OAuth1(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN,
                                   ACCESS_SECRET)

####################################################
# Kafka Producer
####################################################
twitter_topic = "twitter_topic"
client = KafkaClient("10.128.0.2:9092")
producer = SimpleProducer(client)


def get_tweets():
    print(
        "#########################get_tweets called################################"
    )
    url = 'https://stream.twitter.com/1.1/statuses/filter.json'
    #query_data = [('language', 'en'), ('locations', '-130,-20,100,50'),('track','#')]
    #query_data = [('language', 'en'), ('locations', '-3.7834,40.3735,-3.6233,40.4702'),('track','#')]
    query_data = [('language', 'en'),
                  ('locations', '-3.7834,40.3735,-3.6233,40.4702'),
                  ('track', 'Madrid')]
    query_url = url + '?' + '&'.join(
        [str(t[0]) + '=' + str(t[1]) for t in query_data])
Beispiel #53
0
from kafka import KafkaClient, SimpleProducer, SimpleConsumer
kafka = KafkaClient("localhost:2181")
producer = SimpleProducer(kafka)
producer.send_messages("test1", b"Hello world!")
Beispiel #54
0
 def __init__(self, api):
     self.api = api
     super(StreamListener, self).__init__()
     client = KafkaClient("localhost:9092")
     self.producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('utf-8'))
import streaming_generator
from kafka import SimpleProducer, KafkaClient


def timed_call(fn, calls_per_second, *args, **kwargs):
    start = time.time()
    fn(*args, **kwargs)
    fn_time = time.time() - start
    sleep_duration = max(0, (1.0 - calls_per_second * fn_time) / calls_per_second)
    print sleep_duration
    while True:
        fn(*args, **kwargs)
        time.sleep(sleep_duration)


def send_message(producer, topic):
    message_raw = streaming_generator.gen_random_message()
    producer.send_messages(topic, json.dumps({'user_id': message_raw[0],
                                              'activity': message_raw[1]}))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--messages', default=1000)
    parser.add_argument('-z', '--host', default="127.0.0.1:9092")
    parser.add_argument('-t', '--topic', default='messages')
    args = parser.parse_args()
    kafka = KafkaClient(args.host)
    producer = SimpleProducer(kafka)
    timed_call(send_message, args.messages, producer, args.topic)
from kafka import SimpleProducer, KafkaClient
import sys
import csv
import json
import time
import datetime
import re

# To send messages synchronously
#kafka = KafkaClient('slc08use.us.oracle.com:9092')
kafka = KafkaClient('localhost:9092')
producer = SimpleProducer(kafka)

topic = 'test0730'
table = 'taxi_200K'

lines = [x.strip() for x in open(table + '.csv').readlines()]

print 'create external table', table, '('
columns =  lines[0].split('|')
print ',\n'.join(['  ' + col + ' string' for col in columns])
print ')'
print 'ROW FORMAT DELIMITED FIELDS TERMINATED BY "|"'
print "location '/user/jiezhen/camus/topics/test0730/daily/2015/08/04';"
for line in lines[1:]:
  if line:
    try:
      producer.send_messages(bytes(topic), line)
    except LeaderNotAvailableError:
      print "pausing to allow Kafka time to create topic"
      time.sleep(1)
Beispiel #57
0
 def __init__(self):
     self._brokers = APP_CONFIG["rti_kafka"]["brokers"]
     self._partitions = APP_CONFIG["rti_kafka"]["partitions"]
     self._topic = APP_CONFIG["rti_kafka"]["topic"]
     self._kafka = KafkaClient(self._brokers)
     self.producer = None
Beispiel #58
0
from kafka import KafkaProducer, KafkaConsumer
from kafka import KafkaClient, SimpleConsumer
import time
import sys
from app import app
from flask import Flask, render_template, request, redirect, Response
import random, json

SEND_TIME = None
print("At top of function \n\n")
RECEIVE_TIME = None

bs = ['54.218.73.149:9092','50.112.197.74:9092','34.222.135.111:9092']
PRODUCER = KafkaProducer(bootstrap_servers=bs)
CLIENT = KafkaClient(bs)



@app.route('/')
def home():
    return render_template('setuser.html')


@app.route('/<user>')
def serve_user(user):
    consumer = SimpleConsumer(CLIENT, 'testing', 'user{}_sess{}'.format(user,user))
    msg = None
    msg = consumer.get_message()
    RECEIVE_TIME = time.time()
    color='yellow'
Beispiel #59
0
from kafka import SimpleProducer, KafkaClient
from kafka import KafkaConsumer

import logging

import sys

logging.basicConfig(
    format=
    '%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
    level=logging.WARNING)

server = "ec2-54-171-154-70.eu-west-1.compute.amazonaws.com"

kafka = KafkaClient('%s:9092' % server)
producer = SimpleProducer(kafka)

# To consume messages
consumer = KafkaConsumer('rousseau',
                         group_id='my_group',
                         bootstrap_servers=['%s:9092' % server])

# Note that the application is responsible for encoding messages to type bytes
producer.send_messages(b'rousseau', sys.argv[1])

for message in consumer:
    #    # message value is raw byte string -- decode if necessary!
    #    # e.g., for unicode: `message.value.decode('utf-8')`
    print("%s" % message.value)
    break
Beispiel #60
0
def start_kafka(zk_client_port, broker_listen_port, broker_id=0):
    if not os.path.exists(kafka_bdir):
        output, _ = call_command_("mkdir " + kafka_bdir)
    kafka_download = 'wget -nv --tries=3 -c -O ' + kafka_bdir + kafka_dl + \
        ' https://github.com/Juniper/contrail-third-party-cache/blob/master/kafka' + \
        kafka_dl + '?raw=true'
    if not os.path.exists(kafka_bdir + kafka_dl):
        process = subprocess.Popen(kafka_download.split(' '))
        process.wait()
        if process.returncode is not 0:
            return False

    basefile = kafka_version
    kafkabase = "/tmp/kafka.%s.%d/" % (os.getenv('USER',
                                                 'None'), broker_listen_port)
    confdir = kafkabase + basefile + "/config/"
    output, _ = call_command_("rm -rf " + kafkabase)
    output, _ = call_command_("mkdir " + kafkabase)

    logging.info('Check zookeeper in %d' % zk_client_port)
    zk = KazooClient(hosts='127.0.0.1:' + str(zk_client_port), timeout=60.0)
    try:
        zk.start()
        zk.delete("/brokers", recursive=True)
        zk.delete("/consumers", recursive=True)
        zk.delete("/controller", recursive=True)
    except:
        logging.info("Zookeeper client cannot connect")
        zk.stop()
        return False
    zk.stop()
    logging.info('Installing kafka in ' + kafkabase)
    x = os.system("cat " + kafka_bdir + kafka_dl + " | tar -xpzf - -C " +
                  kafkabase)
    if 0 != x:
        logging.error("Cannot install kafka")
        return False

    logging.info('kafka Port %d' % broker_listen_port)

    replace_string_(confdir + "server.properties",
                    [("#listeners=PLAINTEXT://:9092",
                      "listeners=PLAINTEXT://:" + str(broker_listen_port))])

    #Replace the brokerid and port # in the config file
    replace_string_(
        confdir + "server.properties",
        [("broker.id=0", "broker.id=" + str(broker_id)),
         ("zookeeper.connect=localhost:2181",
          "zookeeper.connect=localhost:%d" % zk_client_port),
         ("log.dirs=/tmp/kafka-logs", "log.dirs=" + kafkabase + "logs")])

    replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh",
                    [("grep -v grep", "grep %s | grep -v grep" % kafkabase)])
    replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh",
                    [("SIGINT", "SIGKILL")])
    replace_string_(kafkabase + basefile + "/bin/kafka-server-stop.sh",
                    [("#!/bin/sh", "#!/bin/sh -x")])
    output, _ = call_command_("chmod +x " + kafkabase + basefile +
                              "/bin/kafka-server-stop.sh")

    # Extra options for JMX : -Djava.net.preferIPv4Stack=true -Djava.rmi.server.hostname=xx.xx.xx.xx
    output, _ = call_command_(kafkabase + basefile +
                              "/bin/kafka-server-start.sh -daemon " +
                              kafkabase + basefile +
                              "/config/server.properties")

    count = 0
    start_wait = os.getenv('CONTRIAL_ANALYTICS_TEST_MAX_START_WAIT_TIME', 15)
    while count < start_wait:
        try:
            logging.info('Trying to connect...')
            kk = KafkaClient("localhost:%d" % broker_listen_port)
        except:
            count += 1
            time.sleep(1)
        else:
            return True

    logging.info("Kafka client cannot connect. Kafka logfile below:")
    with open(kafkabase + basefile + "/logs/kafkaServer.out", 'r') as fin:
        logging.info(fin.read())
    return False