Ejemplo n.º 1
0
def send_data_2_kafka(datas):
    '''
        向kafka解析队列发送数据
    '''
    KAFKABROKER = ['120.27.241.54', '120.27.243.224', '114.55.226.34']
    PARTNUM = 5
    TOPICNAME = 'test01'
    # TOPICNAME='logtest3'
    client = KafkaClient(hosts=KAFKABROKER, timeout=30)

    producer = SimpleProducer(client, async=False)

    curcount = len(datas) // PARTNUM
    print(curcount)
    print(len(datas))
    for i in range(0, PARTNUM):
        start = i * curcount
        if i != PARTNUM - 1:
            end = (i + 1) * curcount
            curdata = datas[start:end]
            producer.send_messages(TOPICNAME, *curdata)
        else:
            curdata = datas[start:]
            producer.send_messages(TOPICNAME, *curdata)
        print(datetime.datetime.now())

    producer.stop()
    client.close()
Ejemplo n.º 2
0
def kafka_servers_available(servers):
    try:
        client = KafkaClient(bootstrap_servers=servers)
        client.close()
    except Exception:
        return False
    return True
Ejemplo n.º 3
0
def get_all_kafka_connections(cluster_type, client_id, **kwargs):
    """Get a kafka connection for each available kafka cluster at Yelp.

    :param cluster_type: kafka cluster type (ex.'scribe' or 'standard').
    :type cluster_type: string
    :param client_id: client_id to be used to connect to kafka.
    :type client_id: string
    :param kwargs: parameters to pass along when creating the KafkaClient instance.
    :returns: list (cluster_name, KafkaClient)
    :raises DiscoveryError: :py:class:`yelp_kafka.error.DiscoveryError` upon failure connecting to a cluster.

    .. note:: This function creates a KafkaClient for each cluster in a region and tries to connect to it. If a cluster is not available it fails and closes all the previous connections.
    """

    clusters = get_all_clusters(cluster_type, client_id)
    connected_clusters = []
    for cluster in clusters:
        try:
            client = KafkaClient(cluster.broker_list, client_id=client_id, **kwargs)
            connected_clusters.append((cluster.name, client))
        except:
            log.exception(
                "Connection to kafka cluster %s using broker list %s failed",
                cluster.name,
                cluster.broker_list
            )
            for _, client in connected_clusters:
                client.close()
            raise DiscoveryError("Failed to connect to cluster {0}".format(
                cluster.name))
    return connected_clusters
    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = KafkaClient(hosts)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending < check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Ejemplo n.º 5
0
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(
            self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30
        )

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
Ejemplo n.º 6
0
class Client:
    """
    封装kafka-python KafkaClient,
    """

    def __init__(self):
        pass

    def __enter__(self):
        self.cfg = Config().cfg
        self.client = SimpleClient(
            bootstrap_servers=self.cfg["serList"],
            # api_version=self.cfg["apiVersion"],
            api_version_auto_timeout_ms=self.cfg["autoVersionTimeout"],
            security_protocol=self.cfg["protocol"],
            sasl_mechanism=self.cfg["mechanism"],
            sasl_kerberos_service_name=self.cfg["kerverosSerName"]
        )
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.client.close()

    def add_topic(self, topic_name: str):
        """
Ejemplo n.º 7
0
    def assert_message_count(self,
                             topic,
                             check_count,
                             timeout=10,
                             partitions=None):
        hosts = ','.join(
            ['%s:%d' % (broker.host, broker.port) for broker in self.brokers])

        client = KafkaClient(hosts)
        group = random_string(10)
        consumer = SimpleConsumer(client,
                                  group,
                                  topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending != check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)

        consumer.stop()
        client.close()

        self.assertEqual(pending, check_count)
Ejemplo n.º 8
0
    def assert_message_count(self,
                             topic,
                             check_count,
                             timeout=10,
                             partitions=None,
                             at_least=False):
        hosts = ','.join(
            ['%s:%d' % (broker.host, broker.port) for broker in self.brokers])

        client = KafkaClient(hosts)
        consumer = SimpleConsumer(client,
                                  None,
                                  topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending < check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Ejemplo n.º 9
0
 def kafkaProducer(self):
     client = KafkaClient(hosts=["%s:%s" % (IP, PORT)], timeout=30)
     producer = SimpleProducer(client, async=False)
     self.timeCount = int(time.time() * 1000)
     while True:
         self.result.RecResult.Id = str(uuid.uuid4())
         self.result.RecResult.Meta.Timestamp = int(time.time() * 1000)
         self.result.RecResult.Image.Id = str(uuid.uuid4())
         #print self.result.RecResult.Image.Id
         #print self.result
         data = self.result.SerializeToString()
         producer.send_messages(kafkaTopic, data)
         print "send count:%s" % self.sendCount
         if self.sendCount == sendNumPerSencond:
             now_time = int(time.time() * 1000)
             elapse = now_time - self.timeCount
             print "elapse:%sms" % elapse
             if elapse <= 1000:
                 sleep_time = (1000 - elapse) * 1.0 / 1000
                 print "sleep times:%ss" % sleep_time
                 time.sleep(sleep_time)
                 self.timeCount = int(time.time() * 1000)
                 self.sendCount = 0
                 print "send All:%s" % self.sendAll
                 self.sendAll = self.sendAll + 1
                 break
         self.sendCount = self.sendCount + 1
         break
     producer.stop()
     client.close()
Ejemplo n.º 10
0
def get_files():
    kafka = KafkaClient("129.16.125.231:9092")
    producer = SimpleProducer(kafka)
    topic = 'test'

    for root, dirs, files in os.walk(
            '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell 24h_20151215_110422/AssayPlate_NUNC_#165305-1/'
    ):
        if not files:
            print("files is empty")
        else:
            print("In else")
            print("root: ", root)
            print("dirs: ", dirs)
            print("files[0]: ", files[0])
            if not dirs:
                print("dirs is empty")
            #          else:
            print(
                '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell '
                '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' + files[0])
            for i in range(len(files)):
                img = cv2.imread(
                    '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell '
                    '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' +
                    files[i])
                ret, jpeg = cv2.imencode('.png', img)
                producer.send_messages(topic, jpeg.tobytes())
        kafka.close()
Ejemplo n.º 11
0
def fetchFrom():
    in_kafka = KafkaClient(IN_KAFKA_HOST)
    consumer = SimpleConsumer(in_kafka, 'trending', CONSUMER_TOPIC, max_buffer_size=20*1024*1024)
    out_kafka = KafkaClient(OUT_KAFKA_HOST)
    producer = SimpleProducer(out_kafka)

    for msg in consumer:
        record = json.loads(msg.message.value)
        if 'tags' in record and '_trends' in record['tags']:
            try:
                producer.send_messages("trends", msg.message.value)
                print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8'))
            except MessageSizeTooLargeError as err:
                logging.warning(err)
            continue
        if 'metadata' in record:
            print record['metadata']
        if 'metadata' in record and 'tags' in record['metadata'] and '_channels' in record['metadata']['tags']:
            try:
                producer.send_messages("channels", msg.message.value)
                print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8'))
            except MessageSizeTooLargeError as err:
                logging.warning(err)
            continue
    in_kafka.close()
    out_kafka.close()
Ejemplo n.º 12
0
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(self.kafka_client,
                                             batch_send=True,
                                             batch_send_every_n=500,
                                             batch_send_every_t=30)

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
Ejemplo n.º 13
0
def fetchFrom():
    in_kafka = KafkaClient('172.31.10.154:9092')
    consumer = SimpleConsumer(in_kafka, 'fetcher', 'cpp.pages', max_buffer_size=20*1024*1024)
    out_kafka = KafkaClient("172.31.1.70:9092")
    producer = SimpleProducer(out_kafka)

    for msg in consumer:
        page = json.loads(msg.message.value)
	if 'retweet' in page['meta']:
	    print "remove twitter page"
	    continue
        output = {}
        output['inlink']=''
        output['level']=1
        output['url']=page['url']
        output['fts']=page['ts_fetch']
        output['content']=page['content']
        try:
            producer.send_messages("process", json.dumps(output))
            print(str(time.time()) + " pump url " + output['url'].encode('utf-8'))
        except MessageSizeTooLargeError as err:
            logging.warning(err)

    in_kafka.close()
    out_kafka.close()
Ejemplo n.º 14
0
def ensure_topic_existed(topic):
    try:
        server_info = get_server_kafka()
        client = KafkaClient(server_info)
        client.ensure_topic_exists(topic)
        client.close()
    except ValueError:
        print(ValueError.message)
 def send_kafka_msg(iters):
     # TODO: Add try/catch statements for kafka connection
     kafka = KafkaClient(kafka_host)
     producer = SimpleProducer(kafka)
     for key, val in iters:
         msg = combine_count_json(key, val)
         producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8"))
     kafka.close()
def query_text_producer(text, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.search(text, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
Ejemplo n.º 17
0
def sendMsgToKafka(obj, msg):
    #msg=msg
    #obj=obj
    client = KafkaClient("c9t26359.itcs.hpecorp.net:9092")  #
    producer = SimpleProducer(client)
    producer.send_messages(obj, msg)
    #response=producer.send_messages(obj,msg)
    #print response
    client.close()
Ejemplo n.º 18
0
def consume(kafka_host):
    kafka = KafkaClient(kafka_host)
    consumer = SimpleConsumer(kafka, 'fetcher', cfg['kafka']['pages'])
    producer = SimpleProducer(kafka)
    consumer.max_buffer_size=20*1024*1024
    for msg in consumer:
        page = json.loads(msg.message.value)
        process(page, producer)
    kafka.close()
def query_location_producer(lat, lng, radius, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.area_search(lat, lng, radius, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
def favorite_list_producer(id, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.favorite_list(id, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
def timeline_producer(twitter_account, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.user_timeline(twitter_account, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
Ejemplo n.º 22
0
 def send_kafka_msg(iters):
     #TODO: Add try/catch statements for kafka connection
     kafka = KafkaClient(kafka_host)
     producer = SimpleProducer(kafka)
     for key, val in iters:
         msg = combine_count_json(key, val)
         producer.send_messages(
             str(topic).encode('utf-8'),
             str(msg).encode('utf-8'))
     kafka.close()
Ejemplo n.º 23
0
def main():
    kafka = KafkaClient('localhost:9092')
    producer = SimpleProducer(kafka)

    topic = 'test'
    msg = b'Hello World'

    print_response(producer.send_messages(topic, msg))

    kafka.close()
Ejemplo n.º 24
0
    def sendMsgToKafka(self, topic, msg):
        #msg=msg
        #obj=obj
        kafka = KafkaClient("c9t26359.itcs.hpecorp.net:9092")
        producer = SimpleProducer(kafka)
        producer.send_messages(topic, msg)

        #response=producer.send_messages(obj,msg)
        #print response
        kafka.close()
Ejemplo n.º 25
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    bytes_topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic
            self.bytes_topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ])
        except:
            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Ejemplo n.º 26
0
def setup_capture_new_messages_consumer(topic):
    """Seeks to the tail of the topic then returns a function that can
    consume messages from that point.
    """
    kafka = KafkaClient(get_config().cluster_config.broker_list)
    group = str('data_pipeline_clientlib_test')
    consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
    consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail

    yield consumer

    kafka.close()
Ejemplo n.º 27
0
class Producer(object):

    def __init__(self, broker_ip_port):
        self.kafka = KafkaClient(broker_ip_port)
        self.producer = SimpleProducer(self.kafka)

    def send_message(self):
        response = self.producer.send_messages("HEY", "Hello World", "Kafka Deployment Worked!")
        return [("Error ", response[0].error), response]

    def close(self):
        self.kafka.close()
Ejemplo n.º 28
0
def producer():
    global Flags
    t.sleep(100)
    url = '192.168.56.1:9092'
    kafka = KafkaClient(bootstrap_servers=url)
    producer = KafkaProducer(bootstrap_servers=["192.168.56.1:9092"])
    counter = 1
    while counter:

        url = requests.get(
            'https://download.open.fda.gov/device/enforcement/device-enforcement-0001-of-0001.json.zip'
        )

        d = None
        data = None
        with zipfile.ZipFile(BytesIO(url.content), "r") as z:
            for filename in z.namelist():
                print(filename)
                with z.open(filename) as f:
                    data = f.read()
                    data = json.loads(data)

        data = pd.DataFrame(data['results'])
        data = data.replace({np.NaN: None})
        data = data.drop('openfda', 1)
        data.insert(0, 'ID', range(0, len(data)))
        data['ID'] = data['ID'].astype('str')
        # upload to S3
        conn = boto.connect_s3('AKIAQ4UBQVDV3RRYSD5Y',
                               'x5K/PgZwoDY8O/N+QVg99Lm2TnnkgB4mp981wGy4')
        bucket = conn.get_bucket('device-enforcement')
        upload = Key(bucket)

        csv_buffer = StringIO()
        data.to_csv(csv_buffer, index=False)
        csv_buffer.seek(0)
        filename = 'dataset.csv'
        upload.key = filename
        upload.set_contents_from_string(csv_buffer.getvalue())

        # start consumer
        t2 = threading.Thread(target=consumer)
        t2.start()
        t3 = threading.Thread(target=Analytics1)
        t3.start()
        for index, rows in data.iterrows():
            x = rows.to_dict()
            producer.send("devices", json.dumps(x).encode('utf-8'))
            Flags = True

        t.sleep(3600)

    kafka.close()
Ejemplo n.º 29
0
def main():
    kafka = KafkaClient("104.154.244.37:9092")
    producer = SimpleProducer(kafka)
    msg = []
    topic = b'test'
    try:
        with open("/root/kafkaprojects/Iris.csv") as f:
            for msg in f:
                print_response(producer.send_messages(topic, b'msg'))
    except LeaderNotAvailableError:
        # https://github.com/mumrah/kafka-python/issues/249
        time.sleep(1)
        print_response(producer.send_messages(topic, msg))
    kafka.close()
Ejemplo n.º 30
0
    def old_connect(self, message):
        kafka = KafkaClient(kafka_server + ":9092")
        self.producer = SimpleProducer(kafka)
        self.topic = topic

        try:
            self.producer.send_messages(self.topic, message)
        except LeaderNotAvailableError:
            # https://github.com/mumrah/kafka-python/issues/249
            time.sleep(1)
            KafkaStreamTarget.print_response(
                self.producer.send_messages(self.topic, message))

        kafka.close()
Ejemplo n.º 31
0
def main():
    kafka = KafkaClient('localhost:9092')
    producer = SimpleProducer(kafka)

    topic = b'topic_test_cluster'
    msg = b'Hello World, Hello Kafka'

    try:
        print_response(producer.send_messages(topic, msg))
    except LeaderNotAvailableError:
        time.sleep(1)
        print_response(producer.send_messages(topic, msg))

    kafka.close()
Ejemplo n.º 32
0
Archivo: rest.py Proyecto: yuex/iperf
def post_data():
    from kafka import KafkaClient
    from kafka import SimpleProducer

    kafka = KafkaClient(app.config['KAFKA_SERVER'])
    producer = SimpleProducer(kafka)
    if not request.json:
        resp = 'null post data'
    else:
        resp = producer.send_messages(app.config['KAFKA_TOPIC'], str(request.json))
        if resp:
            resp = {'error':resp[0].error,'offset':resp[0].offset}
    kafka.close()
    return jsonify(resp)
Ejemplo n.º 33
0
class Producer(KeyedProducer):
    """

    """
    def __init__(self, hosts, client_id, timeout):
        self._client = KafkaClient(['localhost:9092'])
        self._client = KafkaClient(hosts, client_id=client_id, timeout=timeout)
        super(Producer, self).__init__(self._client)

    def close(self):
        try:
            self._client.close()
        except:
            pass
Ejemplo n.º 34
0
class KafkaBase(Base):

    """ A block defining common Kafka functionality.
    Properties:
        host (str): location of the database
        port (int): open port served by database
        topic (str): topic name
    """
    host = StringProperty(title='Host', default='[[KAFKA_HOST]]')
    port = IntProperty(title='Port', default=9092)
    topic = StringProperty(title='Topic', default="", allow_none=False)

    def __init__(self):
        super().__init__()
        self._kafka = None
        self._encoded_topic = None

    def configure(self, context):
        super().configure(context)

        if not len(self.topic()):
            raise ValueError("Topic cannot be empty")

        self._connect()

    def stop(self):
        self._disconnect()
        super().stop()

    def _connect(self):
        self._kafka = KafkaClient("{0}:{1}".format(self.host(), self.port()))
        self._encoded_topic = self.topic()

        # ensuring topic is valid
        try:
            self._kafka.ensure_topic_exists(self._encoded_topic)
        except Exception:
            self.logger.exception("Topic: {0} does not exist"
                                  .format(self.topic()))
            raise

    def _disconnect(self):
        if self._kafka:
            self._kafka.close()
            self._kafka = None

    @property
    def connected(self):
        return self._kafka
Ejemplo n.º 35
0
def main():
    kafka = KafkaClient("localhost:9092")
    producer = SimpleProducer(kafka)

    topic = b'test'
    msg = b'Hello World from Me!'

    try:
        print_response(producer.send_messages(topic, msg))
    except LeaderNotAvailableError:
        # https://github.com/mumrah/kafka-python/issues/249
        time.sleep(1)
        print_response(producer.send_messages(topic, msg))

    kafka.close()
Ejemplo n.º 36
0
def setup_capture_new_messages_consumer(topic):
    """Seeks to the tail of the topic then returns a function that can
    consume messages from that point.
    """
    kafka = KafkaClient(get_config().cluster_config.broker_list)
    group = str('data_pipeline_clientlib_test')
    consumer = SimpleConsumer(kafka,
                              group,
                              topic,
                              max_buffer_size=_ONE_MEGABYTE)
    consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail

    yield consumer

    kafka.close()
def pullData():
    topic = sys.argv[1]
    kafka = KafkaClient('localhost:9092')

    producer = SimpleProducer(kafka)

    #url= 'http://countdown.api.tfl.gov.uk/interfaces/ura/instant_V1'
    url = sys.argv[2]
    r = requests.get(url, stream=True)

    for line in r.iter_lines():
        producer.send_messages(topic, line)
        print(line)

    kafka.close()
Ejemplo n.º 38
0
class Consumer(object):
    def __init__(self, topic):
        self.kafka = KafkaClient("localhost:9092")
        self.consumer = SimpleConsumer(self.kafka, "1", topic)

    @classmethod
    def make_queue(cls):
        return Queue(4096)

    def run(self, q):
        try:
            for i in self.consumer:
                q.put(json.loads(i.message.value), True)
        except Exception as e:
            self.kafka.close()
Ejemplo n.º 39
0
class RTIService(object):
    def __init__(self):
        self._brokers = APP_CONFIG["rti_kafka"]["brokers"]
        self._partitions = APP_CONFIG["rti_kafka"]["partitions"]
        self._topic = APP_CONFIG["rti_kafka"]["topic"]
        self._kafka = KafkaClient(self._brokers)
        self.producer = None

    def close(self):
        self._kafka.close()

    def connect(self):
        if not self.producer:
            self.producer = RTIProducer(self._kafka)

    def emit_event(self, adgroupId, campaignId):
        adDocs = []
        conn = getConnectWithDB(dbconfig_xadcms, "xadcms_pool")
        if adgroupId != "0":
            print "a level"
            document = make_ad_document(campaignId, adgroupId, conn)
            adDocs.append(document)

    # campaign level
        elif campaignId != "0":
            #get adgroupIds for this campaign
            adgroupIdRows = getInfoFromDB(campaignId, sql_campaign_adgroups,
                                          conn)
            print "c level"
            print adgroupIdRows
            for row in adgroupIdRows:
                document = make_ad_document(campaignId, row["adgroupId"], conn)
                adDocs.append(document)
        campaignDoc = make_campaign_document(adDocs)
        closeDB(conn)
        envelope = make_document_envelope(1, campaignDoc)
        #        message = make_enigma_envelope(self._topic, document)
        message = make_enigma_envelope(self._topic, envelope)
        totalPartitions = self._partitions
        partition = randint(0, totalPartitions - 1)

        try:
            response = self.producer.send_messages(self._topic, partition,
                                                   message)
            print response
        except Exception as e:
            return
            print(e)
Ejemplo n.º 40
0
    def produce_messages(self):
        """
        Produce sample messages
        """
        # TODO: Support different kafka port
        kafka = KafkaClient(self.config.kafka_host)

        total_messages = self.batches * self.batch_size
        messages_batch = [create_message(random.choice(self.sample_messages)) for r in range(self.batch_size)]

        for i in range(self.batches):
            req = ProduceRequest(topic=self.config.kafka_topic, partition=0, messages=messages_batch)
            resps = kafka.send_produce_request(payloads=[req], fail_on_error=True)
            sent_messages = i * self.batch_size
            logging.info('Created {} out of {} sample messages'.format(sent_messages, total_messages))
        kafka.close()
Ejemplo n.º 41
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10).decode('utf-8'))
            self.topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' %
                                      (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        offsets, = self.client.send_offset_request(
            [OffsetRequest(topic, partition, -1, 1)])
        return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
def topic_security(ip):
    """Ensures our topic exists

    If we're the first one online it won't exist, this will not be needed once
    we configure topics in the kafka configuration

    This will open a connection, create the topic, then close the connection

    **Issues**:
        - The Port is hardcoded

    :param ip: The IP of our Kafka Box
    :type ip: str
    """
    kafka = KafkaClient("%s:9092" % (ip))
    kafka.ensure_topic_exists(TOPIC)
    kafka.close()
Ejemplo n.º 43
0
def topic_security(ip):
    """Ensures our topic exists

    If we're the first one online it won't exist, this will not be needed once
    we configure topics in the kafka configuration

    This will open a connection, create the topic, then close the connection

    **Issues**:
        - The Port is hardcoded

    :param ip: The IP of our Kafka Box
    :type ip: str
    """
    kafka = KafkaClient("%s:9092" % (ip))
    kafka.ensure_topic_exists(TOPIC)
    kafka.close()
Ejemplo n.º 44
0
def low_level():
    '''low level'''
    from kafka import KafkaClient, create_message
    from kafka.protocol import KafkaProtocol
    from kafka.common import ProduceRequest

    kafka = KafkaClient(KAFKA_SERVER)

    req = ProduceRequest(topic=b'topic1', partition=1,
                         messages=[create_message(b'some message')])
    resps = kafka.send_produce_request(payloads=[req], fail_on_error=True)
    kafka.close()

    print resps[0].topic      # b'topic1'
    print resps[0].partition  # 1
    print resps[0].error      # 0 (hopefully)
    print resps[0].offset     # offset of the first message sent in this request
Ejemplo n.º 45
0
    def kafkaTasks(self, addr, topic,tasks):
        try :
            from kafka import SimpleProducer, KafkaClient, KeyedProducer
        except:
            logger.error("kafka-python is not installed")
            raise Exception("kafka-python is not installed")
        kafka_client = None
        try :
            kafka_client = KafkaClient(addr)
            producer = KeyedProducer(kafka_client)

            for task in tasks:
                #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict))
                producer.send_messages(topic, self.manager.name, cPickle.dumps(task))
        finally:
            if kafka_client:
                kafka_client.close()
Ejemplo n.º 46
0
def main():
    """
    Usage:
        dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>]
    """
    args = docopt(main.__doc__)
    host = args["--host"]

    print "=> Connecting to {0}...".format(host)
    logger.info("=> Connecting to {0}...".format(host))
    kafka = KafkaClient(host)
    print "=> Connected."
    logger.info("=> Connected.")
    if args["dump"]:
        topic = args["<topic>"]
        consumer_id = args["--consumer"] or "dump_to_mongodb"
        consumer = SimpleConsumer(
            kafka,
            consumer_id,
            topic,
            buffer_size=1024 * 200,  # 100kb
            fetch_size_bytes=1024 * 200,  # 100kb
            max_buffer_size=None  # eliminate big message errors
        )
        consumer.seek(0, 1)
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    time.sleep(1)
                    continue
                val = message.message.value
                logger.info("message.message.value== %s " % val)
                print('val==', val)
                try:
                    item = json.loads(val)
                except:
                    continue
                if 'meta' in item and 'collection_name' in item['meta']:
                    _insert_item_to_monggodb(item)
            except:
                traceback.print_exc()
                break
        kafka.close()
        return 0
def Producer():
    #producer = KafkaProducer(bootstrap_servers='deepc04.acis.ufl.edu:9092')
    #producer.send('test', b"testingt123456")
    
    kafka = KafkaClient("deepc04.acis.ufl.edu:9092")
    producer = SimpleProducer(kafka)

    topic = b'test'
    msg = b'Hello World from Me/Rajendra!'

    try:
        print_response(producer.send_messages(topic, msg))
    except LeaderNotAvailableError:
        # https://github.com/mumrah/kafka-python/issues/249
        time.sleep(1)
        print_response(producer.send_messages(topic, msg))

    kafka.close()
    def assert_message_count(self, topic, check_count, timeout=10, partitions=None):
        hosts = ",".join(["%s:%d" % (broker.host, broker.port) for broker in self.brokers])

        client = KafkaClient(hosts)
        group = random_string(10)
        consumer = SimpleConsumer(client, group, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending != check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)

        consumer.stop()
        client.close()

        self.assertEqual(pending, check_count)
Ejemplo n.º 49
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8'))
            self.topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        offsets, = self.client.send_offset_request([ OffsetRequest(topic, partition, -1, 1) ])
        return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Ejemplo n.º 50
0
def fetchFrom(kafka_host):
    kafka = KafkaClient(kafka_host)
    consumer = SimpleConsumer(kafka, 'fetcher', 'toppage.pages')
    producer = SimpleProducer(kafka)

    for msg in consumer:
        page = json.loads(msg.message.value)
        output = {}
        output['inlink']=''
        output['level']=1
        output['url']=page['orig_url']
        output['fts']=page['ts_fetch']
        output['content']=page['content']
        try:
            producer.send_messages("seeds", json.dumps(output))
        except MessageSizeTooLargeError as err:
            logging.warning(err)

    kafka.close()
Ejemplo n.º 51
0
def main():
    """
    Usage:
        dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>]
    """
    args = docopt(main.__doc__)
    host = args["--host"]

    print "=> Connecting to {0}...".format(host)
    logger.info("=> Connecting to {0}...".format(host))
    kafka = KafkaClient(host)
    print "=> Connected."
    logger.info("=> Connected.")
    if args["dump"]:
        topic = args["<topic>"]
        consumer_id = args["--consumer"] or "dump_to_mongodb"
        consumer = SimpleConsumer(kafka, consumer_id, topic,
                                  buffer_size=1024*200,      # 100kb
                                  fetch_size_bytes=1024*200, # 100kb
                                  max_buffer_size=None       # eliminate big message errors
                                  )
        consumer.seek(0, 1)
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    time.sleep(1)
                    continue
                val = message.message.value
                logger.info("message.message.value== %s " % val)
                print('val==', val)
                try:
                    item = json.loads(val)
                except:
                    continue
                if 'meta' in item and 'collection_name' in item['meta']:
                    _insert_item_to_monggodb(item)
            except:
                traceback.print_exc()
                break
        kafka.close()
        return 0
Ejemplo n.º 52
0
def main():
    kafka = KafkaClient("mykafkaslave.example.com:6667")

    producer = SimpleProducer(kafka)

    topic = b'testkafka'

    cnt = 0
    while(cnt < 10000):
            msg = b'Hello World - 1 - '
            cnt = cnt + 1

            msg += "%d" %(cnt)

            try:
                print_response(cnt,producer.send_messages(topic, msg))
            except LeaderNotAvailableError:
                print_response(cnt,producer.send_messages(topic, msg))

    kafka.close()
Ejemplo n.º 53
0
    def setUp(self):
        logging.basicConfig()
        handler = logging.StreamHandler(stdout)
        logger = logging.getLogger("kafka")
        logger.setLevel(logging.INFO)
        logger.addHandler(handler)

        self.logger = logging.getLogger("tester")
        self.logger.debug("setup started")
        kafka_location = "127.0.0.1:9092"
        client = KafkaClient(kafka_location)
        client.ensure_topic_exists("frontier-todo")
        client.ensure_topic_exists("frontier-done")
        client.ensure_topic_exists("frontier-score")
        client.close()

        settings = Settings()
        settings.set('KAFKA_LOCATION', kafka_location)
        settings.set('SPIDER_FEED_PARTITIONS', 1)
        settings.set('SPIDER_LOG_PARTITIONS', 1)
        settings.set('QUEUE_HOSTNAME_PARTITIONING', True)
        self.messagebus = KafkaMessageBus(settings)
        spiderlog = self.messagebus.spider_log()

        # sw
        self.sw_sl_c = KafkaConsumerPolling(spiderlog.consumer(partition_id=0, type=b'sw'))

        scoring_log = self.messagebus.scoring_log()
        self.sw_us_p = scoring_log.producer()

        # db
        self.db_sl_c = KafkaConsumerPolling(spiderlog.consumer(partition_id=None, type=b'db'))
        self.db_us_c = KafkaConsumerPolling(scoring_log.consumer())

        spider_feed = self.messagebus.spider_feed()
        self.db_sf_p = spider_feed.producer()

        # spider
        self.sp_sl_p = spiderlog.producer()
        self.sp_sf_c = KafkaConsumerPolling(spider_feed.consumer(partition_id=0))
        self.logger.debug("init is done")
Ejemplo n.º 54
0
def main():
    kafka = KafkaClient("192.168.33.10:9092")
    producer = SimpleProducer(kafka)
 
    topic = b'test_test'
    # msg = "{'resource':'student','action':'added','id':'db2987','object':{'last':'burge','first':'dustin','courses':['COMS W4115','COMS W4170','COMS E6111'],'uni':'db2987'}}"
    #msg = "{resource:student,action:added,id:db2987,object:{last:burge,first:dustin,courses:[COMS W4115,COMS W4170,COMS E6111],uni:db2987}}"
    msgAdded = {"resource":"student","action":"added","id":"db2987","object":{"last":"burge","first":"dustin","courses":["COMSW4115","COMSW4170","COMSE6111"],"uni":"db2987"}}
    msgRemoved = {"resource":"student","action":"removed","id":"db2987"}
    #msgModified = 

    msg = msgRemoved

    try:
        print_response(producer.send_messages(topic, json.dumps(msg)))
    except LeaderNotAvailableError:
        # https://github.com/mumrah/kafka-python/issues/249
        time.sleep(1)
        print_response(producer.send_messages(topic, msg))
 
    kafka.close()