Esempio n. 1
0
    def _get_kafka_client(self, instance):
        kafka_conn_str = self._read_config(instance, 'kafka_connect_str')
        if not kafka_conn_str:
            raise BadKafkaConsumerConfiguration('Bad instance configuration')

        instance_key = self._get_instance_key(instance)
        if instance_key not in self.kafka_clients:
            conf_security_protocol = instance.get('security_protocol',
                                                  'PLAINTEXT')
            if conf_security_protocol == 'SSL':
                conf_ssl_cafile = instance.get('ssl_cafile')
                conf_ssl_check_hostname = instance.get('ssl_check_hostname',
                                                       True)
                conf_ssl_certfile = instance.get('ssl_certfile')
                conf_ssl_keyfile = instance.get('ssl_keyfile')
                conf_ssl_password = instance.get('ssl_password')
                cli = KafkaClient(bootstrap_servers=kafka_conn_str,
                                  client_id='dd-agent',
                                  security_protocol=conf_security_protocol,
                                  ssl_cafile=conf_ssl_cafile,
                                  ssl_check_hostname=conf_ssl_check_hostname,
                                  ssl_certfile=conf_ssl_certfile,
                                  ssl_keyfile=conf_ssl_keyfile,
                                  ssl_password=conf_ssl_password)
                self.kafka_clients[instance_key] = cli
            else:
                cli = KafkaClient(bootstrap_servers=kafka_conn_str,
                                  client_id='dd-agent')
                self.kafka_clients[instance_key] = cli

        return self.kafka_clients[instance_key]
Esempio n. 2
0
    def __init__(self, topic, producer_type=ProducerType.SIMPLE,\
            host_port="127.0.0.1:9092", **producer_opts):

        self.topic = topic
        self.host_port = host_port
        if producer_type == ProducerType.SIMPLE:
            self.producer = SimpleProducer(KafkaClient(host_port),\
                    **producer_opts)
        else:
            self.producer = KeyedProducer(KafkaClient(host_port),\
                    **producer_opts)
Esempio n. 3
0
 def configure_input_queue(self):
     """
     configures the input queue that other services can use to schedule an event to be delivered
     """
     client = KafkaClient(hosts=self.kafka_hosts)
     client.ensure_topic_exists(self.input_topic)
     indexed_consumer = IndexedConsumer(self.input_topic, self.kafka_hosts)
     queue_consumer = KafkaConsumer(self.input_topic,
                                    bootstrap_servers=self.kafka_hosts,
                                    group_id=CONSUMER_GROUP)
     queue_producer = SimpleProducer(KafkaClient(hosts=self.kafka_hosts))
     self.queues.append(
         InputQueue(queue_consumer, indexed_consumer, queue_producer,
                    self.number_of_queues))
Esempio n. 4
0
 def forwarder(self):
     client = KafkaClient(hosts(self.server_list, self.kafka_port))
     client.ensure_topic_exists(self.topic_name)
     producer = SimpleProducer(client, batch_send=False)
     print producer
     for i in xrange(1, 100):
         with open(self.csvfile, 'r') as FR:
             fields = next(FR).strip().split('\t')
             print fields
             for cnc_log in FR:
                 values = cnc_log.strip().split('\t')
                 zipped = dict(zip(fields, values))
                 zipped['lower_bound'] = float(zipped['lower_bound'])
                 zipped['upper_bound'] = float(zipped['upper_bound'])
                 zipped['temperature'] = float(zipped['temperature'])
                 zipped['no'] = int(zipped['no'])
                 print json.dumps(zipped, sort_keys=True, indent=4)
                 # prob = 0.8
                 # y = lambda x, prob: '<span style="background-color:#bd362f; color:white">FAIL</span>' if randint(0,x) > x*prob  else 'PASS'
                 # cnc_log = (datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')+"\t"+y(10,0.8)+'\t'+cnc_log.strip()).split('\t')
                 # zipped = dict(zip(fields,cnc_log))
                 # node = zipped
                 sleep_sec = random.uniform(0, 3) * 5
                 time.sleep(sleep_sec)
                 producer.send_messages(self.topic_name, json.dumps(zipped))
Esempio n. 5
0
 def _create_kafka_client(self):
     kafka_conn_str = self.instance.get('kafka_connect_str')
     if not isinstance(kafka_conn_str, (string_types, list)):
         raise ConfigurationError(
             'kafka_connect_str should be string or list of strings')
     return KafkaClient(
         bootstrap_servers=kafka_conn_str,
         client_id='dd-agent',
         request_timeout_ms=self.init_config.get(
             'kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000,
         # if `kafka_client_api_version` is not set, then kafka-python automatically probes the cluster for broker
         # version during the bootstrapping process. Note that probing randomly picks a broker to probe, so in a
         # mixed-version cluster probing returns a non-deterministic result.
         api_version=self.instance.get('kafka_client_api_version'),
         # While we check for SSL params, if not present they will default to the kafka-python values for plaintext
         # connections
         security_protocol=self.instance.get('security_protocol',
                                             'PLAINTEXT'),
         sasl_mechanism=self.instance.get('sasl_mechanism'),
         sasl_plain_username=self.instance.get('sasl_plain_username'),
         sasl_plain_password=self.instance.get('sasl_plain_password'),
         sasl_kerberos_service_name=self.instance.get(
             'sasl_kerberos_service_name', 'kafka'),
         sasl_kerberos_domain_name=self.instance.get(
             'sasl_kerberos_domain_name'),
         ssl_cafile=self.instance.get('ssl_cafile'),
         ssl_check_hostname=self.instance.get('ssl_check_hostname', True),
         ssl_certfile=self.instance.get('ssl_certfile'),
         ssl_keyfile=self.instance.get('ssl_keyfile'),
         ssl_crlfile=self.instance.get('ssl_crlfile'),
         ssl_password=self.instance.get('ssl_password'),
     )
Esempio n. 6
0
    def __init__(self, config):

        host = config.get("epc", "kafka.host").strip('"').strip("'")
        port = config.get("epc", "kafka.port")

        client = KafkaClient('%s:%s' % (host, port))
        self.producer = SimpleProducer(client, async=False)
Esempio n. 7
0
 def configure_internal_queues(self):
     """
     configures the internal queues used hold references to events in the input queue
     """
     for i in range(self.number_of_queues):
         client = KafkaClient(hosts=self.kafka_hosts)
         queue_name = SCHEDULER_QUEUE_FORMAT.format(2**i)
         client.ensure_topic_exists(queue_name)
         indexed_consumer = IndexedConsumer(self.input_topic,
                                            self.kafka_hosts)
         queue_consumer = KafkaConsumer(
             queue_name,
             bootstrap_servers=self.kafka_hosts,
             group_id=queue_name,
             consumer_timeout_ms=2000,
             auto_commit_enable=False,
         )
         queue_producer = SimpleProducer(client)
         queue_duration = 2**i
         self.queues.append(
             InternalQueue(
                 queue_consumer,
                 indexed_consumer,
                 queue_producer,
                 self.number_of_queues,
                 queue_duration,
             ))
Esempio n. 8
0
    def run(self):
        #client = KafkaClient("localhost:9092")
        client = KafkaClient("kafka_host:9092")
        producer = SimpleProducer(client)

        while True:
            try:
              messages = []
              for i in xrange(1, 10):
                messageStr = SelfGeneratedMessage().asJson()
                logger.debug('Generated message: %s', messageStr)
                messages.append(messageStr)
            
              producer.send_messages('test', *messages)    
#            producer.send_messages('test', '{"publisher": "publisher-id", "time": "2015-11-03 15:03:30.352", "readings": [ 1, 1,1,1,1,1,1,1,1,1,1,1,4,3,3,3,32,2,1,1,1,1]}')

              time.sleep(1)
            except LeaderNotAvailableError as e:
              logging.exception('LeaderNotAvailableError')
              time.sleep(10)
            except KafkaUnavailableError as e:
              logging.exception('KafkaUnavailableError')
              time.sleep(30)
            except ConnectionError as e:
              logging.exception('ConnectionError')
              time.sleep(60)    
            except KafkaError as e:
              logging.exception('KafkaError')
              time.sleep(60) 
            except Exception as e:
              logging.exception('Exception')
              time.sleep(60) 
Esempio n. 9
0
def train(numIters):
    global users
    try:
        mcl = pm.MongoClient('10.137.168.196:27017')
        userColl = mcl.DataSet['PMLUsers']
        users = {
            user['userId']: user['partitionId']
            for user in userColl.find()
        }
        mcl.close()
        kafka = KafkaClient('mozo.cloudapp.net:9092', timeout=None)
        producer = UserProducer(kafka,
                                kafkaTopic,
                                users,
                                async=False,
                                req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                                ack_timeout=200)
        for userId, partitionId in users.iteritems():
            if userId in UoI.keys():
                for i in range(numIters):
                    #print "iteration " + str(i)
                    encodedMessage = simplejson.dumps({
                        'turtleId': turtleId,
                        'userId': userId,
                        'operation': 'train_one'
                    })
                    print i, producer.send(userId, encodedMessage)
    finally:
        producer.stop()
        kafka.close()
Esempio n. 10
0
def add_users():
    global users
    
    mcl = pm.MongoClient('10.137.172.201:27017')
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                            req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                            ack_timeout=200)
    coll = mcl.DataSet['PMLExpression']
    
    for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False):
        follower = ent['userId']
        if follower not in users:
            encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                               'user':'******',
                                               'follower':follower,
                                               'operation':'add_user'})
            print producer.send(follower, encodedMessage)
    
    userColl = mcl.DataSet['PMLUsers']
    if users:
        for userId, partitionId in users.iteritems():            
            u = userColl.find_one({'userId':userId}, {'userId':userId}, timeout=False)
            if not u:
                userColl.insert({'userId':userId, 'partitionId':partitionId});
def main():
    kafka = KafkaClient("localhost:9092")
    print("Consumer established connection to kafka")
    consumer = SimpleConsumer(kafka, "my-group", "test")
    for message in consumer:
        # This will wait and print messages as they become available
        print(message)
Esempio n. 12
0
    def _detect_consumers(self):
        """ Using zookeeper and a kafka connection find the consumers, associated topics and partitions.
        """
        try:
            # The kafka api provides no way to discover existing consumer groups so a query to
            # zookeeper must be made. This is unfortunately fragile as kafka is moving away from
            # zookeeper. Tested with kafka 0.8.1.1
            from kafka.client import KafkaClient
            kafka_connect_str = self._find_kafka_connection()
            kafka = KafkaClient(kafka_connect_str)

            # {'consumer_group_name': { 'topic1': [ 0, 1, 2] # partitions }}
            consumers = {}
            # Find consumers and topics
            for consumer in self._ls_zookeeper('/consumers'):
                consumers[consumer] = dict(
                    (topic, kafka.topic_partitions[topic])
                    for topic in self._ls_zookeeper('/consumers/%s/offsets' %
                                                    consumer))

            log.info("\tInstalling kafka_consumer plugin.")
            self.config['kafka_consumer'] = {
                'init_config':
                None,
                'instances': [{
                    'kafka_connect_str': kafka_connect_str,
                    'full_output': True,
                    'consumer_groups': dict(consumers)
                }]
            }
        except Exception:
            log.error('Error Detecting Kafka consumers/topics/partitions')
Esempio n. 13
0
    def __init__(self):
        comlog.init_logger("./../log/vdata.log")
        self.ub_conf = UbConfig("./../conf/log_tail.conf")
        self._conf_info = self.ub_conf.get_conf_info()
        self._file_path = self._conf_info["[LOG_FILE_CONF]"]["file_path"]
        self._file_name_pattern = self._conf_info["[LOG_FILE_CONF]"][
            "file_name_pattern"]
        self._log_max_length = int(
            self._conf_info["[LOG_FILE_CONF]"]["log_max_length"])
        self._batch_flush_counter = int(
            self._conf_info["[LOG_FILE_CONF]"]["batch_flush_counter"])
        self._topic_name = self._conf_info["[KAFKA]"]["topic_name"]
        self._interval_time = self._conf_info["[TIME_INTERVAL]"]["interval"]

        self.init_data_file()
        self.s = sched.scheduler(time.time, time.sleep)
        if self._conf_info["[KAFKA]"].has_key('broker_list'):
            self.broker_list = self._conf_info["[KAFKA]"]["broker_list"]
        elif self._conf_info["[KAFKA]"].has_key('zookeeper'):
            self.broker_list = ','.join(
                self.get_broker_list(self._conf_info["[KAFKA]"]['zookeeper']))
        else:
            raise ValueError, " zookeeper and broker_list are both null in config file"
        self.client = KafkaClient(self.broker_list)
        self.producer = SimpleProducer(self.client)

        #tags
        self.tags = {}
        for key in self._conf_info["[TAGS]"]:
            self.tags[key] = self._conf_info["[TAGS]"][key]
Esempio n. 14
0
def main():
    global freq_array
    client = KafkaClient('ip-172-31-28-55.ec2.internal:6667')
    producer = SimpleProducer(client)

    fft_size = 1000
    fs = 92
    freq_array = np.array((1 * fs / fft_size))
    for i in range(2, int(fft_size / 2)):
        freq_i = np.array((i * fs / fft_size))
        freq_array = np.vstack((freq_array, freq_i))

    with open('xfourmag.csv', 'rt') as f:
        print('opening csv')
        reader = csv.reader(f)
        row = next(reader)
        #global mags
        mags = np.array(row)
        for row in reader:
            #mags += row
            mags = np.vstack((mags, row))
    #print(mags)
    #print(freq_array)

    json_data = {
        'time': int(time.time()),
        'fft': np.hstack((freq_array[0:31], mags[0:31])).tolist(),
        'sensor_id': '1',
        'reading_type': '0'
    }
    print('sending data...')
    producer.send_messages('shm', (json.dumps(json_data)).encode('utf-8'))
    print('data sent! :)')
Esempio n. 15
0
def create_kafka_producer(kafka_host_port):
    try:
        client = KafkaClient(kafka_host_port)
        producer = SimpleProducer(client, async=True)
        return producer
    except Exception as excp:
        raise Exception("Failed to set up kafka producer: %s" % excp.message)
Esempio n. 16
0
    def __init__(self, config):

        super(KConsumer, self).__init__()
        host = config.get("epc", "kafka.host").strip('"').strip("'")
        port = config.get("epc", "kafka.port")

        client = KafkaClient('%s:%s' % (host, port))
        self.producer = SimpleProducer(client, async=False)

        topicsEvents = ['accountDownloadEnd', 'execDetailsEnd', 'updateAccountTime', 'deltaNeutralValidation', 'orderStatus',\
                  'updateAccountValue', 'historicalData', 'openOrderEnd', 'updatePortfolio', 'managedAccounts', 'contractDetailsEnd',\
                  'positionEnd', 'bondContractDetails', 'accountSummary', 'updateNewsBulletin', 'scannerParameters', \
                  'tickString', 'accountSummaryEnd', 'scannerDataEnd', 'commissionReport', 'error', 'tickGeneric', 'tickPrice', \
                  'nextValidId', 'openOrder', 'realtimeBar', 'contractDetails', 'execDetails', 'tickOptionComputation', \
                  'updateMktDepth', 'scannerData', 'currentTime', 'error_0', 'error_1', 'tickSnapshotEnd', 'tickSize', \
                  'receiveFA', 'connectionClosed', 'position', 'updateMktDepthL2', 'fundamentalData', 'tickEFP']


        self.consumer = KafkaConsumer( *[(v,0) for v in topicsEvents], \
                                       metadata_broker_list=['%s:%s' % (host, port)],\
                                       group_id = 'epc.group',\
                                       auto_commit_enable=True,\
                                       auto_commit_interval_ms=30 * 1000,\
                                       auto_offset_reset='largest') # discard old ones

        self.kwrapper = KWrapper(self.producer)
Esempio n. 17
0
def main():
    global options
    options = parse_options()
    logger.setLevel(logging.DEBUG if options.verbose else logging.INFO)
    logger.info(options)

    client = KafkaClient(bootstrap_servers=options.kafka_host)
    future = client.cluster.request_update()
    client.poll(future=future)
    producer = KafkaProducer(
        bootstrap_servers=options.kafka_host,
        value_serializer=lambda m: json.dumps(m).encode('ascii'))

    db = get_db_conn()
    for topic in options.topic:
        try:
            create_topic(topic, client)
            data = get_data(topic, db)
            for ele in data:
                print(topic, ele)
                producer.send(topic, ele).add_callback(
                    on_send_success).add_errback(on_send_error)
            producer.flush()
            time.sleep(0.001)
        except Exception as e:
            traceback.print_exc()
            logger.debug(e)
Esempio n. 18
0
    def run(self):
        client = KafkaClient(
            "10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092")
        consumer = SimpleConsumer(client, "test-group", "guantest")

        for message in consumer:
            print(message.message.value)
Esempio n. 19
0
    def configure(self, **configs):
        """
        Configuration settings can be passed to constructor,
        otherwise defaults will be used:

        .. code:: python

            client_id='kafka.consumer.kafka',
            group_id=None,
            fetch_message_max_bytes=1024*1024,
            fetch_min_bytes=1,
            fetch_wait_max_ms=100,
            refresh_leader_backoff_ms=200,
            metadata_broker_list=None,
            socket_timeout_ms=30*1000,
            auto_offset_reset='largest',
            deserializer_class=lambda msg: msg,
            auto_commit_enable=False,
            auto_commit_interval_ms=60 * 1000,
            auto_commit_interval_messages=None,
            consumer_timeout_ms=-1

        Configuration parameters are described in more detail at
        http://kafka.apache.org/documentation.html#highlevelconsumerapi
        """
        self._config = {}
        for key in DEFAULT_CONSUMER_CONFIG:
            self._config[key] = configs.pop(key, DEFAULT_CONSUMER_CONFIG[key])

        if configs:
            raise KafkaConfigurationError('Unknown configuration key(s): ' +
                                          str(list(configs.keys())))

        # Handle str/bytes conversions
        for config_key in BYTES_CONFIGURATION_KEYS:
            if isinstance(self._config[config_key], six.string_types):
                logger.warning("Converting configuration key '%s' to bytes" %
                               config_key)
                self._config[config_key] = self._config[config_key].encode(
                    'utf-8')

        if self._config['auto_commit_enable']:
            if not self._config['group_id']:
                raise KafkaConfigurationError(
                    'KafkaConsumer configured to auto-commit without required consumer group (group_id)'
                )

        # Check auto-commit configuration
        if self._config['auto_commit_enable']:
            logger.info("Configuring consumer to auto-commit offsets")
            self._reset_auto_commit()

        if self._config['metadata_broker_list'] is None:
            raise KafkaConfigurationError('metadata_broker_list required to '
                                          'configure KafkaConsumer')

        self._client = KafkaClient(self._config['metadata_broker_list'],
                                   client_id=self._config['client_id'],
                                   timeout=(self._config['socket_timeout_ms'] /
                                            1000.0))
Esempio n. 20
0
 def _create_kafka_client(self):
     kafka_conn_str = self.instance.get('kafka_connect_str')
     if not isinstance(kafka_conn_str, (string_types, list)):
         raise ConfigurationError(
             'kafka_connect_str should be string or list of strings')
     return KafkaClient(
         bootstrap_servers=kafka_conn_str,
         client_id='dd-agent',
         request_timeout_ms=self.init_config.get(
             'kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000,
         api_version=self.instance.get('kafka_client_api_version'),
         # While we check for SSL params, if not present they will default
         # to the kafka-python values for plaintext connections
         security_protocol=self.instance.get('security_protocol',
                                             'PLAINTEXT'),
         sasl_mechanism=self.instance.get('sasl_mechanism'),
         sasl_plain_username=self.instance.get('sasl_plain_username'),
         sasl_plain_password=self.instance.get('sasl_plain_password'),
         sasl_kerberos_service_name=self.instance.get(
             'sasl_kerberos_service_name', 'kafka'),
         sasl_kerberos_domain_name=self.instance.get(
             'sasl_kerberos_domain_name'),
         ssl_cafile=self.instance.get('ssl_cafile'),
         ssl_check_hostname=self.instance.get('ssl_check_hostname', True),
         ssl_certfile=self.instance.get('ssl_certfile'),
         ssl_keyfile=self.instance.get('ssl_keyfile'),
         ssl_crlfile=self.instance.get('ssl_crlfile'),
         ssl_password=self.instance.get('ssl_password'),
     )
Esempio n. 21
0
def add_data():
    global users
    global trainData
    checkUserPartitionMapping()
    mcl = pm.MongoClient('10.137.172.201:27017')        
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                      req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                      ack_timeout=200)
    coll = mcl.DataSet['PMLExpression']

    for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False):
        entity = str(ent['_id'])
        user = ent['userId']
        if ent['_id'] in trainData[user]:
            encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                               'user':user,
                                               'entity':entity,
                                               'operation':'add_data'})
            print producer.send(user, encodedMessage)
        
    for user, partitionId in users.iteritems():
        encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                           'user':user,
                                           'operation':'save_turtle'})
        print producer.send(user, encodedMessage)
    mcl.close()
Esempio n. 22
0
    def __init__(self, settings):
        # dynamic import of settings file
        # remove the .py from the filename
        self.settings = importlib.import_module(settings[:-3])

        # only need kafka for both uses
        self.kafka_conn = KafkaClient(self.settings.KAFKA_HOSTS)
Esempio n. 23
0
    def setup_kafka(self, settings):
        """Setup redis connection and idle signal.

        This should be called after the spider has set its crawler object.

        :param settings: The current Scrapy settings being used
        :type settings: scrapy.settings.Settings
        """
        if not hasattr(self, 'topic') or not self.topic:
            self.topic = '%s-starturls' % self.name

        hosts = settings.get('SCRAPY_KAFKA_HOSTS', ['localhost:9092'])
        consumer_group = settings.get('SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP',
                                      'scrapy-kafka')
        _kafka = KafkaClient(hosts)
        # wait at most 1sec for more messages. Otherwise continue
        self.consumer = SimpleConsumer(_kafka,
                                       consumer_group,
                                       self.topic,
                                       auto_commit=True,
                                       iter_timeout=1.0)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from kafka topic
        self.crawler.signals.connect(self.spider_idle,
                                     signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped,
                                     signal=signals.item_scraped)
        self.log("Reading URLs from kafka topic '%s'" % self.kafka_topic)
 def _create_kafka_client(self):
     kafka_conn_str = self.instance.get('kafka_connect_str')
     if not isinstance(kafka_conn_str, (string_types, list)):
         raise ConfigurationError('kafka_connect_str should be string or list of strings')
     kafka_version = self.instance.get('kafka_client_api_version')
     if isinstance(kafka_version, str):
         kafka_version = tuple(map(int, kafka_version.split(".")))
     kafka_client = KafkaClient(
         bootstrap_servers=kafka_conn_str,
         client_id='dd-agent',
         request_timeout_ms=self.init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000,
         # if `kafka_client_api_version` is not set, then kafka-python automatically probes the cluster for broker
         # version during the bootstrapping process. Note that probing randomly picks a broker to probe, so in a
         # mixed-version cluster probing returns a non-deterministic result.
         api_version=kafka_version,
         # While we check for SSL params, if not present they will default to the kafka-python values for plaintext
         # connections
         security_protocol=self.instance.get('security_protocol', 'PLAINTEXT'),
         sasl_mechanism=self.instance.get('sasl_mechanism'),
         sasl_plain_username=self.instance.get('sasl_plain_username'),
         sasl_plain_password=self.instance.get('sasl_plain_password'),
         sasl_kerberos_service_name=self.instance.get('sasl_kerberos_service_name', 'kafka'),
         sasl_kerberos_domain_name=self.instance.get('sasl_kerberos_domain_name'),
         ssl_cafile=self.instance.get('ssl_cafile'),
         ssl_check_hostname=self.instance.get('ssl_check_hostname', True),
         ssl_certfile=self.instance.get('ssl_certfile'),
         ssl_keyfile=self.instance.get('ssl_keyfile'),
         ssl_crlfile=self.instance.get('ssl_crlfile'),
         ssl_password=self.instance.get('ssl_password'),
     )
     # Force initial population of the local cluster metadata cache
     kafka_client.poll(future=kafka_client.cluster.request_update())
     if kafka_client.cluster.topics(exclude_internal_topics=False) is None:
         raise RuntimeError("Local cluster metadata cache did not populate.")
     return kafka_client
Esempio n. 25
0
    def _get_kafka_client(self, instance):
        kafka_conn_str = instance.get('kafka_connect_str')
        if not isinstance(kafka_conn_str, (string_types, list)):
            raise BadKafkaConsumerConfiguration(
                'kafka_connect_str should be string or list of strings')

        instance_key = tuple(
            kafka_conn_str)  # cast to tuple in case it's a list
        if instance_key not in self.kafka_clients:
            # While we check for SSL params, if not present they will default
            # to the kafka-python values for plaintext connections
            cli = KafkaClient(
                bootstrap_servers=kafka_conn_str,
                client_id='dd-agent',
                security_protocol=instance.get('security_protocol',
                                               'PLAINTEXT'),
                sasl_mechanism=instance.get('sasl_mechanism'),
                sasl_plain_username=instance.get('sasl_plain_username'),
                sasl_plain_password=instance.get('sasl_plain_password'),
                sasl_kerberos_service_name=instance.get(
                    'sasl_kerberos_service_name', 'kafka'),
                sasl_kerberos_domain_name=instance.get(
                    'sasl_kerberos_domain_name'),
                ssl_cafile=instance.get('ssl_cafile'),
                ssl_check_hostname=instance.get('ssl_check_hostname', True),
                ssl_certfile=instance.get('ssl_certfile'),
                ssl_keyfile=instance.get('ssl_keyfile'),
                ssl_crlfile=instance.get('ssl_crlfile'),
                ssl_password=instance.get('ssl_password'))
            self.kafka_clients[instance_key] = cli

        return self.kafka_clients[instance_key]
Esempio n. 26
0
    def __init__(self, config):

        super(ExternalProcessComm, self).__init__()
        host = config.get("epc", "kafka.host").strip('"').strip("'")
        port = config.get("epc", "kafka.port")

        client = KafkaClient('%s:%s' % (host, port))
        self.producer = SimpleProducer(client, async=False)
        #sleep(1)

        print 'create EPC'

        # the kafkaConsumer will fail with a no topic error if the topic is not found in the broker
        # the next line uses the producer to produce the required topic which will create one
        # if it has not been created already

        [
            self.post_msg(v, 'init msg')
            for k, v in ExternalProcessComm.EPC_TOPICS.iteritems()
        ]
        self.consumer = KafkaConsumer( *[(v,0) for k,v in ExternalProcessComm.EPC_TOPICS.iteritems()], \
                                       metadata_broker_list=['%s:%s' % (host, port)],\
                                       group_id = 'epc.group',\
                                       auto_commit_enable=True,\
                                       auto_commit_interval_ms=30 * 1000,\
                                       auto_offset_reset='largest') # discard old ones
Esempio n. 27
0
def process(spouts):
    '''
    Returns a named tuple of type PartitionsSummary.
    '''
    results = []
    total_depth = 0
    total_delta = 0
    brokers = []
    for s in spouts:
        for p in s.partitions:
            try:
                k = KafkaClient(p['broker']['host'] + ':' +
                                str(p['broker']['port']))
            except socket.gaierror, e:
                raise ProcessorError('Failed to contact Kafka broker %s (%s)' %
                                     (p['broker']['host'], str(e)))
            earliest_off = OffsetRequest(str(p['topic']), p['partition'], -2,
                                         1)
            latest_off = OffsetRequest(str(p['topic']), p['partition'], -1, 1)
            earliest = k.send_offset_request([earliest_off])[0]
            latest = k.send_offset_request([latest_off])[0]
            current = p['offset']

            brokers.append(p['broker']['host'])
            total_depth = total_depth + (latest.offsets[0] -
                                         earliest.offsets[0])
            total_delta = total_delta + (latest.offsets[0] - current)

            results.append(
                PartitionState._make([
                    p['broker']['host'], p['topic'], p['partition'],
                    earliest.offsets[0], latest.offsets[0],
                    latest.offsets[0] - earliest.offsets[0], s.id, current,
                    latest.offsets[0] - current
                ]))
Esempio n. 28
0
    def run(self):
        client = KafkaClient("vsu-01:9092")
        consumer = SimpleConsumer(client, "test-group", "my.price")

        for message in consumer:

            print(message)
Esempio n. 29
0
    def get_client_from_section(self, section, timeout=-1):
        """Obtain a KafkaClient from a config section.

        The config section must have a ``hosts`` and ``client_id`` option.
        An optional ``connect_timeout`` defines the connection timeout.

        ``timeout`` specifies how many seconds to retry attempting to connect
        to Kafka in case the initial connection failed. -1 indicates to not
        retry. This is useful when attempting to connect to a cluster that may
        still be coming online, for example.
        """
        hosts = self.c.get(section, 'hosts')
        client_id = self.c.get(section, 'client_id')
        connect_timeout = 60
        if self.c.has_option(section, 'connect_timeout'):
            connect_timeout = self.c.getint(section, 'connect_timeout')

        start = time.time()
        while True:
            try:
                return KafkaClient(hosts,
                                   client_id=client_id,
                                   timeout=connect_timeout)
            except KafkaUnavailableError:
                if timeout == -1:
                    raise

            if time.time() - start > timeout:
                raise Exception('timeout reached trying to connect to Kafka')

            time.sleep(0.1)
Esempio n. 30
0
 def forwarder(self):
     client = KafkaClient(hosts(self.server_list, self.kafka_port))
     client.ensure_topic_exists(self.topic_name)
     producer = SimpleProducer(client, batch_send=False)
     print producer
     no = 1
     for i in xrange(1,10000):
         with open(self.csvfile, 'r') as FR:
             first_line = next(FR)
             print first_line
             fields = first_line.lstrip().rstrip().split('\t')
             print fields
             for cnc_log in FR:
                 print cnc_log
                 values = cnc_log.strip().split('\t')
                 zipped = dict(zip(fields,values))
                 zipped['lower_bound'] = float(zipped['lower_bound'])
                 zipped['upper_bound'] = float(zipped['upper_bound'])
                 zipped['spindle'] = float(zipped['spindle'])
                 # zipped['no'] = int(zipped['no'])
                 zipped['no'] = no
                 zipped['tool_no'] = int(zipped['tool_no'])
                 # zipped['tool_no'] = i
                 print json.dumps(zipped,sort_keys=True,indent=4)
                 sleep_sec = 1
                 time.sleep(sleep_sec)
                 producer.send_messages(self.topic_name, json.dumps(zipped))
                 no = no +1