def _get_kafka_client(self, instance): kafka_conn_str = self._read_config(instance, 'kafka_connect_str') if not kafka_conn_str: raise BadKafkaConsumerConfiguration('Bad instance configuration') instance_key = self._get_instance_key(instance) if instance_key not in self.kafka_clients: conf_security_protocol = instance.get('security_protocol', 'PLAINTEXT') if conf_security_protocol == 'SSL': conf_ssl_cafile = instance.get('ssl_cafile') conf_ssl_check_hostname = instance.get('ssl_check_hostname', True) conf_ssl_certfile = instance.get('ssl_certfile') conf_ssl_keyfile = instance.get('ssl_keyfile') conf_ssl_password = instance.get('ssl_password') cli = KafkaClient(bootstrap_servers=kafka_conn_str, client_id='dd-agent', security_protocol=conf_security_protocol, ssl_cafile=conf_ssl_cafile, ssl_check_hostname=conf_ssl_check_hostname, ssl_certfile=conf_ssl_certfile, ssl_keyfile=conf_ssl_keyfile, ssl_password=conf_ssl_password) self.kafka_clients[instance_key] = cli else: cli = KafkaClient(bootstrap_servers=kafka_conn_str, client_id='dd-agent') self.kafka_clients[instance_key] = cli return self.kafka_clients[instance_key]
def __init__(self, topic, producer_type=ProducerType.SIMPLE,\ host_port="127.0.0.1:9092", **producer_opts): self.topic = topic self.host_port = host_port if producer_type == ProducerType.SIMPLE: self.producer = SimpleProducer(KafkaClient(host_port),\ **producer_opts) else: self.producer = KeyedProducer(KafkaClient(host_port),\ **producer_opts)
def configure_input_queue(self): """ configures the input queue that other services can use to schedule an event to be delivered """ client = KafkaClient(hosts=self.kafka_hosts) client.ensure_topic_exists(self.input_topic) indexed_consumer = IndexedConsumer(self.input_topic, self.kafka_hosts) queue_consumer = KafkaConsumer(self.input_topic, bootstrap_servers=self.kafka_hosts, group_id=CONSUMER_GROUP) queue_producer = SimpleProducer(KafkaClient(hosts=self.kafka_hosts)) self.queues.append( InputQueue(queue_consumer, indexed_consumer, queue_producer, self.number_of_queues))
def forwarder(self): client = KafkaClient(hosts(self.server_list, self.kafka_port)) client.ensure_topic_exists(self.topic_name) producer = SimpleProducer(client, batch_send=False) print producer for i in xrange(1, 100): with open(self.csvfile, 'r') as FR: fields = next(FR).strip().split('\t') print fields for cnc_log in FR: values = cnc_log.strip().split('\t') zipped = dict(zip(fields, values)) zipped['lower_bound'] = float(zipped['lower_bound']) zipped['upper_bound'] = float(zipped['upper_bound']) zipped['temperature'] = float(zipped['temperature']) zipped['no'] = int(zipped['no']) print json.dumps(zipped, sort_keys=True, indent=4) # prob = 0.8 # y = lambda x, prob: '<span style="background-color:#bd362f; color:white">FAIL</span>' if randint(0,x) > x*prob else 'PASS' # cnc_log = (datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')+"\t"+y(10,0.8)+'\t'+cnc_log.strip()).split('\t') # zipped = dict(zip(fields,cnc_log)) # node = zipped sleep_sec = random.uniform(0, 3) * 5 time.sleep(sleep_sec) producer.send_messages(self.topic_name, json.dumps(zipped))
def _create_kafka_client(self): kafka_conn_str = self.instance.get('kafka_connect_str') if not isinstance(kafka_conn_str, (string_types, list)): raise ConfigurationError( 'kafka_connect_str should be string or list of strings') return KafkaClient( bootstrap_servers=kafka_conn_str, client_id='dd-agent', request_timeout_ms=self.init_config.get( 'kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000, # if `kafka_client_api_version` is not set, then kafka-python automatically probes the cluster for broker # version during the bootstrapping process. Note that probing randomly picks a broker to probe, so in a # mixed-version cluster probing returns a non-deterministic result. api_version=self.instance.get('kafka_client_api_version'), # While we check for SSL params, if not present they will default to the kafka-python values for plaintext # connections security_protocol=self.instance.get('security_protocol', 'PLAINTEXT'), sasl_mechanism=self.instance.get('sasl_mechanism'), sasl_plain_username=self.instance.get('sasl_plain_username'), sasl_plain_password=self.instance.get('sasl_plain_password'), sasl_kerberos_service_name=self.instance.get( 'sasl_kerberos_service_name', 'kafka'), sasl_kerberos_domain_name=self.instance.get( 'sasl_kerberos_domain_name'), ssl_cafile=self.instance.get('ssl_cafile'), ssl_check_hostname=self.instance.get('ssl_check_hostname', True), ssl_certfile=self.instance.get('ssl_certfile'), ssl_keyfile=self.instance.get('ssl_keyfile'), ssl_crlfile=self.instance.get('ssl_crlfile'), ssl_password=self.instance.get('ssl_password'), )
def __init__(self, config): host = config.get("epc", "kafka.host").strip('"').strip("'") port = config.get("epc", "kafka.port") client = KafkaClient('%s:%s' % (host, port)) self.producer = SimpleProducer(client, async=False)
def configure_internal_queues(self): """ configures the internal queues used hold references to events in the input queue """ for i in range(self.number_of_queues): client = KafkaClient(hosts=self.kafka_hosts) queue_name = SCHEDULER_QUEUE_FORMAT.format(2**i) client.ensure_topic_exists(queue_name) indexed_consumer = IndexedConsumer(self.input_topic, self.kafka_hosts) queue_consumer = KafkaConsumer( queue_name, bootstrap_servers=self.kafka_hosts, group_id=queue_name, consumer_timeout_ms=2000, auto_commit_enable=False, ) queue_producer = SimpleProducer(client) queue_duration = 2**i self.queues.append( InternalQueue( queue_consumer, indexed_consumer, queue_producer, self.number_of_queues, queue_duration, ))
def run(self): #client = KafkaClient("localhost:9092") client = KafkaClient("kafka_host:9092") producer = SimpleProducer(client) while True: try: messages = [] for i in xrange(1, 10): messageStr = SelfGeneratedMessage().asJson() logger.debug('Generated message: %s', messageStr) messages.append(messageStr) producer.send_messages('test', *messages) # producer.send_messages('test', '{"publisher": "publisher-id", "time": "2015-11-03 15:03:30.352", "readings": [ 1, 1,1,1,1,1,1,1,1,1,1,1,4,3,3,3,32,2,1,1,1,1]}') time.sleep(1) except LeaderNotAvailableError as e: logging.exception('LeaderNotAvailableError') time.sleep(10) except KafkaUnavailableError as e: logging.exception('KafkaUnavailableError') time.sleep(30) except ConnectionError as e: logging.exception('ConnectionError') time.sleep(60) except KafkaError as e: logging.exception('KafkaError') time.sleep(60) except Exception as e: logging.exception('Exception') time.sleep(60)
def train(numIters): global users try: mcl = pm.MongoClient('10.137.168.196:27017') userColl = mcl.DataSet['PMLUsers'] users = { user['userId']: user['partitionId'] for user in userColl.find() } mcl.close() kafka = KafkaClient('mozo.cloudapp.net:9092', timeout=None) producer = UserProducer(kafka, kafkaTopic, users, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) for userId, partitionId in users.iteritems(): if userId in UoI.keys(): for i in range(numIters): #print "iteration " + str(i) encodedMessage = simplejson.dumps({ 'turtleId': turtleId, 'userId': userId, 'operation': 'train_one' }) print i, producer.send(userId, encodedMessage) finally: producer.stop() kafka.close()
def add_users(): global users mcl = pm.MongoClient('10.137.172.201:27017') kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) coll = mcl.DataSet['PMLExpression'] for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False): follower = ent['userId'] if follower not in users: encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':'******', 'follower':follower, 'operation':'add_user'}) print producer.send(follower, encodedMessage) userColl = mcl.DataSet['PMLUsers'] if users: for userId, partitionId in users.iteritems(): u = userColl.find_one({'userId':userId}, {'userId':userId}, timeout=False) if not u: userColl.insert({'userId':userId, 'partitionId':partitionId});
def main(): kafka = KafkaClient("localhost:9092") print("Consumer established connection to kafka") consumer = SimpleConsumer(kafka, "my-group", "test") for message in consumer: # This will wait and print messages as they become available print(message)
def _detect_consumers(self): """ Using zookeeper and a kafka connection find the consumers, associated topics and partitions. """ try: # The kafka api provides no way to discover existing consumer groups so a query to # zookeeper must be made. This is unfortunately fragile as kafka is moving away from # zookeeper. Tested with kafka 0.8.1.1 from kafka.client import KafkaClient kafka_connect_str = self._find_kafka_connection() kafka = KafkaClient(kafka_connect_str) # {'consumer_group_name': { 'topic1': [ 0, 1, 2] # partitions }} consumers = {} # Find consumers and topics for consumer in self._ls_zookeeper('/consumers'): consumers[consumer] = dict( (topic, kafka.topic_partitions[topic]) for topic in self._ls_zookeeper('/consumers/%s/offsets' % consumer)) log.info("\tInstalling kafka_consumer plugin.") self.config['kafka_consumer'] = { 'init_config': None, 'instances': [{ 'kafka_connect_str': kafka_connect_str, 'full_output': True, 'consumer_groups': dict(consumers) }] } except Exception: log.error('Error Detecting Kafka consumers/topics/partitions')
def __init__(self): comlog.init_logger("./../log/vdata.log") self.ub_conf = UbConfig("./../conf/log_tail.conf") self._conf_info = self.ub_conf.get_conf_info() self._file_path = self._conf_info["[LOG_FILE_CONF]"]["file_path"] self._file_name_pattern = self._conf_info["[LOG_FILE_CONF]"][ "file_name_pattern"] self._log_max_length = int( self._conf_info["[LOG_FILE_CONF]"]["log_max_length"]) self._batch_flush_counter = int( self._conf_info["[LOG_FILE_CONF]"]["batch_flush_counter"]) self._topic_name = self._conf_info["[KAFKA]"]["topic_name"] self._interval_time = self._conf_info["[TIME_INTERVAL]"]["interval"] self.init_data_file() self.s = sched.scheduler(time.time, time.sleep) if self._conf_info["[KAFKA]"].has_key('broker_list'): self.broker_list = self._conf_info["[KAFKA]"]["broker_list"] elif self._conf_info["[KAFKA]"].has_key('zookeeper'): self.broker_list = ','.join( self.get_broker_list(self._conf_info["[KAFKA]"]['zookeeper'])) else: raise ValueError, " zookeeper and broker_list are both null in config file" self.client = KafkaClient(self.broker_list) self.producer = SimpleProducer(self.client) #tags self.tags = {} for key in self._conf_info["[TAGS]"]: self.tags[key] = self._conf_info["[TAGS]"][key]
def main(): global freq_array client = KafkaClient('ip-172-31-28-55.ec2.internal:6667') producer = SimpleProducer(client) fft_size = 1000 fs = 92 freq_array = np.array((1 * fs / fft_size)) for i in range(2, int(fft_size / 2)): freq_i = np.array((i * fs / fft_size)) freq_array = np.vstack((freq_array, freq_i)) with open('xfourmag.csv', 'rt') as f: print('opening csv') reader = csv.reader(f) row = next(reader) #global mags mags = np.array(row) for row in reader: #mags += row mags = np.vstack((mags, row)) #print(mags) #print(freq_array) json_data = { 'time': int(time.time()), 'fft': np.hstack((freq_array[0:31], mags[0:31])).tolist(), 'sensor_id': '1', 'reading_type': '0' } print('sending data...') producer.send_messages('shm', (json.dumps(json_data)).encode('utf-8')) print('data sent! :)')
def create_kafka_producer(kafka_host_port): try: client = KafkaClient(kafka_host_port) producer = SimpleProducer(client, async=True) return producer except Exception as excp: raise Exception("Failed to set up kafka producer: %s" % excp.message)
def __init__(self, config): super(KConsumer, self).__init__() host = config.get("epc", "kafka.host").strip('"').strip("'") port = config.get("epc", "kafka.port") client = KafkaClient('%s:%s' % (host, port)) self.producer = SimpleProducer(client, async=False) topicsEvents = ['accountDownloadEnd', 'execDetailsEnd', 'updateAccountTime', 'deltaNeutralValidation', 'orderStatus',\ 'updateAccountValue', 'historicalData', 'openOrderEnd', 'updatePortfolio', 'managedAccounts', 'contractDetailsEnd',\ 'positionEnd', 'bondContractDetails', 'accountSummary', 'updateNewsBulletin', 'scannerParameters', \ 'tickString', 'accountSummaryEnd', 'scannerDataEnd', 'commissionReport', 'error', 'tickGeneric', 'tickPrice', \ 'nextValidId', 'openOrder', 'realtimeBar', 'contractDetails', 'execDetails', 'tickOptionComputation', \ 'updateMktDepth', 'scannerData', 'currentTime', 'error_0', 'error_1', 'tickSnapshotEnd', 'tickSize', \ 'receiveFA', 'connectionClosed', 'position', 'updateMktDepthL2', 'fundamentalData', 'tickEFP'] self.consumer = KafkaConsumer( *[(v,0) for v in topicsEvents], \ metadata_broker_list=['%s:%s' % (host, port)],\ group_id = 'epc.group',\ auto_commit_enable=True,\ auto_commit_interval_ms=30 * 1000,\ auto_offset_reset='largest') # discard old ones self.kwrapper = KWrapper(self.producer)
def main(): global options options = parse_options() logger.setLevel(logging.DEBUG if options.verbose else logging.INFO) logger.info(options) client = KafkaClient(bootstrap_servers=options.kafka_host) future = client.cluster.request_update() client.poll(future=future) producer = KafkaProducer( bootstrap_servers=options.kafka_host, value_serializer=lambda m: json.dumps(m).encode('ascii')) db = get_db_conn() for topic in options.topic: try: create_topic(topic, client) data = get_data(topic, db) for ele in data: print(topic, ele) producer.send(topic, ele).add_callback( on_send_success).add_errback(on_send_error) producer.flush() time.sleep(0.001) except Exception as e: traceback.print_exc() logger.debug(e)
def run(self): client = KafkaClient( "10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092") consumer = SimpleConsumer(client, "test-group", "guantest") for message in consumer: print(message.message.value)
def configure(self, **configs): """ Configuration settings can be passed to constructor, otherwise defaults will be used: .. code:: python client_id='kafka.consumer.kafka', group_id=None, fetch_message_max_bytes=1024*1024, fetch_min_bytes=1, fetch_wait_max_ms=100, refresh_leader_backoff_ms=200, metadata_broker_list=None, socket_timeout_ms=30*1000, auto_offset_reset='largest', deserializer_class=lambda msg: msg, auto_commit_enable=False, auto_commit_interval_ms=60 * 1000, auto_commit_interval_messages=None, consumer_timeout_ms=-1 Configuration parameters are described in more detail at http://kafka.apache.org/documentation.html#highlevelconsumerapi """ self._config = {} for key in DEFAULT_CONSUMER_CONFIG: self._config[key] = configs.pop(key, DEFAULT_CONSUMER_CONFIG[key]) if configs: raise KafkaConfigurationError('Unknown configuration key(s): ' + str(list(configs.keys()))) # Handle str/bytes conversions for config_key in BYTES_CONFIGURATION_KEYS: if isinstance(self._config[config_key], six.string_types): logger.warning("Converting configuration key '%s' to bytes" % config_key) self._config[config_key] = self._config[config_key].encode( 'utf-8') if self._config['auto_commit_enable']: if not self._config['group_id']: raise KafkaConfigurationError( 'KafkaConsumer configured to auto-commit without required consumer group (group_id)' ) # Check auto-commit configuration if self._config['auto_commit_enable']: logger.info("Configuring consumer to auto-commit offsets") self._reset_auto_commit() if self._config['metadata_broker_list'] is None: raise KafkaConfigurationError('metadata_broker_list required to ' 'configure KafkaConsumer') self._client = KafkaClient(self._config['metadata_broker_list'], client_id=self._config['client_id'], timeout=(self._config['socket_timeout_ms'] / 1000.0))
def _create_kafka_client(self): kafka_conn_str = self.instance.get('kafka_connect_str') if not isinstance(kafka_conn_str, (string_types, list)): raise ConfigurationError( 'kafka_connect_str should be string or list of strings') return KafkaClient( bootstrap_servers=kafka_conn_str, client_id='dd-agent', request_timeout_ms=self.init_config.get( 'kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000, api_version=self.instance.get('kafka_client_api_version'), # While we check for SSL params, if not present they will default # to the kafka-python values for plaintext connections security_protocol=self.instance.get('security_protocol', 'PLAINTEXT'), sasl_mechanism=self.instance.get('sasl_mechanism'), sasl_plain_username=self.instance.get('sasl_plain_username'), sasl_plain_password=self.instance.get('sasl_plain_password'), sasl_kerberos_service_name=self.instance.get( 'sasl_kerberos_service_name', 'kafka'), sasl_kerberos_domain_name=self.instance.get( 'sasl_kerberos_domain_name'), ssl_cafile=self.instance.get('ssl_cafile'), ssl_check_hostname=self.instance.get('ssl_check_hostname', True), ssl_certfile=self.instance.get('ssl_certfile'), ssl_keyfile=self.instance.get('ssl_keyfile'), ssl_crlfile=self.instance.get('ssl_crlfile'), ssl_password=self.instance.get('ssl_password'), )
def add_data(): global users global trainData checkUserPartitionMapping() mcl = pm.MongoClient('10.137.172.201:27017') kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) coll = mcl.DataSet['PMLExpression'] for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False): entity = str(ent['_id']) user = ent['userId'] if ent['_id'] in trainData[user]: encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'entity':entity, 'operation':'add_data'}) print producer.send(user, encodedMessage) for user, partitionId in users.iteritems(): encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'operation':'save_turtle'}) print producer.send(user, encodedMessage) mcl.close()
def __init__(self, settings): # dynamic import of settings file # remove the .py from the filename self.settings = importlib.import_module(settings[:-3]) # only need kafka for both uses self.kafka_conn = KafkaClient(self.settings.KAFKA_HOSTS)
def setup_kafka(self, settings): """Setup redis connection and idle signal. This should be called after the spider has set its crawler object. :param settings: The current Scrapy settings being used :type settings: scrapy.settings.Settings """ if not hasattr(self, 'topic') or not self.topic: self.topic = '%s-starturls' % self.name hosts = settings.get('SCRAPY_KAFKA_HOSTS', ['localhost:9092']) consumer_group = settings.get('SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP', 'scrapy-kafka') _kafka = KafkaClient(hosts) # wait at most 1sec for more messages. Otherwise continue self.consumer = SimpleConsumer(_kafka, consumer_group, self.topic, auto_commit=True, iter_timeout=1.0) # idle signal is called when the spider has no requests left, # that's when we will schedule new requests from kafka topic self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped) self.log("Reading URLs from kafka topic '%s'" % self.kafka_topic)
def _create_kafka_client(self): kafka_conn_str = self.instance.get('kafka_connect_str') if not isinstance(kafka_conn_str, (string_types, list)): raise ConfigurationError('kafka_connect_str should be string or list of strings') kafka_version = self.instance.get('kafka_client_api_version') if isinstance(kafka_version, str): kafka_version = tuple(map(int, kafka_version.split("."))) kafka_client = KafkaClient( bootstrap_servers=kafka_conn_str, client_id='dd-agent', request_timeout_ms=self.init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT) * 1000, # if `kafka_client_api_version` is not set, then kafka-python automatically probes the cluster for broker # version during the bootstrapping process. Note that probing randomly picks a broker to probe, so in a # mixed-version cluster probing returns a non-deterministic result. api_version=kafka_version, # While we check for SSL params, if not present they will default to the kafka-python values for plaintext # connections security_protocol=self.instance.get('security_protocol', 'PLAINTEXT'), sasl_mechanism=self.instance.get('sasl_mechanism'), sasl_plain_username=self.instance.get('sasl_plain_username'), sasl_plain_password=self.instance.get('sasl_plain_password'), sasl_kerberos_service_name=self.instance.get('sasl_kerberos_service_name', 'kafka'), sasl_kerberos_domain_name=self.instance.get('sasl_kerberos_domain_name'), ssl_cafile=self.instance.get('ssl_cafile'), ssl_check_hostname=self.instance.get('ssl_check_hostname', True), ssl_certfile=self.instance.get('ssl_certfile'), ssl_keyfile=self.instance.get('ssl_keyfile'), ssl_crlfile=self.instance.get('ssl_crlfile'), ssl_password=self.instance.get('ssl_password'), ) # Force initial population of the local cluster metadata cache kafka_client.poll(future=kafka_client.cluster.request_update()) if kafka_client.cluster.topics(exclude_internal_topics=False) is None: raise RuntimeError("Local cluster metadata cache did not populate.") return kafka_client
def _get_kafka_client(self, instance): kafka_conn_str = instance.get('kafka_connect_str') if not isinstance(kafka_conn_str, (string_types, list)): raise BadKafkaConsumerConfiguration( 'kafka_connect_str should be string or list of strings') instance_key = tuple( kafka_conn_str) # cast to tuple in case it's a list if instance_key not in self.kafka_clients: # While we check for SSL params, if not present they will default # to the kafka-python values for plaintext connections cli = KafkaClient( bootstrap_servers=kafka_conn_str, client_id='dd-agent', security_protocol=instance.get('security_protocol', 'PLAINTEXT'), sasl_mechanism=instance.get('sasl_mechanism'), sasl_plain_username=instance.get('sasl_plain_username'), sasl_plain_password=instance.get('sasl_plain_password'), sasl_kerberos_service_name=instance.get( 'sasl_kerberos_service_name', 'kafka'), sasl_kerberos_domain_name=instance.get( 'sasl_kerberos_domain_name'), ssl_cafile=instance.get('ssl_cafile'), ssl_check_hostname=instance.get('ssl_check_hostname', True), ssl_certfile=instance.get('ssl_certfile'), ssl_keyfile=instance.get('ssl_keyfile'), ssl_crlfile=instance.get('ssl_crlfile'), ssl_password=instance.get('ssl_password')) self.kafka_clients[instance_key] = cli return self.kafka_clients[instance_key]
def __init__(self, config): super(ExternalProcessComm, self).__init__() host = config.get("epc", "kafka.host").strip('"').strip("'") port = config.get("epc", "kafka.port") client = KafkaClient('%s:%s' % (host, port)) self.producer = SimpleProducer(client, async=False) #sleep(1) print 'create EPC' # the kafkaConsumer will fail with a no topic error if the topic is not found in the broker # the next line uses the producer to produce the required topic which will create one # if it has not been created already [ self.post_msg(v, 'init msg') for k, v in ExternalProcessComm.EPC_TOPICS.iteritems() ] self.consumer = KafkaConsumer( *[(v,0) for k,v in ExternalProcessComm.EPC_TOPICS.iteritems()], \ metadata_broker_list=['%s:%s' % (host, port)],\ group_id = 'epc.group',\ auto_commit_enable=True,\ auto_commit_interval_ms=30 * 1000,\ auto_offset_reset='largest') # discard old ones
def process(spouts): ''' Returns a named tuple of type PartitionsSummary. ''' results = [] total_depth = 0 total_delta = 0 brokers = [] for s in spouts: for p in s.partitions: try: k = KafkaClient(p['broker']['host'] + ':' + str(p['broker']['port'])) except socket.gaierror, e: raise ProcessorError('Failed to contact Kafka broker %s (%s)' % (p['broker']['host'], str(e))) earliest_off = OffsetRequest(str(p['topic']), p['partition'], -2, 1) latest_off = OffsetRequest(str(p['topic']), p['partition'], -1, 1) earliest = k.send_offset_request([earliest_off])[0] latest = k.send_offset_request([latest_off])[0] current = p['offset'] brokers.append(p['broker']['host']) total_depth = total_depth + (latest.offsets[0] - earliest.offsets[0]) total_delta = total_delta + (latest.offsets[0] - current) results.append( PartitionState._make([ p['broker']['host'], p['topic'], p['partition'], earliest.offsets[0], latest.offsets[0], latest.offsets[0] - earliest.offsets[0], s.id, current, latest.offsets[0] - current ]))
def run(self): client = KafkaClient("vsu-01:9092") consumer = SimpleConsumer(client, "test-group", "my.price") for message in consumer: print(message)
def get_client_from_section(self, section, timeout=-1): """Obtain a KafkaClient from a config section. The config section must have a ``hosts`` and ``client_id`` option. An optional ``connect_timeout`` defines the connection timeout. ``timeout`` specifies how many seconds to retry attempting to connect to Kafka in case the initial connection failed. -1 indicates to not retry. This is useful when attempting to connect to a cluster that may still be coming online, for example. """ hosts = self.c.get(section, 'hosts') client_id = self.c.get(section, 'client_id') connect_timeout = 60 if self.c.has_option(section, 'connect_timeout'): connect_timeout = self.c.getint(section, 'connect_timeout') start = time.time() while True: try: return KafkaClient(hosts, client_id=client_id, timeout=connect_timeout) except KafkaUnavailableError: if timeout == -1: raise if time.time() - start > timeout: raise Exception('timeout reached trying to connect to Kafka') time.sleep(0.1)
def forwarder(self): client = KafkaClient(hosts(self.server_list, self.kafka_port)) client.ensure_topic_exists(self.topic_name) producer = SimpleProducer(client, batch_send=False) print producer no = 1 for i in xrange(1,10000): with open(self.csvfile, 'r') as FR: first_line = next(FR) print first_line fields = first_line.lstrip().rstrip().split('\t') print fields for cnc_log in FR: print cnc_log values = cnc_log.strip().split('\t') zipped = dict(zip(fields,values)) zipped['lower_bound'] = float(zipped['lower_bound']) zipped['upper_bound'] = float(zipped['upper_bound']) zipped['spindle'] = float(zipped['spindle']) # zipped['no'] = int(zipped['no']) zipped['no'] = no zipped['tool_no'] = int(zipped['tool_no']) # zipped['tool_no'] = i print json.dumps(zipped,sort_keys=True,indent=4) sleep_sec = 1 time.sleep(sleep_sec) producer.send_messages(self.topic_name, json.dumps(zipped)) no = no +1