def __init__(self, *args, **kwargs): import kafka super(KafkaRandomReader, self).__init__(*args, **kwargs) brokers = self.read_option('brokers') group = self.read_option('group') topic = self.read_option('topic') client = kafka.KafkaClient(map(bytes, brokers)) # TODO: Remove this comments when next steps are decided. # If resume is set to true, then child should not load initial offsets # child_loads_initial_offsets = False if settings.get('RESUME') else True # self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1, # child_loads_initial_offsets=child_loads_initial_offsets, # auto_commit=False) self.consumer = kafka.SimpleConsumer(client, group, topic, auto_commit=False) self.decompress_fun = zlib.decompress self.processor = self.create_processor() self.partitions = client.get_partition_ids_for_topic(topic) self.logger.info('KafkaRandomReader has been initiated. ' 'Topic: {}. Group: {}'.format( self.read_option('topic'), self.read_option('group'))) self.logger.info('Running random sampling') self._reservoir = self.fill_reservoir() self.logger.info('Random sampling completed, ready to process batches')
def _get_server(self): if not self.kafka_server: self.kafka_server = kafka.KafkaClient("%s:%s" % (self.host, self.port)) self.kafka_consumer = kafka.KafkaConsumer( self.topic, bootstrap_servers=["%s:%s" % (self.host, self.port)])
def __init__(self, kafkahost, kafkaport, kafkatopics): kafkaaddress = '{0}:{1}'.format(kafkahost, kafkaport) self.__kafkaclient = kafka.KafkaClient(kafkaaddress) self.__kafkaproducer = kafka.SimpleProducer(self.__kafkaclient, async=False) self.__kafkatopics = kafkatopics self.__counter = 0
def kafka_send(kurl, temp_fpath, format, topic, queue=None): try: kafka_python_client = kafka_python.KafkaClient(kurl) kafka_python_client.ensure_topic_exists(topic) kafka = pykafka.KafkaClient(hosts=kurl) publish_topic_object = kafka.topics[topic] # the default partitioner is random_partitioner producer = publish_topic_object.get_producer() if format == 'csv': with open(temp_fpath, 'r') as fp: text = fp.read() producer.produce([text]) elif format == 'graphite': with open(temp_fpath, 'r') as fp: for line in fp.readlines(): producer.produce([line]) else: raise EmitterUnsupportedFormat('Unsupported format: %s' % format) queue and queue.put((True, None)) except Exception as e: if queue: queue.put((False, e)) else: raise finally: queue and queue.close()
def check_connection(self): try: client = kafka.KafkaClient(**self.connection_info) except Exception: return False else: client.close() return True
def connect(broker): kafka_client = kafka.KafkaClient(broker) producer = kafka.SimpleProducer(kafka_client) #producer = kafka.KafkaProducer(bootstrap_servers=broker) return producer
def _get_server(self): if not self.kafka_server: self.kafka_server = kafka.KafkaClient( "%s:%s" % (self.host, self.port)) #self.kafka_consumer = kafka.KafkaConsumer(self.topic,bootstrap_servers = ["%s:%s" % (self.host, self.port)]) self.kafka_consumer=kafka.KafkaConsumer(bootstrap_servers=["%s:%s" % (self.host,self.port)]) self.kafka_consumer.assign([TopicPartition(self.topic,0)]) self.kafka_consumer.seek_to_end()
def testStart(self): self.thread.start() time.sleep(15) self.producer.stop() message = kafka.SimpleConsumer(kafka.KafkaClient(self.url), "group1", self.topic).get_message() assert message is not None
def _ensure_connection(self): if self.kafka_client: return try: self.kafka_client = kafka.KafkaClient(self.hostaddrs) self.producer = kafka.SimpleProducer(self.kafka_client) except KafkaError as e: LOG.exception(_LE("Kafka Connection is not available: %s"), e) self.kafka_client = None
def initialize(self): cluster = kafka.KafkaClient(leaders[0]) self.consumer = kafka.SimpleConsumer(cluster, "default_group", "WikiTest", buffer_size=16384, max_buffer_size=(10 * 1024 * 1024)) self.consumer.seek(0) self.counter = 0
def _is_kafka_service_running(): """Checks whether the Kafka service is running or not""" kafka_running = True try: broker = KAFKA_BROKER kafka.KafkaClient(broker) except KafkaError: # Kafka service is not running. kafka_running = False return kafka_running
def _ensure_connection(self): if self._producer: return try: client = kafka.KafkaClient("%s:%s" % (self._host, self._port)) self._producer = kafka.SimpleProducer(client) except Exception as e: LOG.exception(_LE("Failed to connect to Kafka service: %s"), e) raise messaging.DeliveryFailure('Kafka Client is not available, ' 'please restart Kafka client')
def ensure_topics_exist(): all_topics = ["reddit", "twitter", "google-news", "hacker-news"] admin = kafka.admin.KafkaAdminClient(bootstrap_servers=[KAFKA_URL]) client = kafka.KafkaClient([KAFKA_URL]) existing_topics = client.topics topics = [ kafka.admin.NewTopic(topic, 1, 1) for topic in all_topics if topic not in existing_topics ] admin.create_topics(topics)
def create_topic(self, topic): ''' Create topic ''' client = kafka.KafkaClient(hosts=self.server + ':9092') res = client.ensure_topic_exists(topic) return res
def __init__(self, num_partitions, topic, *args, **kwargs): self.con = kafka.KafkaClient(KAFKAHOST) self.client = kafka.SimpleProducer(self.con, async=False) self.topic = topic print "Set topic to %s" % self.topic try: self.bulksize = kwargs["bulksize"] except: self.bulksize = 1 self.msg_bulk = []
def initialize(self): cluster = kafka.KafkaClient(leaders[0]) self.consumer = kafka.SimpleConsumer(cluster, "default_group", "WikiTest", buffer_size=8192, max_buffer_size=(10 * 1024 * 1024)) self.consumer.seek(0) self.counter = 0 log.debug("Starting Kafka Consumer")
def create_topic(topic): ''' Create topic ''' import kafka client = kafka.KafkaClient(hosts=config.get('kafka_server') + ':' + config.get('kafka_port')) res = client.ensure_topic_exists(topic) return res
def _publish_to_kafka_no_retries(self, url): if kafka_python is None or pykafka is None: raise ImportError('Please install kafka and pykafka') try: list = url[len('kafka://'):].split('/') if len(list) == 2: kurl = list[0] topic = list[1] else: raise Exception( 'The kafka url provided does not seem to be valid: %s. ' 'It should be something like this: ' 'kafka://[ip|hostname]:[port]/[kafka_topic]. ' 'For example: kafka://1.1.1.1:1234/metrics' % url) h = NullHandler() logging.getLogger('kafka').addHandler(h) # XXX We should definitely create a long lasting kafka client kafka_python_client = kafka_python.KafkaClient(kurl) kafka_python_client.ensure_topic_exists(topic) kafka = pykafka.KafkaClient(hosts=kurl) publish_topic_object = kafka.topics[topic] # the default partitioner is random_partitioner producer = publish_topic_object.get_producer() if self.format == 'csv': with open(self.temp_fpath, 'r') as fp: text = fp.read() logger.debug(producer.produce([text])) elif self.format == 'graphite': with open(self.temp_fpath, 'r') as fp: for line in fp.readlines(): producer.produce([line]) else: logger.debug('Could not send data because {0} is an unknown ' 'format'.format(self.format)) raise kafka_python_client.close() except Exception as e: # kafka.close() logger.debug('Could not send data to {0}: {1}'.format(url, e)) raise
def main(): # Kafka information cluster = kafka.KafkaClient("localhost:9092") prod = kafka.SimpleProducer(cluster, async=False) topic = "my-topic" # Node information. Currently hardcoded # TODO user should be able to specify a graph as an input to the problem nodeCount = 200 # Parameter generation information generationType = "sine" avgRate = 100000 # per second average sinePeriod = 20 # seconds sinePerturbation = 0.5 # scale factor # debugging count = 0 to = time.time() print(to) if generationType == "sine": periodLocation = 0 nextParam = nextTimeSineDemand(avgRate, sinePerturbation, sinePeriod, to) nT = nextTime(nextParam) else: nT = nextTime(1) while (1): t = time.time() if t - to >= nT: to = t count = count + 1 if count % 1000 == 0: print("mark") if generationType == "sine": nT = nextTime(nextParam) nextParam = nextTimeSineDemand(avgRate, sinePerturbation, sinePeriod, t) # print(nextParam) else: nT = nextTime(10000) n = random.randint(0, nodeCount - 1) prod.send_messages(topic, *[str(n) + ' ' + str(int(round((to * 1000))))])
def _ensure_topics_exist(self): all_topics = ["reddit", "twitter", "google-news", "hacker-news"] try: admin = kafka.admin.KafkaAdminClient( bootstrap_servers=[self.KAFKA_URL]) client = kafka.KafkaClient([self.KAFKA_URL]) existing_topics = client.topics topics = [ kafka.admin.NewTopic(topic, 1, 1) for topic in all_topics if topic not in existing_topics ] admin.create_topics(topics) except Exception as e: raise SinkNotAvailableError from e
def get_kafka_client(config): for i in range(3): try: return kafka.KafkaClient(config[c.kafka_brokers]) except kafka.common.KafkaUnavailableError as e: last_exception = e logger.error( "Failed to create kafka client, this is usually due " "to all of the brokers died or invalid broker " "IP/ports, error=%s", traceback.format_exc()) time.sleep(i + 1) except Exception as e: last_exception = e logger.error("Failed to create kafka client, error=%s", traceback.format_exc()) time.sleep(i + 1) raise last_exception
def run_kafka(args): client = kafka.KafkaClient(args.server) producer = kafka.SimpleProducer(client) # invalid message producer.send_messages(str('test'), json.dumps({})) # 3x valid messages for _ in xrange(3): producer.send_messages(str('test'), json.dumps(bs)) hub = balog.consumers.ConsumerHub() hub.scan(sample) engine = balog.engines.KafkaEngine(hub=hub, kafka_server=args.server, group=args.group, topic=args.topic) engine.run()
def send_kafka(self, message): kafka_client = kafka.KafkaClient(self.config['kafka_addr']) server_topics = kafka_client.topic_partitions try: if not self.config['topic_name'] in server_topics: self.logger.info('no topic') admin_client = KafkaAdminClient(bootstrap_servers=self.config['kafka_addr']) admin_client.create_topics(self.config['topic_name']) self.logger.info('topic create') else: pass except Exception as e: self.logger.info('topic create error : '+str(e)) producer = KafkaProducer(bootstrap_servers=self.config['kafka_addr'], value_serializer=lambda v: json.dumps(v).encode('utf-8')) producer.send(self.config['topic_name'], message) producer.flush() # self.logger.info('message send') return
import aiohttp import kafka from order_book import OrderBook from order_book import kafka_send from unicorn_binance_websocket_api.unicorn_binance_websocket_api_manager import BinanceWebSocketApiManager class BinanceOrderBook(OrderBook): def __init__(self, lastUpdateId=0): self.lastUpdateId = lastUpdateId super().__init__() host = 'localhost:9092' producer = kafka.KafkaProducer(bootstrap_servers=host) kafka.KafkaClient(bootstrap_servers=host).add_topic('all') exchange = 'binance.com' manager = BinanceWebSocketApiManager(exchange=exchange) with open('./trading_pairs/binance.pair', 'r') as f: pairs = [e.replace('\n', '') for e in f.readlines()] local_book = collections.defaultdict(BinanceOrderBook) # retrieve orderbook snapshot async def get_snapshot(pair, session): async with session.get( f'https://www.binance.com/api/v1/depth?symbol={pair}&limit=100' ) as r:
import sys import socket import argparse import io from datetime import datetime from collections import Counter import kafka import avro.schema from avro.datafile import DataFileWriter from avro.io import DatumWriter kafka_endpoint = "ip-172-31-23-112:9092" topics = ["test02"] consumer_group = "test_kafka_consumer" kafka_client = kafka.KafkaClient(kafka_endpoint) topic = topics[0] consumer = kafka.SimpleConsumer(kafka_client, consumer_group, topic) # reeeeeewiiiiiiind #consumer.seek(0, 0) def dump_message(message): print "****" print(message) print "Message length: %s" % (len(message)) print "* Offset *" print message[0] # get the value back out of the kafka consumer's fetched message
kafkaIPandPort = os.environ.get("kafkaIPandPort") intrinio_forex_key = os.environ.get("intrinio_forex_key") #send each quote to kafka topic def on_quote(quote, backlog): print("QUOTE: " , quote, "BACKLOG LENGTH: ", backlog) strQuote = json.dumps(quote) byteQuote = strQuote.encode('utf-8') #sending keyed messages are in this format #producer.send_messages(b'my-topic', b'key1', b'some message') prod.send_messages(topic, quote["code"].encode("utf-8"), byteQuote) #kafka connection cluster = kafka.KafkaClient(kafkaIPandPort) prod = KeyedProducer(cluster) topic = "forex_topic" #intrinio connection options = { 'api_key': intrinio_forex_key, 'provider': 'fxcm', 'on_quote': on_quote } client = IntrinioRealtimeClient(options) client.join(['fxcm:pair:EUR/USD','fxcm:pair:USD/JPY', 'fxcm:pair:GBP/USD','fxcm:pair:USD/CHF', 'fxcm:pair:EUR/CHF', 'fxcm:pair:AUD/USD', 'fxcm:pair:USD/CAD', 'fxcm:pair:NZD/USD', 'fxcm:pair:EUR/GBP', 'fxcm:pair:EUR/JPY', 'fxcm:pair:GBP/JPY', 'fxcm:pair:CHF/JPY', 'fxcm:pair:GBP/CHF', 'fxcm:pair:EUR/JPY','fxcm:pair:EUR/AUD','fxcm:pair:EUR/CAD','fxcm:pair:AUD/CAD','fxcm:pair:AUD/JPY','fxcm:pair:CAD/JPY','fxcm:pair:NZD/JPY','fxcm:pair:GBP/CAD','fxcm:pair:GBP/NZD', 'fxcm:pair:GBP/AUD','fxcm:pair:AUD/NZD','fxcm:pair:USD/SEK', 'fxcm:pair:EUR/SEK', 'fxcm:pair:EUR/NOK', 'fxcm:pair:USD/NOK','fxcm:pair:USD/MXN','fxcm:pair:AUD/CHF','fxcm:pair:EUR/NZD','fxcm:pair:USD/ZAR', 'fxcm:pair:ZAR/JPY','fxcm:pair:USD/TRY','fxcm:pair:EUR/TRY', 'fxcm:pair:NZD/CHF','fxcm:pair:CAD/CHF', 'fxcm:pair:NZD/CAD', 'fxcm:pair:TRY/JPY']) client.connect() client.keep_alive()
def _get_client(self): if not self.kafka_client: self.kafka_client = kafka.KafkaClient( "%s:%s" % (self.host, self.port)) self.kafka_producer = kafka.SimpleProducer(self.kafka_client)
def __init__(self, hosts): """ """ self.hosts = hosts self.client = kafka.KafkaClient(hosts=self.hosts)
class StdOutListener(StreamListener): def on_data(self, data): producer.send_messages(topic, data.encode('utf-8')) print("Tweet Sent") return True def on_error(self, status): print(status) ## Set access keys from Twitter Developer app info with open('/home/n/opt/MindBender_BD/Misc/keys') as keys: twitter_keys = json.load(keys) consumer_key = twitter_keys["twitter"]["consumer_key"] consumer_secret = twitter_keys["twitter"]["consumer_secret"] access_token = twitter_keys["twitter"]["access_token"] access_secret = twitter_keys["twitter"]["access_secret"] ## Set authorizations auth = tw.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) api = tw.API(auth, wait_on_rate_limit=True) ## Kafka stream listener (topic created in terminal) topic = "tweets" kafka = kf.KafkaClient("localhost:9099") producer = kf.SimpleProducer(kafka) listen = StdOutListener() stream = tw.Stream(auth, listen) stream.filter(track="bigdata")
words_set = words_all.split(" ") words_dict = dict() for x in words_set: ## Filter out blanks, single letters and simple sentence constructors (i.e. "a", "it", "I", "and", etc.) if len(x) > 3: words_dict[x] = words_all.count(x) with open(keywords, "a") as f: for word in sorted(words_dict, key=words_dict.get, reverse=True)[:10]: f.write(word+"\n") ## Send via Kafka broker kafka = kf.KafkaClient("localhost:9099,localhost:9092,localhost:9093") producer = kf.SimpleProducer(kafka) ## Send top 10 words to kafka consumer in JSON, formatted {date, word, freq} for word in sorted(words_dict, key=words_dict.get, reverse=True)[:10]: json = '{"date": "'+timestr+'", "word":"'+word+'", "frequency": "'+str(words_dict[word])+'"}' producer.send_messages("reddit", bytes(json, 'utf-8')) ###### Additional potential PRAW usage ###### ## Grab a submission (post) from reddit by it's ID (from the URL or submission.id). #submission = reddit.submission(id="k3u81d")