Esempio n. 1
0
    def __init__(self):
        #localhost:9092 = Default Zookeeper Producer Host and Port Adresses
        self.client = pykafka.KafkaClient("localhost:9092")

        #Get Producer that has topic name is Twitter
        self.producer = self.client.topics[bytes("twitter",
                                                 "ascii")].get_producer()
def main():
    es = Elasticsearch(
        "https://db2cb7cbe8834bb1a48f960a437f461d.us-east-1.aws.found.io:9243",
        http_auth=(os.environ["ELASTIC_USERNAME"],
                   os.environ["ELASTIC_PASSWORD"]),
    )

    client = pykafka.KafkaClient("localhost:9092")
    topic = client.topics[bytes("TweetStreamSentiments", "ascii")]
    consumer = topic.get_simple_consumer(
        consumer_group="mygroup",
        auto_offset_reset=OffsetType.LATEST,
        reset_offset_on_start=True,
    )
    for msg in consumer:
        try:
            timestamp = datetime.strptime(
                msg.partition_key.decode().split(",")[1][1:-1],
                "%Y-%m-%d %H:%M:%S")
            eventTime = timestamp.strftime(
                "%Y-%m-%dT%H:%M:%S.%f%z"
            )  # elasticsearch is only able to identify timestamp in this format
            json_send_data = {
                "timestamp": eventTime,
                "sentiment_score": float(msg.value.decode()),
            }

            print(json_send_data)
            es.index(
                index="tweets_sentiment_ts",
                id=msg.partition_key.decode(),
                body=json_send_data,
            )
        except Exception as e:
            print("Exception: " + str(e))
Esempio n. 3
0
    def __init__(self):
        print("Publish data to topic: " + topic)
        self.client = pykafka.KafkaClient("localhost:9092")

        #Get Producer that has topic name is Twitter
        self.producer = self.client.topics[bytes(topic,
                                                 "ascii")].get_producer()
    def testCrawlContainerKafka(self):
        env = os.environ.copy()
        mypath = os.path.dirname(os.path.realpath(__file__))
        os.makedirs(self.tempd + '/out')

        # crawler itself needs to be root
        process = subprocess.Popen(
            [
                '/usr/bin/python', mypath + '/../../crawler/crawler.py',
                '--url', 'kafka://localhost:9092/test',
                '--features', 'os,process',
                '--crawlContainers', self.container['Id'],
                '--crawlmode', 'OUTCONTAINER',
                '--numprocesses', '1'
            ],
            env=env)
        stdout, stderr = process.communicate()
        assert process.returncode == 0

        print stderr
        print stdout

        kafka = pykafka.KafkaClient(hosts='localhost:9092')
        topic = kafka.topics['test']
        consumer = topic.get_simple_consumer()
        message = consumer.consume()
        assert '"cmd":"/bin/sleep 60"' in message.value
Esempio n. 5
0
    def __init__(self, topic):
        """Called when initialized.

        This will set up a Kafka client and a producer.
        """
        self.client = pykafka.KafkaClient('localhost:9092')
        self.producer = self.client.topics[topic].get_producer()
Esempio n. 6
0
 def connect(self):
     self.kafka_client = pykafka.KafkaClient(
         hosts=self.hosts,
         socket_timeout_ms=500,
         offsets_channel_socket_timeout_ms=10 * 500)
     self.kafka_topic = self.kafka_client.topics[self.topic]
     self.connected = True
def kafka_send(kurl, temp_fpath, format, topic, queue=None):
    try:
        kafka_python_client = kafka_python.KafkaClient(kurl)
        kafka_python_client.ensure_topic_exists(topic)
        kafka = pykafka.KafkaClient(hosts=kurl)

        publish_topic_object = kafka.topics[topic]
        # the default partitioner is random_partitioner
        producer = publish_topic_object.get_producer()

        if format == 'csv':
            with open(temp_fpath, 'r') as fp:
                text = fp.read()
                producer.produce([text])
        elif format == 'graphite':
            with open(temp_fpath, 'r') as fp:
                for line in fp.readlines():
                    producer.produce([line])
        else:
            raise EmitterUnsupportedFormat('Unsupported format: %s' % format)

        queue and queue.put((True, None))
    except Exception as e:
        if queue:
            queue.put((False, e))
        else:
            raise
    finally:
        queue and queue.close()
Esempio n. 8
0
def main():
    parser = _get_arg_parser()
    args = parser.parse_args()
    if args.command:
        client = pykafka.KafkaClient(hosts=args.host)
        args.func(client, args)
    else:
        parser.print_help()
Esempio n. 9
0
def kafka_connect():
    try:
        client = pykafka.KafkaClient(hosts=settings.KAFKA_HOSTS)
    except pykafka.exceptions.NoBrokersAvailableError:
        log.info('Retrying Kafka connection. . .')
        time.sleep(3)
        return kafka_connect()
    return client
    def __init__(self):
        """ Instantiate a PyKafka Client and a subsequent Producer subscribed 
        to the designated Kafka Topic (as defined in config.py)
        """

        self.client = pykafka.KafkaClient(config.bootstrap_servers)
        self.producer = self.client.topics[bytes(
            config.twitter_kafka_topic_name,
            config.data_encoding)].get_producer()
Esempio n. 11
0
 def __init__(self, host, port):
     '''
     init a new KafkaClient using given host and port
     @param host: host name
     @param port: server port
     '''
     self.Host = host
     self.Port = port
     self.Client = pykafka.KafkaClient("%s:%s" % (self.Host, self.Port))
     self.RunningThread = None
Esempio n. 12
0
 def __init__(self):
     ''' creates a pykafka client using 
     the bootstrap servers, the locations of which may 
     be found in config.py and a Kafka producer that associates 
     the producer to the twitter_kafka_topic_name Kafka topic 
     also defined in config.py'''
     self.client = pykafka.KafkaClient(config.bootstrap_servers)
     self.producer = self.client.topics[bytes(
         config.twitter_kafka_topic_name,
         config.data_encoding)].get_producer()
Esempio n. 13
0
def do2():
    client = pykafka.KafkaClient(hosts='10.250.100.19:9092, \
                                        10.250.100.20:9092')
    topic = client.topics['flume_test']
    with topic.get_producer() as producer:
        i = 0
        while True:
            print i
            producer.produce('simple producer %s' % i)
            i += 1
Esempio n. 14
0
	def __init__(self, profile: dict):
		sapsucker_log("Building KafkaHandler")
		self.profile = profile
		if 'broker_list' in self.profile['kafka']:
			broker_list = self.profile['kafka']['broker_list']
			hosts = ",".join(broker_list)
			sapsucker_log(hosts)
			self.client = pykafka.KafkaClient(hosts=hosts)
			self.producers = {}
		else:
			raise Exception("No broker_list branch under the kafka configuration.")
Esempio n. 15
0
 def __init__(self, hosts, topic, broker_version="0.8.2"):
     """Construct a Producer."""
     self.topic = topic
     self.client = pykafka.KafkaClient(hosts=','.join(hosts),
                                       socket_timeout_ms=10000,
                                       broker_version=broker_version)
     self.producer_topic = self.client.topics[self.topic]
     self.producer = self.producer_topic.get_producer(max_retries=3,
                                                      linger_ms=3000,
                                                      retry_backoff_ms=1000,
                                                      use_rdkafka=True)
Esempio n. 16
0
    def __init__(self):
        self.client = pykafka.KafkaClient("localhost:9092")
        self.kafka_topic = "TweetStreamListener"
        self.producer = self.client.topics[bytes(self.kafka_topic,
                                                 "ascii")].get_producer()

        self.es = Elasticsearch(
            "https://db2cb7cbe8834bb1a48f960a437f461d.us-east-1.aws.found.io:9243",
            http_auth=(os.environ["ELASTIC_USERNAME"],
                       os.environ["ELASTIC_PASSWORD"]),
        )
        print("Connecting...")
Esempio n. 17
0
    def _get_kafka_client(self):
        if self.config.ssl_keyfile:
            ssl_config = pykafka.connection.SslConfig(
                self.config.ssl_cafile,
                certfile=self.config.ssl_certfile,
                keyfile=self.config.ssl_keyfile,
            )
        else:
            ssl_config = None

        return pykafka.KafkaClient(hosts=self.config.broker,
                                   ssl_config=ssl_config)
Esempio n. 18
0
    def _publish_to_kafka_no_retries(self, url):

        if kafka_python is None or pykafka is None:
            raise ImportError('Please install kafka and pykafka')

        try:
            list = url[len('kafka://'):].split('/')

            if len(list) == 2:
                kurl = list[0]
                topic = list[1]
            else:
                raise Exception(
                    'The kafka url provided does not seem to be valid: %s. '
                    'It should be something like this: '
                    'kafka://[ip|hostname]:[port]/[kafka_topic]. '
                    'For example: kafka://1.1.1.1:1234/metrics' % url)

            h = NullHandler()
            logging.getLogger('kafka').addHandler(h)

            # XXX We should definitely create a long lasting kafka client
            kafka_python_client = kafka_python.KafkaClient(kurl)
            kafka_python_client.ensure_topic_exists(topic)

            kafka = pykafka.KafkaClient(hosts=kurl)
            publish_topic_object = kafka.topics[topic]
            # the default partitioner is random_partitioner
            producer = publish_topic_object.get_producer()

            if self.format == 'csv':
                with open(self.temp_fpath, 'r') as fp:
                    text = fp.read()
                    logger.debug(producer.produce([text]))

            elif self.format == 'graphite':

                with open(self.temp_fpath, 'r') as fp:
                    for line in fp.readlines():
                        producer.produce([line])
            else:
                logger.debug('Could not send data because {0} is an unknown '
                             'format'.format(self.format))
                raise

            kafka_python_client.close()
        except Exception as e:

            # kafka.close()

            logger.debug('Could not send data to {0}: {1}'.format(url, e))
            raise
Esempio n. 19
0
def make_kafka_consumer(hosts, env, topic_suffix, group):
    topic_name = "fatcat-{}.{}".format(env, topic_suffix).encode('utf-8')
    client = pykafka.KafkaClient(hosts=hosts, broker_version="1.0.0")
    consume_topic = client.topics[topic_name]
    print("Consuming from kafka topic {}, group {}".format(topic_name, group))

    consumer = consume_topic.get_balanced_consumer(
        consumer_group=group.encode('utf-8'),
        managed=True,
        auto_commit_enable=True,
        auto_commit_interval_ms=30000,  # 30 seconds
        compacted_topic=True,
    )
    return consumer
Esempio n. 20
0
def main():
    client = pykafka.KafkaClient(hosts='10.250.100.19:9092, \
                                        10.250.100.20:9092')
    topic = client.topics['flume_test']
    '''producer = topic.get_sync_producer() 
    
    n = 0
    while True:
        producer.produce('simple producer %s' % n)
        n += 1
    '''
    with topic.get_sync_producer() as producer:
        for i in range(4):
            producer.produce('test message ' + str(i**2))
Esempio n. 21
0
    def run(self):

        # 1. start consumer (in managed/balanced fashion, with consumer group)
        # 2. for each thingie, do the work; if success publish to kafka; either
        #    way... print? log?
        # 3. repeat!

        print("Starting grobid-hbase-worker...")
        try:
            host = self.hbase_host
            hb_conn = happybase.Connection(host=host, transport="framed",
                protocol="compact")
        except Exception:
            raise Exception("Couldn't connect to HBase using host: {}".format(host))
        self.hb_table = hb_conn.table(self.hbase_table_name)
        print("HBase inserting into {}".format(self.hbase_table_name))

        kafka = pykafka.KafkaClient(hosts=self.kafka_hosts, broker_version="2.0.0")
        consume_topic = kafka.topics[self.consume_topic]

        sequential_failures = 0
        consumer = consume_topic.get_balanced_consumer(
            consumer_group=self.consumer_group,
            managed=True,
            auto_commit_enable=True,
            # needed to avoid MessageSet decode errors
            fetch_message_max_bytes=4*1024*1024,
            # LATEST because best to miss processing than waste time re-process
            auto_offset_reset=pykafka.common.OffsetType.LATEST,
            compacted_topic=True)
        print("Kafka consuming {} in group {}".format(
            self.consume_topic,
            self.consumer_group))

        for msg in consumer:
            #print("got a line! ")
            grobid_output, status = self.do_work(msg.value.decode('utf-8'))
            if grobid_output:
                sequential_failures = 0
            else:
                sys.stderr.write("Failed to process GROBID extraction output: {}\n".format(status))
                sequential_failures += 1
                if sequential_failures > 20:
                    sys.stderr.write("too many failures in a row, bailing out\n")
                    sys.exit(-1)
    def testCrawlContainerKafka2(self):
        emitters = EmittersManager(urls=['kafka://localhost:9092/test'])
        crawler = ContainersCrawler(
            features=['os', 'process'],
            user_list=self.container['Id'])
        worker = Worker(emitters=emitters, frequency=-1,
                        crawler=crawler)
        worker.iterate()
        kafka = pykafka.KafkaClient(hosts='localhost:9092')
        topic = kafka.topics['test']
        consumer = topic.get_simple_consumer()
        message = consumer.consume()
        assert '"cmd":"/bin/sleep 60"' in message.value

        for i in range(1, 5):
            worker.iterate()
            message = consumer.consume()
            assert '"cmd":"/bin/sleep 60"' in message.value
Esempio n. 23
0
def request_scheduled(requestScheduled):

    logger.info("Start of scheduled request")

    client = pykafka.KafkaClient(
        hosts='{}:{}'.format(kafka_server, kafka_port))
    topic = client.topics['{}'.format(kafka_topic)]
    producer = topic.get_sync_producer()
    msg = {
        "type": "requestScheduled",
        "datetime": datetime.datetime.now().strftime("%Y -%m-%dT%H:%M:%S"),
        "payload": requestScheduled
    }
    msg_str = json.dumps(msg)
    producer.produce(msg_str.encode('utf-8'))

    logger.info('Produced message in topic {}: {}'.format(topic, msg))
    """
Esempio n. 24
0
    def __init__(self, topic, group_id, **args):
        super(Consumer, self).__init__('kafka', **args)
        if not self.closed:
            if 'host' not in self.__dict__ or not self.host:
                raise TypeError(
                    'KAFKA: the host has not been set in config file or parameters.'
                )
            if 'zookeeper' not in self.__dict__ or not self.zookeeper:
                raise TypeError(
                    'KAFKA: the zookeeper has not been set in config file or parameters.'
                )

            self.__client = pykafka.KafkaClient(hosts=self.host,
                                                zookeeper_hosts=self.zookeeper)
            self.__consumer = self.__client.topics[
                topic.encode()].get_balanced_consumer(
                    consumer_group=group_id.encode(),
                    auto_commit_enable=True,
                    fetch_message_max_bytes=67108864)
Esempio n. 25
0
def produce_weird_messages(topic, partition, bootstrap_servers):
    producer = Producer({'bootstrap.servers': bootstrap_servers})
    for msg in weird_messages:
        if 'headers' in msg:
            producer.produce(topic,
                             msg['value'],
                             key=msg['key'],
                             headers=msg['headers'],
                             partition=partition)
        else:
            producer.produce(topic,
                             msg['value'],
                             key=msg['key'],
                             partition=partition)
    producer.flush()
    # pykafka for weird timestamps
    pykafkaClient = pykafka.KafkaClient(hosts=bootstrap_servers)
    pykafkaTopic = pykafkaClient.topics[topic]
    with pykafkaTopic.get_sync_producer() as pykafkaProducer:
        pykafkaProducer.produce(b"noneTimestamp")
Esempio n. 26
0
def mainpro (serverip, port):
	global hosts
	hosts = "%s:%s" % (serverip, port)
	global client
	client = pykafka.KafkaClient(hosts=hosts)
	global spclient
	spclient = kafka.SimpleClient(hosts=hosts)
	global admin
	admin = kafka.admin.client.KafkaAdminClient(bootstrap_servers=hosts)
	zkserver = getzkserver()
	global myzk
	myzk = zkconn(zkserver)
	global zkdir
	zkdir = "/"
	myoptions = options()
	topOptions = myoptions.keys()
	while True:
		if is_sigint_up:
			os._exit(0)
		try:
			comm = raw_input("kafka %s>" % (port))
			commlist = comm.split()
			if len(commlist) <= 0:
				continue
			if commlist[0] in topOptions:
				if commlist[0] == "help":
					print "[ help info ]"
					print toJson(myoptions)
				else:
					status = eval(commlist[0])(commlist)
					if status == False:
						print  "[ %s help info ]" % (commlist[0])
						print toJson(options(commlist[0]))
			else:
				print "Command error!"
		except:
			print "Execute error !"
			if debug:
				msg = traceback.format_exc()
				print msg
			os._exit(0)
Esempio n. 27
0
def send_tweets_to_spark(http_resp, tcp_connection):
    for line in http_resp.iter_lines():
        try:
            full_tweet = json.loads(line)
            tweet_text = full_tweet['text']
            print("Tweet Text: " + tweet_text)
            print("------------------------------------------")
            #tcp_connection.send(bytes("{}\n".format(tweet_text), "utf-8"))
            #tcp_connection.send(tweet_text)

            #####
            client = pykafka.KafkaClient("localhost:9092")
            print("Client finished")
            producer = client.topics[bytes("twitter", "ascii")].get_producer()
            producer.produce(bytes(tweet_text, "ascii"))
            print("Producer finished")

            #####

        except:
            e = sys.exc_info()[0]
            print("Error: %s" % e)
Esempio n. 28
0
    def connect_to_broker(self, broker, topic):
        kafka_python_client = kafka_python.SimpleClient(broker)
        kafka_python_client.ensure_topic_exists(topic)

        self.client = pykafka.KafkaClient(hosts=broker)
        self.producer = self.client.topics[topic].get_producer()
Esempio n. 29
0
 def __init__(self):
     self.client = pykafka.KafkaClient("localhost:9092")
     self.producer = self.client.topics[bytes('twitterstream_nl','ascii')].get_producer()
Esempio n. 30
0
            self.kafkaproducer.produce(bytes(json.dumps(message), "utf-8"))

        except BaseException as error:
            print(str(error))

        return True

    def on_error(self, status):

        print (status)
        return True
    

if __name__ == "__main__":

    topic = "iot_topic"

    kafka_client = pykafka.KafkaClient("localhost:9092")

    kafka_producer = kafka_client.topics[bytes(topic, "utf-8")].get_producer()

    l = StdOutListener(kafka_producer)
    
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    stream = Stream(auth, l)
    stream.filter(track=['iot'], languages=["en"])