def setUp(self): self.url = "http://ksql-server:8088" self.api_client = KSQLAPI(url=self.url, check_version=False) self.exist_topic = 'exist_topic' bootstrap_servers = 'kafka:29092' if check_kafka_available(bootstrap_servers): producer = Producer({'bootstrap.servers': bootstrap_servers}) producer.produce(self.exist_topic, "test_message") producer.flush()
def setUp(self): self.url = "http://localhost:8088" self.api_client = KSQLAPI(url=self.url, check_version=False) self.test_prefix = "ksql_python_test" self.exist_topic = 'exist_topic' self.bootstrap_servers = 'localhost:29092' if utils.check_kafka_available(self.bootstrap_servers): producer = Producer({'bootstrap.servers': self.bootstrap_servers}) producer.produce(self.exist_topic, "test_message") producer.flush()
def test_ksql_show_tables_with_api_key(self): api_client = KSQLAPI(url=self.url, check_version=False, api_key='foo', secret='bar') ksql_string = "show tables;" r = api_client.ksql(ksql_string) self.assertEqual(r, [{ "@type": "tables", "statementText": "show tables;", "tables": [], "warnings": [] }])
class Processor: def __init__(self, url): self.client = KSQLAPI(url) def start(self): self.create_stream_from_topic() self.rename_rowkey() self.join() # self.query() def create_stream_from_topic(self): raise NotImplementedError(self.__class__.__name__ + " must implement processor") def rename_rowkey(self): raise NotImplementedError(self.__class__.__name__ + " must implement processor") def join(self): raise NotImplementedError(self.__class__.__name__ + " must implement processor") def query(self): query = self.client.query( query_string="select * from user emit changes", stream_properties={"ksql.streams.auto.offset.reset": "earliest"}, ) for item in query: logger.info(item) def ksql(self, ksql_string, stream_properties=None): try: self.client.ksql(ksql_string=ksql_string, stream_properties=stream_properties) except KSQLError as e: logger.info(e) def create_stream(self, table_name, columns_type, topic, value_format="JSON"): try: self.client.create_stream( table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format, ) except KSQLError as e: logger.info(e)
def __init__(self): url = "http://localhost:8088" self.api_client = KSQLAPI(url) self.topic = "test08" self.bootstrap_servers = "localhost:9092" if utils.check_kafka_available(self.bootstrap_servers): value_schema_str = """ { "type": "record", "namespace": "com.example", "name": "value", "fields": [ {"name":"LOCATION", "type":"string"}, {"name":"DATETIME", "type":"string"}, {"name":"SENTIMENT", "type":"string"}, {"name":"TEXT", "type":"string"} ] } """ key_schema_str = """ { "type": "record", "namespace": "com.example", "name": "key", "fields": [ {"name":"LOCATION", "type":"string"}, {"name":"DATETIME", "type":"string"}, {"name":"SENTIMENT", "type":"string"}, {"name":"TEXT", "type":"string"} ] } """ value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) self.key = { "LOCATION": "LOCATION", "DATETIME": "DATETIME", "SENTIMENT": "SENTIMENT", "TEXT": "TEXT" } self.producer = AvroProducer( { 'bootstrap.servers': self.bootstrap_servers, 'on_delivery': delivery_report, 'schema.registry.url': 'http://localhost:8081' }, default_key_schema=None, default_value_schema=value_schema) else: print("Could not connect to Kafka") exit(-1)
def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): try: from ksql import KSQLAPI except ImportError: raise KSqlApiException('Module missing: pip install ksql') self._api_url = KAFKA.KSQL_API_URL.get().strip('/') if KAFKA.KSQL_API_URL.get() else '' self.user = user self.client = client = KSQLAPI(self._api_url)
"""Load configuration from .yaml file.""" import confuse from faker import Faker from time import sleep from json import dumps from kafka import KafkaProducer from ksql import KSQLAPI client = KSQLAPI('http://ksqldb-server:8088') producer = KafkaProducer(bootstrap_servers=['broker:29092'], value_serializer=lambda x: dumps(x).encode('utf-8')) fake = Faker() config = confuse.Configuration('mocker') config.set_file('/app/config.yaml') rate = config['transactions']['rate'].get(int) topic = config['kafka']['topic'].get() print("Start sending transactions") while True: producer.send(topic, value={ 'transaction_id': "RF" + str(fake.pyint(5)), 'transaction_type': "transaction_" + str(fake.pyint(1)), 'from_account': fake.iban(), 'to_account': fake.iban(), 'amount_cents': fake.pyint(), 'created_at': fake.date_time().strftime("%Y/%m/%d, %H:%M:%S") })
def __init__(self, url): self.client = KSQLAPI(url)
def get_ksql_client(): client = KSQLAPI(KSQL_URL) return client
from kafka import KafkaClient from ksql import KSQLAPI kafka_client = KafkaClient(hosts=['localhost:9092']) kafka_client.ensure_topic_exists('gas_prices') kafka_client.ensure_topic_exists('locations') client = KSQLAPI('http://localhost:8088') client.ksql("SET 'auto.offset.reset' = 'earliest';") # Drop existing streams client.ksql('DROP STREAM alerts;') client.ksql('DROP STREAM locations;') client.ksql('DROP STREAM gas_prices;') # Creates gas_prices as a stream client.ksql(''' CREATE STREAM gas_prices \ (stationid VARCHAR, lat DOUBLE, long DOUBLE, price DOUBLE, recordtime BIGINT, joinner INT) \ WITH (KAFKA_TOPIC='gas_prices', VALUE_FORMAT='JSON'); ''') # Creates the location stream client.ksql(''' CREATE STREAM locations \ (userid VARCHAR, lat DOUBLE, long DOUBLE, recordtime BIGINT, joinner INT) \ WITH (KAFKA_TOPIC='locations', VALUE_FORMAT='JSON'); ''') # Creates the alert stream using the gas_prices stream client.sql('''
def test_with_timeout(self): api_client = KSQLAPI(url='http://foo', timeout=10) self.assertEquals(api_client.timeout, 10)
import logging from ksql import KSQLAPI logging.basicConfig(level=logging.DEBUG) #client = KSQLAPI('http://ec2-52-41-32-196.us-west-2.compute.amazonaws.com:8088') client = KSQLAPI('http://10.0.0.13:8088') #client.create_stream() client.ksql('show tables') #client = KSQLAPI('http://ec2-52-41-32-196.us-west-2.compute.amazonaws.com:8088')
def test_with_timeout(self): api_client = KSQLAPI(url=self.url, timeout=10, check_version=False) self.assertEquals(api_client.timeout, 10)
class TestKSQLAPI(unittest.TestCase): """Test case for the client methods.""" def setUp(self): self.url = "http://*****:*****@vcr.use_cassette('tests/vcr_cassettes/healthcheck.yml') def test_ksql_server_healthcheck(self): """ Test GET requests """ res = requests.get(self.url + '/status') self.assertEqual(res.status_code, 200) @vcr.use_cassette('tests/vcr_cassettes/get_ksql_server.yml') def test_get_ksql_version_success(self): """ Test GET requests """ version = self.api_client.get_ksql_version() self.assertEqual(version, ksql.__ksql_server_version__) @vcr.use_cassette('tests/vcr_cassettes/get_properties.yml') def test_get_properties(self): properties = self.api_client.get_properties() self.assertEqual(properties['ksql.schema.registry.url'], "http://*****:*****@vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml') def test_ksql_show_tables(self): """ Test GET requests """ ksql_string = "show tables;" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{ '@type': 'tables', 'statementText': 'show tables;', 'tables': [] }]) @vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml') def test_ksql_show_tables_with_no_semicolon(self): """ Test GET requests """ ksql_string = "show tables" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{ '@type': 'tables', 'statementText': 'show tables;', 'tables': [] }]) @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml') def test_ksql_create_stream(self): """ Test GET requests """ topic = self.exist_topic stream_name = self.test_prefix + "test_ksql_create_stream" ksql_string = "CREATE STREAM {} (viewtime bigint, userid varchar, pageid varchar) \ WITH (kafka_topic='{}', value_format='DELIMITED');".format( stream_name, topic) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]['commandStatus']['status'], 'SUCCESS') @unittest.skipIf(not utils.check_kafka_available('localhost:29092'), "vcrpy does not support streams yet") def test_ksql_create_stream_w_properties(self): """ Test GET requests """ topic = self.exist_topic stream_name = self.test_prefix + "test_ksql_create_stream" stream_name = "test_ksql_create_stream" ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if 'TEST_KSQL_CREATE_STREAM' not in utils.get_all_streams( self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]['commandStatus']['status'], 'SUCCESS') producer = Producer({'bootstrap.servers': self.bootstrap_servers}) producer.produce( self.exist_topic, '''{"order_id":3,"total_amount":43,"customer_name":"Palo Alto"}''') producer.flush() print() chunks = self.api_client.query("select * from {}".format(stream_name), stream_properties=streamProperties, idle_timeout=10) for chunk in chunks: pass assert json.loads(chunk)['row']['columns'][-1] == 'Palo Alto' @vcr.use_cassette('tests/vcr_cassettes/bad_requests.yml') def test_bad_requests(self): broken_ksql_string = "noi" with self.assertRaises(KSQLError) as e: r = self.api_client.ksql(broken_ksql_string) the_exception = e.exception self.assertEqual(the_exception.error_code, 40000) @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream_by_builder.yml') def test_ksql_create_stream_by_builder(self): sql_type = 'create' table_type = 'stream' table_name = 'test_table' columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar'] topic = self.exist_topic value_format = 'DELIMITED' utils.drop_stream(self.api_client, table_name) ksql_string = SQLBuilder.build(sql_type=sql_type, table_type=table_type, table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]['commandStatus']['status'], 'SUCCESS') @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_by_builder_api.yml') def test_ksql_create_stream_by_builder_api(self): table_name = 'test_table' columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar'] topic = self.exist_topic value_format = 'DELIMITED' utils.drop_stream(self.api_client, table_name) r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) self.assertTrue(r) @vcr.use_cassette('tests/vcr_cassettes/ksql_topic_already_registered.yml') def test_raise_create_error_topic_already_registered(self): table_name = 'foo_table' columns_type = ['name string', 'age bigint'] topic = self.exist_topic value_format = 'DELIMITED' utils.drop_stream(self.api_client, table_name) r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) with self.assertRaises(KSQLError): r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) @vcr.use_cassette('tests/vcr_cassettes/raise_create_error_no_topic.yml') def test_raise_create_error_no_topic(self): table_name = 'foo_table' columns_type = ['name string', 'age bigint'] topic = 'this_topic_is_not_exist' value_format = 'DELIMITED' with self.assertRaises(KSQLError): r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml') def test_create_stream_as_without_conditions(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_without_conditions' kafka_topic = 'create_stream_as_without_conditions' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml' ) def test_create_stream_as_with_conditions_without_startwith(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_with_conditions_without_startwith' kafka_topic = 'create_stream_as_with_conditions_without_startwith' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] conditions = "userid = 'foo'" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format, conditions=conditions) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml' ) def test_create_stream_as_with_conditions_with_startwith(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_with_conditions_with_startwith' kafka_topic = 'create_stream_as_with_conditions_with_startwith' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] conditions = "userid = 'foo_%'" utils.drop_stream(self.api_client, src_table) utils.drop_stream(self.api_client, table_name) try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format, conditions=conditions) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml' ) def test_create_stream_as_with_conditions_with_startwith_with_and(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_with_conditions_with_startwith_with_and' kafka_topic = 'create_stream_as_with_conditions_with_startwith_with_and' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] conditions = "userid = 'foo_%' and age > 10" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format, conditions=conditions) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml') def test_ksql_create_stream_as_with_wrong_timestamp(self): src_table = 'prebid_traffic_log_total_stream' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'prebid_traffic_log_valid_stream' kafka_topic = 'prebid_traffic_log_valid_topic' value_format = 'DELIMITED' select_columns = ['*'] timestamp = 'foo' utils.drop_stream(self.api_client, src_table) utils.drop_stream(self.api_client, table_name) try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError as e: raise with self.assertRaises(KSQLError): r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp=timestamp, value_format=value_format)
class TestKSQLAPI(unittest.TestCase): """Test case for the client methods.""" def setUp(self): self.url = "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/healthcheck.yml") def test_ksql_server_healthcheck(self): """ Test GET requests """ res = requests.get(self.url + "/status") self.assertEqual(res.status_code, 200) @vcr.use_cassette("tests/vcr_cassettes/get_ksql_server.yml") def test_get_ksql_version_success(self): """ Test GET requests """ version = self.api_client.get_ksql_version() self.assertEqual(version, ksql.__ksql_server_version__) @vcr.use_cassette("tests/vcr_cassettes/get_properties.yml") def test_get_properties(self): properties = self.api_client.get_properties() property = [ i for i in properties if i["name"] == "ksql.schema.registry.url" ][0] self.assertEqual(property.get("value"), "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/ksql_show_table_with_api_key.yml") def test_ksql_show_tables_with_api_key(self): api_client = KSQLAPI(url=self.url, check_version=False, api_key='foo', secret='bar') ksql_string = "show tables;" r = api_client.ksql(ksql_string) self.assertEqual(r, [{ "@type": "tables", "statementText": "show tables;", "tables": [], "warnings": [] }]) @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml") def test_ksql_show_tables(self): """ Test GET requests """ ksql_string = "show tables;" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{ "@type": "tables", "statementText": "show tables;", "tables": [], "warnings": [] }]) @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml") def test_ksql_show_tables_with_no_semicolon(self): """ Test GET requests """ ksql_string = "show tables" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{ "@type": "tables", "statementText": "show tables;", "tables": [], "warnings": [] }]) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream.yml") def test_ksql_create_stream(self): """ Test GET requests """ topic = self.exist_topic stream_name = self.test_prefix + "test_ksql_create_stream" ksql_string = "CREATE STREAM {} (viewtime bigint, userid varchar, pageid varchar) \ WITH (kafka_topic='{}', value_format='DELIMITED');".format( stream_name, topic) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet") def test_ksql_create_stream_w_properties(self): """ Test GET requests """ topic = self.exist_topic stream_name = "TEST_KSQL_CREATE_STREAM" ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if "TEST_KSQL_CREATE_STREAM" not in utils.get_all_streams( self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") producer = Producer({"bootstrap.servers": self.bootstrap_servers}) producer.produce( self.exist_topic, """{"order_id":3,"total_amount":43,"customer_name":"Palo Alto"}""") producer.flush() chunks = self.api_client.query( "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties) for chunk in chunks: self.assertTrue(chunk) break @vcr.use_cassette("tests/vcr_cassettes/bad_requests.yml") def test_bad_requests(self): broken_ksql_string = "noi" with self.assertRaises(KSQLError) as e: self.api_client.ksql(broken_ksql_string) exception = e.exception self.assertEqual(exception.error_code, 40001) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_by_builder.yml") def test_ksql_create_stream_by_builder(self): sql_type = "create" table_type = "stream" table_name = "test_table" columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"] topic = self.exist_topic value_format = "DELIMITED" utils.drop_stream(self.api_client, table_name) ksql_string = SQLBuilder.build( sql_type=sql_type, table_type=table_type, table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format, ) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") @vcr.use_cassette( "tests/vcr_cassettes/ksql_create_stream_by_builder_api.yml") def test_ksql_create_stream_by_builder_api(self): table_name = "test_table" columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"] topic = self.exist_topic value_format = "DELIMITED" utils.drop_stream(self.api_client, table_name) r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) self.assertTrue(r) @vcr.use_cassette("tests/vcr_cassettes/ksql_topic_already_registered.yml") def test_raise_create_error_topic_already_registered(self): table_name = "foo_table" columns_type = ["name string", "age bigint"] topic = self.exist_topic value_format = "DELIMITED" utils.drop_stream(self.api_client, table_name) self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) with self.assertRaises(KSQLError): self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) @vcr.use_cassette("tests/vcr_cassettes/raise_create_error_no_topic.yml") def test_raise_create_error_no_topic(self): table_name = "foo_table" columns_type = ["name string", "age bigint"] topic = "this_topic_is_not_exist" value_format = "DELIMITED" with self.assertRaises(KSQLError): self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) @vcr.use_cassette( "tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml") def test_create_stream_as_without_conditions(self): src_table = "pageviews_original" columns_type = [ "name string", "age bigint", "userid string", "pageid bigint" ] topic = self.exist_topic table_name = "create_stream_as_without_conditions" kafka_topic = "create_stream_as_without_conditions" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, ) self.assertTrue(r) @vcr.use_cassette( "tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml" ) def test_create_stream_as_with_conditions_without_startwith(self): src_table = "pageviews_original" columns_type = [ "name string", "age bigint", "userid string", "pageid bigint" ] topic = self.exist_topic table_name = "create_stream_as_with_conditions_without_startwith" kafka_topic = "create_stream_as_with_conditions_without_startwith" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] conditions = "userid = 'foo'" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, conditions=conditions, ) self.assertTrue(r) @vcr.use_cassette( "tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml" ) def test_create_stream_as_with_conditions_with_startwith(self): src_table = "pageviews_original" columns_type = [ "name string", "age bigint", "userid string", "pageid bigint" ] topic = self.exist_topic table_name = "create_stream_as_with_conditions_with_startwith" kafka_topic = "create_stream_as_with_conditions_with_startwith" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] conditions = "userid = 'foo_%'" utils.drop_stream(self.api_client, src_table) utils.drop_stream(self.api_client, table_name) try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, conditions=conditions, ) self.assertTrue(r) @vcr.use_cassette( "tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml" ) def test_create_stream_as_with_conditions_with_startwith_with_and(self): src_table = "pageviews_original" columns_type = [ "name string", "age bigint", "userid string", "pageid bigint" ] topic = self.exist_topic table_name = "create_stream_as_with_conditions_with_startwith_with_and" kafka_topic = "create_stream_as_with_conditions_with_startwith_with_and" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] conditions = "userid = 'foo_%' and age > 10" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, conditions=conditions, ) self.assertTrue(r) @vcr.use_cassette( "tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml") def test_ksql_create_stream_as_with_wrong_timestamp(self): src_table = "prebid_traffic_log_total_stream" columns_type = [ "name string", "age bigint", "userid string", "pageid bigint" ] topic = self.exist_topic table_name = "prebid_traffic_log_valid_stream" kafka_topic = "prebid_traffic_log_valid_topic" value_format = "DELIMITED" select_columns = ["*"] timestamp = "foo" utils.drop_stream(self.api_client, src_table) utils.drop_stream(self.api_client, table_name) try: self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except KSQLError: pass with self.assertRaises(KSQLError): self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp=timestamp, value_format=value_format, )
import json from pyspark.sql import types as T from ksql import KSQLAPI from structured_stream import log_type_dict, datetime_convert client = KSQLAPI('http://localhost:8088') type_mapping = { T.StringType: "varchar", T.IntegerType: "int", datetime_convert: "bigint" } avro_type_mapping = { T.StringType: "string", T.IntegerType: "int", datetime_convert: "date" } base_schema = {"fields": [], "name": "", "namespace": "", "type": "record"} def create_stream(): for log_type in ["traffic", "threat"]: header = log_type_dict[log_type]["header"] indices = log_type_dict[log_type]["indices"] types = log_type_dict[log_type]["types"] topic = log_type_dict[log_type]["topic"] selected_header = [header[i] for i in indices] # Add backtick ` to prevent the `extraneous input '/'` error
class MYKSQLAPI(): def __init__(self): url = "http://localhost:8088" self.api_client = KSQLAPI(url) self.topic = "test08" self.bootstrap_servers = "localhost:9092" if utils.check_kafka_available(self.bootstrap_servers): value_schema_str = """ { "type": "record", "namespace": "com.example", "name": "value", "fields": [ {"name":"LOCATION", "type":"string"}, {"name":"DATETIME", "type":"string"}, {"name":"SENTIMENT", "type":"string"}, {"name":"TEXT", "type":"string"} ] } """ key_schema_str = """ { "type": "record", "namespace": "com.example", "name": "key", "fields": [ {"name":"LOCATION", "type":"string"}, {"name":"DATETIME", "type":"string"}, {"name":"SENTIMENT", "type":"string"}, {"name":"TEXT", "type":"string"} ] } """ value_schema = avro.loads(value_schema_str) key_schema = avro.loads(key_schema_str) self.key = { "LOCATION": "LOCATION", "DATETIME": "DATETIME", "SENTIMENT": "SENTIMENT", "TEXT": "TEXT" } self.producer = AvroProducer( { 'bootstrap.servers': self.bootstrap_servers, 'on_delivery': delivery_report, 'schema.registry.url': 'http://localhost:8081' }, default_key_schema=None, default_value_schema=value_schema) else: print("Could not connect to Kafka") exit(-1) def create_stream(self): self.api_client.ksql( "CREATE STREAM TEST08 (LOCATION STRING, DATETIME STRING, SENTIMENT STRING, TEXT STRING) WITH (KAFKA_TOPIC='test08', PARTITIONS=1, VALUE_FORMAT='JSON');" ) self.api_client.ksql( "CREATE SINK CONNECTOR SINK_ELASTIC_TEST_08 WITH ('connector.class' = 'io.confluent.connect.elasticsearch.ElasticsearchSinkConnector','connection.url' = 'http://elasticsearch:9200','key.converter' = 'org.apache.kafka.connect.storage.StringConverter','type.name' = '_doc','topics' = 'test08','key.ignore' = 'true','behavior.on.null.values'='delete','schema.ignore' = 'false');" ) pass def produce(self, message): self.producer.produce(topic=self.topic, key=None, value=message) def flush(self): self.producer.flush()
from ksql import KSQLAPI import pandas as pd import json client = KSQLAPI('http://localhost:8088') client.ksql('show streams') query = client.query('select * from passenger2 limit 10', stream_properties={"auto.offset.reset": "earliest"}) for item in query: print(item) records = [json.loads(r) for r in query] data = [r['row']['columns'][2:] for r in records[:-1]] #data = r['row']['columns'][2] for r in records df = pd.DataFrame(data=data) df.head(5)
# print(cmd) insert_table_cmd = (""" CREATE TABLE IF NOT EXISTS updates ( update varchar(255) ); """) try: conn = pgdb.Connection(database='postgres', host='localhost', user='******', password='******') cur = conn.cursor() print('Connection successful!') client = KSQLAPI('http://127.0.0.1:59090/browser/') except: print('Connection unsuccessful!') print( "Type a SQL command below. \nWhen you are done typing your command, press ENTER on a blank line or type 'stop'.\nTo completely quite out of this interface, type 'quit all'." ) raw_input_lines = [] outer_loop = True while (outer_loop): print("> ") inner_loop = True while (inner_loop): raw_input = input() if (raw_input) and (raw_input != 'stop'): raw_input_lines.append(raw_input)
class TestKSQLAPI(unittest.TestCase): """Test case for the client methods.""" def setUp(self): self.url = "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/healthcheck.yml") def test_ksql_server_healthcheck(self): """ Test GET requests """ res = requests.get(self.url + "/status") self.assertEqual(res.status_code, 200) @vcr.use_cassette("tests/vcr_cassettes/get_ksql_server.yml") def test_get_ksql_version_success(self): """ Test GET requests """ version = self.api_client.get_ksql_version() self.assertEqual(version, ksql.__ksql_server_version__) @vcr.use_cassette("tests/vcr_cassettes/get_properties.yml") def test_get_properties(self): properties = self.api_client.get_properties() property = [i for i in properties if i["name"] == "ksql.schema.registry.url"][0] self.assertEqual(property.get("value"), "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/ksql_show_table_with_api_key.yml") def test_ksql_show_tables_with_api_key(self): api_client = KSQLAPI(url=self.url, check_version=False, api_key='foo', secret='bar') ksql_string = "show tables;" r = api_client.ksql(ksql_string) self.assertEqual(r, [{"@type": "tables", "statementText": "show tables;", "tables": [], "warnings": []}]) @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml") def test_ksql_show_tables(self): """ Test GET requests """ ksql_string = "show tables;" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{"@type": "tables", "statementText": "show tables;", "tables": [], "warnings": []}]) @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml") def test_ksql_show_tables_with_no_semicolon(self): """ Test GET requests """ ksql_string = "show tables" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{"@type": "tables", "statementText": "show tables;", "tables": [], "warnings": []}]) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream.yml") def test_ksql_create_stream(self): """ Test GET requests """ topic = self.exist_topic stream_name = self.test_prefix + "test_ksql_create_stream" ksql_string = "CREATE STREAM {} (viewtime bigint, userid varchar, pageid varchar) \ WITH (kafka_topic='{}', value_format='DELIMITED');".format( stream_name, topic ) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet") def test_ksql_create_stream_w_properties(self): """ Test GET requests """ topic = self.exist_topic stream_name = "TEST_KSQL_CREATE_STREAM" ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic ) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if "TEST_KSQL_CREATE_STREAM" not in utils.get_all_streams(self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") producer = Producer({"bootstrap.servers": self.bootstrap_servers}) producer.produce(self.exist_topic, """{"order_id":3,"total_amount":43,"customer_name":"Palo Alto"}""") producer.flush() # test legacy HTTP/1.1 request chunks = self.api_client.query( "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties ) header = next(chunks) self.assertEqual(header, """[{"header":{"queryId":"none","schema":"`ORDER_ID` INTEGER, `TOTAL_AMOUNT` DOUBLE, `CUSTOMER_NAME` STRING"}},\n""") for chunk in chunks: self.assertEqual(chunk, """{"row":{"columns":[3,43.0,"Palo Alto"]}},\n""") break # test new HTTP/2 request chunks = self.api_client.query( "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties, use_http2=True ) header = next(chunks) header_obj = json.loads(header) self.assertEqual(header_obj["columnNames"], ['ORDER_ID', 'TOTAL_AMOUNT', 'CUSTOMER_NAME']) self.assertEqual(header_obj["columnTypes"], ['INTEGER', 'DOUBLE', 'STRING']) for chunk in chunks: chunk_obj = json.loads(chunk) self.assertEqual(chunk_obj, [3,43.0, "Palo Alto"]) break @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support HTTP/2") def test_ksql_close_query(self): result = self.api_client.close_query("123") self.assertFalse(result) @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet") def test_inserts_stream(self): topic = self.exist_topic stream_name = "TEST_INSERTS_STREAM_STREAM" ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic ) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if "TEST_KSQL_CREATE_STREAM" not in utils.get_all_streams(self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") rows = [ {"ORDER_ID": 1, "TOTAL_AMOUNT": 23.5, "CUSTOMER_NAME": "abc"}, {"ORDER_ID": 2, "TOTAL_AMOUNT": 3.7, "CUSTOMER_NAME": "xyz"} ] results = self.api_client.inserts_stream(stream_name, rows) for result in results: self.assertEqual(result["status"], "ok") @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet") def test_ksql_parse_query_result_with_utils(self): topic = "TEST_KSQL_PARSE_QUERY_RESULT_WITH_UTILS_TOPIC" stream_name = "TEST_KSQL_PARSE_QUERY_RESULT_WITH_UTILS_STREAM" producer = Producer({"bootstrap.servers": self.bootstrap_servers}) producer.produce(topic, """{"order_id":3,"my_struct":{"a":1,"b":"bbb"}, "my_map":{"x":3, "y":4}, "my_array":[1,2,3], "total_amount":43,"customer_name":"Palo Alto"}""") producer.flush() ksql_string = "CREATE STREAM {} (ORDER_ID INT, MY_STRUCT STRUCT<A INT, B VARCHAR>, MY_MAP MAP<VARCHAR, INT>, MY_ARRAY ARRAY<INT>, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic ) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if stream_name not in utils.get_all_streams(self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") chunks = self.api_client.query( "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties ) header = next(chunks) columns = utils.parse_columns(header) for chunk in chunks: row_obj = utils.process_row(chunk, columns) self.assertEqual(row_obj["ORDER_ID"], 3) self.assertEqual(row_obj["MY_STRUCT"], {"A": 1, "B": "bbb"}) self.assertEqual(row_obj["MY_MAP"], {"x": 3, "y": 4}) self.assertEqual(row_obj["MY_ARRAY"], [1, 2, 3]) self.assertEqual(row_obj["TOTAL_AMOUNT"], 43) self.assertEqual(row_obj["CUSTOMER_NAME"], "Palo Alto") break @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet") def test_ksql_parse_query_result(self): topic = "TEST_KSQL_PARSE_QUERY_RESULT_TOPIC" stream_name = "TEST_KSQL_PARSE_QUERY_RESULT_STREAM" producer = Producer({"bootstrap.servers": self.bootstrap_servers}) producer.produce(topic, """{"order_id":3,"my_struct":{"a":1,"b":"bbb"}, "my_map":{"x":3, "y":4}, "my_array":[1,2,3], "total_amount":43,"customer_name":"Palo Alto"}""") producer.flush() ksql_string = "CREATE STREAM {} (ORDER_ID INT, MY_STRUCT STRUCT<A INT, B VARCHAR>, MY_MAP MAP<VARCHAR, INT>, MY_ARRAY ARRAY<INT>, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic ) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if stream_name not in utils.get_all_streams(self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") chunks = self.api_client.query( "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties, return_objects=True ) for chunk in chunks: self.assertEqual(chunk["ORDER_ID"], 3) self.assertEqual(chunk["MY_STRUCT"], {"A": 1, "B": "bbb"}) self.assertEqual(chunk["MY_MAP"], {"x": 3, "y": 4}) self.assertEqual(chunk["MY_ARRAY"], [1, 2, 3]) self.assertEqual(chunk["TOTAL_AMOUNT"], 43) self.assertEqual(chunk["CUSTOMER_NAME"], "Palo Alto") break @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet") def test_ksql_parse_query_final_message(self): topic = "TEST_KSQL_PARSE_QUERY_FINAL_MESSAGE_TOPIC" stream_name = "TEST_KSQL_PARSE_QUERY_FINAL_MESSAGE_STREAM" producer = Producer({"bootstrap.servers": self.bootstrap_servers}) producer.produce(topic, """{"order_id":3,"my_struct":{"a":1,"b":"bbb"}, "my_map":{"x":3, "y":4}, "my_array":[1,2,3], "total_amount":43,"customer_name":"Palo Alto"}""") producer.flush() ksql_string = "CREATE STREAM {} (ORDER_ID INT, MY_STRUCT STRUCT<A INT, B VARCHAR>, MY_MAP MAP<VARCHAR, INT>, MY_ARRAY ARRAY<INT>, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \ WITH (kafka_topic='{}', value_format='JSON');".format( stream_name, topic ) streamProperties = {"ksql.streams.auto.offset.reset": "earliest"} if stream_name not in utils.get_all_streams(self.api_client): r = self.api_client.ksql(ksql_string, stream_properties=streamProperties) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") chunks = self.api_client.query( "select * from {} EMIT CHANGES LIMIT 1".format(stream_name), stream_properties=streamProperties, return_objects=True ) for row_obj in chunks: self.assertEqual(row_obj["ORDER_ID"], 3) self.assertEqual(row_obj["MY_STRUCT"], {"A": 1, "B": "bbb"}) self.assertEqual(row_obj["MY_MAP"], {"x": 3, "y": 4}) self.assertEqual(row_obj["MY_ARRAY"], [1, 2, 3]) self.assertEqual(row_obj["TOTAL_AMOUNT"], 43) self.assertEqual(row_obj["CUSTOMER_NAME"], "Palo Alto") @vcr.use_cassette("tests/vcr_cassettes/bad_requests.yml") def test_bad_requests(self): broken_ksql_string = "noi" with self.assertRaises(KSQLError) as e: self.api_client.ksql(broken_ksql_string) exception = e.exception self.assertEqual(exception.error_code, 40001) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_by_builder.yml") def test_ksql_create_stream_by_builder(self): sql_type = "create" table_type = "stream" table_name = "test_table" columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"] topic = self.exist_topic value_format = "DELIMITED" utils.drop_stream(self.api_client, table_name) ksql_string = SQLBuilder.build( sql_type=sql_type, table_type=table_type, table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format, ) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS") @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_by_builder_api.yml") def test_ksql_create_stream_by_builder_api(self): table_name = "test_table" columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"] topic = self.exist_topic value_format = "DELIMITED" utils.drop_stream(self.api_client, table_name) r = self.api_client.create_stream( table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format ) self.assertTrue(r) @vcr.use_cassette("tests/vcr_cassettes/ksql_topic_already_registered.yml") def test_raise_create_error_topic_already_registered(self): table_name = "foo_table" columns_type = ["name string", "age bigint"] topic = self.exist_topic value_format = "DELIMITED" utils.drop_stream(self.api_client, table_name) self.api_client.create_stream( table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format ) with self.assertRaises(KSQLError): self.api_client.create_stream( table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format ) @vcr.use_cassette("tests/vcr_cassettes/raise_create_error_no_topic.yml") def test_raise_create_error_no_topic(self): table_name = "foo_table" columns_type = ["name string", "age bigint"] topic = "this_topic_is_not_exist" value_format = "DELIMITED" with self.assertRaises(KSQLError): self.api_client.create_stream( table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format ) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml") def test_create_stream_as_without_conditions(self): src_table = "pageviews_original" columns_type = ["name string", "age bigint", "userid string", "pageid bigint"] topic = self.exist_topic table_name = "create_stream_as_without_conditions" kafka_topic = "create_stream_as_without_conditions" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] try: r = self.api_client.create_stream( table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format ) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, ) self.assertTrue(r) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml") def test_create_stream_as_with_conditions_without_startwith(self): src_table = "pageviews_original" columns_type = ["name string", "age bigint", "userid string", "pageid bigint"] topic = self.exist_topic table_name = "create_stream_as_with_conditions_without_startwith" kafka_topic = "create_stream_as_with_conditions_without_startwith" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] conditions = "userid = 'foo'" try: r = self.api_client.create_stream( table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format ) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, conditions=conditions, ) self.assertTrue(r) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml") def test_create_stream_as_with_conditions_with_startwith(self): src_table = "pageviews_original" columns_type = ["name string", "age bigint", "userid string", "pageid bigint"] topic = self.exist_topic table_name = "create_stream_as_with_conditions_with_startwith" kafka_topic = "create_stream_as_with_conditions_with_startwith" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] conditions = "userid = 'foo_%'" utils.drop_stream(self.api_client, src_table) utils.drop_stream(self.api_client, table_name) try: r = self.api_client.create_stream( table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format ) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, conditions=conditions, ) self.assertTrue(r) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml") def test_create_stream_as_with_conditions_with_startwith_with_and(self): src_table = "pageviews_original" columns_type = ["name string", "age bigint", "userid string", "pageid bigint"] topic = self.exist_topic table_name = "create_stream_as_with_conditions_with_startwith_with_and" kafka_topic = "create_stream_as_with_conditions_with_startwith_with_and" value_format = "DELIMITED" select_columns = ["rowtime as logtime", "*"] conditions = "userid = 'foo_%' and age > 10" try: r = self.api_client.create_stream( table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format ) except KSQLError: pass r = self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp="logtime", value_format=value_format, conditions=conditions, ) self.assertTrue(r) @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml") def test_ksql_create_stream_as_with_wrong_timestamp(self): src_table = "prebid_traffic_log_total_stream" columns_type = ["name string", "age bigint", "userid string", "pageid bigint"] topic = self.exist_topic table_name = "prebid_traffic_log_valid_stream" kafka_topic = "prebid_traffic_log_valid_topic" value_format = "DELIMITED" select_columns = ["*"] timestamp = "foo" utils.drop_stream(self.api_client, src_table) utils.drop_stream(self.api_client, table_name) try: self.api_client.create_stream( table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format ) except KSQLError: pass with self.assertRaises(KSQLError): self.api_client.create_stream_as( table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp=timestamp, value_format=value_format, )
from ksql import KSQLAPI client = KSQLAPI('http://192.168.99.100:8088') s = client.ksql('show streams') print(s)
class TestKSQLAPI(unittest.TestCase): """Test case for the client methods.""" def setUp(self): self.url = "http://*****:*****@vcr.use_cassette('tests/vcr_cassettes/healthcheck.yml') def test_ksql_server_healthcheck(self): """ Test GET requests """ res = requests.get(self.url) self.assertEqual(res.status_code, 200) @vcr.use_cassette('tests/vcr_cassettes/get_ksql_server.yml') def test_get_ksql_version_success(self): """ Test GET requests """ version = self.api_client.get_ksql_version() self.assertEqual(version, ksql.__ksql_server_version__) @vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml') def test_ksql_show_tables(self): """ Test GET requests """ ksql_string = "show tables;" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{ 'tables': { 'statementText': 'show tables;', 'tables': [] } }]) @vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml') def test_ksql_show_tables_with_no_semicolon(self): """ Test GET requests """ ksql_string = "show tables" r = self.api_client.ksql(ksql_string) self.assertEqual(r, [{ 'tables': { 'statementText': 'show tables;', 'tables': [] } }]) @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml') def test_ksql_create_stream(self): """ Test GET requests """ topic = self.exist_topic ksql_string = "CREATE STREAM test_table (viewtime bigint, userid varchar, pageid varchar) \ WITH (kafka_topic='{}', value_format='DELIMITED');".format( topic) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]['currentStatus']['commandStatus']['status'], 'SUCCESS') @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml') def test_ksql_create_stream_by_builder(self): sql_type = 'create' table_type = 'stream' table_name = 'test_table' columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar'] topic = self.exist_topic value_format = 'DELIMITED' ksql_string = SQLBuilder.build(sql_type=sql_type, table_type=table_type, table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) r = self.api_client.ksql(ksql_string) self.assertEqual(r[0]['currentStatus']['commandStatus']['status'], 'SUCCESS') @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml') def test_ksql_create_stream_by_builder_api(self): table_name = 'test_table' columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar'] topic = self.exist_topic value_format = 'DELIMITED' r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) self.assertTrue(r) @vcr.use_cassette('tests/vcr_cassettes/ksql_topic_already_registered.yml') def test_raise_create_error_topic_already_registered(self): table_name = 'foo_table' columns_type = ['name string', 'age bigint'] topic = self.exist_topic value_format = 'DELIMITED' r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) with self.assertRaises(CreateError): r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) @vcr.use_cassette('tests/vcr_cassettes/raise_create_error_no_topic.yml') def test_raise_create_error_no_topic(self): table_name = 'foo_table' columns_type = ['name string', 'age bigint'] topic = 'this_topic_is_not_exist' value_format = 'DELIMITED' with self.assertRaises(CreateError): r = self.api_client.create_stream(table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml') def test_create_stream_as_without_conditions(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_without_conditions' kafka_topic = 'create_stream_as_without_conditions' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except CreateError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml' ) def test_create_stream_as_with_conditions_without_startwith(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_with_conditions_without_startwith' kafka_topic = 'create_stream_as_with_conditions_without_startwith' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] conditions = "userid = 'foo'" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except CreateError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format, conditions=conditions) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml' ) def test_create_stream_as_with_conditions_with_startwith(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_with_conditions_with_startwith' kafka_topic = 'create_stream_as_with_conditions_with_startwith' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] conditions = "userid = 'foo_%'" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except CreateError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format, conditions=conditions) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml' ) def test_create_stream_as_with_conditions_with_startwith_with_and(self): src_table = 'pageviews_original' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'create_stream_as_with_conditions_with_startwith_with_and' kafka_topic = 'create_stream_as_with_conditions_with_startwith_with_and' value_format = 'DELIMITED' select_columns = ['rowtime as logtime', '*'] conditions = "userid = 'foo_%' and age > 10" try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except CreateError as e: pass r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp='logtime', value_format=value_format, conditions=conditions) self.assertTrue(r) @vcr.use_cassette( 'tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml') def test_ksql_create_stream_as_with_wrong_timestamp(self): src_table = 'prebid_traffic_log_total_stream' columns_type = [ 'name string', 'age bigint', 'userid string', 'pageid bigint' ] topic = self.exist_topic table_name = 'prebid_traffic_log_valid_stream' kafka_topic = 'prebid_traffic_log_valid_topic' value_format = 'DELIMITED' select_columns = ['*'] timestamp = 'foo' try: r = self.api_client.create_stream(table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format) except CreateError as e: pass with self.assertRaises(CreateError): r = self.api_client.create_stream_as(table_name=table_name, src_table=src_table, kafka_topic=kafka_topic, select_columns=select_columns, timestamp=timestamp, value_format=value_format)
def setUp(self): self.url = "http://ksql-server:8080" self.api_client = KSQLAPI(url=self.url) self.exist_topic = 'exist_topic'
from ksql import KSQLAPI # Refer to https://pypi.org/project/ksql/ client = KSQLAPI('http://localhost:8088') query = client.query('select * from table1') for item in query: print(item)
from ksql import KSQLAPI import sys import string import random import json endpoint = str(sys.argv[1]) topic = str(sys.argv[2]) client = KSQLAPI(endpoint) ## Create a stream from topic stream_name_random = ''.join( random.choice(string.ascii_uppercase) for _ in range(10)) client.create_stream(table_name=stream_name_random, columns_type=["number bigint", "category varchar"], topic=topic, value_format="json") query_res = client.ksql("LIST STREAMS;") streams_names = [stream['name'] for stream in query_res[0]['streams']] if stream_name_random in streams_names: print("STREAM CREATED") else: print("Stream not created") exit(1) # Create a materialized table from stream for the average number over categories materialized_table_random_name = ''.join( random.choice(string.ascii_uppercase) for _ in range(10))