Esempio n. 1
0
 def setUp(self):
     self.url = "http://ksql-server:8088"
     self.api_client = KSQLAPI(url=self.url, check_version=False)
     self.exist_topic = 'exist_topic'
     bootstrap_servers = 'kafka:29092'
     if check_kafka_available(bootstrap_servers):
         producer = Producer({'bootstrap.servers': bootstrap_servers})
         producer.produce(self.exist_topic, "test_message")
         producer.flush()
Esempio n. 2
0
 def setUp(self):
     self.url = "http://localhost:8088"
     self.api_client = KSQLAPI(url=self.url, check_version=False)
     self.test_prefix = "ksql_python_test"
     self.exist_topic = 'exist_topic'
     self.bootstrap_servers = 'localhost:29092'
     if utils.check_kafka_available(self.bootstrap_servers):
         producer = Producer({'bootstrap.servers': self.bootstrap_servers})
         producer.produce(self.exist_topic, "test_message")
         producer.flush()
Esempio n. 3
0
 def test_ksql_show_tables_with_api_key(self):
     api_client = KSQLAPI(url=self.url,
                          check_version=False,
                          api_key='foo',
                          secret='bar')
     ksql_string = "show tables;"
     r = api_client.ksql(ksql_string)
     self.assertEqual(r, [{
         "@type": "tables",
         "statementText": "show tables;",
         "tables": [],
         "warnings": []
     }])
Esempio n. 4
0
class Processor:
    def __init__(self, url):
        self.client = KSQLAPI(url)

    def start(self):
        self.create_stream_from_topic()
        self.rename_rowkey()
        self.join()
        # self.query()

    def create_stream_from_topic(self):
        raise NotImplementedError(self.__class__.__name__ +
                                  " must implement processor")

    def rename_rowkey(self):
        raise NotImplementedError(self.__class__.__name__ +
                                  " must implement processor")

    def join(self):
        raise NotImplementedError(self.__class__.__name__ +
                                  " must implement processor")

    def query(self):
        query = self.client.query(
            query_string="select * from user emit changes",
            stream_properties={"ksql.streams.auto.offset.reset": "earliest"},
        )
        for item in query:
            logger.info(item)

    def ksql(self, ksql_string, stream_properties=None):
        try:
            self.client.ksql(ksql_string=ksql_string,
                             stream_properties=stream_properties)
        except KSQLError as e:
            logger.info(e)

    def create_stream(self,
                      table_name,
                      columns_type,
                      topic,
                      value_format="JSON"):
        try:
            self.client.create_stream(
                table_name=table_name,
                columns_type=columns_type,
                topic=topic,
                value_format=value_format,
            )
        except KSQLError as e:
            logger.info(e)
Esempio n. 5
0
 def __init__(self):
     url = "http://localhost:8088"
     self.api_client = KSQLAPI(url)
     self.topic = "test08"
     self.bootstrap_servers = "localhost:9092"
     if utils.check_kafka_available(self.bootstrap_servers):
         value_schema_str = """ 
         { 
             "type": "record", 
             "namespace": "com.example", 
             "name": "value", 
             "fields": [ 
                 {"name":"LOCATION", "type":"string"}, 
                 {"name":"DATETIME", "type":"string"}, 
                 {"name":"SENTIMENT", "type":"string"}, 
                 {"name":"TEXT", "type":"string"} 
             ] 
         } 
         """
         key_schema_str = """ 
         { 
             "type": "record", 
             "namespace": "com.example", 
             "name": "key", 
             "fields": [ 
                 {"name":"LOCATION", "type":"string"}, 
                 {"name":"DATETIME", "type":"string"}, 
                 {"name":"SENTIMENT", "type":"string"}, 
                 {"name":"TEXT", "type":"string"} 
             ] 
         } 
         """
         value_schema = avro.loads(value_schema_str)
         key_schema = avro.loads(key_schema_str)
         self.key = {
             "LOCATION": "LOCATION",
             "DATETIME": "DATETIME",
             "SENTIMENT": "SENTIMENT",
             "TEXT": "TEXT"
         }
         self.producer = AvroProducer(
             {
                 'bootstrap.servers': self.bootstrap_servers,
                 'on_delivery': delivery_report,
                 'schema.registry.url': 'http://localhost:8081'
             },
             default_key_schema=None,
             default_value_schema=value_schema)
     else:
         print("Could not connect to Kafka")
         exit(-1)
Esempio n. 6
0
  def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False):
    try:
      from ksql import KSQLAPI
    except ImportError:
      raise KSqlApiException('Module missing: pip install ksql')

    self._api_url = KAFKA.KSQL_API_URL.get().strip('/') if KAFKA.KSQL_API_URL.get() else ''

    self.user = user
    self.client = client = KSQLAPI(self._api_url)
Esempio n. 7
0
"""Load configuration from .yaml file."""
import confuse
from faker import Faker
from time import sleep
from json import dumps
from kafka import KafkaProducer
from ksql import KSQLAPI
client = KSQLAPI('http://ksqldb-server:8088')

producer = KafkaProducer(bootstrap_servers=['broker:29092'],
                         value_serializer=lambda x: dumps(x).encode('utf-8'))

fake = Faker()
config = confuse.Configuration('mocker')
config.set_file('/app/config.yaml')

rate = config['transactions']['rate'].get(int)
topic = config['kafka']['topic'].get()

print("Start sending transactions")
while True:
    producer.send(topic,
                  value={
                      'transaction_id': "RF" + str(fake.pyint(5)),
                      'transaction_type': "transaction_" + str(fake.pyint(1)),
                      'from_account': fake.iban(),
                      'to_account': fake.iban(),
                      'amount_cents': fake.pyint(),
                      'created_at':
                      fake.date_time().strftime("%Y/%m/%d, %H:%M:%S")
                  })
Esempio n. 8
0
 def __init__(self, url):
     self.client = KSQLAPI(url)
Esempio n. 9
0
def get_ksql_client():
    client = KSQLAPI(KSQL_URL)
    return client
Esempio n. 10
0
from kafka import KafkaClient
from ksql import KSQLAPI

kafka_client = KafkaClient(hosts=['localhost:9092'])
kafka_client.ensure_topic_exists('gas_prices')
kafka_client.ensure_topic_exists('locations')

client = KSQLAPI('http://localhost:8088')
client.ksql("SET 'auto.offset.reset' = 'earliest';")

# Drop existing streams
client.ksql('DROP STREAM alerts;')
client.ksql('DROP STREAM locations;')
client.ksql('DROP STREAM gas_prices;')

# Creates gas_prices as a stream
client.ksql('''
    CREATE STREAM gas_prices \
    (stationid VARCHAR, lat DOUBLE, long DOUBLE, price DOUBLE, recordtime BIGINT, joinner INT) \
    WITH (KAFKA_TOPIC='gas_prices', VALUE_FORMAT='JSON');
''')

# Creates the location stream
client.ksql('''
    CREATE STREAM locations \
    (userid VARCHAR, lat DOUBLE, long DOUBLE, recordtime BIGINT, joinner INT) \
    WITH (KAFKA_TOPIC='locations', VALUE_FORMAT='JSON');
''')

# Creates the alert stream using the gas_prices stream
client.sql('''
Esempio n. 11
0
 def test_with_timeout(self):
     api_client = KSQLAPI(url='http://foo', timeout=10)
     self.assertEquals(api_client.timeout, 10)
Esempio n. 12
0
import logging
from ksql import KSQLAPI

logging.basicConfig(level=logging.DEBUG)
#client = KSQLAPI('http://ec2-52-41-32-196.us-west-2.compute.amazonaws.com:8088')
client = KSQLAPI('http://10.0.0.13:8088')

#client.create_stream()

client.ksql('show tables')
#client = KSQLAPI('http://ec2-52-41-32-196.us-west-2.compute.amazonaws.com:8088')

Esempio n. 13
0
 def test_with_timeout(self):
     api_client = KSQLAPI(url=self.url, timeout=10, check_version=False)
     self.assertEquals(api_client.timeout, 10)
Esempio n. 14
0
class TestKSQLAPI(unittest.TestCase):
    """Test case for the client methods."""
    def setUp(self):
        self.url = "http://*****:*****@vcr.use_cassette('tests/vcr_cassettes/healthcheck.yml')
    def test_ksql_server_healthcheck(self):
        """ Test GET requests """
        res = requests.get(self.url + '/status')
        self.assertEqual(res.status_code, 200)

    @vcr.use_cassette('tests/vcr_cassettes/get_ksql_server.yml')
    def test_get_ksql_version_success(self):
        """ Test GET requests """
        version = self.api_client.get_ksql_version()
        self.assertEqual(version, ksql.__ksql_server_version__)

    @vcr.use_cassette('tests/vcr_cassettes/get_properties.yml')
    def test_get_properties(self):
        properties = self.api_client.get_properties()
        self.assertEqual(properties['ksql.schema.registry.url'],
                         "http://*****:*****@vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml')
    def test_ksql_show_tables(self):
        """ Test GET requests """
        ksql_string = "show tables;"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            '@type': 'tables',
            'statementText': 'show tables;',
            'tables': []
        }])

    @vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml')
    def test_ksql_show_tables_with_no_semicolon(self):
        """ Test GET requests """
        ksql_string = "show tables"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            '@type': 'tables',
            'statementText': 'show tables;',
            'tables': []
        }])

    @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml')
    def test_ksql_create_stream(self):
        """ Test GET requests """
        topic = self.exist_topic
        stream_name = self.test_prefix + "test_ksql_create_stream"
        ksql_string = "CREATE STREAM {} (viewtime bigint, userid varchar, pageid varchar) \
                       WITH (kafka_topic='{}', value_format='DELIMITED');".format(
            stream_name, topic)
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]['commandStatus']['status'], 'SUCCESS')

    @unittest.skipIf(not utils.check_kafka_available('localhost:29092'),
                     "vcrpy does not support streams yet")
    def test_ksql_create_stream_w_properties(self):
        """ Test GET requests """
        topic = self.exist_topic
        stream_name = self.test_prefix + "test_ksql_create_stream"
        stream_name = "test_ksql_create_stream"
        ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
                       WITH (kafka_topic='{}', value_format='JSON');".format(
            stream_name, topic)
        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}
        if 'TEST_KSQL_CREATE_STREAM' not in utils.get_all_streams(
                self.api_client):
            r = self.api_client.ksql(ksql_string,
                                     stream_properties=streamProperties)
            self.assertEqual(r[0]['commandStatus']['status'], 'SUCCESS')
        producer = Producer({'bootstrap.servers': self.bootstrap_servers})
        producer.produce(
            self.exist_topic,
            '''{"order_id":3,"total_amount":43,"customer_name":"Palo Alto"}''')
        producer.flush()
        print()
        chunks = self.api_client.query("select * from {}".format(stream_name),
                                       stream_properties=streamProperties,
                                       idle_timeout=10)
        for chunk in chunks:
            pass
            assert json.loads(chunk)['row']['columns'][-1] == 'Palo Alto'

    @vcr.use_cassette('tests/vcr_cassettes/bad_requests.yml')
    def test_bad_requests(self):
        broken_ksql_string = "noi"
        with self.assertRaises(KSQLError) as e:
            r = self.api_client.ksql(broken_ksql_string)
        the_exception = e.exception
        self.assertEqual(the_exception.error_code, 40000)

    @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream_by_builder.yml')
    def test_ksql_create_stream_by_builder(self):
        sql_type = 'create'
        table_type = 'stream'
        table_name = 'test_table'
        columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar']
        topic = self.exist_topic
        value_format = 'DELIMITED'

        utils.drop_stream(self.api_client, table_name)

        ksql_string = SQLBuilder.build(sql_type=sql_type,
                                       table_type=table_type,
                                       table_name=table_name,
                                       columns_type=columns_type,
                                       topic=topic,
                                       value_format=value_format)

        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]['commandStatus']['status'], 'SUCCESS')

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_by_builder_api.yml')
    def test_ksql_create_stream_by_builder_api(self):
        table_name = 'test_table'
        columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar']
        topic = self.exist_topic
        value_format = 'DELIMITED'

        utils.drop_stream(self.api_client, table_name)

        r = self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

        self.assertTrue(r)

    @vcr.use_cassette('tests/vcr_cassettes/ksql_topic_already_registered.yml')
    def test_raise_create_error_topic_already_registered(self):
        table_name = 'foo_table'
        columns_type = ['name string', 'age bigint']
        topic = self.exist_topic
        value_format = 'DELIMITED'
        utils.drop_stream(self.api_client, table_name)
        r = self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

        with self.assertRaises(KSQLError):
            r = self.api_client.create_stream(table_name=table_name,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)

    @vcr.use_cassette('tests/vcr_cassettes/raise_create_error_no_topic.yml')
    def test_raise_create_error_no_topic(self):
        table_name = 'foo_table'
        columns_type = ['name string', 'age bigint']
        topic = 'this_topic_is_not_exist'
        value_format = 'DELIMITED'

        with self.assertRaises(KSQLError):
            r = self.api_client.create_stream(table_name=table_name,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml')
    def test_create_stream_as_without_conditions(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_without_conditions'
        kafka_topic = 'create_stream_as_without_conditions'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format)
        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml'
    )
    def test_create_stream_as_with_conditions_without_startwith(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_with_conditions_without_startwith'
        kafka_topic = 'create_stream_as_with_conditions_without_startwith'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']
        conditions = "userid = 'foo'"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format,
                                             conditions=conditions)

        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml'
    )
    def test_create_stream_as_with_conditions_with_startwith(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_with_conditions_with_startwith'
        kafka_topic = 'create_stream_as_with_conditions_with_startwith'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']
        conditions = "userid = 'foo_%'"
        utils.drop_stream(self.api_client, src_table)
        utils.drop_stream(self.api_client, table_name)

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format,
                                             conditions=conditions)

        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml'
    )
    def test_create_stream_as_with_conditions_with_startwith_with_and(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_with_conditions_with_startwith_with_and'
        kafka_topic = 'create_stream_as_with_conditions_with_startwith_with_and'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']
        conditions = "userid = 'foo_%' and age > 10"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format,
                                             conditions=conditions)

        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml')
    def test_ksql_create_stream_as_with_wrong_timestamp(self):
        src_table = 'prebid_traffic_log_total_stream'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'prebid_traffic_log_valid_stream'
        kafka_topic = 'prebid_traffic_log_valid_topic'
        value_format = 'DELIMITED'
        select_columns = ['*']
        timestamp = 'foo'
        utils.drop_stream(self.api_client, src_table)
        utils.drop_stream(self.api_client, table_name)
        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError as e:
            raise

        with self.assertRaises(KSQLError):
            r = self.api_client.create_stream_as(table_name=table_name,
                                                 src_table=src_table,
                                                 kafka_topic=kafka_topic,
                                                 select_columns=select_columns,
                                                 timestamp=timestamp,
                                                 value_format=value_format)
Esempio n. 15
0
class TestKSQLAPI(unittest.TestCase):
    """Test case for the client methods."""
    def setUp(self):
        self.url = "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/healthcheck.yml")
    def test_ksql_server_healthcheck(self):
        """ Test GET requests """
        res = requests.get(self.url + "/status")
        self.assertEqual(res.status_code, 200)

    @vcr.use_cassette("tests/vcr_cassettes/get_ksql_server.yml")
    def test_get_ksql_version_success(self):
        """ Test GET requests """
        version = self.api_client.get_ksql_version()
        self.assertEqual(version, ksql.__ksql_server_version__)

    @vcr.use_cassette("tests/vcr_cassettes/get_properties.yml")
    def test_get_properties(self):
        properties = self.api_client.get_properties()
        property = [
            i for i in properties if i["name"] == "ksql.schema.registry.url"
        ][0]
        self.assertEqual(property.get("value"), "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/ksql_show_table_with_api_key.yml")
    def test_ksql_show_tables_with_api_key(self):
        api_client = KSQLAPI(url=self.url,
                             check_version=False,
                             api_key='foo',
                             secret='bar')
        ksql_string = "show tables;"
        r = api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            "@type": "tables",
            "statementText": "show tables;",
            "tables": [],
            "warnings": []
        }])

    @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml")
    def test_ksql_show_tables(self):
        """ Test GET requests """
        ksql_string = "show tables;"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            "@type": "tables",
            "statementText": "show tables;",
            "tables": [],
            "warnings": []
        }])

    @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml")
    def test_ksql_show_tables_with_no_semicolon(self):
        """ Test GET requests """
        ksql_string = "show tables"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            "@type": "tables",
            "statementText": "show tables;",
            "tables": [],
            "warnings": []
        }])

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream.yml")
    def test_ksql_create_stream(self):
        """ Test GET requests """
        topic = self.exist_topic
        stream_name = self.test_prefix + "test_ksql_create_stream"
        ksql_string = "CREATE STREAM {} (viewtime bigint, userid varchar, pageid varchar) \
                       WITH (kafka_topic='{}', value_format='DELIMITED');".format(
            stream_name, topic)
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"),
                     "vcrpy does not support streams yet")
    def test_ksql_create_stream_w_properties(self):
        """ Test GET requests """
        topic = self.exist_topic
        stream_name = "TEST_KSQL_CREATE_STREAM"
        ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
                       WITH (kafka_topic='{}', value_format='JSON');".format(
            stream_name, topic)
        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}

        if "TEST_KSQL_CREATE_STREAM" not in utils.get_all_streams(
                self.api_client):
            r = self.api_client.ksql(ksql_string,
                                     stream_properties=streamProperties)
            self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

        producer = Producer({"bootstrap.servers": self.bootstrap_servers})
        producer.produce(
            self.exist_topic,
            """{"order_id":3,"total_amount":43,"customer_name":"Palo Alto"}""")
        producer.flush()
        chunks = self.api_client.query(
            "select * from {} EMIT CHANGES".format(stream_name),
            stream_properties=streamProperties)

        for chunk in chunks:
            self.assertTrue(chunk)
            break

    @vcr.use_cassette("tests/vcr_cassettes/bad_requests.yml")
    def test_bad_requests(self):
        broken_ksql_string = "noi"
        with self.assertRaises(KSQLError) as e:
            self.api_client.ksql(broken_ksql_string)

        exception = e.exception
        self.assertEqual(exception.error_code, 40001)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_by_builder.yml")
    def test_ksql_create_stream_by_builder(self):
        sql_type = "create"
        table_type = "stream"
        table_name = "test_table"
        columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"]
        topic = self.exist_topic
        value_format = "DELIMITED"

        utils.drop_stream(self.api_client, table_name)

        ksql_string = SQLBuilder.build(
            sql_type=sql_type,
            table_type=table_type,
            table_name=table_name,
            columns_type=columns_type,
            topic=topic,
            value_format=value_format,
        )

        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

    @vcr.use_cassette(
        "tests/vcr_cassettes/ksql_create_stream_by_builder_api.yml")
    def test_ksql_create_stream_by_builder_api(self):
        table_name = "test_table"
        columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"]
        topic = self.exist_topic
        value_format = "DELIMITED"

        utils.drop_stream(self.api_client, table_name)

        r = self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

        self.assertTrue(r)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_topic_already_registered.yml")
    def test_raise_create_error_topic_already_registered(self):
        table_name = "foo_table"
        columns_type = ["name string", "age bigint"]
        topic = self.exist_topic
        value_format = "DELIMITED"
        utils.drop_stream(self.api_client, table_name)
        self.api_client.create_stream(table_name=table_name,
                                      columns_type=columns_type,
                                      topic=topic,
                                      value_format=value_format)

        with self.assertRaises(KSQLError):
            self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

    @vcr.use_cassette("tests/vcr_cassettes/raise_create_error_no_topic.yml")
    def test_raise_create_error_no_topic(self):
        table_name = "foo_table"
        columns_type = ["name string", "age bigint"]
        topic = "this_topic_is_not_exist"
        value_format = "DELIMITED"

        with self.assertRaises(KSQLError):
            self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

    @vcr.use_cassette(
        "tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml")
    def test_create_stream_as_without_conditions(self):

        src_table = "pageviews_original"
        columns_type = [
            "name string", "age bigint", "userid string", "pageid bigint"
        ]
        topic = self.exist_topic

        table_name = "create_stream_as_without_conditions"
        kafka_topic = "create_stream_as_without_conditions"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
        )
        self.assertTrue(r)

    @vcr.use_cassette(
        "tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml"
    )
    def test_create_stream_as_with_conditions_without_startwith(self):

        src_table = "pageviews_original"
        columns_type = [
            "name string", "age bigint", "userid string", "pageid bigint"
        ]
        topic = self.exist_topic

        table_name = "create_stream_as_with_conditions_without_startwith"
        kafka_topic = "create_stream_as_with_conditions_without_startwith"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]
        conditions = "userid = 'foo'"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
            conditions=conditions,
        )

        self.assertTrue(r)

    @vcr.use_cassette(
        "tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml"
    )
    def test_create_stream_as_with_conditions_with_startwith(self):

        src_table = "pageviews_original"
        columns_type = [
            "name string", "age bigint", "userid string", "pageid bigint"
        ]
        topic = self.exist_topic

        table_name = "create_stream_as_with_conditions_with_startwith"
        kafka_topic = "create_stream_as_with_conditions_with_startwith"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]
        conditions = "userid = 'foo_%'"
        utils.drop_stream(self.api_client, src_table)
        utils.drop_stream(self.api_client, table_name)

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
            conditions=conditions,
        )

        self.assertTrue(r)

    @vcr.use_cassette(
        "tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml"
    )
    def test_create_stream_as_with_conditions_with_startwith_with_and(self):

        src_table = "pageviews_original"
        columns_type = [
            "name string", "age bigint", "userid string", "pageid bigint"
        ]
        topic = self.exist_topic

        table_name = "create_stream_as_with_conditions_with_startwith_with_and"
        kafka_topic = "create_stream_as_with_conditions_with_startwith_with_and"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]
        conditions = "userid = 'foo_%' and age > 10"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
            conditions=conditions,
        )

        self.assertTrue(r)

    @vcr.use_cassette(
        "tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml")
    def test_ksql_create_stream_as_with_wrong_timestamp(self):
        src_table = "prebid_traffic_log_total_stream"
        columns_type = [
            "name string", "age bigint", "userid string", "pageid bigint"
        ]
        topic = self.exist_topic

        table_name = "prebid_traffic_log_valid_stream"
        kafka_topic = "prebid_traffic_log_valid_topic"
        value_format = "DELIMITED"
        select_columns = ["*"]
        timestamp = "foo"
        utils.drop_stream(self.api_client, src_table)
        utils.drop_stream(self.api_client, table_name)
        try:
            self.api_client.create_stream(table_name=src_table,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)
        except KSQLError:
            pass

        with self.assertRaises(KSQLError):
            self.api_client.create_stream_as(
                table_name=table_name,
                src_table=src_table,
                kafka_topic=kafka_topic,
                select_columns=select_columns,
                timestamp=timestamp,
                value_format=value_format,
            )
Esempio n. 16
0
import json

from pyspark.sql import types as T
from ksql import KSQLAPI

from structured_stream import log_type_dict, datetime_convert

client = KSQLAPI('http://localhost:8088')

type_mapping = {
    T.StringType: "varchar",
    T.IntegerType: "int",
    datetime_convert: "bigint"
}
avro_type_mapping = {
    T.StringType: "string",
    T.IntegerType: "int",
    datetime_convert: "date"
}
base_schema = {"fields": [], "name": "", "namespace": "", "type": "record"}


def create_stream():
    for log_type in ["traffic", "threat"]:
        header = log_type_dict[log_type]["header"]
        indices = log_type_dict[log_type]["indices"]
        types = log_type_dict[log_type]["types"]
        topic = log_type_dict[log_type]["topic"]
        selected_header = [header[i] for i in indices]

        # Add backtick ` to prevent the `extraneous input '/'` error
Esempio n. 17
0
class MYKSQLAPI():
    def __init__(self):
        url = "http://localhost:8088"
        self.api_client = KSQLAPI(url)
        self.topic = "test08"
        self.bootstrap_servers = "localhost:9092"
        if utils.check_kafka_available(self.bootstrap_servers):
            value_schema_str = """ 
            { 
                "type": "record", 
                "namespace": "com.example", 
                "name": "value", 
                "fields": [ 
                    {"name":"LOCATION", "type":"string"}, 
                    {"name":"DATETIME", "type":"string"}, 
                    {"name":"SENTIMENT", "type":"string"}, 
                    {"name":"TEXT", "type":"string"} 
                ] 
            } 
            """
            key_schema_str = """ 
            { 
                "type": "record", 
                "namespace": "com.example", 
                "name": "key", 
                "fields": [ 
                    {"name":"LOCATION", "type":"string"}, 
                    {"name":"DATETIME", "type":"string"}, 
                    {"name":"SENTIMENT", "type":"string"}, 
                    {"name":"TEXT", "type":"string"} 
                ] 
            } 
            """
            value_schema = avro.loads(value_schema_str)
            key_schema = avro.loads(key_schema_str)
            self.key = {
                "LOCATION": "LOCATION",
                "DATETIME": "DATETIME",
                "SENTIMENT": "SENTIMENT",
                "TEXT": "TEXT"
            }
            self.producer = AvroProducer(
                {
                    'bootstrap.servers': self.bootstrap_servers,
                    'on_delivery': delivery_report,
                    'schema.registry.url': 'http://localhost:8081'
                },
                default_key_schema=None,
                default_value_schema=value_schema)
        else:
            print("Could not connect to Kafka")
            exit(-1)

    def create_stream(self):
        self.api_client.ksql(
            "CREATE STREAM TEST08 (LOCATION STRING, DATETIME STRING, SENTIMENT STRING, TEXT STRING) WITH (KAFKA_TOPIC='test08', PARTITIONS=1, VALUE_FORMAT='JSON');"
        )
        self.api_client.ksql(
            "CREATE SINK CONNECTOR SINK_ELASTIC_TEST_08 WITH ('connector.class' = 'io.confluent.connect.elasticsearch.ElasticsearchSinkConnector','connection.url'  = 'http://elasticsearch:9200','key.converter'   = 'org.apache.kafka.connect.storage.StringConverter','type.name'       = '_doc','topics'          = 'test08','key.ignore'      = 'true','behavior.on.null.values'='delete','schema.ignore'   = 'false');"
        )
        pass

    def produce(self, message):
        self.producer.produce(topic=self.topic, key=None, value=message)

    def flush(self):
        self.producer.flush()
Esempio n. 18
0
from ksql import KSQLAPI
import pandas as pd
import json


client = KSQLAPI('http://localhost:8088')
client.ksql('show streams')

query = client.query('select * from passenger2 limit 10', stream_properties={"auto.offset.reset": "earliest"})


for item in query:
   print(item)



records = [json.loads(r) for r in query]
data = [r['row']['columns'][2:] for r in records[:-1]]
#data = r['row']['columns'][2] for r in records
df = pd.DataFrame(data=data)
df.head(5)
Esempio n. 19
0
# print(cmd)

insert_table_cmd = ("""
        CREATE TABLE IF NOT EXISTS updates (
			update varchar(255)
        );
        """)

try:
    conn = pgdb.Connection(database='postgres',
                           host='localhost',
                           user='******',
                           password='******')
    cur = conn.cursor()
    print('Connection successful!')
    client = KSQLAPI('http://127.0.0.1:59090/browser/')
except:
    print('Connection unsuccessful!')

print(
    "Type a SQL command below. \nWhen you are done typing your command, press ENTER on a blank line or type 'stop'.\nTo completely quite out of this interface, type 'quit all'."
)
raw_input_lines = []
outer_loop = True
while (outer_loop):
    print("> ")
    inner_loop = True
    while (inner_loop):
        raw_input = input()
        if (raw_input) and (raw_input != 'stop'):
            raw_input_lines.append(raw_input)
Esempio n. 20
0
class TestKSQLAPI(unittest.TestCase):
    """Test case for the client methods."""

    def setUp(self):
        self.url = "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/healthcheck.yml")
    def test_ksql_server_healthcheck(self):
        """ Test GET requests """
        res = requests.get(self.url + "/status")
        self.assertEqual(res.status_code, 200)

    @vcr.use_cassette("tests/vcr_cassettes/get_ksql_server.yml")
    def test_get_ksql_version_success(self):
        """ Test GET requests """
        version = self.api_client.get_ksql_version()
        self.assertEqual(version, ksql.__ksql_server_version__)

    @vcr.use_cassette("tests/vcr_cassettes/get_properties.yml")
    def test_get_properties(self):
        properties = self.api_client.get_properties()
        property = [i for i in properties if i["name"] == "ksql.schema.registry.url"][0]
        self.assertEqual(property.get("value"), "http://*****:*****@vcr.use_cassette("tests/vcr_cassettes/ksql_show_table_with_api_key.yml")
    def test_ksql_show_tables_with_api_key(self):
        api_client = KSQLAPI(url=self.url, check_version=False, api_key='foo', secret='bar')
        ksql_string = "show tables;"
        r = api_client.ksql(ksql_string)
        self.assertEqual(r, [{"@type": "tables", "statementText": "show tables;", "tables": [], "warnings": []}])

    @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml")
    def test_ksql_show_tables(self):
        """ Test GET requests """
        ksql_string = "show tables;"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{"@type": "tables", "statementText": "show tables;", "tables": [], "warnings": []}])

    @vcr.use_cassette("tests/vcr_cassettes/ksql_show_table.yml")
    def test_ksql_show_tables_with_no_semicolon(self):
        """ Test GET requests """
        ksql_string = "show tables"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{"@type": "tables", "statementText": "show tables;", "tables": [], "warnings": []}])

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream.yml")
    def test_ksql_create_stream(self):
        """ Test GET requests """
        topic = self.exist_topic
        stream_name = self.test_prefix + "test_ksql_create_stream"
        ksql_string = "CREATE STREAM {} (viewtime bigint, userid varchar, pageid varchar) \
                       WITH (kafka_topic='{}', value_format='DELIMITED');".format(
            stream_name, topic
        )
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet")
    def test_ksql_create_stream_w_properties(self):
        """ Test GET requests """
        topic = self.exist_topic
        stream_name = "TEST_KSQL_CREATE_STREAM"
        ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
                       WITH (kafka_topic='{}', value_format='JSON');".format(
            stream_name, topic
        )
        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}

        if "TEST_KSQL_CREATE_STREAM" not in utils.get_all_streams(self.api_client):
            r = self.api_client.ksql(ksql_string, stream_properties=streamProperties)
            self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

        producer = Producer({"bootstrap.servers": self.bootstrap_servers})
        producer.produce(self.exist_topic, """{"order_id":3,"total_amount":43,"customer_name":"Palo Alto"}""")
        producer.flush()

        # test legacy HTTP/1.1 request
        chunks = self.api_client.query(
            "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties
        )

        header = next(chunks)
        self.assertEqual(header, """[{"header":{"queryId":"none","schema":"`ORDER_ID` INTEGER, `TOTAL_AMOUNT` DOUBLE, `CUSTOMER_NAME` STRING"}},\n""")

        for chunk in chunks:
            self.assertEqual(chunk, """{"row":{"columns":[3,43.0,"Palo Alto"]}},\n""")
            break

        # test new HTTP/2 request
        chunks = self.api_client.query(
            "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties, use_http2=True
        )

        header = next(chunks)
        header_obj = json.loads(header)
        self.assertEqual(header_obj["columnNames"], ['ORDER_ID', 'TOTAL_AMOUNT', 'CUSTOMER_NAME'])
        self.assertEqual(header_obj["columnTypes"], ['INTEGER', 'DOUBLE', 'STRING'])

        for chunk in chunks:
            chunk_obj = json.loads(chunk)
            self.assertEqual(chunk_obj, [3,43.0, "Palo Alto"])
            break

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support HTTP/2")
    def test_ksql_close_query(self):
        result = self.api_client.close_query("123")

        self.assertFalse(result)

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet")
    def test_inserts_stream(self):
        topic = self.exist_topic
        stream_name = "TEST_INSERTS_STREAM_STREAM"
        ksql_string = "CREATE STREAM {} (ORDER_ID INT, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
        WITH (kafka_topic='{}', value_format='JSON');".format(
                    stream_name, topic
                )

        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}

        if "TEST_KSQL_CREATE_STREAM" not in utils.get_all_streams(self.api_client):
            r = self.api_client.ksql(ksql_string, stream_properties=streamProperties)
            self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

        rows = [
            {"ORDER_ID": 1, "TOTAL_AMOUNT": 23.5, "CUSTOMER_NAME": "abc"},
            {"ORDER_ID": 2, "TOTAL_AMOUNT": 3.7, "CUSTOMER_NAME": "xyz"}
        ]

        results = self.api_client.inserts_stream(stream_name, rows)

        for result in results:
            self.assertEqual(result["status"], "ok")

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet")
    def test_ksql_parse_query_result_with_utils(self):
        topic = "TEST_KSQL_PARSE_QUERY_RESULT_WITH_UTILS_TOPIC"
        stream_name = "TEST_KSQL_PARSE_QUERY_RESULT_WITH_UTILS_STREAM"

        producer = Producer({"bootstrap.servers": self.bootstrap_servers})
        producer.produce(topic, """{"order_id":3,"my_struct":{"a":1,"b":"bbb"}, "my_map":{"x":3, "y":4}, "my_array":[1,2,3], "total_amount":43,"customer_name":"Palo Alto"}""")
        producer.flush()

        ksql_string = "CREATE STREAM {} (ORDER_ID INT, MY_STRUCT STRUCT<A INT, B VARCHAR>, MY_MAP MAP<VARCHAR, INT>, MY_ARRAY ARRAY<INT>, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
                       WITH (kafka_topic='{}', value_format='JSON');".format(
            stream_name, topic
        )
        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}

        if stream_name not in utils.get_all_streams(self.api_client):
            r = self.api_client.ksql(ksql_string, stream_properties=streamProperties)
            self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

        chunks = self.api_client.query(
            "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties
        )
        header = next(chunks)
        columns = utils.parse_columns(header)

        for chunk in chunks:
            row_obj = utils.process_row(chunk, columns)
            self.assertEqual(row_obj["ORDER_ID"], 3)
            self.assertEqual(row_obj["MY_STRUCT"], {"A": 1, "B": "bbb"})
            self.assertEqual(row_obj["MY_MAP"], {"x": 3, "y": 4})
            self.assertEqual(row_obj["MY_ARRAY"], [1, 2, 3])
            self.assertEqual(row_obj["TOTAL_AMOUNT"], 43)
            self.assertEqual(row_obj["CUSTOMER_NAME"], "Palo Alto")
            break

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet")
    def test_ksql_parse_query_result(self):
        topic = "TEST_KSQL_PARSE_QUERY_RESULT_TOPIC"
        stream_name = "TEST_KSQL_PARSE_QUERY_RESULT_STREAM"

        producer = Producer({"bootstrap.servers": self.bootstrap_servers})
        producer.produce(topic, """{"order_id":3,"my_struct":{"a":1,"b":"bbb"}, "my_map":{"x":3, "y":4}, "my_array":[1,2,3], "total_amount":43,"customer_name":"Palo Alto"}""")
        producer.flush()

        ksql_string = "CREATE STREAM {} (ORDER_ID INT, MY_STRUCT STRUCT<A INT, B VARCHAR>, MY_MAP MAP<VARCHAR, INT>, MY_ARRAY ARRAY<INT>, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
                       WITH (kafka_topic='{}', value_format='JSON');".format(
            stream_name, topic
        )
        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}

        if stream_name not in utils.get_all_streams(self.api_client):
            r = self.api_client.ksql(ksql_string, stream_properties=streamProperties)
            self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

        chunks = self.api_client.query(
            "select * from {} EMIT CHANGES".format(stream_name), stream_properties=streamProperties, return_objects=True
        )

        for chunk in chunks:
            self.assertEqual(chunk["ORDER_ID"], 3)
            self.assertEqual(chunk["MY_STRUCT"], {"A": 1, "B": "bbb"})
            self.assertEqual(chunk["MY_MAP"], {"x": 3, "y": 4})
            self.assertEqual(chunk["MY_ARRAY"], [1, 2, 3])
            self.assertEqual(chunk["TOTAL_AMOUNT"], 43)
            self.assertEqual(chunk["CUSTOMER_NAME"], "Palo Alto")
            break

    @unittest.skipIf(not utils.check_kafka_available("localhost:29092"), "vcrpy does not support streams yet")
    def test_ksql_parse_query_final_message(self):
        topic = "TEST_KSQL_PARSE_QUERY_FINAL_MESSAGE_TOPIC"
        stream_name = "TEST_KSQL_PARSE_QUERY_FINAL_MESSAGE_STREAM"

        producer = Producer({"bootstrap.servers": self.bootstrap_servers})
        producer.produce(topic, """{"order_id":3,"my_struct":{"a":1,"b":"bbb"}, "my_map":{"x":3, "y":4}, "my_array":[1,2,3], "total_amount":43,"customer_name":"Palo Alto"}""")
        producer.flush()

        ksql_string = "CREATE STREAM {} (ORDER_ID INT, MY_STRUCT STRUCT<A INT, B VARCHAR>, MY_MAP MAP<VARCHAR, INT>, MY_ARRAY ARRAY<INT>, TOTAL_AMOUNT DOUBLE, CUSTOMER_NAME VARCHAR) \
                       WITH (kafka_topic='{}', value_format='JSON');".format(
            stream_name, topic
        )
        streamProperties = {"ksql.streams.auto.offset.reset": "earliest"}

        if stream_name not in utils.get_all_streams(self.api_client):
            r = self.api_client.ksql(ksql_string, stream_properties=streamProperties)
            self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

        chunks = self.api_client.query(
            "select * from {} EMIT CHANGES LIMIT 1".format(stream_name), stream_properties=streamProperties, return_objects=True
        )

        for row_obj in chunks:
            self.assertEqual(row_obj["ORDER_ID"], 3)
            self.assertEqual(row_obj["MY_STRUCT"], {"A": 1, "B": "bbb"})
            self.assertEqual(row_obj["MY_MAP"], {"x": 3, "y": 4})
            self.assertEqual(row_obj["MY_ARRAY"], [1, 2, 3])
            self.assertEqual(row_obj["TOTAL_AMOUNT"], 43)
            self.assertEqual(row_obj["CUSTOMER_NAME"], "Palo Alto")

    @vcr.use_cassette("tests/vcr_cassettes/bad_requests.yml")
    def test_bad_requests(self):
        broken_ksql_string = "noi"
        with self.assertRaises(KSQLError) as e:
            self.api_client.ksql(broken_ksql_string)

        exception = e.exception
        self.assertEqual(exception.error_code, 40001)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_by_builder.yml")
    def test_ksql_create_stream_by_builder(self):
        sql_type = "create"
        table_type = "stream"
        table_name = "test_table"
        columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"]
        topic = self.exist_topic
        value_format = "DELIMITED"

        utils.drop_stream(self.api_client, table_name)

        ksql_string = SQLBuilder.build(
            sql_type=sql_type,
            table_type=table_type,
            table_name=table_name,
            columns_type=columns_type,
            topic=topic,
            value_format=value_format,
        )

        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]["commandStatus"]["status"], "SUCCESS")

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_by_builder_api.yml")
    def test_ksql_create_stream_by_builder_api(self):
        table_name = "test_table"
        columns_type = ["viewtime bigint", "userid varchar", "pageid varchar"]
        topic = self.exist_topic
        value_format = "DELIMITED"

        utils.drop_stream(self.api_client, table_name)

        r = self.api_client.create_stream(
            table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format
        )

        self.assertTrue(r)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_topic_already_registered.yml")
    def test_raise_create_error_topic_already_registered(self):
        table_name = "foo_table"
        columns_type = ["name string", "age bigint"]
        topic = self.exist_topic
        value_format = "DELIMITED"
        utils.drop_stream(self.api_client, table_name)
        self.api_client.create_stream(
            table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format
        )

        with self.assertRaises(KSQLError):
            self.api_client.create_stream(
                table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format
            )

    @vcr.use_cassette("tests/vcr_cassettes/raise_create_error_no_topic.yml")
    def test_raise_create_error_no_topic(self):
        table_name = "foo_table"
        columns_type = ["name string", "age bigint"]
        topic = "this_topic_is_not_exist"
        value_format = "DELIMITED"

        with self.assertRaises(KSQLError):
            self.api_client.create_stream(
                table_name=table_name, columns_type=columns_type, topic=topic, value_format=value_format
            )

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml")
    def test_create_stream_as_without_conditions(self):

        src_table = "pageviews_original"
        columns_type = ["name string", "age bigint", "userid string", "pageid bigint"]
        topic = self.exist_topic

        table_name = "create_stream_as_without_conditions"
        kafka_topic = "create_stream_as_without_conditions"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]

        try:
            r = self.api_client.create_stream(
                table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format
            )
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
        )
        self.assertTrue(r)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml")
    def test_create_stream_as_with_conditions_without_startwith(self):

        src_table = "pageviews_original"
        columns_type = ["name string", "age bigint", "userid string", "pageid bigint"]
        topic = self.exist_topic

        table_name = "create_stream_as_with_conditions_without_startwith"
        kafka_topic = "create_stream_as_with_conditions_without_startwith"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]
        conditions = "userid = 'foo'"

        try:
            r = self.api_client.create_stream(
                table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format
            )
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
            conditions=conditions,
        )

        self.assertTrue(r)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml")
    def test_create_stream_as_with_conditions_with_startwith(self):

        src_table = "pageviews_original"
        columns_type = ["name string", "age bigint", "userid string", "pageid bigint"]
        topic = self.exist_topic

        table_name = "create_stream_as_with_conditions_with_startwith"
        kafka_topic = "create_stream_as_with_conditions_with_startwith"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]
        conditions = "userid = 'foo_%'"
        utils.drop_stream(self.api_client, src_table)
        utils.drop_stream(self.api_client, table_name)

        try:
            r = self.api_client.create_stream(
                table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format
            )
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
            conditions=conditions,
        )

        self.assertTrue(r)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml")
    def test_create_stream_as_with_conditions_with_startwith_with_and(self):

        src_table = "pageviews_original"
        columns_type = ["name string", "age bigint", "userid string", "pageid bigint"]
        topic = self.exist_topic

        table_name = "create_stream_as_with_conditions_with_startwith_with_and"
        kafka_topic = "create_stream_as_with_conditions_with_startwith_with_and"
        value_format = "DELIMITED"
        select_columns = ["rowtime as logtime", "*"]
        conditions = "userid = 'foo_%' and age > 10"

        try:
            r = self.api_client.create_stream(
                table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format
            )
        except KSQLError:
            pass

        r = self.api_client.create_stream_as(
            table_name=table_name,
            src_table=src_table,
            kafka_topic=kafka_topic,
            select_columns=select_columns,
            timestamp="logtime",
            value_format=value_format,
            conditions=conditions,
        )

        self.assertTrue(r)

    @vcr.use_cassette("tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml")
    def test_ksql_create_stream_as_with_wrong_timestamp(self):
        src_table = "prebid_traffic_log_total_stream"
        columns_type = ["name string", "age bigint", "userid string", "pageid bigint"]
        topic = self.exist_topic

        table_name = "prebid_traffic_log_valid_stream"
        kafka_topic = "prebid_traffic_log_valid_topic"
        value_format = "DELIMITED"
        select_columns = ["*"]
        timestamp = "foo"
        utils.drop_stream(self.api_client, src_table)
        utils.drop_stream(self.api_client, table_name)
        try:
            self.api_client.create_stream(
                table_name=src_table, columns_type=columns_type, topic=topic, value_format=value_format
            )
        except KSQLError:
            pass

        with self.assertRaises(KSQLError):
            self.api_client.create_stream_as(
                table_name=table_name,
                src_table=src_table,
                kafka_topic=kafka_topic,
                select_columns=select_columns,
                timestamp=timestamp,
                value_format=value_format,
            )
from ksql import KSQLAPI
client = KSQLAPI('http://192.168.99.100:8088')
s = client.ksql('show streams')
print(s)
Esempio n. 22
0
class TestKSQLAPI(unittest.TestCase):
    """Test case for the client methods."""
    def setUp(self):
        self.url = "http://*****:*****@vcr.use_cassette('tests/vcr_cassettes/healthcheck.yml')
    def test_ksql_server_healthcheck(self):
        """ Test GET requests """
        res = requests.get(self.url)
        self.assertEqual(res.status_code, 200)

    @vcr.use_cassette('tests/vcr_cassettes/get_ksql_server.yml')
    def test_get_ksql_version_success(self):
        """ Test GET requests """
        version = self.api_client.get_ksql_version()
        self.assertEqual(version, ksql.__ksql_server_version__)

    @vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml')
    def test_ksql_show_tables(self):
        """ Test GET requests """
        ksql_string = "show tables;"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            'tables': {
                'statementText': 'show tables;',
                'tables': []
            }
        }])

    @vcr.use_cassette('tests/vcr_cassettes/ksql_show_table.yml')
    def test_ksql_show_tables_with_no_semicolon(self):
        """ Test GET requests """
        ksql_string = "show tables"
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r, [{
            'tables': {
                'statementText': 'show tables;',
                'tables': []
            }
        }])

    @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml')
    def test_ksql_create_stream(self):
        """ Test GET requests """
        topic = self.exist_topic
        ksql_string = "CREATE STREAM test_table (viewtime bigint, userid varchar, pageid varchar) \
                       WITH (kafka_topic='{}', value_format='DELIMITED');".format(
            topic)
        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]['currentStatus']['commandStatus']['status'],
                         'SUCCESS')

    @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml')
    def test_ksql_create_stream_by_builder(self):
        sql_type = 'create'
        table_type = 'stream'
        table_name = 'test_table'
        columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar']
        topic = self.exist_topic
        value_format = 'DELIMITED'

        ksql_string = SQLBuilder.build(sql_type=sql_type,
                                       table_type=table_type,
                                       table_name=table_name,
                                       columns_type=columns_type,
                                       topic=topic,
                                       value_format=value_format)

        r = self.api_client.ksql(ksql_string)
        self.assertEqual(r[0]['currentStatus']['commandStatus']['status'],
                         'SUCCESS')

    @vcr.use_cassette('tests/vcr_cassettes/ksql_create_stream.yml')
    def test_ksql_create_stream_by_builder_api(self):
        table_name = 'test_table'
        columns_type = ['viewtime bigint', 'userid varchar', 'pageid varchar']
        topic = self.exist_topic
        value_format = 'DELIMITED'

        r = self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

        self.assertTrue(r)

    @vcr.use_cassette('tests/vcr_cassettes/ksql_topic_already_registered.yml')
    def test_raise_create_error_topic_already_registered(self):
        table_name = 'foo_table'
        columns_type = ['name string', 'age bigint']
        topic = self.exist_topic
        value_format = 'DELIMITED'

        r = self.api_client.create_stream(table_name=table_name,
                                          columns_type=columns_type,
                                          topic=topic,
                                          value_format=value_format)

        with self.assertRaises(CreateError):
            r = self.api_client.create_stream(table_name=table_name,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)

    @vcr.use_cassette('tests/vcr_cassettes/raise_create_error_no_topic.yml')
    def test_raise_create_error_no_topic(self):
        table_name = 'foo_table'
        columns_type = ['name string', 'age bigint']
        topic = 'this_topic_is_not_exist'
        value_format = 'DELIMITED'

        with self.assertRaises(CreateError):
            r = self.api_client.create_stream(table_name=table_name,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_without_conditions.yml')
    def test_create_stream_as_without_conditions(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_without_conditions'
        kafka_topic = 'create_stream_as_without_conditions'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except CreateError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format)
        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_without_startwith.yml'
    )
    def test_create_stream_as_with_conditions_without_startwith(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_with_conditions_without_startwith'
        kafka_topic = 'create_stream_as_with_conditions_without_startwith'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']
        conditions = "userid = 'foo'"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except CreateError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format,
                                             conditions=conditions)

        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith.yml'
    )
    def test_create_stream_as_with_conditions_with_startwith(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_with_conditions_with_startwith'
        kafka_topic = 'create_stream_as_with_conditions_with_startwith'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']
        conditions = "userid = 'foo_%'"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except CreateError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format,
                                             conditions=conditions)

        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_conditions_with_startwith_with_and.yml'
    )
    def test_create_stream_as_with_conditions_with_startwith_with_and(self):

        src_table = 'pageviews_original'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'create_stream_as_with_conditions_with_startwith_with_and'
        kafka_topic = 'create_stream_as_with_conditions_with_startwith_with_and'
        value_format = 'DELIMITED'
        select_columns = ['rowtime as logtime', '*']
        conditions = "userid = 'foo_%' and age > 10"

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except CreateError as e:
            pass

        r = self.api_client.create_stream_as(table_name=table_name,
                                             src_table=src_table,
                                             kafka_topic=kafka_topic,
                                             select_columns=select_columns,
                                             timestamp='logtime',
                                             value_format=value_format,
                                             conditions=conditions)

        self.assertTrue(r)

    @vcr.use_cassette(
        'tests/vcr_cassettes/ksql_create_stream_as_with_wrong_timestamp.yml')
    def test_ksql_create_stream_as_with_wrong_timestamp(self):
        src_table = 'prebid_traffic_log_total_stream'
        columns_type = [
            'name string', 'age bigint', 'userid string', 'pageid bigint'
        ]
        topic = self.exist_topic

        table_name = 'prebid_traffic_log_valid_stream'
        kafka_topic = 'prebid_traffic_log_valid_topic'
        value_format = 'DELIMITED'
        select_columns = ['*']
        timestamp = 'foo'

        try:
            r = self.api_client.create_stream(table_name=src_table,
                                              columns_type=columns_type,
                                              topic=topic,
                                              value_format=value_format)
        except CreateError as e:
            pass

        with self.assertRaises(CreateError):
            r = self.api_client.create_stream_as(table_name=table_name,
                                                 src_table=src_table,
                                                 kafka_topic=kafka_topic,
                                                 select_columns=select_columns,
                                                 timestamp=timestamp,
                                                 value_format=value_format)
Esempio n. 23
0
 def setUp(self):
     self.url = "http://ksql-server:8080"
     self.api_client = KSQLAPI(url=self.url)
     self.exist_topic = 'exist_topic'
Esempio n. 24
0
from ksql import KSQLAPI
#  Refer to https://pypi.org/project/ksql/

client = KSQLAPI('http://localhost:8088')
query = client.query('select * from table1')
for item in query: 
    print(item)


Esempio n. 25
0
from ksql import KSQLAPI
import sys
import string
import random
import json

endpoint = str(sys.argv[1])
topic = str(sys.argv[2])

client = KSQLAPI(endpoint)

## Create a stream from topic
stream_name_random = ''.join(
    random.choice(string.ascii_uppercase) for _ in range(10))
client.create_stream(table_name=stream_name_random,
                     columns_type=["number bigint", "category varchar"],
                     topic=topic,
                     value_format="json")

query_res = client.ksql("LIST STREAMS;")
streams_names = [stream['name'] for stream in query_res[0]['streams']]

if stream_name_random in streams_names:
    print("STREAM CREATED")
else:
    print("Stream not created")
    exit(1)

# Create a materialized table from stream for the average number over categories
materialized_table_random_name = ''.join(
    random.choice(string.ascii_uppercase) for _ in range(10))