Exemple #1
0
class KSQLAPI(object):
    """ API Class """
    def __init__(self, url, max_retries=3, **kwargs):
        self.url = url
        self.sa = SimplifiedAPI(url, max_retries=max_retries, **kwargs)

    def get_url(self):
        return self.url

    @property
    def timeout(self):
        return self.sa.get_timout()

    def get_ksql_version(self):
        r = requests.get(self.url)
        if r.status_code == 200:
            info = r.json().get('KSQL Server Info')
            version = info.get('version')
            return version
        else:
            raise ValueError('Status Code: {}.\nMessage: {}'.format(
                r.status_code, r.content))

    def ksql(self, ksql_string):
        return self.sa.ksql(ksql_string)

    def query(self, query_string, encoding='utf-8', chunk_size=128):
        self.sa.query(query_string=query_string,
                      encoding=encoding,
                      chunk_size=chunk_size)

    def create_stream(self, table_name, columns_type, topic, value_format):
        return self.sa.create_stream(table_name=table_name,
                                     columns_type=columns_type,
                                     topic=topic,
                                     value_format=value_format)

    def create_table(self, table_name, columns_type, topic, value_format):
        return self.sa.create_table(table_name=table_name,
                                    columns_type=columns_type,
                                    topic=topic,
                                    value_format=value_format)

    def create_stream_as(self,
                         table_name,
                         select_columns,
                         src_table,
                         kafka_topic=None,
                         value_format='DELIMITED',
                         conditions=[],
                         partition_by=None,
                         **kwargs):
        return self.sa.create_stream_as(table_name=table_name,
                                        select_columns=select_columns,
                                        src_table=src_table,
                                        kafka_topic=kafka_topic,
                                        value_format=value_format,
                                        conditions=conditions,
                                        partition_by=partition_by,
                                        **kwargs)
Exemple #2
0
class KSQLAPI(object):
    """ API Class """
    def __init__(self, url, max_retries=3, check_version=True, **kwargs):
        self.url = url
        self.sa = SimplifiedAPI(url, max_retries=max_retries, **kwargs)
        if check_version is True:
            self.get_ksql_version()

    def get_url(self):
        return self.url

    @property
    def timeout(self):
        return self.sa.get_timout()

    def get_ksql_version(self):
        r = requests.get(self.url + "/info")
        if r.status_code == 200:
            info = r.json().get('KsqlServerInfo')
            version = info.get('version')
            return version

        else:
            raise ValueError('Status Code: {}.\nMessage: {}'.format(
                r.status_code, r.content))

    def get_properties(self):
        properties = self.sa.ksql("show properties;")
        return properties[0]['properties']

    def ksql(self, ksql_string, stream_properties=None):
        return self.sa.ksql(ksql_string, stream_properties=stream_properties)

    def query(self,
              query_string,
              encoding='utf-8',
              chunk_size=128,
              stream_properties=None,
              idle_timeout=None):
        return self.sa.query(query_string=query_string,
                             encoding=encoding,
                             chunk_size=chunk_size,
                             stream_properties=stream_properties,
                             idle_timeout=idle_timeout)

    def create_stream(self,
                      table_name,
                      columns_type,
                      topic,
                      value_format='JSON'):
        return self.sa.create_stream(table_name=table_name,
                                     columns_type=columns_type,
                                     topic=topic,
                                     value_format=value_format)

    def create_table(self, table_name, columns_type, topic, value_format, key,
                     **kwargs):
        return self.sa.create_table(table_name=table_name,
                                    columns_type=columns_type,
                                    topic=topic,
                                    value_format=value_format,
                                    key=key,
                                    **kwargs)

    def create_stream_as(self,
                         table_name,
                         select_columns,
                         src_table,
                         kafka_topic=None,
                         value_format='JSON',
                         conditions=[],
                         partition_by=None,
                         **kwargs):

        return self.sa.create_stream_as(table_name=table_name,
                                        select_columns=select_columns,
                                        src_table=src_table,
                                        kafka_topic=kafka_topic,
                                        value_format=value_format,
                                        conditions=conditions,
                                        partition_by=partition_by,
                                        **kwargs)

    def stream_to_pandas(self,
                         stream,
                         startDt='',
                         endDt='',
                         dtFormat='yyyy-MM-dd HH:mm:ss',
                         limit=0,
                         timeout=None):
        sql = "SELECT * FROM " + stream
        if startDt != '' or endDt != '':
            sql = sql + "\nWHERE "
        if startDt != '':
            sql = sql + "ROWTIME >= STRINGTOTIMESTAMP('" + startDt + "', '" + dtFormat + "')"
        if endDt != '':
            if startDt != '':
                sql = sql + "\nAND "
            sql = sql + "ROWTIME <= STRINGTOTIMESTAMP('" + endDt + "', '" + dtFormat + "')"
        sql = sql + "\nEMIT CHANGES"
        if limit > 0:
            sql = sql + '\nLIMIT ' + str(limit)
        sql = sql + ';'
        print('[KSQL]\n' + sql)

        properties = {}
        if startDt != '' or endDt != '':
            properties['auto.offset.reset'] = 'earliest'

        result = self.query(sql,
                            stream_properties=properties,
                            idle_timeout=timeout)
        print('\nStart loading stream data...')

        count = 0
        header = []
        rows = []

        try:
            for record in result:
                r = json.loads(record)
                if 'header' in r:
                    header = re.findall(r'`(.*?)`', r['header']['schema'])
                elif 'row' in r:
                    rows.append(r['row']['columns'])
                count = count + 1
                if count % 1000 == 0:
                    print('Records: ' + str(count))
            print('Finished by LIMIT')
        except KeyboardInterrupt:
            print('Finished by Ctrl-C')

        df = pd.DataFrame(rows, columns=header)
        df['ROWTIME'] = pd.to_datetime(df['ROWTIME'], unit='ms')
        return df
Exemple #3
0
class KSQLAPI(object):
    """ API Class """

    def __init__(self, url, max_retries=3, check_version=True, **kwargs):
        """
        You can use a Basic Authentication with this API, for now we accept the api_key/secret based on the Confluent
        Cloud implementation. So you just need to put on the kwargs the api_key and secret.
        """
        self.url = url
        self.sa = SimplifiedAPI(url, max_retries=max_retries, **kwargs)
        if check_version is True:
            self.get_ksql_version()

    def get_url(self):
        return self.url

    @property
    def timeout(self):
        return self.sa.get_timout()

    def get_ksql_version(self):
        r = self.sa.get_request(self.url + "/info")
        if r.status_code == 200:
            info = r.json().get('KsqlServerInfo')
            version = info.get('version')
            return version

        else:
            raise ValueError(
                'Status Code: {}.\nMessage: {}'.format(
                    r.status_code, r.content))

    def get_properties(self):
        properties = self.sa.ksql("show properties;")
        return properties[0]['properties']

    def ksql(self, ksql_string, stream_properties=None):
        return self.sa.ksql(ksql_string, stream_properties=stream_properties)

    def query(self, query_string, encoding='utf-8', chunk_size=128, stream_properties=None, idle_timeout=None):
        return self.sa.query(query_string=query_string,
                      encoding=encoding,
                      chunk_size=chunk_size,
                      stream_properties=stream_properties,
                      idle_timeout=idle_timeout)

    def create_stream(
            self,
            table_name,
            columns_type,
            topic,
            value_format='JSON'):
        return self.sa.create_stream(table_name=table_name,
                                     columns_type=columns_type,
                                     topic=topic,
                                     value_format=value_format)

    def create_table(self, table_name, columns_type, topic, value_format, key, **kwargs):
        return self.sa.create_table(table_name=table_name,
                                    columns_type=columns_type,
                                    topic=topic,
                                    value_format=value_format,
                                    key=key,
                                    **kwargs)

    def create_stream_as(
            self,
            table_name,
            select_columns,
            src_table,
            kafka_topic=None,
            value_format='JSON',
            conditions=[],
            partition_by=None,
            **kwargs):

        return self.sa.create_stream_as(table_name=table_name,
                                        select_columns=select_columns,
                                        src_table=src_table,
                                        kafka_topic=kafka_topic,
                                        value_format=value_format,
                                        conditions=conditions,
                                        partition_by=partition_by,
                                        **kwargs)