def test_json_spec(self): """ Check a JSON Kafka subscription creates the right table. """ t = ck.consume({'bootstrap.servers': 'redpanda:29092'}, 'orders', key_spec=KeyValueSpec.IGNORE, value_spec=ck.json_spec( [('Symbol', dtypes.string), ('Side', dtypes.string), ('Price', dtypes.double), ('Qty', dtypes.int_), ('Tstamp', dtypes.DateTime)], mapping={ 'jsymbol': 'Symbol', 'jside': 'Side', 'jprice': 'Price', 'jqty': 'Qty', 'jts': 'Tstamp' }), table_type=TableType.append()) cols = t.columns self.assertEqual(8, len(cols)) self._assert_common_cols(cols) self.assertEqual("Symbol", cols[3].name) self.assertEqual(dtypes.string, cols[3].data_type) self.assertEqual("Side", cols[4].name) self.assertEqual(dtypes.string, cols[4].data_type) self.assertEqual("Price", cols[5].name) self.assertEqual(dtypes.double, cols[5].data_type) self.assertEqual("Qty", cols[6].name) self.assertEqual(dtypes.int_, cols[6].data_type) self.assertEqual("Tstamp", cols[7].name) self.assertEqual(dtypes.DateTime, cols[7].data_type)
def test_simple_spec(self): """ Check a simple Kafka subscription creates the right table. """ t = ck.consume({'bootstrap.servers': 'redpanda:29092'}, 'orders', key_spec=KeyValueSpec.IGNORE, value_spec=ck.simple_spec('Price', dtypes.double)) cols = t.columns self.assertEqual(4, len(cols)) self._assert_common_cols(cols) self.assertEqual("Price", cols[3].name) self.assertEqual(dtypes.double, cols[3].data_type)
def test_avro_spec(self): """ Check an Avro Kafka subscription creates the right table. """ schema = \ """ { "type" : "record", "namespace" : "io.deephaven.examples", "name" : "share_price", "fields" : [ { "name" : "Symbol", "type" : "string" }, { "name" : "Side", "type" : "string" }, { "name" : "Qty", "type" : "int" }, { "name" : "Price", "type" : "double" } ] } """ schema_str = '{ "schema" : "%s" }' % \ schema.replace('\n', ' ').replace('"', '\\"') sys_str = \ """ curl -X POST \ -H 'Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO' \ --data-binary '%s' \ http://redpanda:8081/subjects/share_price_record/versions """ % schema_str r = os.system(sys_str) self.assertEqual(0, r) with self.subTest(msg='straight schema, no mapping'): t = ck.consume( { 'bootstrap.servers': 'redpanda:29092', 'schema.registry.url': 'http://redpanda:8081' }, 'share_price', key_spec=KeyValueSpec.IGNORE, value_spec=ck.avro_spec('share_price_record', schema_version='1'), table_type=TableType.append()) cols = t.columns self.assertEqual(7, len(cols)) self._assert_common_cols(cols) self.assertEqual("Symbol", cols[3].name) self.assertEqual(dtypes.string, cols[3].data_type) self.assertEqual("Side", cols[4].name) self.assertEqual(dtypes.string, cols[4].data_type) self.assertEqual("Qty", cols[5].name) self.assertEqual(dtypes.int32, cols[5].data_type) self.assertEqual("Price", cols[6].name) self.assertEqual(dtypes.double, cols[6].data_type) with self.subTest(msg='mapping_only (filter out some schema fields)'): m = {'Symbol': 'Ticker', 'Price': 'Dollars'} t = ck.consume( { 'bootstrap.servers': 'redpanda:29092', 'schema.registry.url': 'http://redpanda:8081' }, 'share_price', key_spec=KeyValueSpec.IGNORE, value_spec=ck.avro_spec('share_price_record', mapping=m, mapped_only=True), table_type=TableType.append()) cols = t.columns self.assertEqual(5, len(cols)) self._assert_common_cols(cols) self.assertEqual("Ticker", cols[3].name) self.assertEqual(dtypes.string, cols[3].data_type) self.assertEqual("Dollars", cols[4].name) self.assertEqual(dtypes.double, cols[4].data_type) with self.subTest(msg='mapping (rename some fields)'): m = {'Symbol': 'Ticker', 'Qty': 'Quantity'} t = ck.consume( { 'bootstrap.servers': 'redpanda:29092', 'schema.registry.url': 'http://redpanda:8081' }, 'share_price', key_spec=KeyValueSpec.IGNORE, value_spec=ck.avro_spec('share_price_record', mapping=m), table_type=TableType.append()) cols = t.columns self.assertEqual(7, len(cols)) self._assert_common_cols(cols) self.assertEqual("Ticker", cols[3].name) self.assertEqual(dtypes.string, cols[3].data_type) self.assertEqual("Side", cols[4].name) self.assertEqual(dtypes.string, cols[4].data_type) self.assertEqual("Quantity", cols[5].name) self.assertEqual(dtypes.int32, cols[5].data_type) self.assertEqual("Price", cols[6].name) self.assertEqual(dtypes.double, cols[6].data_type)
purchases = make_cdc_table('purchases') consume_properties = { **kafka_base_properties, **{ 'deephaven.partition.column.name': '', 'deephaven.timestamp.column.name': '', 'deephaven.offset.column.name': '' } } pageviews = ck.consume(consume_properties, topic='pageviews', offsets=ck.ALL_PARTITIONS_SEEK_TO_BEGINNING, key_spec=KeyValueSpec.IGNORE, value_spec=ck.json_spec([('user_id', dh.int_), ('url', dh.string), ('channel', dh.string), ('received_at', dh.DateTime)]), table_type=TableType.Append) pageviews_stg = pageviews \ .update_view([ 'url_path = url.split(`/`)', 'pageview_type = url_path[1]', 'target_id = Long.parseLong(url_path[2])' ]).drop_columns('url_path') purchases_by_item = purchases.agg_by([ agg.sum_(['revenue = purchase_price']), agg.count_('orders'),