Ejemplo n.º 1
0
    def test_json_spec(self):
        """
        Check a JSON Kafka subscription creates the right table.
        """

        t = ck.consume({'bootstrap.servers': 'redpanda:29092'},
                       'orders',
                       key_spec=KeyValueSpec.IGNORE,
                       value_spec=ck.json_spec(
                           [('Symbol', dtypes.string), ('Side', dtypes.string),
                            ('Price', dtypes.double), ('Qty', dtypes.int_),
                            ('Tstamp', dtypes.DateTime)],
                           mapping={
                               'jsymbol': 'Symbol',
                               'jside': 'Side',
                               'jprice': 'Price',
                               'jqty': 'Qty',
                               'jts': 'Tstamp'
                           }),
                       table_type=TableType.append())

        cols = t.columns
        self.assertEqual(8, len(cols))
        self._assert_common_cols(cols)

        self.assertEqual("Symbol", cols[3].name)
        self.assertEqual(dtypes.string, cols[3].data_type)
        self.assertEqual("Side", cols[4].name)
        self.assertEqual(dtypes.string, cols[4].data_type)
        self.assertEqual("Price", cols[5].name)
        self.assertEqual(dtypes.double, cols[5].data_type)
        self.assertEqual("Qty", cols[6].name)
        self.assertEqual(dtypes.int_, cols[6].data_type)
        self.assertEqual("Tstamp", cols[7].name)
        self.assertEqual(dtypes.DateTime, cols[7].data_type)
Ejemplo n.º 2
0
    def test_simple_spec(self):
        """
        Check a simple Kafka subscription creates the right table.
        """
        t = ck.consume({'bootstrap.servers': 'redpanda:29092'},
                       'orders',
                       key_spec=KeyValueSpec.IGNORE,
                       value_spec=ck.simple_spec('Price', dtypes.double))

        cols = t.columns
        self.assertEqual(4, len(cols))
        self._assert_common_cols(cols)
        self.assertEqual("Price", cols[3].name)
        self.assertEqual(dtypes.double, cols[3].data_type)
Ejemplo n.º 3
0
    def test_avro_spec(self):
        """
        Check an Avro Kafka subscription creates the right table.
        """

        schema = \
            """
            { "type" : "record",
              "namespace" : "io.deephaven.examples",
              "name" : "share_price",
              "fields" : [
                { "name" : "Symbol", "type" : "string" },
                { "name" : "Side",   "type" : "string" },
                { "name" : "Qty",    "type" : "int"    },
                { "name" : "Price",  "type" : "double" }
              ]
            }
            """

        schema_str = '{ "schema" : "%s" }' % \
                     schema.replace('\n', ' ').replace('"', '\\"')

        sys_str = \
            """
            curl -X POST \
                -H 'Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO' \
                --data-binary '%s' \
                http://redpanda:8081/subjects/share_price_record/versions
            """ % schema_str

        r = os.system(sys_str)
        self.assertEqual(0, r)

        with self.subTest(msg='straight schema, no mapping'):
            t = ck.consume(
                {
                    'bootstrap.servers': 'redpanda:29092',
                    'schema.registry.url': 'http://redpanda:8081'
                },
                'share_price',
                key_spec=KeyValueSpec.IGNORE,
                value_spec=ck.avro_spec('share_price_record',
                                        schema_version='1'),
                table_type=TableType.append())

            cols = t.columns
            self.assertEqual(7, len(cols))
            self._assert_common_cols(cols)

            self.assertEqual("Symbol", cols[3].name)
            self.assertEqual(dtypes.string, cols[3].data_type)
            self.assertEqual("Side", cols[4].name)
            self.assertEqual(dtypes.string, cols[4].data_type)
            self.assertEqual("Qty", cols[5].name)
            self.assertEqual(dtypes.int32, cols[5].data_type)
            self.assertEqual("Price", cols[6].name)
            self.assertEqual(dtypes.double, cols[6].data_type)

        with self.subTest(msg='mapping_only (filter out some schema fields)'):
            m = {'Symbol': 'Ticker', 'Price': 'Dollars'}
            t = ck.consume(
                {
                    'bootstrap.servers': 'redpanda:29092',
                    'schema.registry.url': 'http://redpanda:8081'
                },
                'share_price',
                key_spec=KeyValueSpec.IGNORE,
                value_spec=ck.avro_spec('share_price_record',
                                        mapping=m,
                                        mapped_only=True),
                table_type=TableType.append())

            cols = t.columns
            self.assertEqual(5, len(cols))
            self._assert_common_cols(cols)

            self.assertEqual("Ticker", cols[3].name)
            self.assertEqual(dtypes.string, cols[3].data_type)
            self.assertEqual("Dollars", cols[4].name)
            self.assertEqual(dtypes.double, cols[4].data_type)

        with self.subTest(msg='mapping (rename some fields)'):
            m = {'Symbol': 'Ticker', 'Qty': 'Quantity'}
            t = ck.consume(
                {
                    'bootstrap.servers': 'redpanda:29092',
                    'schema.registry.url': 'http://redpanda:8081'
                },
                'share_price',
                key_spec=KeyValueSpec.IGNORE,
                value_spec=ck.avro_spec('share_price_record', mapping=m),
                table_type=TableType.append())

            cols = t.columns
            self.assertEqual(7, len(cols))
            self._assert_common_cols(cols)

            self.assertEqual("Ticker", cols[3].name)
            self.assertEqual(dtypes.string, cols[3].data_type)
            self.assertEqual("Side", cols[4].name)
            self.assertEqual(dtypes.string, cols[4].data_type)
            self.assertEqual("Quantity", cols[5].name)
            self.assertEqual(dtypes.int32, cols[5].data_type)
            self.assertEqual("Price", cols[6].name)
            self.assertEqual(dtypes.double, cols[6].data_type)
Ejemplo n.º 4
0
purchases = make_cdc_table('purchases')

consume_properties = {
    **kafka_base_properties,
    **{
        'deephaven.partition.column.name': '',
        'deephaven.timestamp.column.name': '',
        'deephaven.offset.column.name': ''
    }
}

pageviews = ck.consume(consume_properties,
                       topic='pageviews',
                       offsets=ck.ALL_PARTITIONS_SEEK_TO_BEGINNING,
                       key_spec=KeyValueSpec.IGNORE,
                       value_spec=ck.json_spec([('user_id', dh.int_),
                                                ('url', dh.string),
                                                ('channel', dh.string),
                                                ('received_at', dh.DateTime)]),
                       table_type=TableType.Append)

pageviews_stg = pageviews \
    .update_view([
        'url_path = url.split(`/`)',
        'pageview_type = url_path[1]',
        'target_id = Long.parseLong(url_path[2])'
    ]).drop_columns('url_path')

purchases_by_item = purchases.agg_by([
    agg.sum_(['revenue = purchase_price']),
    agg.count_('orders'),