Example #1
0
    def shared(self) -> Action:
        return TdAction(self.keyschema() + self.schema() + f"""
$ kafka-create-topic topic=sink-input partitions=16

$ kafka-ingest format=avro topic=sink-input key-format=avro key-schema=${{keyschema}} schema=${{schema}} repeat={self.n()}
{{"f1": ${{kafka-ingest.iteration}} }} {{"f2": ${{kafka-ingest.iteration}} }}
""")
Example #2
0
    def shared(self) -> Action:
        return TdAction(self.schema() + self.keyschema() + f"""
$ kafka-create-topic topic=kafka-parallel-ingestion partitions=4

$ kafka-ingest format=avro topic=kafka-parallel-ingestion key-format=avro key-schema=${{keyschema}} schema=${{schema}} repeat={self.n()} publish=true
{{"f1": ${{kafka-ingest.iteration}} }} {{"f2": ${{kafka-ingest.iteration}} }}
""")
Example #3
0
    def shared(self) -> Action:
        return TdAction(f"""
$ kafka-create-topic topic=kafka-envelope-none-bytes

$ kafka-ingest format=bytes topic=kafka-envelope-none-bytes repeat={self.n()}
12345678901234567890123456789012345678901234567890
""")
Example #4
0
    def init(self) -> List[Action]:
        return [
            self.view_ten(),
            TdAction(f"""
> CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} AS f1, {self.unique_values()} AS f2 FROM {self.join()}
"""),
        ]
Example #5
0
 def shared(self) -> List[Action]:
     return [
         TdAction(
             "$ kafka-create-topic topic=kafka-recovery-big partitions=8"),
         # Ingest 10 ** SCALE records
         Kgen(
             topic="kafka-recovery-big",
             args=[
                 "--keys=random",
                 f"--num-records={self.n()}",
                 "--values=bytes",
                 "--max-message-size=32",
                 "--min-message-size=32",
                 "--key-min=256",
                 f"--key-max={256+(self.n()**2)}",
             ],
         ),
         # Add 256 EOF markers with key values <= 256.
         # This high number is chosen as to guarantee that there will be an EOF marker
         # in each partition, even if the number of partitions is increased in the future.
         Kgen(
             topic="kafka-recovery-big",
             args=[
                 "--keys=sequential",
                 "--num-records=256",
                 "--values=bytes",
                 "--min-message-size=32",
                 "--max-message-size=32",
             ],
         ),
     ]
Example #6
0
    def shared(self) -> Action:
        return TdAction(self.schema() + f"""
$ kafka-create-topic topic=startup-time

$ kafka-ingest format=avro topic=startup-time schema=${{schema}} repeat=1
{{"f2": 1}}
""")
Example #7
0
    def init(self) -> Action:
        return TdAction(f"""
> DROP CONNECTION IF EXISTS s1_kafka_conn CASCADE
> DROP CONNECTION IF EXISTS s1_csr_conn CASCADE

> CREATE CONNECTION s1_kafka_conn
  FOR KAFKA BROKER '${{testdrive.kafka-addr}}'

> CREATE CONNECTION IF NOT EXISTS s1_csr_conn
FOR CONFLUENT SCHEMA REGISTRY
URL '${{testdrive.schema-registry-url}}';

> CREATE SOURCE s1
  FROM KAFKA CONNECTION s1_kafka_conn
  TOPIC 'testdrive-kafka-recovery-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_csr_conn
  ENVELOPE UPSERT;

# Make sure we are fully caught up before continuing
> SELECT COUNT(*) FROM s1;
{self.n()}

# Give time for any background tasks (e.g. compaction) to settle down
> SELECT mz_internal.mz_sleep(10)
<null>
""")
Example #8
0
    def shared(self) -> Action:
        return TdAction(self.keyschema() + self.schema() + f"""
$ kafka-create-topic topic=upsert-unique partitions=16

$ kafka-ingest format=avro topic=upsert-unique key-format=avro key-schema=${{keyschema}} schema=${{schema}} publish=true repeat={self.n()}
{{"f1": ${{kafka-ingest.iteration}} }} {{"f2": ${{kafka-ingest.iteration}} }}
""")
Example #9
0
    def shared(self) -> Action:
        return TdAction(self.schema() + f"""
$ kafka-create-topic topic=kafka-raw

$ kafka-ingest format=avro topic=kafka-raw schema=${{schema}} repeat={self.n()}
{{"f2": 1}}
""")
Example #10
0
    def init(self) -> Action:
        create_tables = "\n".join(
            f"> CREATE TABLE t{i} (f1 INTEGER);\n> INSERT INTO t{i} DEFAULT VALUES;"
            for i in range(0, self.n()))
        create_sources = "\n".join(f"""
> CREATE MATERIALIZED SOURCE source{i}
  FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-startup-time-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
  ENVELOPE NONE
""" for i in range(0, self.n()))
        join = " ".join(f"LEFT JOIN source{i} USING (f2)"
                        for i in range(1, (ceil(self.scale()))))

        create_views = "\n".join(
            f"> CREATE MATERIALIZED VIEW v{i} AS SELECT * FROM source{i} AS s {join} LIMIT {i+1}"
            for i in range(0, self.n()))

        create_sinks = "\n".join(f"""
> CREATE SINK sink{i} FROM source{i}
  INTO KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-sink-output-${{testdrive.seed}}'
  KEY (f2)
  WITH (reuse_topic=true)
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
""" for i in range(0, self.n()))

        return TdAction(f"""
{create_tables}
{create_sources}
{create_views}
{create_sinks}
""")
Example #11
0
    def shared(self) -> Action:
        data = "a" * 512
        return TdAction(f"""
$ kafka-create-topic topic=kafka-envelope-none-bytes

$ kafka-ingest format=bytes topic=kafka-envelope-none-bytes repeat={self.n()}
{data}
""")
Example #12
0
    def init(self) -> List[Action]:
        return [
            self.table_ten(),
            TdAction(f"""
> CREATE TABLE t1 (f1 BIGINT);

> INSERT INTO t1 SELECT {self.unique_values()} FROM {self.join()}
"""),
        ]
Example #13
0
    def init(self) -> List[Action]:
        return [
            self.view_ten(),
            TdAction(f"""
> CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} AS f1 FROM {self.join()};

> SELECT COUNT(*) = {self.n()} FROM v1;
true
"""),
        ]
Example #14
0
    def init(self) -> Action:
        return TdAction(f"""
> CREATE MATERIALIZED SOURCE source1
  FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-sink-input-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
  ENVELOPE UPSERT;

> SELECT COUNT(*) FROM source1;
{self.n()}
""")
Example #15
0
    def shared(self) -> Action:
        return TdAction(f"""
$ postgres-execute connection=postgres://postgres:postgres@postgres
ALTER USER postgres WITH replication;
DROP SCHEMA IF EXISTS public CASCADE;
CREATE SCHEMA public;

DROP PUBLICATION IF EXISTS p1;
CREATE PUBLICATION p1 FOR ALL TABLES;
""")
Example #16
0
    def shared(self) -> Action:
        return TdAction(self.keyschema() + self.schema() + f"""
$ kafka-create-topic topic=kafka-upsert

$ kafka-ingest format=avro topic=kafka-upsert key-format=avro key-schema=${{keyschema}} schema=${{schema}} repeat={self.n()}
{{"f1": 1}} {{"f2": ${{kafka-ingest.iteration}} }}

$ kafka-ingest format=avro topic=kafka-upsert key-format=avro key-schema=${{keyschema}} schema=${{schema}}
{{"f1": 2}} {{"f2": 2}}
""")
Example #17
0
    def before(self) -> Action:
        # Due to exterme variability in the results, we have no option but to drop and re-create
        # the table prior to each measurement
        return TdAction(f"""
> DROP TABLE IF EXISTS t1;

> CREATE TABLE t1 (f1 BIGINT);

> INSERT INTO t1 SELECT * FROM generate_series(0, {self.n()})
""")
Example #18
0
    def init(self) -> List[Action]:
        return [
            self.table_ten(),
            TdAction(f"""
> CREATE MATERIALIZED VIEW v1 (f1, f2) AS SELECT {self.unique_values()} AS f1, 1 AS f2 FROM {self.join()}

> CREATE DEFAULT INDEX ON v1;

> SELECT COUNT(*) = {self.n()} FROM v1;
true
"""),
        ]
Example #19
0
    def init(self) -> List[Action]:
        return [
            self.table_ten(),
            TdAction(f"""
> CREATE TABLE t1 (f1 INTEGER, f2 INTEGER);
> INSERT INTO t1 (f1) SELECT {self.unique_values()} FROM {self.join()}

# Make sure the dataflow is fully hydrated
> SELECT 1 FROM t1 WHERE f1 = 0;
1
"""),
        ]
Example #20
0
    def init(self) -> Action:
        return TdAction("""
> CREATE SOURCE s1
  FROM KAFKA BROKER '${testdrive.kafka-addr}' TOPIC 'testdrive-kafka-recovery-big-${testdrive.seed}'
  FORMAT BYTES
  ENVELOPE UPSERT;

# Confirm that all the EOF markers generated above have been processed
> CREATE MATERIALIZED VIEW s1_is_complete AS SELECT COUNT(*) = 256 FROM s1 WHERE key0 <= '\\x00000000000000ff'

> SELECT * FROM s1_is_complete;
true
""")
Example #21
0
    def before(self) -> Action:
        return TdAction(f"""
> DROP SOURCE IF EXISTS s1;

$ postgres-execute connection=postgres://postgres:postgres@postgres
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (pk SERIAL PRIMARY KEY, f2 BIGINT);
ALTER TABLE t1 REPLICA IDENTITY FULL;

> CREATE SOURCE s1
  FROM POSTGRES CONNECTION 'host=postgres port=5432 user=postgres password=postgres sslmode=require dbname=postgres'
  PUBLICATION 'p1';
            """)
Example #22
0
    def before(self) -> Action:
        return TdAction(f"""
> DROP TABLE IF EXISTS ten CASCADE;

> CREATE TABLE ten (f1 INTEGER);

> INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

> CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} FROM {self.join()}

> SELECT COUNT(*) = {self.n()} AS f1 FROM v1;
true
""")
Example #23
0
    def shared(self) -> Action:
        return TdAction(f"""
$ postgres-execute connection=postgres://postgres:postgres@postgres
ALTER USER postgres WITH replication;
DROP SCHEMA IF EXISTS public CASCADE;
CREATE SCHEMA public;

DROP PUBLICATION IF EXISTS mz_source;
CREATE PUBLICATION mz_source FOR ALL TABLES;

CREATE TABLE pk_table (pk BIGINT PRIMARY KEY, f2 BIGINT);
INSERT INTO pk_table SELECT x, x*2 FROM generate_series(1, {self.n()}) as x;
ALTER TABLE pk_table REPLICA IDENTITY FULL;
""")
Example #24
0
    def init(self) -> Action:
        return TdAction(f"""
> CREATE MATERIALIZED SOURCE s1
  FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-kafka-recovery-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
  ENVELOPE UPSERT;

# Make sure we are fully caught up before continuing
> SELECT COUNT(*) FROM s1;
{self.n()}

# Give time for any background tasks (e.g. compaction) to settle down
> SELECT mz_internal.mz_sleep(10)
<null>
""")
Example #25
0
    def shared(self) -> List[Action]:
        return [
            TdAction(f"""
$ kafka-create-topic topic=kafka-scalability partitions=8
"""),
            Kgen(
                topic="kafka-scalability",
                args=[
                    "--keys=sequential",
                    f"--num-records={self.n()}",
                    "--values=bytes",
                    "--max-message-size=100",
                    "--min-message-size=100",
                ],
            ),
        ]
Example #26
0
    def init(self) -> Action:
        # Just to spice things up a bit, we perform individual
        # inserts here so that the rows are assigned separate timestamps
        inserts = "\n\n".join(f"> INSERT INTO ten VALUES ({i})"
                              for i in range(0, 10))

        return TdAction(f"""
> CREATE TABLE ten (f1 INTEGER);

> CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} AS f1 FROM {self.join()};

{inserts}

> SELECT COUNT(*) = {self.n()} FROM v1;
true
""")
Example #27
0
    def init(self) -> Action:
        create_tables = "\n".join(
            f"> CREATE TABLE t{i} (f1 INTEGER);\n> INSERT INTO t{i} DEFAULT VALUES;"
            for i in range(0, self.n()))
        create_sources = "\n".join(f"""
> DROP CONNECTION IF EXISTS s1_kafka_conn CASCADE
> DROP CONNECTION IF EXISTS s1_csr_conn CASCADE

> CREATE CONNECTION s1_kafka_conn
  FOR KAFKA BROKER '${{testdrive.kafka-addr}}'

> CREATE CONNECTION IF NOT EXISTS s1_csr_conn
FOR CONFLUENT SCHEMA REGISTRY
URL '${{testdrive.schema-registry-url}}';

> CREATE SOURCE source{i}
  FROM KAFKA CONNECTION s1_kafka_conn
  TOPIC 'testdrive-startup-time-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_kafka_conn
  ENVELOPE NONE
""" for i in range(0, self.n()))
        join = " ".join(f"LEFT JOIN source{i} USING (f2)"
                        for i in range(1, (ceil(self.scale()))))

        create_views = "\n".join(
            f"> CREATE MATERIALIZED VIEW v{i} AS SELECT * FROM source{i} AS s {join} LIMIT {i+1}"
            for i in range(0, self.n()))

        create_sinks = "\n".join(f"""
> CREATE SINK sink{i} FROM source{i}
  INTO KAFKA CONNECTION s1_kafka_conn TOPIC 'testdrive-sink-output-${{testdrive.seed}}'
  KEY (f2)
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_csr_conn
""" for i in range(0, self.n()))

        return TdAction(f"""
{create_tables}
{create_sources}
{create_views}
{create_sinks}
""")
Example #28
0
    def init(self) -> Action:
        return TdAction(f"""
> DROP CONNECTION IF EXISTS s1_kafka_conn CASCADE
> DROP CONNECTION IF EXISTS s1_csr_conn CASCADE

> CREATE CONNECTION s1_kafka_conn
  FOR KAFKA BROKER '${{testdrive.kafka-addr}}'

> CREATE CONNECTION s1_csr_conn
FOR CONFLUENT SCHEMA REGISTRY
URL '${{testdrive.schema-registry-url}}';

> CREATE SOURCE source1
  FROM KAFKA CONNECTION s1_kafka_conn
  TOPIC 'testdrive-sink-input-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_csr_conn
  ENVELOPE UPSERT;

> SELECT COUNT(*) FROM source1;
{self.n()}
""")
Example #29
0
    def view_ten(self) -> TdAction:
        return TdAction("""
> CREATE VIEW ten (f1) AS (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9));
""")
Example #30
0
    def before(self) -> Action:
        return TdAction(f"""
> DROP SOURCE IF EXISTS mz_source_pgcdc;
            """)