def shared(self) -> Action: return TdAction(self.keyschema() + self.schema() + f""" $ kafka-create-topic topic=sink-input partitions=16 $ kafka-ingest format=avro topic=sink-input key-format=avro key-schema=${{keyschema}} schema=${{schema}} repeat={self.n()} {{"f1": ${{kafka-ingest.iteration}} }} {{"f2": ${{kafka-ingest.iteration}} }} """)
def shared(self) -> Action: return TdAction(self.schema() + self.keyschema() + f""" $ kafka-create-topic topic=kafka-parallel-ingestion partitions=4 $ kafka-ingest format=avro topic=kafka-parallel-ingestion key-format=avro key-schema=${{keyschema}} schema=${{schema}} repeat={self.n()} publish=true {{"f1": ${{kafka-ingest.iteration}} }} {{"f2": ${{kafka-ingest.iteration}} }} """)
def shared(self) -> Action: return TdAction(f""" $ kafka-create-topic topic=kafka-envelope-none-bytes $ kafka-ingest format=bytes topic=kafka-envelope-none-bytes repeat={self.n()} 12345678901234567890123456789012345678901234567890 """)
def init(self) -> List[Action]: return [ self.view_ten(), TdAction(f""" > CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} AS f1, {self.unique_values()} AS f2 FROM {self.join()} """), ]
def shared(self) -> List[Action]: return [ TdAction( "$ kafka-create-topic topic=kafka-recovery-big partitions=8"), # Ingest 10 ** SCALE records Kgen( topic="kafka-recovery-big", args=[ "--keys=random", f"--num-records={self.n()}", "--values=bytes", "--max-message-size=32", "--min-message-size=32", "--key-min=256", f"--key-max={256+(self.n()**2)}", ], ), # Add 256 EOF markers with key values <= 256. # This high number is chosen as to guarantee that there will be an EOF marker # in each partition, even if the number of partitions is increased in the future. Kgen( topic="kafka-recovery-big", args=[ "--keys=sequential", "--num-records=256", "--values=bytes", "--min-message-size=32", "--max-message-size=32", ], ), ]
def shared(self) -> Action: return TdAction(self.schema() + f""" $ kafka-create-topic topic=startup-time $ kafka-ingest format=avro topic=startup-time schema=${{schema}} repeat=1 {{"f2": 1}} """)
def init(self) -> Action: return TdAction(f""" > DROP CONNECTION IF EXISTS s1_kafka_conn CASCADE > DROP CONNECTION IF EXISTS s1_csr_conn CASCADE > CREATE CONNECTION s1_kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}' > CREATE CONNECTION IF NOT EXISTS s1_csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}'; > CREATE SOURCE s1 FROM KAFKA CONNECTION s1_kafka_conn TOPIC 'testdrive-kafka-recovery-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_csr_conn ENVELOPE UPSERT; # Make sure we are fully caught up before continuing > SELECT COUNT(*) FROM s1; {self.n()} # Give time for any background tasks (e.g. compaction) to settle down > SELECT mz_internal.mz_sleep(10) <null> """)
def shared(self) -> Action: return TdAction(self.keyschema() + self.schema() + f""" $ kafka-create-topic topic=upsert-unique partitions=16 $ kafka-ingest format=avro topic=upsert-unique key-format=avro key-schema=${{keyschema}} schema=${{schema}} publish=true repeat={self.n()} {{"f1": ${{kafka-ingest.iteration}} }} {{"f2": ${{kafka-ingest.iteration}} }} """)
def shared(self) -> Action: return TdAction(self.schema() + f""" $ kafka-create-topic topic=kafka-raw $ kafka-ingest format=avro topic=kafka-raw schema=${{schema}} repeat={self.n()} {{"f2": 1}} """)
def init(self) -> Action: create_tables = "\n".join( f"> CREATE TABLE t{i} (f1 INTEGER);\n> INSERT INTO t{i} DEFAULT VALUES;" for i in range(0, self.n())) create_sources = "\n".join(f""" > CREATE MATERIALIZED SOURCE source{i} FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-startup-time-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}' ENVELOPE NONE """ for i in range(0, self.n())) join = " ".join(f"LEFT JOIN source{i} USING (f2)" for i in range(1, (ceil(self.scale())))) create_views = "\n".join( f"> CREATE MATERIALIZED VIEW v{i} AS SELECT * FROM source{i} AS s {join} LIMIT {i+1}" for i in range(0, self.n())) create_sinks = "\n".join(f""" > CREATE SINK sink{i} FROM source{i} INTO KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-sink-output-${{testdrive.seed}}' KEY (f2) WITH (reuse_topic=true) FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}' """ for i in range(0, self.n())) return TdAction(f""" {create_tables} {create_sources} {create_views} {create_sinks} """)
def shared(self) -> Action: data = "a" * 512 return TdAction(f""" $ kafka-create-topic topic=kafka-envelope-none-bytes $ kafka-ingest format=bytes topic=kafka-envelope-none-bytes repeat={self.n()} {data} """)
def init(self) -> List[Action]: return [ self.table_ten(), TdAction(f""" > CREATE TABLE t1 (f1 BIGINT); > INSERT INTO t1 SELECT {self.unique_values()} FROM {self.join()} """), ]
def init(self) -> List[Action]: return [ self.view_ten(), TdAction(f""" > CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} AS f1 FROM {self.join()}; > SELECT COUNT(*) = {self.n()} FROM v1; true """), ]
def init(self) -> Action: return TdAction(f""" > CREATE MATERIALIZED SOURCE source1 FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-sink-input-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}' ENVELOPE UPSERT; > SELECT COUNT(*) FROM source1; {self.n()} """)
def shared(self) -> Action: return TdAction(f""" $ postgres-execute connection=postgres://postgres:postgres@postgres ALTER USER postgres WITH replication; DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public; DROP PUBLICATION IF EXISTS p1; CREATE PUBLICATION p1 FOR ALL TABLES; """)
def shared(self) -> Action: return TdAction(self.keyschema() + self.schema() + f""" $ kafka-create-topic topic=kafka-upsert $ kafka-ingest format=avro topic=kafka-upsert key-format=avro key-schema=${{keyschema}} schema=${{schema}} repeat={self.n()} {{"f1": 1}} {{"f2": ${{kafka-ingest.iteration}} }} $ kafka-ingest format=avro topic=kafka-upsert key-format=avro key-schema=${{keyschema}} schema=${{schema}} {{"f1": 2}} {{"f2": 2}} """)
def before(self) -> Action: # Due to exterme variability in the results, we have no option but to drop and re-create # the table prior to each measurement return TdAction(f""" > DROP TABLE IF EXISTS t1; > CREATE TABLE t1 (f1 BIGINT); > INSERT INTO t1 SELECT * FROM generate_series(0, {self.n()}) """)
def init(self) -> List[Action]: return [ self.table_ten(), TdAction(f""" > CREATE MATERIALIZED VIEW v1 (f1, f2) AS SELECT {self.unique_values()} AS f1, 1 AS f2 FROM {self.join()} > CREATE DEFAULT INDEX ON v1; > SELECT COUNT(*) = {self.n()} FROM v1; true """), ]
def init(self) -> List[Action]: return [ self.table_ten(), TdAction(f""" > CREATE TABLE t1 (f1 INTEGER, f2 INTEGER); > INSERT INTO t1 (f1) SELECT {self.unique_values()} FROM {self.join()} # Make sure the dataflow is fully hydrated > SELECT 1 FROM t1 WHERE f1 = 0; 1 """), ]
def init(self) -> Action: return TdAction(""" > CREATE SOURCE s1 FROM KAFKA BROKER '${testdrive.kafka-addr}' TOPIC 'testdrive-kafka-recovery-big-${testdrive.seed}' FORMAT BYTES ENVELOPE UPSERT; # Confirm that all the EOF markers generated above have been processed > CREATE MATERIALIZED VIEW s1_is_complete AS SELECT COUNT(*) = 256 FROM s1 WHERE key0 <= '\\x00000000000000ff' > SELECT * FROM s1_is_complete; true """)
def before(self) -> Action: return TdAction(f""" > DROP SOURCE IF EXISTS s1; $ postgres-execute connection=postgres://postgres:postgres@postgres DROP TABLE IF EXISTS t1; CREATE TABLE t1 (pk SERIAL PRIMARY KEY, f2 BIGINT); ALTER TABLE t1 REPLICA IDENTITY FULL; > CREATE SOURCE s1 FROM POSTGRES CONNECTION 'host=postgres port=5432 user=postgres password=postgres sslmode=require dbname=postgres' PUBLICATION 'p1'; """)
def before(self) -> Action: return TdAction(f""" > DROP TABLE IF EXISTS ten CASCADE; > CREATE TABLE ten (f1 INTEGER); > INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); > CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} FROM {self.join()} > SELECT COUNT(*) = {self.n()} AS f1 FROM v1; true """)
def shared(self) -> Action: return TdAction(f""" $ postgres-execute connection=postgres://postgres:postgres@postgres ALTER USER postgres WITH replication; DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public; DROP PUBLICATION IF EXISTS mz_source; CREATE PUBLICATION mz_source FOR ALL TABLES; CREATE TABLE pk_table (pk BIGINT PRIMARY KEY, f2 BIGINT); INSERT INTO pk_table SELECT x, x*2 FROM generate_series(1, {self.n()}) as x; ALTER TABLE pk_table REPLICA IDENTITY FULL; """)
def init(self) -> Action: return TdAction(f""" > CREATE MATERIALIZED SOURCE s1 FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-kafka-recovery-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}' ENVELOPE UPSERT; # Make sure we are fully caught up before continuing > SELECT COUNT(*) FROM s1; {self.n()} # Give time for any background tasks (e.g. compaction) to settle down > SELECT mz_internal.mz_sleep(10) <null> """)
def shared(self) -> List[Action]: return [ TdAction(f""" $ kafka-create-topic topic=kafka-scalability partitions=8 """), Kgen( topic="kafka-scalability", args=[ "--keys=sequential", f"--num-records={self.n()}", "--values=bytes", "--max-message-size=100", "--min-message-size=100", ], ), ]
def init(self) -> Action: # Just to spice things up a bit, we perform individual # inserts here so that the rows are assigned separate timestamps inserts = "\n\n".join(f"> INSERT INTO ten VALUES ({i})" for i in range(0, 10)) return TdAction(f""" > CREATE TABLE ten (f1 INTEGER); > CREATE MATERIALIZED VIEW v1 AS SELECT {self.unique_values()} AS f1 FROM {self.join()}; {inserts} > SELECT COUNT(*) = {self.n()} FROM v1; true """)
def init(self) -> Action: create_tables = "\n".join( f"> CREATE TABLE t{i} (f1 INTEGER);\n> INSERT INTO t{i} DEFAULT VALUES;" for i in range(0, self.n())) create_sources = "\n".join(f""" > DROP CONNECTION IF EXISTS s1_kafka_conn CASCADE > DROP CONNECTION IF EXISTS s1_csr_conn CASCADE > CREATE CONNECTION s1_kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}' > CREATE CONNECTION IF NOT EXISTS s1_csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}'; > CREATE SOURCE source{i} FROM KAFKA CONNECTION s1_kafka_conn TOPIC 'testdrive-startup-time-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_kafka_conn ENVELOPE NONE """ for i in range(0, self.n())) join = " ".join(f"LEFT JOIN source{i} USING (f2)" for i in range(1, (ceil(self.scale())))) create_views = "\n".join( f"> CREATE MATERIALIZED VIEW v{i} AS SELECT * FROM source{i} AS s {join} LIMIT {i+1}" for i in range(0, self.n())) create_sinks = "\n".join(f""" > CREATE SINK sink{i} FROM source{i} INTO KAFKA CONNECTION s1_kafka_conn TOPIC 'testdrive-sink-output-${{testdrive.seed}}' KEY (f2) FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_csr_conn """ for i in range(0, self.n())) return TdAction(f""" {create_tables} {create_sources} {create_views} {create_sinks} """)
def init(self) -> Action: return TdAction(f""" > DROP CONNECTION IF EXISTS s1_kafka_conn CASCADE > DROP CONNECTION IF EXISTS s1_csr_conn CASCADE > CREATE CONNECTION s1_kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}' > CREATE CONNECTION s1_csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}'; > CREATE SOURCE source1 FROM KAFKA CONNECTION s1_kafka_conn TOPIC 'testdrive-sink-input-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION s1_csr_conn ENVELOPE UPSERT; > SELECT COUNT(*) FROM source1; {self.n()} """)
def view_ten(self) -> TdAction: return TdAction(""" > CREATE VIEW ten (f1) AS (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)); """)
def before(self) -> Action: return TdAction(f""" > DROP SOURCE IF EXISTS mz_source_pgcdc; """)