Example #1
0
def populate(c: Composition) -> None:
    # Create some database objects
    c.testdrive(
        dedent("""
            > CREATE TABLE t1 (f1 INTEGER);
            > INSERT INTO t1 SELECT * FROM generate_series(1, 10);
            > CREATE MATERIALIZED VIEW v1 AS SELECT COUNT(*) AS c1 FROM t1;
            > CREATE TABLE ten (f1 INTEGER);
            > INSERT INTO ten SELECT * FROM generate_series(1, 10);
            > CREATE MATERIALIZED VIEW expensive AS SELECT (a1.f1 * 1) +
              (a2.f1 * 10) +
              (a3.f1 * 100) +
              (a4.f1 * 1000) +
              (a5.f1 * 10000) +
              (a6.f1 * 100000) +
              (a7.f1 * 1000000)
              FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5, ten AS a6, ten AS a7;
            $ kafka-create-topic topic=source1
            $ kafka-ingest format=bytes topic=source1 repeat=1000000
            A${kafka-ingest.iteration}
            > CREATE SOURCE source1
              FROM KAFKA BROKER '${testdrive.kafka-addr}' TOPIC 'testdrive-source1-${testdrive.seed}'
              FORMAT BYTES
            > CREATE MATERIALIZED VIEW v2 AS SELECT COUNT(*) FROM source1
            """), )
Example #2
0
    def run(self, c: Composition) -> None:
        if not self.new_sink:
            return

        c.testdrive(
            dedent(f"""
                > CREATE CONNECTION IF NOT EXISTS {self.sink.name}_kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}';
                > CREATE CONNECTION IF NOT EXISTS {self.sink.name}_csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';

                > CREATE SINK {self.sink.name} FROM {self.source_view.name}
                  INTO KAFKA CONNECTION {self.sink.name}_kafka_conn
                  TOPIC 'sink-{self.sink.name}'
                  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION {self.sink.name}_csr_conn;

                # Ingest the sink again in order to be able to validate its contents

                > CREATE SOURCE {self.sink.name}_source
                  FROM KAFKA CONNECTION {self.sink.name}_kafka_conn
                  TOPIC 'sink-{self.sink.name}'
                  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION {self.sink.name}_csr_conn
                  ENVELOPE NONE

                # The sink-dervied source has upsert semantics, so produce a "normal" ViewExists output
                # from the 'before' and the 'after'

                > CREATE MATERIALIZED VIEW {self.dest_view.name} AS
                  SELECT SUM(min)::int AS min, SUM(max)::int AS max, SUM(c1)::int AS c1, SUM(c2)::int AS c2 FROM (
                    SELECT (after).min, (after).max, (after).c1, (after).c2 FROM {self.sink.name}_source
                    UNION ALL
                    SELECT - (before).min, - (before).max, -(before).c1, -(before).c2 FROM {self.sink.name}_source
                  );
            """))
Example #3
0
def create_invalid_replica(c: Composition) -> None:
    c.testdrive(
        dedent(
            """
            > CREATE CLUSTER REPLICA cluster1.replica3 REMOTE ['no_such_host:2100']
            """
        )
    )
Example #4
0
def drop_create_replica(c: Composition) -> None:
    c.testdrive(
        dedent(
            """
            > DROP CLUSTER REPLICA cluster1.replica1
            > CREATE CLUSTER REPLICA cluster1.replica3 REMOTE ['computed_1_1:2100', 'computed_1_2:2100']
            """
        )
    )
Example #5
0
    def run(self, c: Composition) -> None:
        if self.new_source:
            c.testdrive(f"""
> CREATE MATERIALIZED SOURCE {self.source.name}
  FROM KAFKA BROKER '${{testdrive.kafka-addr}}'
  TOPIC 'testdrive-{self.topic.name}-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
  ENVELOPE UPSERT
""")
    def run(self, c: Composition) -> None:
        if self.new_topic:
            c.testdrive(f"""
$ kafka-create-topic topic={self.topic.name} partitions={self.topic.partitions}

{SCHEMA}

$ kafka-ingest format=avro key-format=avro topic={self.topic.name} schema=${{schema}} key-schema=${{keyschema}} repeat=1
{{"key": 0}} {{"f1": 0}}
""")
    def run(self, c: Composition) -> None:
        watermarks = self.view.get_watermarks()
        view_min = watermarks.min
        view_max = watermarks.max

        if view_min <= view_max:
            c.testdrive(f"""
> SELECT * FROM {self.view.name} /* {view_min} {view_max} {(view_max-view_min)+1} {(view_max-view_min)+1} */ ;
{view_min} {view_max} {(view_max-view_min)+1} {(view_max-view_min)+1}
""")
Example #8
0
    def run(self, c: Composition) -> None:
        prev_high = self.topic.watermarks.high
        self.topic.watermarks.high = prev_high + self.delta
        assert self.topic.watermarks.high >= 0
        assert self.topic.watermarks.low >= 0
        c.testdrive(f"""
{SCHEMA}

$ kafka-ingest format=avro key-format=avro topic={self.topic.name} schema=${{schema}} key-schema=${{keyschema}} start-iteration={prev_high + 1} publish=true repeat={self.delta}
{{"f1": ${{kafka-ingest.iteration}}}} {{"f2": ${{kafka-ingest.iteration}}}}
""")
    def run(self, c: Composition) -> None:
        prev_max = self.topic.watermarks.max
        self.topic.watermarks.max = prev_max + self.delta
        assert self.topic.watermarks.max >= 0
        assert self.topic.watermarks.min >= 0
        c.testdrive(f"""
{SCHEMA}

$ kafka-ingest format=avro key-format=avro topic={self.topic.name} schema=${{schema}} key-schema=${{keyschema}} start-iteration={prev_max + 1} repeat={self.delta}
{{"key": ${{kafka-ingest.iteration}}}} {{"f1": ${{kafka-ingest.iteration}}}}
""")
Example #10
0
    def run(self, c: Composition) -> None:
        froms: WatermarkedObjects = self.view.froms
        low = max([f.get_watermarks().low for f in froms])
        high = min([f.get_watermarks().high for f in froms])

        if low <= high:
            c.testdrive(
                f"""
> SELECT * FROM {self.view.name};
{low} {high} {(high-low)+1} {(high-low)+1}
"""
            )
Example #11
0
    def run(self, c: Composition) -> None:
        if self.new_source:
            envelope = str(self.topic.envelope).split(".")[1]
            c.testdrive(f"""
> CREATE CONNECTION IF NOT EXISTS {self.source.name}_csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';

> CREATE SOURCE {self.source.name}
  FROM KAFKA BROKER '${{testdrive.kafka-addr}}'
  TOPIC 'testdrive-{self.topic.name}-${{testdrive.seed}}'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION {self.source.name}_csr_conn
  ENVELOPE {envelope}
""")
Example #12
0
def workflow_test_builtin_migration(c: Composition) -> None:
    """Exercise the builtin object migration code by upgrading between two versions
    that will have a migration triggered between them. Create a materialized view
    over the affected builtin object to confirm that the migration was successful
    """

    c.down(destroy_volumes=True)
    with c.override(
            # Random commit before pg_roles was updated.
            Materialized(
                image=
                "materialize/materialized:devel-9efd269199b1510b3e8f90196cb4fa3072a548a1",
            ),
            Testdrive(default_timeout="15s",
                      no_reset=True,
                      consistent_seed=True),
    ):
        c.up("testdrive", persistent=True)
        c.up("materialized")
        c.wait_for_materialized()

        c.testdrive(input=dedent("""
        > CREATE VIEW v1 AS SELECT COUNT(*) FROM pg_roles;
        > SELECT * FROM v1;
        2
        ! SELECT DISTINCT rolconnlimit FROM pg_roles;
        contains:column "rolconnlimit" does not exist
    """))

        c.kill("materialized")

    with c.override(
            # This will stop working if we introduce a breaking change.
            Materialized(),
            Testdrive(default_timeout="15s",
                      no_reset=True,
                      consistent_seed=True),
    ):
        c.up("testdrive", persistent=True)
        c.up("materialized")
        c.wait_for_materialized()

        c.testdrive(input=dedent("""
       > SELECT * FROM v1;
       2
       # This column is new after the migration
       > SELECT DISTINCT rolconnlimit FROM pg_roles;
       -1
    """))
Example #13
0
    def run(self, c: Composition) -> None:
        prev_low = self.topic.watermarks.low
        self.topic.watermarks.low = min(prev_low + self.delta,
                                        self.topic.watermarks.high)
        assert self.topic.watermarks.high >= 0
        assert self.topic.watermarks.low >= 0
        actual_delta = self.topic.watermarks.low - prev_low

        if actual_delta > 0:
            c.testdrive(f"""
{SCHEMA}

$ kafka-ingest format=avro topic={self.topic.name} key-format=avro key-schema=${{keyschema}} schema=${{schema}} start-iteration={prev_low} publish=true repeat={actual_delta}
{{"f1": ${{kafka-ingest.iteration}}}}
""")
Example #14
0
def populate(c: Composition) -> None:
    # Create some database objects
    c.testdrive(
        """
> SET cluster=cluster1

> DROP TABLE IF EXISTS t1 CASCADE;

> CREATE TABLE t1 (f1 TEXT);

> INSERT INTO t1 VALUES (1), (2);

> CREATE VIEW v1 AS SELECT COUNT(*) AS c1 FROM t1;

> CREATE DEFAULT INDEX i1 IN CLUSTER cluster2 ON v1;
""", )
    def run(self, c: Composition) -> None:
        if self.topic.envelope is Envelope.NONE:
            return

        prev_min = self.topic.watermarks.min
        self.topic.watermarks.min = min(prev_min + self.delta,
                                        self.topic.watermarks.max)
        assert self.topic.watermarks.max >= 0
        assert self.topic.watermarks.min >= 0
        actual_delta = self.topic.watermarks.min - prev_min

        if actual_delta > 0:
            c.testdrive(f"""
{SCHEMA}

$ kafka-ingest format=avro topic={self.topic.name} key-format=avro key-schema=${{keyschema}} schema=${{schema}} start-iteration={prev_min} repeat={actual_delta}
{{"key": ${{kafka-ingest.iteration}}}}
""")
Example #16
0
def validate(c: Composition) -> None:
    # Validate that the cluster continues to operate
    c.testdrive(
        dedent(
            """
            # Dataflows

            > SELECT * FROM v1;
            10

            # Existing sources
            $ kafka-ingest format=bytes topic=source1 repeat=1000000
            B${kafka-ingest.iteration}
            > SELECT * FROM v2;
            2000000

            # Existing tables
            > INSERT INTO t1 VALUES (20);
            > SELECT * FROM v1;
            11

            # New materialized views
            > CREATE MATERIALIZED VIEW v3 AS SELECT COUNT(*) AS c1 FROM t1;
            > SELECT * FROM v3;
            11

            # New tables
            > CREATE TABLE t2 (f1 INTEGER);
            > INSERT INTO t2 SELECT * FROM t1;
            > SELECT COUNT(*) FROM t2;
            11

            # New sources
            > CREATE MATERIALIZED SOURCE source2
              FROM KAFKA BROKER '${testdrive.kafka-addr}' TOPIC 'testdrive-source1-${testdrive.seed}'
              FORMAT BYTES
            > SELECT COUNT(*) FROM source2
            2000000
"""
        ),
    )
Example #17
0
    def run(self, c: Composition) -> None:
        if not self.new_view:
            return

        some_from = random.sample(self.view.froms, 1)[0]
        outer_join = "\n  ".join(
            f"JOIN {f.name} USING (f1)" for f in self.view.froms[1:]
        )

        c.testdrive(
            f"""
> CREATE MATERIALIZED VIEW {self.view.name} AS
  SELECT
    MIN({some_from.name}.f1),
    MAX({some_from.name}.f1),
    COUNT({some_from.name}.f1) AS c1,
    COUNT(DISTINCT {some_from.name}.f1) AS c2
  FROM {self.view.froms[0].name}
  {outer_join};
"""
        )
Example #18
0
 def run(self, c: Composition) -> None:
     self.table.watermarks.low = min(self.table.watermarks.low + self.delta,
                                     self.table.watermarks.high)
     c.testdrive(
         f"> DELETE FROM {self.table.name} WHERE f1 < {self.table.watermarks.low};"
     )
Example #19
0
def validate(c: Composition) -> None:
    # Validate that cluster2 continues to operate
    c.testdrive(
        """
# Dataflows

$ set-regex match=\d{13} replacement=<TIMESTAMP>

> SET cluster=cluster2

> SELECT * FROM v1;
2

# Tables

> INSERT INTO t1 VALUES (3);

> SELECT * FROM t1;
1
2
3

# Introspection tables

> SHOW CLUSTERS LIKE 'cluster2'
cluster2

> SELECT name FROM mz_tables WHERE name = 't1';
t1

# DDL statements

> CREATE MATERIALIZED VIEW v2 AS SELECT COUNT(*) AS c1 FROM t1;

> SELECT * FROM v2;
3

> CREATE MATERIALIZED VIEW v1mat AS SELECT * FROM v1;

> CREATE INDEX i2 IN CLUSTER cluster2 ON t1 (f1);

> SELECT f1 FROM t1;
1
2
3

# Tables

> CREATE TABLE t2 (f1 INTEGER);

> INSERT INTO t2 VALUES (1);

> INSERT INTO t2 SELECT * FROM t2;

> SELECT * FROM t2;
1
1

# Sources

> CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${testdrive.kafka-addr}';

> CREATE CONNECTION IF NOT EXISTS csr_conn
  FOR CONFLUENT SCHEMA REGISTRY
  URL '${testdrive.schema-registry-url}';

$ kafka-create-topic topic=source1 partitions=1
$ kafka-ingest format=bytes topic=source1
A

> CREATE SOURCE source1
  FROM KAFKA CONNECTION kafka_conn TOPIC 'testdrive-source1-${testdrive.seed}'
  FORMAT BYTES

> SELECT * FROM source1
A

# Sinks
> CREATE SINK sink1 FROM v1mat
  INTO KAFKA CONNECTION kafka_conn TOPIC 'sink1'
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn

$ kafka-verify format=avro sink=materialize.public.sink1 sort-messages=true
{"before": null, "after": {"row":{"c1": 3}}, "transaction": {"id": "<TIMESTAMP>"}}
""", )
Example #20
0
def workflow_instance_size(c: Composition,
                           parser: WorkflowArgumentParser) -> None:
    """Create multiple clusters with multiple nodes and replicas each"""
    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])

    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        default=2,
        help="set the default number of workers",
    )

    parser.add_argument(
        "--clusters",
        type=int,
        metavar="N",
        default=16,
        help="set the number of clusters to create",
    )
    parser.add_argument(
        "--nodes",
        type=int,
        metavar="N",
        default=4,
        help="set the number of nodes per cluster",
    )
    parser.add_argument(
        "--replicas",
        type=int,
        metavar="N",
        default=4,
        help="set the number of replicas per cluster",
    )
    args = parser.parse_args()

    c.up("testdrive", persistent=True)
    c.up("materialized")
    c.wait_for_materialized()

    # Construct the requied Computed instances and peer them into clusters
    computeds = []
    for cluster_id in range(0, args.clusters):
        for replica_id in range(0, args.replicas):
            nodes = []
            for node_id in range(0, args.nodes):
                node_name = f"computed_{cluster_id}_{replica_id}_{node_id}"
                nodes.append(node_name)

            for node_id in range(0, args.nodes):
                computeds.append(
                    Computed(name=nodes[node_id],
                             peers=nodes,
                             workers=args.workers))

    with c.override(*computeds):
        with c.override(Testdrive(seed=1, no_reset=True)):

            for n in computeds:
                c.up(n.name)

            # Create some input data
            c.testdrive(
                dedent("""
                    > CREATE TABLE ten (f1 INTEGER);
                    > INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

                    $ set schema={
                        "type" : "record",
                        "name" : "test",
                        "fields" : [
                            {"name":"f1", "type":"string"}
                        ]
                      }

                    $ kafka-create-topic topic=instance-size

                    $ kafka-ingest format=avro topic=instance-size schema=${schema} publish=true repeat=10000
                    {"f1": "fish"}
                    """))

            # Construct the required CREATE CLUSTER statements
            for cluster_id in range(0, args.clusters):
                replica_definitions = []
                for replica_id in range(0, args.replicas):
                    nodes = []
                    for node_id in range(0, args.nodes):
                        node_name = f"computed_{cluster_id}_{replica_id}_{node_id}"
                        nodes.append(node_name)

                    replica_name = f"replica_{cluster_id}_{replica_id}"

                    replica_definitions.append(f"{replica_name} (REMOTE [" +
                                               ", ".join(f"'{n}:2100'"
                                                         for n in nodes) +
                                               "])")

                c.sql(f"CREATE CLUSTER cluster_{cluster_id} REPLICAS (" +
                      ",".join(replica_definitions) + ")")

            # Construct some dataflows in each cluster
            for cluster_id in range(0, args.clusters):
                cluster_name = f"cluster_{cluster_id}"

                c.testdrive(
                    dedent(f"""
                         > SET cluster={cluster_name}

                         > CREATE DEFAULT INDEX ON ten;

                         > CREATE MATERIALIZED VIEW v_{cluster_name} AS
                           SELECT COUNT(*) AS c1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;

                         > CREATE MATERIALIZED SOURCE s_{cluster_name}
                           FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC
                           'testdrive-instance-size-${{testdrive.seed}}'
                           FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
                           ENVELOPE NONE
                     """))

            # Validate that each individual cluster is operating properly
            for cluster_id in range(0, args.clusters):
                cluster_name = f"cluster_{cluster_id}"

                c.testdrive(
                    dedent(f"""
                         > SET cluster={cluster_name}

                         > SELECT c1 FROM v_{cluster_name};
                         10000

                         > SELECT COUNT(*) FROM s_{cluster_name}
                         10000
                     """))
Example #21
0
    def run(self, c: Composition) -> None:
        c.testdrive(f"""
> SELECT MIN(f1), MAX(f1), COUNT(f1), COUNT(DISTINCT f1) FROM {self.table.name};
{self.table.watermarks.low} {self.table.watermarks.high} {(self.table.watermarks.high-self.table.watermarks.low)+1} {(self.table.watermarks.high-self.table.watermarks.low)+1}
""")
Example #22
0
 def run(self, c: Composition) -> None:
     prev_high = self.table.watermarks.high
     self.table.watermarks.high = prev_high + self.delta
     c.testdrive(
         f"> INSERT INTO {self.table.name} SELECT * FROM generate_series({prev_high + 1}, {self.table.watermarks.high});"
     )
Example #23
0
 def run(self, c: Composition) -> None:
     self.table.watermarks.shift(-self.delta)
     c.testdrive(f"> UPDATE {self.table.name} SET f1 = f1 - {self.delta};")
Example #24
0
 def run(self, c: Composition) -> None:
     self.table.watermarks.high = max(
         self.table.watermarks.high - self.delta, self.table.watermarks.low)
     c.testdrive(
         f"> DELETE FROM {self.table.name} WHERE f1 > {self.table.watermarks.high};"
     )
Example #25
0
    def run(self, c: Composition) -> None:
        if self.new_table:
            c.testdrive(f"""
> CREATE TABLE {self.table.name} (f1 INTEGER);
> INSERT INTO {self.table.name} VALUES ({self.table.watermarks.high});
""")
Example #26
0
 def execute(self, c: Composition) -> None:
     c.testdrive(input=self.td_str)