def workflow_start_two_mzs(c: Composition, parser: WorkflowArgumentParser) -> None: """Starts two Mz instances from different git tags for the purpose of manually running RQG comparison tests. """ parser.add_argument("--this-tag", help="Run Materialize with this git tag on port 6875") parser.add_argument("--other-tag", help="Run Materialize with this git tag on port 16875") args = parser.parse_args() with c.override( Materialized( name="mz_this", image=f"materialize/materialized:{args.this_tag}" if args.this_tag else None, volumes= [], # Keep the mzdata, pgdata, etc. private to the container allow_host_ports=True, ports=["6875:6875"], ), Materialized( name="mz_other", image=f"materialize/materialized:{args.other_tag}" if args.other_tag else None, volumes=[], allow_host_ports=True, ports=["16875:6875"], ), ): for mz in ["mz_this", "mz_other"]: c.up(mz) c.wait_for_materialized(service=mz)
def start_services(c: Composition, args: argparse.Namespace, instance: str) -> List[Service]: tag, options, nodes, workers = ((args.this_tag, args.this_options, args.this_nodes, args.this_workers) if instance == "this" else (args.other_tag, args.other_options, args.other_nodes, args.other_workers)) cluster_services: List[Service] = [] if nodes: cluster_services.append( Materialized( image=f"materialize/materialized:{tag}" if tag else None, )) node_names = [f"computed_{n}" for n in range(0, nodes)] for node_id in range(0, nodes): cluster_services.append( Computed( name=node_names[node_id], workers=workers, options=options, peers=node_names, image=f"materialize/computed:{tag}" if tag else None, )) else: cluster_services.append( Materialized( image=f"materialize/materialized:{tag}" if tag else None, workers=workers, options=options, )) with c.override(*cluster_services): print(f"The version of the '{instance.upper()}' Mz instance is:") c.run("materialized", "--version") # Single-binary legacy Mz instances only have port 6875 open # so only check that port before proceeding c.up("materialized") c.wait_for_materialized(port=6875) if nodes: print(f"Starting cluster for '{instance.upper()}' ...") c.up(*[f"computed_{n}" for n in range(0, nodes)]) c.sql("CREATE CLUSTER REPLICA default.feature_benchmark REMOTE [" + ",".join([f"'computed_{n}:2100'" for n in range(0, nodes)]) + "];") c.sql("DROP CLUSTER REPLICA default.default_replica") c.up("testdrive", persistent=True) return cluster_services
def run_one_scenario( c: Composition, scenario: Type[Scenario], args: argparse.Namespace ) -> Comparator: name = scenario.__name__ print(f"--- Now benchmarking {name} ...") comparator = make_comparator(name) common_seed = round(time.time()) mzs = { "this": Materialized( image=f"materialize/materialized:{args.this_tag}" if args.this_tag else None, options=args.this_options, ), "other": Materialized( image=f"materialize/materialized:{args.other_tag}" if args.other_tag else None, options=args.other_options, ), } for mz_id, instance in enumerate(["this", "other"]): with c.override(mzs[instance]): print(f"The version of the '{instance.upper()}' Mz instance is:") c.run("materialized", "--version") c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized() executor = Docker( composition=c, seed=common_seed, ) benchmark = Benchmark( mz_id=mz_id, scenario=scenario, scale=args.scale, executor=executor, filter=make_filter(args), termination_conditions=make_termination_conditions(args), aggregation=make_aggregation(), ) outcome, iterations = benchmark.run() comparator.append(outcome) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.rm_volumes("mzdata") return comparator
def workflow_test_builtin_migration(c: Composition) -> None: """Exercise the builtin object migration code by upgrading between two versions that will have a migration triggered between them. Create a materialized view over the affected builtin object to confirm that the migration was successful """ c.down(destroy_volumes=True) with c.override( # Random commit before pg_roles was updated. Materialized( image= "materialize/materialized:devel-9efd269199b1510b3e8f90196cb4fa3072a548a1", ), Testdrive(default_timeout="15s", no_reset=True, consistent_seed=True), ): c.up("testdrive", persistent=True) c.up("materialized") c.wait_for_materialized() c.testdrive(input=dedent(""" > CREATE VIEW v1 AS SELECT COUNT(*) FROM pg_roles; > SELECT * FROM v1; 2 ! SELECT DISTINCT rolconnlimit FROM pg_roles; contains:column "rolconnlimit" does not exist """)) c.kill("materialized") with c.override( # This will stop working if we introduce a breaking change. Materialized(), Testdrive(default_timeout="15s", no_reset=True, consistent_seed=True), ): c.up("testdrive", persistent=True) c.up("materialized") c.wait_for_materialized() c.testdrive(input=dedent(""" > SELECT * FROM v1; 2 # This column is new after the migration > SELECT DISTINCT rolconnlimit FROM pg_roles; -1 """))
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run the proxy tests.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) args = parser.parse_args() dependencies = ["squid"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] if not args.aws_region: dependencies += ["localstack"] c.start_and_wait_for_tcp(dependencies) aws_arg = (f"--aws-region={args.aws_region}" if args.aws_region else "--aws-endpoint=http://localstack:4566") for test_case in test_cases: print(f"Running test case {test_case.name!r}") with c.override(Materialized(environment_extra=test_case.env)): c.up("materialized") c.wait_for_materialized("materialized") c.run("testdrive-svc", aws_arg, *test_case.files)
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Runs the dbt adapter test suite against Materialize in various configurations.""" parser.add_argument( "filter", nargs="?", default="", help="limit to test cases matching filter" ) args = parser.parse_args() for test_case in test_cases: if args.filter in test_case.name: print(f"> Running test case {test_case.name}") materialized = Materialized( options=test_case.materialized_options, image=test_case.materialized_image, depends_on=["test-certs"], volumes=["secrets:/secrets"], ) with c.override(materialized): c.up("materialized") c.wait_for_tcp(host="materialized", port=6875) c.run( "dbt-test", "pytest", "dbt-materialize/test", env_extra=test_case.dbt_env, )
def workflow_disable_user_indexes(c: Composition) -> None: seed = round(time.time()) c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/before.td") c.kill("materialized") with c.override( Materialized(options=f"{mz_options} --disable-user-indexes", )): c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/after.td") c.kill("materialized") c.rm("materialized", "testdrive-svc", destroy_volumes=True) c.rm_volumes("mzdata")
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run testdrive.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) parser.add_argument( "--workers", type=int, metavar="N", help="set the number of materialized dataflow workers", ) parser.add_argument( "--persistent-user-tables", action="store_true", help="enable the --persistent-user-tables materialized option", ) parser.add_argument( "files", nargs="*", default=["*.td", "esoteric/*.td"], help="run against the specified files", ) args = parser.parse_args() if not args.redpanda and Arch.host() == Arch.AARCH64: ui.warn( "Running the Confluent Platform in Docker on ARM-based machines is " "nearly unusably slow. Consider using Redpanda instead (--redpanda) " "or running tests without mzcompose.") dependencies = ["materialized"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] materialized = Materialized( workers=args.workers, options=["--persistent-user-tables"] if args.persistent_user_tables else [], ) testdrive = Testdrive( forward_buildkite_shard=True, entrypoint_extra=[f"--aws-region={args.aws_region}"] if args.aws_region else ["--aws-endpoint=http://localstack:4566"], ) with c.override(materialized, testdrive): c.start_and_wait_for_tcp(services=dependencies) c.wait_for_materialized("materialized") c.run("testdrive-svc", *args.files) c.kill("materialized")
def test_upgrade_from_version( c: Composition, from_version: str, priors: List[str], filter: str, style: str = "" ) -> None: print(f"===>>> Testing upgrade from Materialize {from_version} to current_source.") version_glob = "{" + ",".join(["any_version", *priors, from_version]) + "}" print(">>> Version glob pattern: " + version_glob) c.rm("materialized", "testdrive-svc", stop=True) c.rm_volumes("mzdata", "tmp") if from_version != "current_source": mz_from = Materialized( image=f"materialize/materialized:{from_version}", options=" ".join( opt for start_version, opt in mz_options.items() if from_version[1:] >= start_version ), environment=[ "SSL_KEY_PASSWORD=mzmzmz", ], volumes_extra=["secrets:/share/secrets"], ) with c.override(mz_from): c.up("materialized") else: c.up("materialized") c.wait_for_materialized("materialized") temp_dir = f"--temp-dir=/share/tmp/upgrade-from-{from_version}" seed = f"--seed={random.getrandbits(32)}" c.run( "testdrive-svc", "--no-reset", f"--var=upgrade-from-version={from_version}", temp_dir, seed, f"create-{style}in-{version_glob}-{filter}.td", ) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.up("materialized") c.wait_for_materialized("materialized") c.run( "testdrive-svc", "--no-reset", f"--var=upgrade-from-version={from_version}", temp_dir, seed, "--validate-catalog=/share/mzdata/catalog", f"check-{style}from-{version_glob}-{filter}.td", )
def workflow_compaction(c: Composition) -> None: with c.override(Materialized(options=f"--metrics-scraping-interval=1s", )): c.up("materialized") c.wait_for_materialized() c.run("testdrive", "compaction/compaction.td") c.kill("materialized") c.rm("materialized", "testdrive", destroy_volumes=True) c.rm_volumes("mzdata", "pgdata")
def test_upgrade_from_version( c: Composition, from_version: str, priors: List[str], filter: str ) -> None: print(f"===>>> Testing upgrade from Materialize {from_version} to current_source.") version_glob = "|".join(["any_version", *priors, from_version]) print(">>> Version glob pattern: " + version_glob) if from_version != "current_source": mz_from = Materialized( image=f"materialize/materialized:{from_version}", options=" ".join( opt for start_version, opt in mz_options.items() if from_version[1:] >= start_version ), ) with c.override(mz_from): c.up("materialized") else: c.up("materialized") c.wait_for_materialized("materialized") temp_dir = f"--temp-dir=/share/tmp/upgrade-from-{from_version}" with patch.dict(os.environ, {"UPGRADE_FROM_VERSION": from_version}): c.run( "testdrive-svc", "--seed=1", "--no-reset", temp_dir, f"create-in-@({version_glob})-{filter}.td", ) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.up("materialized") c.wait_for_materialized("materialized") with patch.dict(os.environ, {"UPGRADE_FROM_VERSION": from_version}): c.run( "testdrive-svc", "--seed=1", "--no-reset", temp_dir, f"--validate-catalog=/share/mzdata/catalog check-from-@({version_glob})-{filter}.td", ) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.rm_volumes("mzdata", "tmp")
def workflow_test_resource_limits(c: Composition) -> None: """Test resource limits in Materialize.""" c.down(destroy_volumes=True) with c.override( Testdrive(), Materialized(), ): c.up("materialized") c.wait_for_materialized() c.run("testdrive", "resources/resource-limits.td")
def workflow_stash(c: Composition) -> None: c.rm( "testdrive", "materialized", stop=True, destroy_volumes=True, ) c.rm_volumes("mzdata", "pgdata", force=True) materialized = Materialized(options=[ "--adapter-stash-url", "postgres://*****:*****@postgres" ], ) postgres = Postgres(image="postgres:14.4") with c.override(materialized, postgres): c.up("postgres") c.wait_for_postgres() c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized("materialized") c.sql("CREATE TABLE a (i INT)") c.stop("postgres") c.up("postgres") c.wait_for_postgres() c.sql("CREATE TABLE b (i INT)") c.rm("postgres", stop=True, destroy_volumes=True) c.up("postgres") c.wait_for_postgres() # Postgres cleared its database, so this should fail. try: c.sql("CREATE TABLE c (i INT)") raise Exception("expected unreachable") except Exception as e: # Depending on timing, either of these errors can occur. The stash error comes # from the stash complaining. The network error comes from pg8000 complaining # because materialize panic'd. if "stash error: postgres: db error" not in str( e) and "network error" not in str(e): raise e
def workflow_test_remote_storaged(c: Composition) -> None: """Test creating sources in a remote storaged process.""" c.down(destroy_volumes=True) with c.override( Testdrive(default_timeout="15s", no_reset=True, consistent_seed=True), # Use a separate PostgreSQL service for persist rather than the one in # the `Materialized` service, so that crashing `environmentd` does not # also take down PostgreSQL. Postgres(), Materialized( options= "--persist-consensus-url=postgres://postgres:postgres@postgres" ), ): dependencies = [ "materialized", "postgres", "storaged", "redpanda", ] c.start_and_wait_for_tcp(services=dependencies, ) c.run("testdrive", "storaged/01-create-sources.td") c.kill("materialized") c.up("materialized") c.run("testdrive", "storaged/02-after-environmentd-restart.td") c.kill("storaged") c.run("testdrive", "storaged/03-while-storaged-down.td") c.up("storaged") c.run("testdrive", "storaged/04-after-storaged-restart.td")
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Runs the dbt adapter test suite against Materialize in various configurations.""" parser.add_argument("filter", nargs="?", default="", help="limit to test cases matching filter") args = parser.parse_args() for test_case in test_cases: if args.filter in test_case.name: print(f"> Running test case {test_case.name}") materialized = Materialized( options=test_case.materialized_options, image=test_case.materialized_image, depends_on=["test-certs"], volumes_extra=["secrets:/secrets"], ) with c.test_case(test_case.name): with c.override(materialized): c.down() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_tcp(host="materialized", port=6875) c.run( "dbt-test", "pytest", "dbt-materialize/test", env_extra={ "DBT_HOST": "materialized", "KAFKA_ADDR": "kafka:9092", "SCHEMA_REGISTRY_URL": "http://schema-registry:8081", **test_case.dbt_env, }, )
import os import time from materialize.mzcompose import Composition from materialize.mzcompose.services import ( Kafka, Materialized, SchemaRegistry, Testdrive, Zookeeper, ) mz_options = "--persistent-user-tables --persistent-kafka-sources --disable-persistent-system-tables-test" mz_default = Materialized(options=mz_options) mz_logical_compaction_window_off = Materialized( # We need to use 1s and not 100ms here as otherwise validate_timestamp_bindings() # dominates the CPU see #10740 timestamp_frequency="1s", options=f"{mz_options} --logical-compaction-window=off", ) # TODO: add back mz_logical_compaction_window_off in the line below. # See: https://github.com/MaterializeInc/materialize/issues/10488 mz_configurations = [mz_default] prerequisites = ["zookeeper", "kafka", "schema-registry"] SERVICES = [
# the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. from materialize.mzcompose import Composition from materialize.mzcompose.services import ( Kafka, Materialized, SchemaRegistry, Testdrive, Zookeeper, ) versioned_mz = [ Materialized( name=f"materialized_{version}", image=f"materialize/materialized:{version}", hostname="materialized", ) for version in ["v0.7.0", "v0.8.0"] ] multiple_mz = [ Materialized(name=f"materialized{i}", data_directory=f"/share/materialized{i}", port=6875 + i) for i in [1, 2] ] mz_with_options = [ Materialized(name="mz_2_workers", hostname="materialized", options="--workers 2"), Materialized(name="mz_4_workers",
"SCHEMA_REGISTRY_AUTHENTICATION_METHOD=BASIC", "SCHEMA_REGISTRY_AUTHENTICATION_ROLES=user", "SCHEMA_REGISTRY_AUTHENTICATION_REALM=SchemaRegistry", "KAFKA_OPTS=-Djava.security.auth.login.config=/etc/schema-registry/sasl.jaas.config", "SCHEMA_REGISTRY_OPTS=-Djava.security.auth.login.config=/etc/schema-registry/sasl.jaas.config", ], volumes=[ "secrets:/etc/schema-registry/secrets", "./sasl.jaas.config:/etc/schema-registry/sasl.jaas.config", "./users.properties:/etc/schema-registry/users.properties", ], bootstrap_server_type="SASL_SSL", ), Materialized( environment_extra=[ "SASL_PASSWORD=sekurity", ], volumes_extra=["secrets:/share/secrets"], ), Testdrive( entrypoint=[ "bash", "-c", "cp /share/secrets/ca.crt /usr/local/share/ca-certificates/ca.crt && " "update-ca-certificates && " "testdrive " "--kafka-addr=kafka:9092 " "--kafka-option=security.protocol=SASL_SSL " "--kafka-option=sasl.mechanism=PLAIN " "--kafka-option=sasl.username=materialize " "--kafka-option=sasl.password=sekurity " "--schema-registry-url=https://materialize:sekurity@schema-registry:8081 "
def make_comparator(name: str) -> Comparator: return RelativeThresholdComparator(name, threshold=0.10) default_timeout = "5m" SERVICES = [ Zookeeper(), Kafka(), SchemaRegistry(), # We are going to override this service definition during the actual benchmark # we put "latest" here so that we avoid recompiling the current source unless # we will actually be benchmarking it. Materialized(image="latest"), Testdrive( validate_catalog=False, default_timeout=default_timeout, ), ] def run_one_scenario( c: Composition, scenario: Scenario, args: argparse.Namespace ) -> Comparator: name = scenario.__name__ print(f"Now benchmarking {name} ...") comparator = make_comparator(name) common_seed = round(time.time())
"name": "no_proxy", "env": [ "ALL_PROXY=http://localhost:1234", "NO_PROXY=schema-registry,amazonaws.com,localstack", ], "td": "testdrive/avro-registry.td testdrive/esoteric/s3.td", }, ] # Construct a dedicated Mz instance for each set of env variables under test for t in tests: t["mz"] = Materialized( name=f"materialized_{t['name']}", hostname="materialized", environment_extra=t["env"], ) mzs = [t["mz"] for t in tests] SERVICES = [ Zookeeper(), Kafka(), SchemaRegistry(), Squid(), Localstack(), *mzs, Testdrive(volumes_extra=["../testdrive:/workdir/testdrive"]), ]
# Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. from materialize.mzcompose import Composition, WorkflowArgumentParser from materialize.mzcompose.services import Materialized, Postgres, TestCerts, Testdrive SERVICES = [ Materialized(volumes_extra=["secrets:/share/secrets"]), Testdrive(volumes_extra=["secrets:/share/secrets"]), TestCerts(), Postgres(), ] def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "filter", nargs="*", default=["*.td"], help="limit to only the files matching filter", ) args = parser.parse_args() c.up("materialized", "test-certs", "testdrive-svc", "postgres") c.wait_for_materialized()
@classmethod def body(cls) -> None: print( f"> CREATE MATERIALIZED VIEW v1 AS SELECT generate_series AS f1, generate_series AS f2 FROM (SELECT * FROM generate_series(1, {cls.COUNT}));" ) print( f"> SELECT COUNT(*) FROM v1 AS a1 LEFT JOIN v1 AS a2 USING (f1);") print(f"{cls.COUNT}") SERVICES = [ Zookeeper(), Kafka(), SchemaRegistry(), Materialized( memory="8G", options="--persistent-user-tables --persistent-kafka-sources"), Testdrive(), ] def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", "materialized"]) with tempfile.NamedTemporaryFile(mode="w", dir=c.path) as tmp: with contextlib.redirect_stdout(tmp): [cls.generate() for cls in Generator.__subclasses__()] sys.stdout.flush() c.run("testdrive-svc", os.path.basename(tmp.name))
@classmethod def body(cls) -> None: print( f"> CREATE MATERIALIZED VIEW v1 AS SELECT generate_series AS f1, generate_series AS f2 FROM (SELECT * FROM generate_series(1, {cls.COUNT}));" ) print( f"> SELECT COUNT(*) FROM v1 AS a1 LEFT JOIN v1 AS a2 USING (f1);") print(f"{cls.COUNT}") SERVICES = [ Zookeeper(), Kafka(), SchemaRegistry(), Materialized(memory="8G"), Testdrive(default_timeout="60s"), ] def run_test(c: Composition, args: argparse.Namespace) -> None: c.up("testdrive", persistent=True) scenarios = ([globals()[args.scenario]] if args.scenario else Generator.__subclasses__()) for scenario in scenarios: with tempfile.NamedTemporaryFile(mode="w", dir=c.path) as tmp: with contextlib.redirect_stdout(tmp): scenario.generate() sys.stdout.flush()
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--num-seconds", type=int, default=100, help="number of seconds to write records to Kafka", ) parser.add_argument( "--records-per-second", type=int, default=10000, help="throughput of writes to maintain during testing", ) parser.add_argument("--num-keys", type=int, default=1000000000, help="number of distinct keys") parser.add_argument("--value-bytes", type=int, default=500, help="record payload size in bytes") parser.add_argument( "--upsert", action="store_true", help="whether to use envelope UPSERT (True) or NONE (False)", ) parser.add_argument("--timeout-secs", type=int, default=120, help="timeout to send records to Kafka") parser.add_argument( "--enable-persistence", action="store_true", help="whether or not to enable persistence on materialized", ) parser.add_argument( "--s3-storage", type=str, default=None, help= "enables s3 persist storage, pointed at the given subpath of our internal testing bucket", ) parser.add_argument( "--workers", type=int, default=None, help="number of dataflow workers to use in materialized", ) args = parser.parse_args() envelope = "NONE" if args.upsert: envelope = "UPSERT" options = [] if args.enable_persistence: options = [ "--persistent-user-tables", "--persistent-kafka-sources", "--disable-persistent-system-tables-test", ] if args.s3_storage == "": print("--s3-storage value must be non-empty", file=sys.stderr) sys.exit(1) elif args.s3_storage: if args.enable_persistence is not True: print( "cannot specifiy --s3-storage without --enable-persistence", file=sys.stderr, ) sys.exit(1) options.extend([ "--persist-storage-enabled", f"--persist-storage=s3://mtlz-test-persist-1d-lifecycle-delete/{args.s3_storage}", ]) override = [ Materialized( workers=args.workers, timestamp_frequency="1s", options=options, ) ] with c.override(*override): c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized("materialized") c.run( "testdrive-svc", f"--var=envelope={envelope}", "setup.td", ) start = time.monotonic() records_sent = 0 total_records_to_send = args.records_per_second * args.num_seconds # Maximum observed delta between records sent by the benchmark and ingested by # Materialize. max_lag = 0 last_reported_time = 0.0 while True: elapsed = time.monotonic() - start records_ingested = query_materialize(c) lag = records_sent - records_ingested if lag > max_lag: max_lag = lag # Report our findings back once per second. if elapsed - last_reported_time > 1: print( f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records" ) last_reported_time = elapsed # Determine how many records we are scheduled to send, based on how long # the benchmark has been running and the desired QPS. records_scheduled = int( min(elapsed, args.num_seconds) * args.records_per_second) records_to_send = records_scheduled - records_sent if records_to_send > 0: send_records( c, num_records=records_to_send, num_keys=args.num_keys, value_bytes=args.value_bytes, timeout_secs=args.timeout_secs, ) records_sent = records_scheduled # Exit once we've sent all the records we need to send, and confirmed that # Materialize has ingested them. if records_sent == total_records_to_send == records_ingested: print( f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records." ) break
Testdrive, Zookeeper, ) # All released Materialize versions, in order from most to least recent. all_versions = util.known_materialize_versions() # The `materialized` options that are valid only at or above a certain version. mz_options = {Version.parse("0.9.2"): "--persistent-user-tables"} SERVICES = [ Zookeeper(), Kafka(), SchemaRegistry(), Postgres(), Materialized(options=" ".join(mz_options.values())), # N.B.: we need to use `validate_catalog=False` because testdrive uses HEAD # to load the catalog from disk but does *not* run migrations. There is no # guarantee that HEAD can load an old catalog without running migrations. # # When testdrive is targeting a HEAD materialized, we re-enable catalog # validation below by manually passing the `--validate-catalog` flag. # # Disabling catalog validation is preferable to using a versioned testdrive # because that would involve maintaining backwards compatibility for all # testdrive commands. Testdrive(validate_catalog=False), ] def workflow_upgrade(c: Composition, parser: WorkflowArgumentParser) -> None:
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--num-seconds", type=int, default=100, help="number of seconds to write records to Kafka", ) parser.add_argument( "--records-per-second", type=int, default=10000, help="throughput of writes to maintain during testing", ) parser.add_argument("--num-keys", type=int, default=1000000000, help="number of distinct keys") parser.add_argument("--value-bytes", type=int, default=500, help="record payload size in bytes") parser.add_argument("--timeout-secs", type=int, default=120, help="timeout to send records to Kafka") parser.add_argument( "--blob-url", type=str, default=None, help="location where we store persistent data", ) parser.add_argument( "--consensus-url", type=str, default=None, help="location where we store persistent data", ) args = parser.parse_args() options = [ "--persist-consensus-url", f"{args.consensus_url}", "--persist-blob-url", f"{args.blob_url}", ] override = [Materialized(options=options)] with c.override(*override): c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized("materialized") c.run( "testdrive", "setup.td", ) start = time.monotonic() records_sent = 0 total_records_to_send = args.records_per_second * args.num_seconds # Maximum observed delta between records sent by the benchmark and ingested by # Materialize. max_lag = 0 last_reported_time = 0.0 while True: elapsed = time.monotonic() - start records_ingested = query_materialize(c) lag = records_sent - records_ingested if lag > max_lag: max_lag = lag # Report our findings back once per second. if elapsed - last_reported_time > 1: print( f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records" ) last_reported_time = elapsed # Determine how many records we are scheduled to send, based on how long # the benchmark has been running and the desired QPS. records_scheduled = int( min(elapsed, args.num_seconds) * args.records_per_second) records_to_send = records_scheduled - records_sent if records_to_send > 0: send_records( c, num_records=records_to_send, num_keys=args.num_keys, value_bytes=args.value_bytes, timeout_secs=args.timeout_secs, ) records_sent = records_scheduled # Exit once we've sent all the records we need to send, and confirmed that # Materialize has ingested them. if records_sent == total_records_to_send == records_ingested: print( f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records." ) break
# Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. from materialize.mzcompose import Composition from materialize.mzcompose.services import Materialized, Postgres, Testdrive, Toxiproxy SERVICES = [ Materialized(), Postgres(), Toxiproxy(), Testdrive(no_reset=True, default_timeout="60s"), ] def workflow_pg_cdc_resumption(c: Composition) -> None: """Test Postgres direct replication's failure handling by disrupting replication at various stages using Toxiproxy or service restarts """ initialize(c) for scenario in [ disconnect_pg_during_snapshot, disconnect_pg_during_replication, restart_pg_during_snapshot,
) from materialize.mzcompose.services import Testdrive as TestdriveService SERVICES = [ Postgres(name="postgres-backend"), Postgres(name="postgres-source"), Redpanda(auto_create_topics=True), Debezium(), Computed( name="computed_1" ), # Started by some Scenarios, defined here only for the teardown Materialized( options=" ".join( [ "--persist-consensus-url=postgresql://postgres:postgres@postgres-backend:5432?options=--search_path=consensus", "--storage-stash-url=postgresql://postgres:postgres@postgres-backend:5432?options=--search_path=storage", "--adapter-stash-url=postgresql://postgres:postgres@postgres-backend:5432?options=--search_path=adapter", ] ) ), TestdriveService(default_timeout="300s", no_reset=True, seed=1), ] class ExecutionMode(Enum): ALLTOGETHER = "alltogether" ONEATATIME = "oneatatime" def __str__(self) -> str: return self.value
# == Services == # In buildkite this script is executed in a read-only directory so we can't use # cwd. Directories in `/tmp` end up empty inside of the running docker # container, perhaps because of docker-in-docker and external permissions. # # This works in both local dev and buildkite. LOCAL_DIR = (Path(__file__).parent / f"mzcompose-aws-config-{SEED}").resolve() AWS_VOLUME = [f"{LOCAL_DIR}:/root/.aws"] SERVICES = [ Materialized( forward_aws_credentials=False, environment_extra=[f"AWS_EC2_METADATA_SERVICE_ENDPOINT={DISCARD}"], volumes_extra=AWS_VOLUME, ), Testdrive( materialize_url=f"postgres://materialize@materialized:6875", seed=SEED, ), ] # Service overrides for specifying external id MZ_EID = Materialized( forward_aws_credentials=False, options=f"--aws-external-id={EXTERNAL_ID}", environment_extra=[f"AWS_EC2_METADATA_SERVICE_ENDPOINT={DISCARD}"], volumes_extra=AWS_VOLUME,
"SCHEMA_REGISTRY_SSL_CLIENT_AUTH=true", "SCHEMA_REGISTRY_AUTHENTICATION_METHOD=BASIC", "SCHEMA_REGISTRY_AUTHENTICATION_ROLES=user", "SCHEMA_REGISTRY_AUTHENTICATION_REALM=SchemaRegistry", "SCHEMA_REGISTRY_OPTS=-Djava.security.auth.login.config=/etc/schema-registry/jaas_config.conf", ], volumes=[ "secrets:/etc/schema-registry/secrets", "./jaas_config.conf:/etc/schema-registry/jaas_config.conf", "./users.properties:/etc/schema-registry/users.properties", ], bootstrap_server_type="SSL", ), Materialized( environment=[ "SSL_KEY_PASSWORD=mzmzmz", ], volumes_extra=["secrets:/share/secrets"], ), Testdrive( entrypoint=[ "bash", "-c", "cp /share/secrets/ca.crt /usr/local/share/ca-certificates/ca.crt && " "update-ca-certificates && " "testdrive " "--kafka-addr=kafka:9092 " "--schema-registry-url=https://schema-registry:8081 " "--materialized-url=postgres://materialize@materialized:6875 " "--cert=/share/secrets/producer.p12 " "--cert-password=mzmzmz " "--ccsr-password=sekurity "