def workflow_pg_snapshot_resumption(c: Composition) -> None: """Test creating sources in a remote storaged process.""" c.down(destroy_volumes=True) with c.override( # Start postgres for the pg source Postgres(), Testdrive(no_reset=True), Storaged(environment=["FAILPOINTS=pg_snapshot_failure=return"]), ): dependencies = [ "materialized", "postgres", "storaged", ] c.start_and_wait_for_tcp(services=dependencies, ) c.run("testdrive", "pg-snapshot-resumption/01-configure-postgres.td") c.run("testdrive", "pg-snapshot-resumption/02-create-sources.td") # storaged should crash c.run("testdrive", "pg-snapshot-resumption/03-while-storaged-down.td") print("Sleeping to ensure that storaged crashes") time.sleep(10) with c.override( # turn off the failpoint Storaged()): c.start_and_wait_for_tcp(services=["storaged"], ) c.run("testdrive", "pg-snapshot-resumption/04-verify-data.td")
def run_test(c: Composition, disruption: Disruption, id: int) -> None: print(f"+++ Running disruption scenario {disruption.name}") c.up("testdrive", persistent=True) nodes = [ Computed( name="computed_1_1", peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_1_2", peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_2_1", peers=["computed_2_1", "computed_2_2"], ), Computed( name="computed_2_2", peers=["computed_2_1", "computed_2_2"], ), ] with c.override(*nodes): c.up("materialized", *[n.name for n in nodes]) c.wait_for_materialized() c.sql( """ CREATE CLUSTER cluster1 REPLICAS ( replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']), replica2 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100']) ) """ ) with c.override( Testdrive( validate_data_dir=False, no_reset=True, materialize_params={"cluster": "cluster1"}, seed=id, ) ): populate(c) # Disrupt replica1 by some means disruption.disruption(c) validate(c) cleanup_list = ["materialized", "testdrive", *[n.name for n in nodes]] c.kill(*cleanup_list) c.rm(*cleanup_list, destroy_volumes=True) c.rm_volumes("mzdata", "pgdata")
def workflow_test_builtin_migration(c: Composition) -> None: """Exercise the builtin object migration code by upgrading between two versions that will have a migration triggered between them. Create a materialized view over the affected builtin object to confirm that the migration was successful """ c.down(destroy_volumes=True) with c.override( # Random commit before pg_roles was updated. Materialized( image= "materialize/materialized:devel-9efd269199b1510b3e8f90196cb4fa3072a548a1", ), Testdrive(default_timeout="15s", no_reset=True, consistent_seed=True), ): c.up("testdrive", persistent=True) c.up("materialized") c.wait_for_materialized() c.testdrive(input=dedent(""" > CREATE VIEW v1 AS SELECT COUNT(*) FROM pg_roles; > SELECT * FROM v1; 2 ! SELECT DISTINCT rolconnlimit FROM pg_roles; contains:column "rolconnlimit" does not exist """)) c.kill("materialized") with c.override( # This will stop working if we introduce a breaking change. Materialized(), Testdrive(default_timeout="15s", no_reset=True, consistent_seed=True), ): c.up("testdrive", persistent=True) c.up("materialized") c.wait_for_materialized() c.testdrive(input=dedent(""" > SELECT * FROM v1; 2 # This column is new after the migration > SELECT DISTINCT rolconnlimit FROM pg_roles; -1 """))
def workflow_start_two_mzs(c: Composition, parser: WorkflowArgumentParser) -> None: """Starts two Mz instances from different git tags for the purpose of manually running RQG comparison tests. """ parser.add_argument("--this-tag", help="Run Materialize with this git tag on port 6875") parser.add_argument("--other-tag", help="Run Materialize with this git tag on port 16875") args = parser.parse_args() with c.override( Materialized( name="mz_this", image=f"materialize/materialized:{args.this_tag}" if args.this_tag else None, volumes= [], # Keep the mzdata, pgdata, etc. private to the container allow_host_ports=True, ports=["6875:6875"], ), Materialized( name="mz_other", image=f"materialize/materialized:{args.other_tag}" if args.other_tag else None, volumes=[], allow_host_ports=True, ports=["16875:6875"], ), ): for mz in ["mz_this", "mz_other"]: c.up(mz) c.wait_for_materialized(service=mz)
def workflow_disable_user_indexes(c: Composition) -> None: seed = round(time.time()) c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/before.td") c.kill("materialized") with c.override( Materialized(options=f"{mz_options} --disable-user-indexes", )): c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/after.td") c.kill("materialized") c.rm("materialized", "testdrive-svc", destroy_volumes=True) c.rm_volumes("mzdata")
def workflow_test_upsert(c: Composition) -> None: """Test creating upsert sources and continuing to ingest them after a restart.""" with c.override( Testdrive(default_timeout="30s", no_reset=True, consistent_seed=True), ): c.down(destroy_volumes=True) dependencies = [ "materialized", "zookeeper", "kafka", "schema-registry", ] c.start_and_wait_for_tcp(services=dependencies, ) c.run("testdrive", "upsert/01-create-sources.td") # Sleep to make sure the errors have made it to persist. # This isn't necessary for correctness, # as we should be able to crash at any point and re-start. # But if we don't sleep here, then we might be ingesting the errored # records in the new process, and so we won't actually be testing # the ability to retract error values that make it to persist. print("Sleeping for ten seconds") time.sleep(10) c.exec("materialized", "bash", "-c", "kill -9 `pidof storaged`") c.run("testdrive", "upsert/02-after-storaged-restart.td")
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp(services=["localstack"]) for version in CONFLUENT_PLATFORM_VERSIONS: print(f"==> Testing Confluent Platform {version}") confluent_platform_services = [ Zookeeper(tag=version), Kafka(tag=version), SchemaRegistry(tag=version), ] with c.override(*confluent_platform_services): c.start_and_wait_for_tcp(services=[ "zookeeper", "kafka", "schema-registry", "materialized" ]) c.wait_for_materialized() c.run("testdrive", "kafka-matrix.td", "testdrive/kafka-*.td") c.kill( "zookeeper", "kafka", "schema-registry", "materialized", ) c.rm( "zookeeper", "kafka", "schema-registry", "materialized", "testdrive", destroy_volumes=True, ) c.rm_volumes("mzdata", "pgdata", force=True)
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Runs the dbt adapter test suite against Materialize in various configurations.""" parser.add_argument( "filter", nargs="?", default="", help="limit to test cases matching filter" ) args = parser.parse_args() for test_case in test_cases: if args.filter in test_case.name: print(f"> Running test case {test_case.name}") materialized = Materialized( options=test_case.materialized_options, image=test_case.materialized_image, depends_on=["test-certs"], volumes=["secrets:/secrets"], ) with c.override(materialized): c.up("materialized") c.wait_for_tcp(host="materialized", port=6875) c.run( "dbt-test", "pytest", "dbt-materialize/test", env_extra=test_case.dbt_env, )
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run the proxy tests.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) args = parser.parse_args() dependencies = ["squid"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] if not args.aws_region: dependencies += ["localstack"] c.start_and_wait_for_tcp(dependencies) aws_arg = (f"--aws-region={args.aws_region}" if args.aws_region else "--aws-endpoint=http://localstack:4566") for test_case in test_cases: print(f"Running test case {test_case.name!r}") with c.override(Materialized(environment_extra=test_case.env)): c.up("materialized") c.wait_for_materialized("materialized") c.run("testdrive-svc", aws_arg, *test_case.files)
def run_one_scenario(c: Composition, scenario: Type[Scenario], args: argparse.Namespace) -> Comparator: name = scenario.__name__ print(f"--- Now benchmarking {name} ...") comparator = make_comparator(name) common_seed = round(time.time()) for mz_id, instance in enumerate(["this", "other"]): cluster_services = start_services(c, args, instance) with c.override(*cluster_services): executor = Docker( composition=c, seed=common_seed, ) benchmark = Benchmark( mz_id=mz_id, scenario=scenario, scale=args.scale, executor=executor, filter=make_filter(args), termination_conditions=make_termination_conditions(args), aggregation=make_aggregation(), ) outcome, iterations = benchmark.run() comparator.append(outcome) stop_services(c, cluster_services) return comparator
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run testdrive.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) parser.add_argument( "--workers", type=int, metavar="N", help="set the number of materialized dataflow workers", ) parser.add_argument( "--persistent-user-tables", action="store_true", help="enable the --persistent-user-tables materialized option", ) parser.add_argument( "files", nargs="*", default=["*.td", "esoteric/*.td"], help="run against the specified files", ) args = parser.parse_args() if not args.redpanda and Arch.host() == Arch.AARCH64: ui.warn( "Running the Confluent Platform in Docker on ARM-based machines is " "nearly unusably slow. Consider using Redpanda instead (--redpanda) " "or running tests without mzcompose.") dependencies = ["materialized"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] materialized = Materialized( workers=args.workers, options=["--persistent-user-tables"] if args.persistent_user_tables else [], ) testdrive = Testdrive( forward_buildkite_shard=True, entrypoint_extra=[f"--aws-region={args.aws_region}"] if args.aws_region else ["--aws-endpoint=http://localstack:4566"], ) with c.override(materialized, testdrive): c.start_and_wait_for_tcp(services=dependencies) c.wait_for_materialized("materialized") c.run("testdrive-svc", *args.files) c.kill("materialized")
def workflow_persistence(c: Composition) -> None: for mz in [mz_default, mz_logical_compaction_window_off]: with c.override(mz): workflow_kafka_sources(c) workflow_user_tables(c) workflow_failpoints(c) workflow_disable_user_indexes(c)
def workflow_default(c: Composition) -> None: for mz in mz_configurations: with c.override(mz): workflow_kafka_sources(c) workflow_user_tables(c) workflow_disable_user_indexes(c) workflow_compaction(c)
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run testdrive.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) parser.add_argument( "--kafka-default-partitions", type=int, metavar="N", help="set the default number of kafka partitions per topic", ) parser.add_argument( "files", nargs="*", default=["*.td"], help="run against the specified files", ) args = parser.parse_args() if not args.redpanda and Arch.host() == Arch.AARCH64: ui.warn( "Running the Confluent Platform in Docker on ARM-based machines is " "nearly unusably slow. Consider using Redpanda instead (--redpanda) " "or running tests without mzcompose." ) dependencies = ["materialized"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] if args.aws_region is None: dependencies += ["localstack"] testdrive = Testdrive( forward_buildkite_shard=True, kafka_default_partitions=args.kafka_default_partitions, aws_region=args.aws_region, validate_postgres_stash=True, ) with c.override(testdrive): c.start_and_wait_for_tcp(services=dependencies) c.wait_for_materialized("materialized") try: junit_report = ci_util.junit_report_filename(c.name) c.run("testdrive", f"--junit-report={junit_report}", *args.files) finally: ci_util.upload_junit_report( "testdrive", Path(__file__).parent / junit_report )
def test_upgrade_from_version( c: Composition, from_version: str, priors: List[str], filter: str, style: str = "" ) -> None: print(f"===>>> Testing upgrade from Materialize {from_version} to current_source.") version_glob = "{" + ",".join(["any_version", *priors, from_version]) + "}" print(">>> Version glob pattern: " + version_glob) c.rm("materialized", "testdrive-svc", stop=True) c.rm_volumes("mzdata", "tmp") if from_version != "current_source": mz_from = Materialized( image=f"materialize/materialized:{from_version}", options=" ".join( opt for start_version, opt in mz_options.items() if from_version[1:] >= start_version ), environment=[ "SSL_KEY_PASSWORD=mzmzmz", ], volumes_extra=["secrets:/share/secrets"], ) with c.override(mz_from): c.up("materialized") else: c.up("materialized") c.wait_for_materialized("materialized") temp_dir = f"--temp-dir=/share/tmp/upgrade-from-{from_version}" seed = f"--seed={random.getrandbits(32)}" c.run( "testdrive-svc", "--no-reset", f"--var=upgrade-from-version={from_version}", temp_dir, seed, f"create-{style}in-{version_glob}-{filter}.td", ) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.up("materialized") c.wait_for_materialized("materialized") c.run( "testdrive-svc", "--no-reset", f"--var=upgrade-from-version={from_version}", temp_dir, seed, "--validate-catalog=/share/mzdata/catalog", f"check-{style}from-{version_glob}-{filter}.td", )
def workflow_cluster(c: Composition, parser: WorkflowArgumentParser) -> None: """Run all the limits tests against a multi-node, multi-replica cluster""" parser.add_argument("--scenario", metavar="SCENARIO", type=str, help="Scenario to run.") parser.add_argument( "--workers", type=int, metavar="N", default=2, help="set the default number of workers", ) args = parser.parse_args() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_materialized() nodes = [ Computed( name="computed_1_1", workers=args.workers, peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_1_2", workers=args.workers, peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_2_1", workers=args.workers, peers=["computed_2_1", "computed_2_2"], ), Computed( name="computed_2_2", workers=args.workers, peers=["computed_2_1", "computed_2_2"], ), ] with c.override(*nodes): c.up(*[n.name for n in nodes]) c.sql(""" CREATE CLUSTER cluster1 REPLICAS ( replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']), replica2 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100']) ) """) run_test(c, args)
def start_services(c: Composition, args: argparse.Namespace, instance: str) -> List[Service]: tag, options, nodes, workers = ((args.this_tag, args.this_options, args.this_nodes, args.this_workers) if instance == "this" else (args.other_tag, args.other_options, args.other_nodes, args.other_workers)) cluster_services: List[Service] = [] if nodes: cluster_services.append( Materialized( image=f"materialize/materialized:{tag}" if tag else None, )) node_names = [f"computed_{n}" for n in range(0, nodes)] for node_id in range(0, nodes): cluster_services.append( Computed( name=node_names[node_id], workers=workers, options=options, peers=node_names, image=f"materialize/computed:{tag}" if tag else None, )) else: cluster_services.append( Materialized( image=f"materialize/materialized:{tag}" if tag else None, workers=workers, options=options, )) with c.override(*cluster_services): print(f"The version of the '{instance.upper()}' Mz instance is:") c.run("materialized", "--version") # Single-binary legacy Mz instances only have port 6875 open # so only check that port before proceeding c.up("materialized") c.wait_for_materialized(port=6875) if nodes: print(f"Starting cluster for '{instance.upper()}' ...") c.up(*[f"computed_{n}" for n in range(0, nodes)]) c.sql("CREATE CLUSTER REPLICA default.feature_benchmark REMOTE [" + ",".join([f"'computed_{n}:2100'" for n in range(0, nodes)]) + "];") c.sql("DROP CLUSTER REPLICA default.default_replica") c.up("testdrive", persistent=True) return cluster_services
def run_one_scenario( c: Composition, scenario: Type[Scenario], args: argparse.Namespace ) -> Comparator: name = scenario.__name__ print(f"--- Now benchmarking {name} ...") comparator = make_comparator(name) common_seed = round(time.time()) mzs = { "this": Materialized( image=f"materialize/materialized:{args.this_tag}" if args.this_tag else None, options=args.this_options, ), "other": Materialized( image=f"materialize/materialized:{args.other_tag}" if args.other_tag else None, options=args.other_options, ), } for mz_id, instance in enumerate(["this", "other"]): with c.override(mzs[instance]): print(f"The version of the '{instance.upper()}' Mz instance is:") c.run("materialized", "--version") c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized() executor = Docker( composition=c, seed=common_seed, ) benchmark = Benchmark( mz_id=mz_id, scenario=scenario, scale=args.scale, executor=executor, filter=make_filter(args), termination_conditions=make_termination_conditions(args), aggregation=make_aggregation(), ) outcome, iterations = benchmark.run() comparator.append(outcome) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.rm_volumes("mzdata") return comparator
def workflow_compaction(c: Composition) -> None: with c.override(mz_fast_metrics): c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", "compaction/compaction.td") c.kill("materialized") c.rm("materialized", "testdrive-svc", destroy_volumes=True) c.rm_volumes("mzdata")
def workflow_compaction(c: Composition) -> None: with c.override(Materialized(options=f"--metrics-scraping-interval=1s", )): c.up("materialized") c.wait_for_materialized() c.run("testdrive", "compaction/compaction.td") c.kill("materialized") c.rm("materialized", "testdrive", destroy_volumes=True) c.rm_volumes("mzdata", "pgdata")
def test_upgrade_from_version( c: Composition, from_version: str, priors: List[str], filter: str ) -> None: print(f"===>>> Testing upgrade from Materialize {from_version} to current_source.") version_glob = "|".join(["any_version", *priors, from_version]) print(">>> Version glob pattern: " + version_glob) if from_version != "current_source": mz_from = Materialized( image=f"materialize/materialized:{from_version}", options=" ".join( opt for start_version, opt in mz_options.items() if from_version[1:] >= start_version ), ) with c.override(mz_from): c.up("materialized") else: c.up("materialized") c.wait_for_materialized("materialized") temp_dir = f"--temp-dir=/share/tmp/upgrade-from-{from_version}" with patch.dict(os.environ, {"UPGRADE_FROM_VERSION": from_version}): c.run( "testdrive-svc", "--seed=1", "--no-reset", temp_dir, f"create-in-@({version_glob})-{filter}.td", ) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.up("materialized") c.wait_for_materialized("materialized") with patch.dict(os.environ, {"UPGRADE_FROM_VERSION": from_version}): c.run( "testdrive-svc", "--seed=1", "--no-reset", temp_dir, f"--validate-catalog=/share/mzdata/catalog check-from-@({version_glob})-{filter}.td", ) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.rm_volumes("mzdata", "tmp")
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Test upgrades from various versions.""" parser.add_argument( "--min-version", metavar="VERSION", type=Version.parse, default=Version.parse("0.8.0"), help="the minimum version to test from", ) parser.add_argument( "--most-recent", metavar="N", type=int, help="limit testing to the N most recent versions", ) parser.add_argument( "--tests", choices=["all", "non-ssl", "ssl"], default="all", help="limit testing to certain scenarios", ) parser.add_argument( "filter", nargs="?", default="*", help="limit to only the files matching filter" ) args = parser.parse_args() tested_versions = [v for v in all_versions if v >= args.min_version] if args.most_recent is not None: tested_versions = tested_versions[: args.most_recent] tested_versions.reverse() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", "postgres"] ) if args.tests in ["all", "non-ssl"]: for version in tested_versions: priors = [f"v{v}" for v in all_versions if v < version] test_upgrade_from_version(c, f"v{version}", priors, filter=args.filter) test_upgrade_from_version(c, "current_source", priors=["*"], filter=args.filter) if args.tests in ["all", "ssl"]: kafka, schema_registry, testdrive = ssl_services() with c.override(kafka, schema_registry, testdrive): c.start_and_wait_for_tcp(services=["kafka", "schema-registry"]) for version in tested_versions: priors = [f"v{v}" for v in all_versions if v < version] test_upgrade_from_version( c, f"v{version}", priors, filter=args.filter, style="ssl-" )
def workflow_test_resource_limits(c: Composition) -> None: """Test resource limits in Materialize.""" c.down(destroy_volumes=True) with c.override( Testdrive(), Materialized(), ): c.up("materialized") c.wait_for_materialized() c.run("testdrive", "resources/resource-limits.td")
def workflow_failpoints(c: Composition) -> None: c.start_and_wait_for_tcp(services=prerequisites) for mz in mz_configurations: with c.override(mz): for failpoint in [ "fileblob_set_sync", "fileblob_delete_before", "fileblob_delete_after", "insert_timestamp_bindings_before", "insert_timestamp_bindings_after", ]: for action in ["return", "panic", "sleep(1000)"]: run_one_failpoint(c, failpoint, action)
def workflow_stash(c: Composition) -> None: c.rm( "testdrive", "materialized", stop=True, destroy_volumes=True, ) c.rm_volumes("mzdata", "pgdata", force=True) materialized = Materialized(options=[ "--adapter-stash-url", "postgres://*****:*****@postgres" ], ) postgres = Postgres(image="postgres:14.4") with c.override(materialized, postgres): c.up("postgres") c.wait_for_postgres() c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized("materialized") c.sql("CREATE TABLE a (i INT)") c.stop("postgres") c.up("postgres") c.wait_for_postgres() c.sql("CREATE TABLE b (i INT)") c.rm("postgres", stop=True, destroy_volumes=True) c.up("postgres") c.wait_for_postgres() # Postgres cleared its database, so this should fail. try: c.sql("CREATE TABLE c (i INT)") raise Exception("expected unreachable") except Exception as e: # Depending on timing, either of these errors can occur. The stash error comes # from the stash complaining. The network error comes from pg8000 complaining # because materialize panic'd. if "stash error: postgres: db error" not in str( e) and "network error" not in str(e): raise e
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Runs the dbt adapter test suite against Materialize in various configurations.""" parser.add_argument("filter", nargs="?", default="", help="limit to test cases matching filter") args = parser.parse_args() for test_case in test_cases: if args.filter in test_case.name: print(f"> Running test case {test_case.name}") materialized = Materialized( options=test_case.materialized_options, image=test_case.materialized_image, depends_on=["test-certs"], volumes_extra=["secrets:/secrets"], ) with c.test_case(test_case.name): with c.override(materialized): c.down() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_tcp(host="materialized", port=6875) c.run( "dbt-test", "pytest", "dbt-materialize/test", env_extra={ "DBT_HOST": "materialized", "KAFKA_ADDR": "kafka:9092", "SCHEMA_REGISTRY_URL": "http://schema-registry:8081", **test_case.dbt_env, }, )
def workflow_test_remote_storaged(c: Composition) -> None: """Test creating sources in a remote storaged process.""" c.down(destroy_volumes=True) with c.override( Testdrive(default_timeout="15s", no_reset=True, consistent_seed=True), # Use a separate PostgreSQL service for persist rather than the one in # the `Materialized` service, so that crashing `environmentd` does not # also take down PostgreSQL. Postgres(), Materialized( options= "--persist-consensus-url=postgres://postgres:postgres@postgres" ), ): dependencies = [ "materialized", "postgres", "storaged", "redpanda", ] c.start_and_wait_for_tcp(services=dependencies, ) c.run("testdrive", "storaged/01-create-sources.td") c.kill("materialized") c.up("materialized") c.run("testdrive", "storaged/02-after-environmentd-restart.td") c.kill("storaged") c.run("testdrive", "storaged/03-while-storaged-down.td") c.up("storaged") c.run("testdrive", "storaged/04-after-storaged-restart.td")
def workflow_default(c: Composition) -> None: "Test that materialize can use a multitude of auth schemes to connect to AWS" LOCAL_DIR.mkdir() session = boto3.Session() sts: STSClient = session.client("sts") iam: IAMClient = session.client("iam") identity = sts.get_caller_identity() current_user = identity["Arn"] aws_region = session.region_name created_roles: List[CreatedRole] = [] try: allowed = create_role(iam, "Allow", current_user, created_roles) denied = create_role(iam, "Deny", current_user, created_roles) requires_eid = create_role( iam, "Allow", current_user, created_roles, external_id=EXTERNAL_ID ) profile_contents = gen_profile_text( session, allowed.arn, requires_eid.arn, denied.arn ) wait_for_role(sts, allowed.arn) td_args = [ f"--aws-region={aws_region}", f"--var=allowed-role-arn={allowed.arn}", f"--var=denied-role-arn={denied.arn}", f"--var=role-requires-eid={requires_eid.arn}", ] # == Run core tests == c.up("materialized") write_aws_config(LOCAL_DIR, profile_contents) c.wait_for_materialized("materialized") c.run( "testdrive", *td_args, "test.td", ) c.run( "testdrive", *td_args, # no reset because the next test wants to validate behavior with # the previous catalog "--no-reset", "test-externalid-missing.td", ) # == Tests that restarting materialized without a profile doesn't bork mz == print("+++ Test Restarts with and without profile files") # Historically, a missing aws config file would cause all SQL # commands to hang entirely after a restart, this no longer happens # but this step restarts to catch it if it comes back. c.stop("materialized") rm_aws_config(LOCAL_DIR) c.up("materialized") c.run( "testdrive", "--no-reset", "test-restart-no-creds.td", ) # now test that with added credentials things can be done write_aws_config(LOCAL_DIR, profile_contents) c.run("testdrive", *td_args, "test-restart-with-creds.td") # == Test that requires --aws-external-id has been supplied == print("+++ Test AWS External IDs") c.stop("materialized") c.rm("materialized") with c.override(MZ_EID): c.up("materialized") c.wait_for_materialized("materialized") write_aws_config(LOCAL_DIR, profile_contents) c.run("testdrive", *td_args, "test-externalid-present.td") finally: errored = False for role in created_roles: try: iam.delete_role_policy(RoleName=role.name, PolicyName=role.policy_name) except Exception as e: errored = True print( f"> Unable to delete role policy {role.name}/{role.policy_name}: {e}" ) try: iam.delete_role(RoleName=role.name) print(f"> Deleted IAM role {role.name}") except Exception as e: errored = True print(f"> Unable to delete role {role.name}: {e}") rm_aws_config(LOCAL_DIR) LOCAL_DIR.rmdir() if errored: raise UIError("Unable to completely clean up AWS resources")
def workflow_instance_size(c: Composition, parser: WorkflowArgumentParser) -> None: """Create multiple clusters with multiple nodes and replicas each""" c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) parser.add_argument( "--workers", type=int, metavar="N", default=2, help="set the default number of workers", ) parser.add_argument( "--clusters", type=int, metavar="N", default=16, help="set the number of clusters to create", ) parser.add_argument( "--nodes", type=int, metavar="N", default=4, help="set the number of nodes per cluster", ) parser.add_argument( "--replicas", type=int, metavar="N", default=4, help="set the number of replicas per cluster", ) args = parser.parse_args() c.up("testdrive", persistent=True) c.up("materialized") c.wait_for_materialized() # Construct the requied Computed instances and peer them into clusters computeds = [] for cluster_id in range(0, args.clusters): for replica_id in range(0, args.replicas): nodes = [] for node_id in range(0, args.nodes): node_name = f"computed_{cluster_id}_{replica_id}_{node_id}" nodes.append(node_name) for node_id in range(0, args.nodes): computeds.append( Computed(name=nodes[node_id], peers=nodes, workers=args.workers)) with c.override(*computeds): with c.override(Testdrive(seed=1, no_reset=True)): for n in computeds: c.up(n.name) # Create some input data c.testdrive( dedent(""" > CREATE TABLE ten (f1 INTEGER); > INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); $ set schema={ "type" : "record", "name" : "test", "fields" : [ {"name":"f1", "type":"string"} ] } $ kafka-create-topic topic=instance-size $ kafka-ingest format=avro topic=instance-size schema=${schema} publish=true repeat=10000 {"f1": "fish"} """)) # Construct the required CREATE CLUSTER statements for cluster_id in range(0, args.clusters): replica_definitions = [] for replica_id in range(0, args.replicas): nodes = [] for node_id in range(0, args.nodes): node_name = f"computed_{cluster_id}_{replica_id}_{node_id}" nodes.append(node_name) replica_name = f"replica_{cluster_id}_{replica_id}" replica_definitions.append(f"{replica_name} (REMOTE [" + ", ".join(f"'{n}:2100'" for n in nodes) + "])") c.sql(f"CREATE CLUSTER cluster_{cluster_id} REPLICAS (" + ",".join(replica_definitions) + ")") # Construct some dataflows in each cluster for cluster_id in range(0, args.clusters): cluster_name = f"cluster_{cluster_id}" c.testdrive( dedent(f""" > SET cluster={cluster_name} > CREATE DEFAULT INDEX ON ten; > CREATE MATERIALIZED VIEW v_{cluster_name} AS SELECT COUNT(*) AS c1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; > CREATE MATERIALIZED SOURCE s_{cluster_name} FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-instance-size-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}' ENVELOPE NONE """)) # Validate that each individual cluster is operating properly for cluster_id in range(0, args.clusters): cluster_name = f"cluster_{cluster_id}" c.testdrive( dedent(f""" > SET cluster={cluster_name} > SELECT c1 FROM v_{cluster_name}; 10000 > SELECT COUNT(*) FROM s_{cluster_name} 10000 """))
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--num-seconds", type=int, default=100, help="number of seconds to write records to Kafka", ) parser.add_argument( "--records-per-second", type=int, default=10000, help="throughput of writes to maintain during testing", ) parser.add_argument("--num-keys", type=int, default=1000000000, help="number of distinct keys") parser.add_argument("--value-bytes", type=int, default=500, help="record payload size in bytes") parser.add_argument("--timeout-secs", type=int, default=120, help="timeout to send records to Kafka") parser.add_argument( "--blob-url", type=str, default=None, help="location where we store persistent data", ) parser.add_argument( "--consensus-url", type=str, default=None, help="location where we store persistent data", ) args = parser.parse_args() options = [ "--persist-consensus-url", f"{args.consensus_url}", "--persist-blob-url", f"{args.blob_url}", ] override = [Materialized(options=options)] with c.override(*override): c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized("materialized") c.run( "testdrive", "setup.td", ) start = time.monotonic() records_sent = 0 total_records_to_send = args.records_per_second * args.num_seconds # Maximum observed delta between records sent by the benchmark and ingested by # Materialize. max_lag = 0 last_reported_time = 0.0 while True: elapsed = time.monotonic() - start records_ingested = query_materialize(c) lag = records_sent - records_ingested if lag > max_lag: max_lag = lag # Report our findings back once per second. if elapsed - last_reported_time > 1: print( f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records" ) last_reported_time = elapsed # Determine how many records we are scheduled to send, based on how long # the benchmark has been running and the desired QPS. records_scheduled = int( min(elapsed, args.num_seconds) * args.records_per_second) records_to_send = records_scheduled - records_sent if records_to_send > 0: send_records( c, num_records=records_to_send, num_keys=args.num_keys, value_bytes=args.value_bytes, timeout_secs=args.timeout_secs, ) records_sent = records_scheduled # Exit once we've sent all the records we need to send, and confirmed that # Materialize has ingested them. if records_sent == total_records_to_send == records_ingested: print( f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records." ) break