Example #1
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run the proxy tests."""
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    parser.add_argument(
        "--aws-region",
        help="run against the specified AWS region instead of localstack",
    )
    args = parser.parse_args()

    dependencies = ["squid"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]
    if not args.aws_region:
        dependencies += ["localstack"]
    c.start_and_wait_for_tcp(dependencies)

    aws_arg = (f"--aws-region={args.aws_region}"
               if args.aws_region else "--aws-endpoint=http://localstack:4566")

    for test_case in test_cases:
        print(f"Running test case {test_case.name!r}")
        with c.override(Materialized(environment_extra=test_case.env)):
            c.up("materialized")
            c.wait_for_materialized("materialized")
            c.run("testdrive-svc", aws_arg, *test_case.files)
Example #2
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--scenario",
        metavar="SCENARIO",
        type=str,
        help="Scenario to run",
        required=True,
    )

    parser.add_argument("--seed",
                        metavar="N",
                        type=int,
                        help="Random seed",
                        default=1)

    args = parser.parse_args()
    scenario_class = globals()[args.scenario]

    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])
    c.up("testdrive", persistent=True)

    random.seed(args.seed)

    print("Generating test...")
    test = Test(scenario=scenario_class(), max_actions=500)
    print("Running test...")
    test.run(c)
Example #3
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Runs the dbt adapter test suite against Materialize in various configurations."""
    parser.add_argument(
        "filter", nargs="?", default="", help="limit to test cases matching filter"
    )
    args = parser.parse_args()

    for test_case in test_cases:
        if args.filter in test_case.name:
            print(f"> Running test case {test_case.name}")
            materialized = Materialized(
                options=test_case.materialized_options,
                image=test_case.materialized_image,
                depends_on=["test-certs"],
                volumes=["secrets:/secrets"],
            )

            with c.override(materialized):
                c.up("materialized")
                c.wait_for_tcp(host="materialized", port=6875)
                c.run(
                    "dbt-test",
                    "pytest",
                    "dbt-materialize/test",
                    env_extra=test_case.dbt_env,
                )
Example #4
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--seed",
        help="an alternate seed to use to avoid clashing with existing topics",
        type=int,
        default=1,
    )
    args = parser.parse_args()

    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry", "materialized"])
    c.run(
        "testdrive",
        f"--seed={args.seed}",
        "--kafka-option=group.id=group1",
        "before-restart.td",
    )
    c.kill("materialized")
    c.up("materialized")
    c.wait_for_materialized()
    c.run(
        "testdrive",
        f"--seed={args.seed}",
        "--no-reset",
        "--kafka-option=group.id=group2",
        "after-restart.td",
    )
Example #5
0
def workflow_start_two_mzs(c: Composition,
                           parser: WorkflowArgumentParser) -> None:
    """Starts two Mz instances from different git tags for the purpose of manually running
    RQG comparison tests.
    """
    parser.add_argument("--this-tag",
                        help="Run Materialize with this git tag on port 6875")

    parser.add_argument("--other-tag",
                        help="Run Materialize with this git tag on port 16875")
    args = parser.parse_args()

    with c.override(
            Materialized(
                name="mz_this",
                image=f"materialize/materialized:{args.this_tag}"
                if args.this_tag else None,
                volumes=
                [],  # Keep the mzdata, pgdata, etc. private to the container
                allow_host_ports=True,
                ports=["6875:6875"],
            ),
            Materialized(
                name="mz_other",
                image=f"materialize/materialized:{args.other_tag}"
                if args.other_tag else None,
                volumes=[],
                allow_host_ports=True,
                ports=["16875:6875"],
            ),
    ):
        for mz in ["mz_this", "mz_other"]:
            c.up(mz)
            c.wait_for_materialized(service=mz)
Example #6
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    c.silent = True

    parser.add_argument("--scenario",
                        metavar="SCENARIO",
                        type=str,
                        help="Scenario to run.")

    parser.add_argument("--check",
                        metavar="CHECK",
                        type=str,
                        help="Check to run.")

    args = parser.parse_args()

    c.up("testdrive", persistent=True)

    #    c.start_and_wait_for_tcp(
    #        services=["zookeeper", "kafka", "schema-registry", "postgres"]
    #    )

    scenarios = ([globals()[args.scenario]]
                 if args.scenario else Scenario.__subclasses__())

    checks = [globals()[args.check]] if args.check else Check.__subclasses__()

    for scenario_class in scenarios:
        print(f"Testing upgrade scenario {scenario_class}")
        scenario = scenario_class(checks=checks)
        scenario.run(c)
Example #7
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run testdrive."""
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    parser.add_argument(
        "--aws-region",
        help="run against the specified AWS region instead of localstack",
    )
    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        help="set the number of materialized dataflow workers",
    )
    parser.add_argument(
        "--persistent-user-tables",
        action="store_true",
        help="enable the --persistent-user-tables materialized option",
    )
    parser.add_argument(
        "files",
        nargs="*",
        default=["*.td", "esoteric/*.td"],
        help="run against the specified files",
    )
    args = parser.parse_args()

    if not args.redpanda and Arch.host() == Arch.AARCH64:
        ui.warn(
            "Running the Confluent Platform in Docker on ARM-based machines is "
            "nearly unusably slow. Consider using Redpanda instead (--redpanda) "
            "or running tests without mzcompose.")

    dependencies = ["materialized"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]

    materialized = Materialized(
        workers=args.workers,
        options=["--persistent-user-tables"]
        if args.persistent_user_tables else [],
    )

    testdrive = Testdrive(
        forward_buildkite_shard=True,
        entrypoint_extra=[f"--aws-region={args.aws_region}"]
        if args.aws_region else ["--aws-endpoint=http://localstack:4566"],
    )

    with c.override(materialized, testdrive):
        c.start_and_wait_for_tcp(services=dependencies)
        c.wait_for_materialized("materialized")
        c.run("testdrive-svc", *args.files)
        c.kill("materialized")
Example #8
0
def workflow_cluster(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run all the limits tests against a multi-node, multi-replica cluster"""

    parser.add_argument("--scenario",
                        metavar="SCENARIO",
                        type=str,
                        help="Scenario to run.")

    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        default=2,
        help="set the default number of workers",
    )
    args = parser.parse_args()

    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])

    c.up("materialized")
    c.wait_for_materialized()

    nodes = [
        Computed(
            name="computed_1_1",
            workers=args.workers,
            peers=["computed_1_1", "computed_1_2"],
        ),
        Computed(
            name="computed_1_2",
            workers=args.workers,
            peers=["computed_1_1", "computed_1_2"],
        ),
        Computed(
            name="computed_2_1",
            workers=args.workers,
            peers=["computed_2_1", "computed_2_2"],
        ),
        Computed(
            name="computed_2_2",
            workers=args.workers,
            peers=["computed_2_1", "computed_2_2"],
        ),
    ]
    with c.override(*nodes):
        c.up(*[n.name for n in nodes])

        c.sql("""
            CREATE CLUSTER cluster1 REPLICAS (
                replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']),
                replica2 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100'])
            )
        """)

        run_test(c, args)
Example #9
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "filter",
        nargs="*",
        default=["*.td"],
        help="limit to only the files matching filter",
    )
    args = parser.parse_args()

    c.up("materialized", "test-certs", "testdrive-svc", "postgres")
    c.wait_for_materialized()
    c.wait_for_postgres()
    c.run("testdrive-svc", *args.filter)
Example #10
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument("--scenario",
                        metavar="SCENARIO",
                        type=str,
                        help="Scenario to run.")

    args = parser.parse_args()

    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])

    c.up("materialized")
    c.wait_for_materialized()

    run_test(c, args)
Example #11
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run testdrive."""
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    parser.add_argument(
        "--aws-region",
        help="run against the specified AWS region instead of localstack",
    )
    parser.add_argument(
        "--kafka-default-partitions",
        type=int,
        metavar="N",
        help="set the default number of kafka partitions per topic",
    )
    parser.add_argument(
        "files",
        nargs="*",
        default=["*.td"],
        help="run against the specified files",
    )
    args = parser.parse_args()

    if not args.redpanda and Arch.host() == Arch.AARCH64:
        ui.warn(
            "Running the Confluent Platform in Docker on ARM-based machines is "
            "nearly unusably slow. Consider using Redpanda instead (--redpanda) "
            "or running tests without mzcompose."
        )

    dependencies = ["materialized"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]

    if args.aws_region is None:
        dependencies += ["localstack"]

    testdrive = Testdrive(
        forward_buildkite_shard=True,
        kafka_default_partitions=args.kafka_default_partitions,
        aws_region=args.aws_region,
        validate_postgres_stash=True,
    )

    with c.override(testdrive):
        c.start_and_wait_for_tcp(services=dependencies)
        c.wait_for_materialized("materialized")
        try:
            junit_report = ci_util.junit_report_filename(c.name)
            c.run("testdrive", f"--junit-report={junit_report}", *args.files)
        finally:
            ci_util.upload_junit_report(
                "testdrive", Path(__file__).parent / junit_report
            )
Example #12
0
def workflow_replay(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--config-file",
        default="/workdir/examples/all-frequent-routes-config-weekend.csv",
    )
    parser.add_argument("archive_path", metavar="ARCHIVE-PATH")
    args = parser.parse_args()
    start_everything(c)
    c.run(
        "mbta-demo",
        "replay",
        args.config_file,
        args.archive_path,
        "kakfa:9092",
        detach=True,
    )
Example #13
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """ "Run testdrive against an SSL-enabled Confluent Platform."""
    parser.add_argument(
        "files",
        nargs="*",
        default=["*.td"],
        help="run against the specified files",
    )
    args = parser.parse_args()
    c.start_and_wait_for_tcp(
        services=[
            "zookeeper",
            "kafka",
            "schema-registry",
            "materialized",
        ]
    )
    c.run("testdrive-svc", *args.files)
Example #14
0
def workflow_test_cluster(c: Composition,
                          parser: WorkflowArgumentParser) -> None:
    """Run testdrive in a variety of compute cluster configurations."""

    parser.add_argument(
        "glob",
        nargs="*",
        default=["smoke/*.td"],
        help="run against the specified files",
    )
    args = parser.parse_args()

    c.down(destroy_volumes=True)
    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry", "localstack"])
    c.up("materialized")
    c.wait_for_materialized()

    # Create a remote cluster and verify that tests pass.
    c.up("computed_1")
    c.up("computed_2")
    c.sql("DROP CLUSTER IF EXISTS cluster1 CASCADE;")
    c.sql(
        "CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1:2100', 'computed_2:2100']));"
    )
    c.run("testdrive", *args.glob)

    # Add a replica to that remote cluster and verify that tests still pass.
    c.up("computed_3")
    c.up("computed_4")
    c.sql(
        "CREATE CLUSTER REPLICA cluster1.replica2 REMOTE ['computed_3:2100', 'computed_4:2100']"
    )
    c.run("testdrive", *args.glob)

    # Kill one of the nodes in the first replica of the compute cluster and
    # verify that tests still pass.
    c.kill("computed_1")
    c.run("testdrive", *args.glob)

    # Leave only replica 2 up and verify that tests still pass.
    c.sql("DROP CLUSTER REPLICA cluster1.replica1")
    c.run("testdrive", *args.glob)
Example #15
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Test upgrades from various versions."""

    parser.add_argument(
        "--min-version",
        metavar="VERSION",
        type=Version.parse,
        default=Version.parse("0.8.0"),
        help="the minimum version to test from",
    )
    parser.add_argument(
        "--most-recent",
        metavar="N",
        type=int,
        help="limit testing to the N most recent versions",
    )
    parser.add_argument(
        "--tests",
        choices=["all", "non-ssl", "ssl"],
        default="all",
        help="limit testing to certain scenarios",
    )
    parser.add_argument(
        "filter", nargs="?", default="*", help="limit to only the files matching filter"
    )
    args = parser.parse_args()

    tested_versions = [v for v in all_versions if v >= args.min_version]
    if args.most_recent is not None:
        tested_versions = tested_versions[: args.most_recent]
    tested_versions.reverse()

    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry", "postgres"]
    )

    if args.tests in ["all", "non-ssl"]:
        for version in tested_versions:
            priors = [f"v{v}" for v in all_versions if v < version]
            test_upgrade_from_version(c, f"v{version}", priors, filter=args.filter)

        test_upgrade_from_version(c, "current_source", priors=["*"], filter=args.filter)

    if args.tests in ["all", "ssl"]:
        kafka, schema_registry, testdrive = ssl_services()
        with c.override(kafka, schema_registry, testdrive):
            c.start_and_wait_for_tcp(services=["kafka", "schema-registry"])
            for version in tested_versions:
                priors = [f"v{v}" for v in all_versions if v < version]
                test_upgrade_from_version(
                    c, f"v{version}", priors, filter=args.filter, style="ssl-"
                )
Example #16
0
def workflow_demo(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument("--message-count", type=int, default=1000)
    parser.add_argument("--partitions", type=int, default=1)
    parser.add_argument("--check-sink", action="store_true")
    args = parser.parse_args()

    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])
    c.run(
        "billing-demo",
        "--materialized-host=materialized",
        "--kafka-host=kafka",
        "--schema-registry-url=http://schema-registry:8081",
        "--csv-file-name=/share/billing-demo/data/prices.csv",
        "--create-topic",
        "--replication-factor=1",
        f"--message-count={args.message_count}",
        f"--partitions={args.partitions}",
        *(["--check-sink"] if args.check_sink else []),
    )
Example #17
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument("--message-count", type=int, default=1000)
    parser.add_argument("--partitions", type=int, default=1)
    parser.add_argument("--check-sink", action="store_true")
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    args = parser.parse_args()

    dependencies = ["materialized"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]

    c.start_and_wait_for_tcp(services=dependencies)
    c.run(
        "billing-demo",
        "--materialized-host=materialized",
        "--kafka-host=kafka",
        "--schema-registry-url=http://schema-registry:8081",
        "--create-topic",
        "--replication-factor=1",
        f"--message-count={args.message_count}",
        f"--partitions={args.partitions}",
        *(["--check-sink"] if args.check_sink else []),
    )
Example #18
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Runs the dbt adapter test suite against Materialize in various configurations."""
    parser.add_argument("filter",
                        nargs="?",
                        default="",
                        help="limit to test cases matching filter")
    args = parser.parse_args()

    for test_case in test_cases:
        if args.filter in test_case.name:
            print(f"> Running test case {test_case.name}")
            materialized = Materialized(
                options=test_case.materialized_options,
                image=test_case.materialized_image,
                depends_on=["test-certs"],
                volumes_extra=["secrets:/secrets"],
            )

            with c.test_case(test_case.name):
                with c.override(materialized):
                    c.down()
                    c.start_and_wait_for_tcp(
                        services=["zookeeper", "kafka", "schema-registry"])
                    c.up("materialized")
                    c.wait_for_tcp(host="materialized", port=6875)
                    c.run(
                        "dbt-test",
                        "pytest",
                        "dbt-materialize/test",
                        env_extra={
                            "DBT_HOST": "materialized",
                            "KAFKA_ADDR": "kafka:9092",
                            "SCHEMA_REGISTRY_URL":
                            "http://schema-registry:8081",
                            **test_case.dbt_env,
                        },
                    )
Example #19
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "filter",
        nargs="*",
        default=["*.td"],
        help="limit to only the files matching filter",
    )
    args = parser.parse_args()

    ssl_ca = c.run("test-certs", "cat", "/secrets/ca.crt", capture=True).stdout
    ssl_cert = c.run("test-certs",
                     "cat",
                     "/secrets/certuser.crt",
                     capture=True).stdout
    ssl_key = c.run("test-certs", "cat", "/secrets/certuser.key",
                    capture=True).stdout
    ssl_wrong_cert = c.run("test-certs",
                           "cat",
                           "/secrets/postgres.crt",
                           capture=True).stdout
    ssl_wrong_key = c.run("test-certs",
                          "cat",
                          "/secrets/postgres.key",
                          capture=True).stdout

    c.up("materialized", "test-certs", "testdrive", "postgres")
    c.wait_for_materialized()
    c.wait_for_postgres()
    c.run(
        "testdrive",
        f"--var=ssl-ca={ssl_ca}",
        f"--var=ssl-cert={ssl_cert}",
        f"--var=ssl-key={ssl_key}",
        f"--var=ssl-wrong-cert={ssl_wrong_cert}",
        f"--var=ssl-wrong-key={ssl_wrong_key}",
        *args.filter,
    )
Example #20
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    # c.silent = True

    parser.add_argument(
        "--scenario", metavar="SCENARIO", type=str, help="Scenario to run."
    )

    parser.add_argument(
        "--check", metavar="CHECK", type=str, action="append", help="Check(s) to run."
    )

    parser.add_argument(
        "--execution-mode",
        type=ExecutionMode,
        choices=list(ExecutionMode),
        default=ExecutionMode.ALLTOGETHER,
    )

    args = parser.parse_args()

    scenarios = (
        [globals()[args.scenario]] if args.scenario else Scenario.__subclasses__()
    )

    checks = (
        [globals()[c] for c in args.check] if args.check else Check.__subclasses__()
    )

    for scenario_class in scenarios:
        print(f"Testing scenario {scenario_class}...")
        if args.execution_mode is ExecutionMode.ALLTOGETHER:
            setup(c)
            scenario = scenario_class(checks=checks)
            scenario.run(c)
            teardown(c)
        elif args.execution_mode is ExecutionMode.ONEATATIME:
            for check in checks:
                print(f"Running individual check {check}, scenario {scenario_class}")
                setup(c)
                scenario = scenario_class(checks=[check])
                scenario.run(c)
                teardown(c)
        else:
            assert False
Example #21
0
def workflow_start_live_data(c: Composition,
                             parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--config-file",
        default="/workdir/examples/all-frequent-routes-config-weekend.csv",
    )
    parser.add_argument("--archive-at-shutdown", action="store_true")
    parser.add_argument("api_key", metavar="API-KEY")
    args = parser.parse_args()

    start_everything(c)
    c.run(
        "mbta-demo",
        "start_docker",
        args.config_file,
        "kafka:9092",
        args.api_key,
        "1" if args.archive_at_shutdown else "0",
        detach=True,
    )
Example #22
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--num-seconds",
        type=int,
        default=100,
        help="number of seconds to write records to Kafka",
    )
    parser.add_argument(
        "--records-per-second",
        type=int,
        default=10000,
        help="throughput of writes to maintain during testing",
    )
    parser.add_argument("--num-keys",
                        type=int,
                        default=1000000000,
                        help="number of distinct keys")
    parser.add_argument("--value-bytes",
                        type=int,
                        default=500,
                        help="record payload size in bytes")
    parser.add_argument(
        "--upsert",
        action="store_true",
        help="whether to use envelope UPSERT (True) or NONE (False)",
    )
    parser.add_argument("--timeout-secs",
                        type=int,
                        default=120,
                        help="timeout to send records to Kafka")
    parser.add_argument(
        "--enable-persistence",
        action="store_true",
        help="whether or not to enable persistence on materialized",
    )
    parser.add_argument(
        "--s3-storage",
        type=str,
        default=None,
        help=
        "enables s3 persist storage, pointed at the given subpath of our internal testing bucket",
    )
    parser.add_argument(
        "--workers",
        type=int,
        default=None,
        help="number of dataflow workers to use in materialized",
    )
    args = parser.parse_args()

    envelope = "NONE"
    if args.upsert:
        envelope = "UPSERT"

    options = []
    if args.enable_persistence:
        options = [
            "--persistent-user-tables",
            "--persistent-kafka-sources",
            "--disable-persistent-system-tables-test",
        ]

    if args.s3_storage == "":
        print("--s3-storage value must be non-empty", file=sys.stderr)
        sys.exit(1)
    elif args.s3_storage:
        if args.enable_persistence is not True:
            print(
                "cannot specifiy --s3-storage without --enable-persistence",
                file=sys.stderr,
            )
            sys.exit(1)
        options.extend([
            "--persist-storage-enabled",
            f"--persist-storage=s3://mtlz-test-persist-1d-lifecycle-delete/{args.s3_storage}",
        ])

    override = [
        Materialized(
            workers=args.workers,
            timestamp_frequency="1s",
            options=options,
        )
    ]

    with c.override(*override):
        c.start_and_wait_for_tcp(services=prerequisites)

        c.up("materialized")
        c.wait_for_materialized("materialized")

        c.run(
            "testdrive-svc",
            f"--var=envelope={envelope}",
            "setup.td",
        )

        start = time.monotonic()
        records_sent = 0
        total_records_to_send = args.records_per_second * args.num_seconds
        # Maximum observed delta between records sent by the benchmark and ingested by
        # Materialize.
        max_lag = 0
        last_reported_time = 0.0

        while True:
            elapsed = time.monotonic() - start
            records_ingested = query_materialize(c)

            lag = records_sent - records_ingested

            if lag > max_lag:
                max_lag = lag

            # Report our findings back once per second.
            if elapsed - last_reported_time > 1:
                print(
                    f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records"
                )
                last_reported_time = elapsed

            # Determine how many records we are scheduled to send, based on how long
            # the benchmark has been running and the desired QPS.
            records_scheduled = int(
                min(elapsed, args.num_seconds) * args.records_per_second)
            records_to_send = records_scheduled - records_sent

            if records_to_send > 0:
                send_records(
                    c,
                    num_records=records_to_send,
                    num_keys=args.num_keys,
                    value_bytes=args.value_bytes,
                    timeout_secs=args.timeout_secs,
                )
                records_sent = records_scheduled

            # Exit once we've sent all the records we need to send, and confirmed that
            # Materialize has ingested them.
            if records_sent == total_records_to_send == records_ingested:
                print(
                    f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records."
                )
                break
Example #23
0
def workflow_instance_size(c: Composition,
                           parser: WorkflowArgumentParser) -> None:
    """Create multiple clusters with multiple nodes and replicas each"""
    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])

    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        default=2,
        help="set the default number of workers",
    )

    parser.add_argument(
        "--clusters",
        type=int,
        metavar="N",
        default=16,
        help="set the number of clusters to create",
    )
    parser.add_argument(
        "--nodes",
        type=int,
        metavar="N",
        default=4,
        help="set the number of nodes per cluster",
    )
    parser.add_argument(
        "--replicas",
        type=int,
        metavar="N",
        default=4,
        help="set the number of replicas per cluster",
    )
    args = parser.parse_args()

    c.up("testdrive", persistent=True)
    c.up("materialized")
    c.wait_for_materialized()

    # Construct the requied Computed instances and peer them into clusters
    computeds = []
    for cluster_id in range(0, args.clusters):
        for replica_id in range(0, args.replicas):
            nodes = []
            for node_id in range(0, args.nodes):
                node_name = f"computed_{cluster_id}_{replica_id}_{node_id}"
                nodes.append(node_name)

            for node_id in range(0, args.nodes):
                computeds.append(
                    Computed(name=nodes[node_id],
                             peers=nodes,
                             workers=args.workers))

    with c.override(*computeds):
        with c.override(Testdrive(seed=1, no_reset=True)):

            for n in computeds:
                c.up(n.name)

            # Create some input data
            c.testdrive(
                dedent("""
                    > CREATE TABLE ten (f1 INTEGER);
                    > INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

                    $ set schema={
                        "type" : "record",
                        "name" : "test",
                        "fields" : [
                            {"name":"f1", "type":"string"}
                        ]
                      }

                    $ kafka-create-topic topic=instance-size

                    $ kafka-ingest format=avro topic=instance-size schema=${schema} publish=true repeat=10000
                    {"f1": "fish"}
                    """))

            # Construct the required CREATE CLUSTER statements
            for cluster_id in range(0, args.clusters):
                replica_definitions = []
                for replica_id in range(0, args.replicas):
                    nodes = []
                    for node_id in range(0, args.nodes):
                        node_name = f"computed_{cluster_id}_{replica_id}_{node_id}"
                        nodes.append(node_name)

                    replica_name = f"replica_{cluster_id}_{replica_id}"

                    replica_definitions.append(f"{replica_name} (REMOTE [" +
                                               ", ".join(f"'{n}:2100'"
                                                         for n in nodes) +
                                               "])")

                c.sql(f"CREATE CLUSTER cluster_{cluster_id} REPLICAS (" +
                      ",".join(replica_definitions) + ")")

            # Construct some dataflows in each cluster
            for cluster_id in range(0, args.clusters):
                cluster_name = f"cluster_{cluster_id}"

                c.testdrive(
                    dedent(f"""
                         > SET cluster={cluster_name}

                         > CREATE DEFAULT INDEX ON ten;

                         > CREATE MATERIALIZED VIEW v_{cluster_name} AS
                           SELECT COUNT(*) AS c1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;

                         > CREATE MATERIALIZED SOURCE s_{cluster_name}
                           FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC
                           'testdrive-instance-size-${{testdrive.seed}}'
                           FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
                           ENVELOPE NONE
                     """))

            # Validate that each individual cluster is operating properly
            for cluster_id in range(0, args.clusters):
                cluster_name = f"cluster_{cluster_id}"

                c.testdrive(
                    dedent(f"""
                         > SET cluster={cluster_name}

                         > SELECT c1 FROM v_{cluster_name};
                         10000

                         > SELECT COUNT(*) FROM s_{cluster_name}
                         10000
                     """))
Example #24
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Feature benchmark framework."""

    c.silent = True

    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )

    parser.add_argument(
        "--this-tag",
        metavar="TAG",
        type=str,
        default=os.getenv("THIS_TAG", None),
        help=
        "'This' Materialize container tag to benchmark. If not provided, the current source will be used.",
    )

    parser.add_argument(
        "--this-options",
        metavar="OPTIONS",
        type=str,
        default=os.getenv("THIS_OPTIONS", None),
        help="Options to pass to the 'This' instance.",
    )

    parser.add_argument(
        "--other-tag",
        metavar="TAG",
        type=str,
        default=os.getenv("OTHER_TAG", None),
        help=
        "'Other' Materialize container tag to benchmark. If not provided, the current source will be used.",
    )

    parser.add_argument(
        "--other-options",
        metavar="OPTIONS",
        type=str,
        default=os.getenv("OTHER_OPTIONS", None),
        help="Options to pass to the 'Other' instance.",
    )

    parser.add_argument(
        "--root-scenario",
        "--scenario",
        metavar="SCENARIO",
        type=str,
        default="Scenario",
        help=
        "Scenario or scenario family to benchmark. See scenarios.py for available scenarios.",
    )

    parser.add_argument(
        "--scale",
        metavar="+N | -N | N",
        type=str,
        default=None,
        help="Absolute or relative scale to apply.",
    )

    parser.add_argument(
        "--max-measurements",
        metavar="N",
        type=int,
        default=99,
        help="Limit the number of measurements to N.",
    )

    parser.add_argument(
        "--max-retries",
        metavar="N",
        type=int,
        default=3,
        help="Retry any potential performance regressions up to N times.",
    )

    parser.add_argument(
        "--this-nodes",
        metavar="N",
        type=int,
        default=None,
        help="Start a cluster with that many nodes for 'THIS'",
    )

    parser.add_argument(
        "--other-nodes",
        metavar="N",
        type=int,
        default=None,
        help="Start a cluster with that many nodes for 'OTHER'",
    )

    parser.add_argument(
        "--this-workers",
        metavar="N",
        type=int,
        default=None,
        help="Number of workers to use for 'THIS'",
    )

    parser.add_argument(
        "--other-workers",
        metavar="N",
        type=int,
        default=None,
        help="Number of workers to use for 'OTHER'",
    )

    args = parser.parse_args()

    print(f"""
this_tag: {args.this_tag}
this_options: {args.this_options}

other_tag: {args.other_tag}
other_options: {args.other_options}

root_scenario: {args.root_scenario}""")

    # Build the list of scenarios to run
    root_scenario = globals()[args.root_scenario]
    initial_scenarios = {}

    if root_scenario.__subclasses__():
        for scenario in root_scenario.__subclasses__():
            has_children = False
            for s in scenario.__subclasses__():
                has_children = True
                initial_scenarios[s] = 1

            if not has_children:
                initial_scenarios[scenario] = 1
    else:
        initial_scenarios[root_scenario] = 1

    dependencies = ["postgres"]

    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]

    c.start_and_wait_for_tcp(services=dependencies)

    scenarios = initial_scenarios.copy()

    for cycle in range(0, args.max_retries):
        print(
            f"Cycle {cycle+1} with scenarios: {', '.join([scenario.__name__ for scenario in scenarios.keys()])}"
        )

        report = Report()

        for scenario in list(scenarios.keys()):
            comparison = run_one_scenario(c, scenario, args)
            report.append(comparison)

            if not comparison.is_regression():
                del scenarios[scenario]

            print(f"+++ Benchmark Report for cycle {cycle+1}:")
            report.dump()

        if len(scenarios.keys()) == 0:
            break

    if len(scenarios.keys()) > 0:
        print(
            f"ERROR: The following scenarios have regressions: {', '.join([scenario.__name__ for scenario in scenarios.keys()])}"
        )
        sys.exit(1)
Example #25
0
def workflow_mzcloud(c: Composition, parser: WorkflowArgumentParser) -> None:
    # Make sure Kafka is externally accessible on a predictable port.
    assert (
        c.preserve_ports
    ), "`--preserve-ports` must be specified (BEFORE the `run` command)"

    parser.add_argument(
        "--mzcloud-url",
        type=str,
        help=
        "The postgres connection url to the mzcloud deployment to benchmark.",
    )

    parser.add_argument(
        "--external-addr",
        type=str,
        help=
        "Kafka and Schema Registry are started by mzcompose and exposed on the public interface. This is the IP address or hostname that is accessible by the mzcloud instance, usually the public IP of your machine.",
    )

    parser.add_argument(
        "--root-scenario",
        "--scenario",
        metavar="SCENARIO",
        type=str,
        default="Scenario",
        help=
        "Scenario or scenario family to benchmark. See scenarios.py for available scenarios.",
    )

    parser.add_argument(
        "--scale",
        metavar="+N | -N | N",
        type=str,
        default=None,
        help="Absolute or relative scale to apply.",
    )

    parser.add_argument(
        "--max-measurements",
        metavar="N",
        type=int,
        default=99,
        help="Limit the number of measurements to N.",
    )

    parser.add_argument(
        "--max-retries",
        metavar="N",
        type=int,
        default=2,
        help="Retry any potential performance regressions up to N times.",
    )

    parser.add_argument(
        "--test-filter",
        type=str,
        help="Filter scenario names by this string (case insensitive).",
    )

    args = parser.parse_args()

    assert args.mzcloud_url
    assert args.external_addr

    print(f"""
mzcloud url: {args.mzcloud_url}
external addr: {args.external_addr}

root_scenario: {args.root_scenario}""")

    overrides = [
        KafkaService(extra_environment=[
            f"KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://{args.external_addr}:9092"
        ]),
    ]

    with c.override(*overrides):
        c.start_and_wait_for_tcp(
            services=["zookeeper", "kafka", "schema-registry"])
        c.up("testdrive", persistent=True)

        # Build the list of scenarios to run
        root_scenario = globals()[args.root_scenario]
        initial_scenarios = {}

        if root_scenario.__subclasses__():
            for scenario in root_scenario.__subclasses__():
                has_children = False
                for s in scenario.__subclasses__():
                    has_children = True
                    initial_scenarios[s] = 1

                if not has_children:
                    initial_scenarios[scenario] = 1
        else:
            initial_scenarios[root_scenario] = 1

        scenarios = initial_scenarios.copy()

        for cycle in range(0, args.max_retries):
            print(
                f"Cycle {cycle+1} with scenarios: {', '.join([scenario.__name__ for scenario in scenarios.keys()])}"
            )

            report = SingleReport()

            for scenario in list(scenarios.keys()):
                name = scenario.__name__
                if args.test_filter and args.test_filter.lower(
                ) not in name.lower():
                    continue
                print(f"--- Now benchmarking {name} ...")
                comparator = SuccessComparator(name, threshold=0)
                common_seed = round(time.time())
                executor = MzCloud(
                    composition=c,
                    mzcloud_url=args.mzcloud_url,
                    seed=common_seed,
                    external_addr=args.external_addr,
                )
                executor.Reset()
                mz_id = 0

                benchmark = Benchmark(
                    mz_id=mz_id,
                    scenario=scenario,
                    scale=args.scale,
                    executor=executor,
                    filter=make_filter(args),
                    termination_conditions=make_termination_conditions(args),
                    aggregation=make_aggregation(),
                )

                outcome, iterations = benchmark.run()
                comparator.append(outcome)
                report.append(comparator)

                print(f"+++ Benchmark Report for cycle {cycle+1}:")
                report.dump()
Example #26
0
def workflow_feature_benchmark(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Feature benchmark framework."""

    parser.add_argument(
        "--this-tag",
        metavar="TAG",
        type=str,
        default=os.getenv("THIS_TAG", None),
        help="'This' Materialize container tag to benchmark. If not provided, the current source will be used.",
    )

    parser.add_argument(
        "--this-options",
        metavar="OPTIONS",
        type=str,
        default=os.getenv("THIS_OPTIONS", None),
        help="Options to pass to the 'This' instance.",
    )

    parser.add_argument(
        "--other-tag",
        metavar="TAG",
        type=str,
        default=os.getenv("OTHER_TAG", None),
        help="'Other' Materialize container tag to benchmark. If not provided, the current source will be used.",
    )

    parser.add_argument(
        "--other-options",
        metavar="OPTIONS",
        type=str,
        default=os.getenv("OTHER_OPTIONS", None),
        help="Options to pass to the 'Other' instance.",
    )

    parser.add_argument(
        "--root-scenario",
        metavar="SCENARIO",
        type=str,
        default="Scenario",
        help="Scenario or scenario family to benchmark. See scenarios.py for available scenarios.",
    )

    args = parser.parse_args()

    print(
        f"""
this_tag: {args.this_tag}
this_options: {args.this_options}

other_tag: {args.other_tag}
other_options: {args.other_options}

root_scenario: {args.root_scenario}"""
    )

    # Build the list of scenarios to run
    root_scenario = globals()[args.root_scenario]
    scenarios = []

    if root_scenario.__subclasses__():
        for scenario in root_scenario.__subclasses__():
            has_children = False
            for s in scenario.__subclasses__():
                has_children = True
                scenarios.append(s)

            if not has_children:
                scenarios.append(scenario)
    else:
        scenarios.append(root_scenario)

    print(f"scenarios: {', '.join([scenario.__name__ for scenario in scenarios])}")

    c.start_and_wait_for_tcp(services=["zookeeper", "kafka", "schema-registry"])

    report = Report()
    has_regressions = False

    for scenario in scenarios:
        comparison = run_one_scenario(c, scenario, args)
        report.append(comparison)

        if comparison.is_regression():
            has_regressions = True

        report.dump()

    sys.exit(1 if has_regressions else 0)
Example #27
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run CH-benCHmark without any load on Materialize"""

    # Parse arguments.
    parser.add_argument("--wait",
                        action="store_true",
                        help="wait for the load generator to exit")
    args, unknown_args = parser.parse_known_args()

    # Start Materialize.
    c.up("materialized")
    c.wait_for_materialized()

    # Start MySQL and Debezium.
    c.up("mysql", "debezium")
    c.wait_for_tcp(host="mysql", port=3306)
    c.wait_for_tcp(host="debezium", port=8083)

    # Generate initial data.
    c.run(
        "chbench",
        "gen",
        "--config-file-path=/etc/chbenchmark/mz-default-mysql.cfg",
        "--warehouses=1",
    )

    # Start Debezium.
    response = requests.post(
        f"http://localhost:{c.default_port('debezium')}/connectors",
        json={
            "name": "mysql-connector",
            "config": {
                "connector.class":
                "io.debezium.connector.mysql.MySqlConnector",
                "database.hostname": "mysql",
                "database.port": "3306",
                "database.user": "******",
                "database.password": "******",
                "database.server.name": "debezium",
                "database.server.id": "1234",
                "database.history.kafka.bootstrap.servers": "kafka:9092",
                "database.history.kafka.topic": "mysql-history",
                "database.allowPublicKeyRetrieval": "true",
                "time.precision.mode": "connect",
            },
        },
    )
    # Don't error if the connector already exists.
    if response.status_code != requests.codes.conflict:
        response.raise_for_status()

    # Run load generator.
    c.run(
        "chbench",
        "run",
        "--config-file-path=/etc/chbenchmark/mz-default-mysql.cfg",
        "--dsn=mysql",
        "--gen-dir=/var/lib/mysql-files",
        "--analytic-threads=0",
        "--transactional-threads=1",
        "--run-seconds=86400",
        "--mz-sources",
        *unknown_args,
        detach=not args.wait,
    )
Example #28
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--num-seconds",
        type=int,
        default=100,
        help="number of seconds to write records to Kafka",
    )
    parser.add_argument(
        "--records-per-second",
        type=int,
        default=10000,
        help="throughput of writes to maintain during testing",
    )
    parser.add_argument("--num-keys",
                        type=int,
                        default=1000000000,
                        help="number of distinct keys")
    parser.add_argument("--value-bytes",
                        type=int,
                        default=500,
                        help="record payload size in bytes")
    parser.add_argument("--timeout-secs",
                        type=int,
                        default=120,
                        help="timeout to send records to Kafka")
    parser.add_argument(
        "--blob-url",
        type=str,
        default=None,
        help="location where we store persistent data",
    )
    parser.add_argument(
        "--consensus-url",
        type=str,
        default=None,
        help="location where we store persistent data",
    )
    args = parser.parse_args()

    options = [
        "--persist-consensus-url",
        f"{args.consensus_url}",
        "--persist-blob-url",
        f"{args.blob_url}",
    ]

    override = [Materialized(options=options)]

    with c.override(*override):
        c.start_and_wait_for_tcp(services=prerequisites)

        c.up("materialized")
        c.wait_for_materialized("materialized")

        c.run(
            "testdrive",
            "setup.td",
        )

        start = time.monotonic()
        records_sent = 0
        total_records_to_send = args.records_per_second * args.num_seconds
        # Maximum observed delta between records sent by the benchmark and ingested by
        # Materialize.
        max_lag = 0
        last_reported_time = 0.0

        while True:
            elapsed = time.monotonic() - start
            records_ingested = query_materialize(c)

            lag = records_sent - records_ingested

            if lag > max_lag:
                max_lag = lag

            # Report our findings back once per second.
            if elapsed - last_reported_time > 1:
                print(
                    f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records"
                )
                last_reported_time = elapsed

            # Determine how many records we are scheduled to send, based on how long
            # the benchmark has been running and the desired QPS.
            records_scheduled = int(
                min(elapsed, args.num_seconds) * args.records_per_second)
            records_to_send = records_scheduled - records_sent

            if records_to_send > 0:
                send_records(
                    c,
                    num_records=records_to_send,
                    num_keys=args.num_keys,
                    value_bytes=args.value_bytes,
                    timeout_secs=args.timeout_secs,
                )
                records_sent = records_scheduled

            # Exit once we've sent all the records we need to send, and confirmed that
            # Materialize has ingested them.
            if records_sent == total_records_to_send == records_ingested:
                print(
                    f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records."
                )
                break