def test_scaling_performance(region, scheduler, os, instance, pcluster_config_reader, clusters_factory, request): """The test runs benchmarks for the scaling logic.""" benchmarks_max_time = request.config.getoption("benchmarks_max_time") benchmark_params = { "region": region, "scheduler": scheduler, "os": os, "instance": instance, "scaling_target": request.config.getoption("benchmarks_target_capacity"), "scaledown_idletime": 2, "job_duration": 60, } cluster_config = pcluster_config_reader( scaledown_idletime=benchmark_params["scaledown_idletime"], scaling_target=benchmark_params["scaling_target"]) cluster = clusters_factory(cluster_config) remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) logging.info("Starting benchmark with following parameters: %s", benchmark_params) start_time = datetime.datetime.utcnow() kwargs = {"nodes": benchmark_params["scaling_target"]} result = scheduler_commands.submit_command( "sleep {0}".format(benchmark_params["job_duration"]), **kwargs) scheduler_commands.assert_job_submitted(result.stdout) compute_nodes_time_series, timestamps, end_time = publish_compute_nodes_metric( scheduler_commands, max_monitoring_time=minutes(benchmarks_max_time), region=region, cluster_name=cluster.cfn_name, ) logging.info( "Benchmark completed. Producing outputs and performing assertions.") benchmark_params["total_time"] = "{0}seconds".format( int((end_time - start_time).total_seconds())) produce_benchmark_metrics_report( benchmark_params, region, cluster.cfn_name, start_time.replace(tzinfo=datetime.timezone.utc).isoformat(), end_time.replace(tzinfo=datetime.timezone.utc).isoformat(), benchmark_params["scaling_target"], request, ) assert_that(max(compute_nodes_time_series)).is_equal_to( benchmark_params["scaling_target"]) assert_that(compute_nodes_time_series[-1]).is_equal_to(0) assert_no_errors_in_logs(remote_command_executor, scheduler)
def test_scheduler_performance(region, scheduler, os, instance, pcluster_config_reader, clusters_factory, request): """The test runs a stress test to verify scheduler behaviour with many submitted jobs.""" benchmarks_max_time = request.config.getoption("benchmarks_max_time") instance_slots = get_instance_vcpus(region, instance) benchmark_params = { "region": region, "scheduler": scheduler, "os": os, "instance": instance, "scaling_target": request.config.getoption("benchmarks_target_capacity"), "scaledown_idletime": 2, "job_duration": 60, "jobs_to_submit": 2 * instance_slots * request.config.getoption("benchmarks_target_capacity"), } cluster_config = pcluster_config_reader( scaledown_idletime=benchmark_params["scaledown_idletime"], scaling_target=benchmark_params["scaling_target"] ) cluster = clusters_factory(cluster_config) remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) enable_asg_metrics(region, cluster) logging.info("Starting benchmark with following parameters: %s", benchmark_params) start_time = datetime.datetime.utcnow() _submit_jobs(benchmark_params, scheduler_commands, instance_slots, cluster) compute_nodes_time_series, timestamps, end_time = publish_compute_nodes_metric( scheduler_commands, max_monitoring_time=minutes(benchmarks_max_time), region=region, cluster_name=cluster.cfn_name, ) logging.info("Benchmark completed. Producing outputs and performing assertions.") benchmark_params["total_time"] = "{0}seconds".format(int((end_time - start_time).total_seconds())) produce_benchmark_metrics_report( benchmark_params, region, cluster.cfn_name, cluster.asg, start_time.replace(tzinfo=datetime.timezone.utc).isoformat(), end_time.replace(tzinfo=datetime.timezone.utc).isoformat(), benchmark_params["scaling_target"], request, ) assert_that(max(compute_nodes_time_series)).is_equal_to(benchmark_params["scaling_target"]) assert_that(compute_nodes_time_series[-1]).is_equal_to(0) _assert_jobs_completed(remote_command_executor, benchmark_params["jobs_to_submit"]) assert_no_errors_in_logs(remote_command_executor, ["/var/log/sqswatcher", "/var/log/jobwatcher"])