def test_mxnet_with_default_profiler_config_and_profiler_rule( sagemaker_session, mxnet_training_latest_version, mxnet_training_latest_py_version, cpu_instance_type, ): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist_gluon.py") data_path = os.path.join(DATA_DIR, "mxnet_mnist") mx = MXNet( entry_point=script_path, role="SageMakerRole", framework_version=mxnet_training_latest_version, py_version=mxnet_training_latest_py_version, instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) train_input = mx.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train") test_input = mx.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test") training_job_name = unique_name_from_base( "test-profiler-mxnet-training") mx.fit( inputs={ "train": train_input, "test": test_input }, job_name=training_job_name, wait=False, ) job_description = mx.latest_training_job.describe() assert (job_description["ProfilerConfig"] == ProfilerConfig( s3_output_path=mx.output_path, system_monitor_interval_millis=500)._to_request_dict()) assert job_description.get("ProfilingStatus") == "Enabled" profiler_rule_configuration = job_description.get( "ProfilerRuleConfigurations")[0] assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) assert profiler_rule_configuration[ "RuleEvaluatorImage"] == get_rule_container_image_uri( mx.sagemaker_session.boto_region_name) assert profiler_rule_configuration["RuleParameters"] == { "rule_to_invoke": "ProfilerReport" } with pytest.raises(ValueError) as error: mx.enable_default_profiling() assert "Debugger monitoring is already enabled." in str(error)
def test_mxnet_with_disable_profiler_then_enable_default_profiling( sagemaker_session, mxnet_training_latest_version, mxnet_training_latest_py_version, cpu_instance_type, ): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist_gluon.py") data_path = os.path.join(DATA_DIR, "mxnet_mnist") mx = MXNet( entry_point=script_path, role="SageMakerRole", framework_version=mxnet_training_latest_version, py_version=mxnet_training_latest_py_version, instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, disable_profiler=True, ) train_input = mx.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train") test_input = mx.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test") training_job_name = unique_name_from_base( "test-profiler-mxnet-training") mx.fit( inputs={ "train": train_input, "test": test_input }, job_name=training_job_name, wait=False, ) job_description = mx.latest_training_job.describe() assert job_description.get("ProfilerConfig") is None assert job_description.get("ProfilerRuleConfigurations") is None assert job_description.get("ProfilingStatus") == "Disabled" _wait_until_training_can_be_updated(sagemaker_session.sagemaker_client, training_job_name) mx.enable_default_profiling() job_description = mx.latest_training_job.describe() assert job_description["ProfilerConfig"][ "S3OutputPath"] == mx.output_path