def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, tags=None, **kwargs): """ Args: state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. tuner (sagemaker.tuner.HyperparameterTuner): The tuner to use in the TuningStep. job_name (str or Placeholder): Specify a tuning job name. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms: * (str) - The S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.inputs.TrainingInput` objects. * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the tuning job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the tuning job and proceed to the next step. (default: True) tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource. """ if wait_for_completion: kwargs[ Field.Resource. value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync' else: kwargs[ Field.Resource. value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob' parameters = tuning_config(tuner=tuner, inputs=data, job_name=job_name).copy() if job_name is not None: parameters['HyperParameterTuningJobName'] = job_name if 'S3Operations' in parameters: del parameters['S3Operations'] if tags: parameters['Tags'] = tags_dict_to_kv_list(tags) kwargs[Field.Parameters.value] = parameters super(TuningStep, self).__init__(state_id, **kwargs)
# create estimator fm_estimator = Estimator(image_name=container, role=role, sagemaker_session=sagemaker.session.Session(sess), **config["train_model"]["estimator_config"]) # train_config specifies SageMaker training configuration train_config = training_config(estimator=fm_estimator, inputs=config["train_model"]["inputs"]) # create tuner fm_tuner = HyperparameterTuner(estimator=fm_estimator, **config["tune_model"]["tuner_config"]) # create tuning config tuner_config = tuning_config(tuner=fm_tuner, inputs=config["tune_model"]["inputs"]) # create transform config transform_config = transform_config_from_estimator( estimator=fm_estimator, task_id="model_tuning" if hpo_enabled else "model_training", task_type="tuning" if hpo_enabled else "training", **config["batch_transform"]["transform_config"]) # ============================================================================= # define airflow DAG and tasks # ============================================================================= # define airflow DAG args = {'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(2)}
def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, tags=None, **kwargs): """ Args: state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. tuner (sagemaker.tuner.HyperparameterTuner): The tuner to use in the TuningStep. job_name (str or Placeholder): Specify a tuning job name. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms: * (str) - The S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.inputs.TrainingInput` objects. * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the tuning job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the tuning job and proceed to the next step. (default: True) tags (list[dict] or Placeholder, optional): `List of tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource. parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateHyperParameterTuningJob <https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateHyperParameterTuningJob.html>`_. You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders <https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_. """ if wait_for_completion: """ Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync """ kwargs[Field.Resource.value] = get_service_integration_arn( SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateHyperParameterTuningJob, IntegrationPattern.WaitForCompletion) else: """ Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob """ kwargs[Field.Resource.value] = get_service_integration_arn( SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateHyperParameterTuningJob) tuning_parameters = tuning_config(tuner=tuner, inputs=data, job_name=job_name).copy() if job_name is not None: tuning_parameters['HyperParameterTuningJobName'] = job_name if 'S3Operations' in tuning_parameters: del tuning_parameters['S3Operations'] if tags: tuning_parameters['Tags'] = tags if isinstance( tags, Placeholder) else tags_dict_to_kv_list(tags) if Field.Parameters.value in kwargs and isinstance( kwargs[Field.Parameters.value], dict): # Update tuning parameters with input parameters merge_dicts(tuning_parameters, kwargs[Field.Parameters.value]) kwargs[Field.Parameters.value] = tuning_parameters super(TuningStep, self).__init__(state_id, **kwargs)
def test_framework_tuning_config(sagemaker_session): mxnet_estimator = mxnet.MXNet(entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", py_version='py3', framework_version='1.3.0', role="{{ role }}", train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker_session, base_job_name="{{ base_job_name }}", hyperparameters={'batch_size': 100}) hyperparameter_ranges = { 'optimizer': tuner.CategoricalParameter(['sgd', 'Adam']), 'learning_rate': tuner.ContinuousParameter(0.01, 0.2), 'num_epoch': tuner.IntegerParameter(10, 50) } objective_metric_name = 'Validation-accuracy' metric_definitions = [{ 'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)' }] mxnet_tuner = tuner.HyperparameterTuner( estimator=mxnet_estimator, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, metric_definitions=metric_definitions, strategy='Bayesian', objective_type='Maximize', max_jobs="{{ max_job }}", max_parallel_jobs="{{ max_parallel_job }}", tags=[{ '{{ key }}': '{{ value }}' }], base_tuning_job_name="{{ base_job_name }}") data = "{{ training_data }}" config = airflow.tuning_config(mxnet_tuner, data) expected_config = { 'HyperParameterTuningJobName': "{{ base_job_name }}-{{ execution_date.strftime('%y%m%d-%H%M') }}", 'HyperParameterTuningJobConfig': { 'Strategy': 'Bayesian', 'HyperParameterTuningJobObjective': { 'Type': 'Maximize', 'MetricName': 'Validation-accuracy' }, 'ResourceLimits': { 'MaxNumberOfTrainingJobs': '{{ max_job }}', 'MaxParallelTrainingJobs': '{{ max_parallel_job }}' }, 'ParameterRanges': { 'ContinuousParameterRanges': [{ 'Name': 'learning_rate', 'MinValue': '0.01', 'MaxValue': '0.2' }], 'CategoricalParameterRanges': [{ 'Name': 'optimizer', 'Values': ['"sgd"', '"Adam"'] }], 'IntegerParameterRanges': [{ 'Name': 'num_epoch', 'MinValue': '10', 'MaxValue': '50' }] } }, 'TrainingJobDefinition': { 'AlgorithmSpecification': { 'TrainingImage': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3', 'TrainingInputMode': 'File', 'MetricDefinitions': [{ 'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)' }] }, 'OutputDataConfig': { 'S3OutputPath': 's3://output/' }, 'StoppingCondition': { 'MaxRuntimeInSeconds': 86400 }, 'ResourceConfig': { 'InstanceCount': 1, 'InstanceType': 'ml.m4.xlarge', 'VolumeSizeInGB': 30 }, 'RoleArn': '{{ role }}', 'InputDataConfig': [{ 'DataSource': { 'S3DataSource': { 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3Uri': '{{ training_data }}' } }, 'ChannelName': 'training' }], 'StaticHyperParameters': { 'batch_size': '100', 'sagemaker_submit_directory': '"s3://output/{{ base_job_name }}' '-{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}' '/source/sourcedir.tar.gz"', 'sagemaker_program': '"{{ entry_point }}"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '20', 'sagemaker_job_name': '"{{ base_job_name }}-' '{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}"', 'sagemaker_region': '"us-west-2"' } }, 'Tags': [{ '{{ key }}': '{{ value }}' }], 'S3Operations': { 'S3Upload': [{ 'Path': '{{ source_dir }}', 'Bucket': 'output', 'Key': "{{ base_job_name }}-" "{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}/source/sourcedir.tar.gz", 'Tar': True }] } } assert config == expected_config
def test_framework_tuning_config(sagemaker_session): mxnet_estimator = mxnet.MXNet( entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", py_version="py3", framework_version="1.3.0", role="{{ role }}", train_instance_count=1, train_instance_type="ml.m4.xlarge", sagemaker_session=sagemaker_session, base_job_name="{{ base_job_name }}", hyperparameters={"batch_size": 100}, ) hyperparameter_ranges = { "optimizer": tuner.CategoricalParameter(["sgd", "Adam"]), "learning_rate": tuner.ContinuousParameter(0.01, 0.2), "num_epoch": tuner.IntegerParameter(10, 50), } objective_metric_name = "Validation-accuracy" metric_definitions = [{ "Name": "Validation-accuracy", "Regex": "Validation-accuracy=([0-9\\.]+)" }] mxnet_tuner = tuner.HyperparameterTuner( estimator=mxnet_estimator, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, metric_definitions=metric_definitions, strategy="Bayesian", objective_type="Maximize", max_jobs="{{ max_job }}", max_parallel_jobs="{{ max_parallel_job }}", tags=[{ "{{ key }}": "{{ value }}" }], base_tuning_job_name="{{ base_job_name }}", ) data = "{{ training_data }}" config = airflow.tuning_config(mxnet_tuner, data) expected_config = { "HyperParameterTuningJobName": "{{ base_job_name }}-%s" % TIME_STAMP, "HyperParameterTuningJobConfig": { "Strategy": "Bayesian", "HyperParameterTuningJobObjective": { "Type": "Maximize", "MetricName": "Validation-accuracy", }, "ResourceLimits": { "MaxNumberOfTrainingJobs": "{{ max_job }}", "MaxParallelTrainingJobs": "{{ max_parallel_job }}", }, "ParameterRanges": { "ContinuousParameterRanges": [{ "Name": "learning_rate", "MinValue": "0.01", "MaxValue": "0.2", "ScalingType": "Auto", }], "CategoricalParameterRanges": [{ "Name": "optimizer", "Values": ['"sgd"', '"Adam"'] }], "IntegerParameterRanges": [{ "Name": "num_epoch", "MinValue": "10", "MaxValue": "50", "ScalingType": "Auto" }], }, }, "TrainingJobDefinition": { "AlgorithmSpecification": { "TrainingImage": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3", "TrainingInputMode": "File", "MetricDefinitions": [{ "Name": "Validation-accuracy", "Regex": "Validation-accuracy=([0-9\\.]+)" }], }, "OutputDataConfig": { "S3OutputPath": "s3://output/" }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.m4.xlarge", "VolumeSizeInGB": 30, }, "RoleArn": "{{ role }}", "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataDistributionType": "FullyReplicated", "S3DataType": "S3Prefix", "S3Uri": "{{ training_data }}", } }, "ChannelName": "training", }], "StaticHyperParameters": { "batch_size": "100", "sagemaker_submit_directory": '"s3://output/{{ base_job_name }}-%s/source/sourcedir.tar.gz"' % TIME_STAMP, "sagemaker_program": '"{{ entry_point }}"', "sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_container_log_level": "20", "sagemaker_job_name": '"{{ base_job_name }}-%s"' % TIME_STAMP, "sagemaker_region": '"us-west-2"', }, }, "Tags": [{ "{{ key }}": "{{ value }}" }], "S3Operations": { "S3Upload": [{ "Path": "{{ source_dir }}", "Bucket": "output", "Key": "{{ base_job_name }}-%s/source/sourcedir.tar.gz" % TIME_STAMP, "Tar": True, }] }, } assert config == expected_config