Beispiel #1
0
def prepare_framework_container_def(model, instance_type, s3_operations):
    """Prepare the framework model container information. Specify related S3 operations for Airflow to perform.
    (Upload `source_dir`)

    Args:
        model (sagemaker.model.FrameworkModel): The framework model
        instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
        s3_operations (dict): The dict to specify S3 operations (upload `source_dir`).

    Returns:
        dict: The container information of this framework model.
    """
    deploy_image = model.image
    if not deploy_image:
        region_name = model.sagemaker_session.boto_session.region_name
        deploy_image = fw_utils.create_image_uri(region_name,
                                                 model.__framework_name__,
                                                 instance_type,
                                                 model.framework_version,
                                                 model.py_version)

    base_name = utils.base_name_from_image(deploy_image)
    model.name = model.name or utils.airflow_name_from_base(base_name)

    bucket = model.bucket or model.sagemaker_session._default_bucket
    script = os.path.basename(model.entry_point)
    key = '{}/source/sourcedir.tar.gz'.format(model.name)

    if model.source_dir and model.source_dir.lower().startswith('s3://'):
        model.uploaded_code = fw_utils.UploadedCode(s3_prefix=model.source_dir,
                                                    script_name=script)
    else:
        code_dir = 's3://{}/{}'.format(bucket, key)
        model.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                    script_name=script)
        s3_operations['S3Upload'] = [{
            'Path': model.source_dir or script,
            'Bucket': bucket,
            'Key': key,
            'Tar': True
        }]

    deploy_env = dict(model.env)
    deploy_env.update(model._framework_env_vars())

    try:
        if model.model_server_workers:
            deploy_env[
                sagemaker.model.MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(
                    model.model_server_workers)
    except AttributeError:
        # This applies to a FrameworkModel which is not SageMaker Deep Learning Framework Model
        pass

    return sagemaker.container_def(deploy_image, model.model_data, deploy_env)
Beispiel #2
0
def model_config(instance_type, model, role=None, image=None):
    """Export Airflow model config from a SageMaker model

    Args:
        instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'
        model (sagemaker.model.FrameworkModel): The SageMaker model to export Airflow config from
        role (str): The ``ExecutionRoleArn`` IAM Role ARN for the model
        image (str): An container image to use for deploying the model

    Returns:
        dict: Model config that can be directly used by SageMakerModelOperator in Airflow. It can also be part
        of the config used by SageMakerEndpointOperator and SageMakerTransformOperator in Airflow.
    """
    s3_operations = {}
    model.image = image or model.image

    if isinstance(model, sagemaker.model.FrameworkModel):
        container_def = prepare_framework_container_def(
            model, instance_type, s3_operations)
    else:
        container_def = model.prepare_container_def(instance_type)
        base_name = utils.base_name_from_image(container_def['Image'])
        model.name = model.name or utils.airflow_name_from_base(base_name)

    primary_container = session._expand_container_def(container_def)

    config = {
        'ModelName': model.name,
        'PrimaryContainer': primary_container,
        'ExecutionRoleArn': role or model.role
    }

    if model.vpc_config:
        config['VpcConfig'] = model.vpc_config

    if s3_operations:
        config['S3Operations'] = s3_operations

    return config
Beispiel #3
0
def training_base_config(estimator,
                         inputs=None,
                         job_name=None,
                         mini_batch_size=None):
    """Export Airflow base training config from an estimator

    Args:
        estimator (sagemaker.estimator.EstimatorBase):
            The estimator to export training config from. Can be a BYO estimator,
            Framework estimator or Amazon algorithm estimator.
        inputs: Information about the training data. Please refer to the ``fit()`` method of
                the associated estimator, as this can take any of the following forms:

            * (str) - The S3 location where training data is saved.
            * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for
                training data, you can specify a dict mapping channel names
                to strings or :func:`~sagemaker.session.s3_input` objects.
            * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide
                additional information about the training dataset. See :func:`sagemaker.session.s3_input`
                for full details.
            * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                Amazon :class:~`Record` objects serialized and stored in S3.
                For use with an estimator for an Amazon algorithm.
            * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is
                a different channel of training data.

        job_name (str): Specify a training job name if needed.
        mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an
            Amazon algorithm. For other estimators, batch size should be specified in the estimator.

    Returns:
        dict: Training config that can be directly used by SageMakerTrainingOperator in Airflow.
    """
    default_bucket = estimator.sagemaker_session.default_bucket()
    s3_operations = {}

    if job_name is not None:
        estimator._current_job_name = job_name
    else:
        base_name = estimator.base_job_name or utils.base_name_from_image(
            estimator.train_image())
        estimator._current_job_name = utils.airflow_name_from_base(base_name)

    if estimator.output_path is None:
        estimator.output_path = 's3://{}/'.format(default_bucket)

    if isinstance(estimator, sagemaker.estimator.Framework):
        prepare_framework(estimator, s3_operations)

    elif isinstance(estimator, amazon_estimator.AmazonAlgorithmEstimatorBase):
        prepare_amazon_algorithm_estimator(estimator, inputs, mini_batch_size)
    job_config = job._Job._load_config(inputs,
                                       estimator,
                                       expand_role=False,
                                       validate_uri=False)

    train_config = {
        'AlgorithmSpecification': {
            'TrainingImage': estimator.train_image(),
            'TrainingInputMode': estimator.input_mode
        },
        'OutputDataConfig': job_config['output_config'],
        'StoppingCondition': job_config['stop_condition'],
        'ResourceConfig': job_config['resource_config'],
        'RoleArn': job_config['role'],
    }

    if job_config['input_config'] is not None:
        train_config['InputDataConfig'] = job_config['input_config']

    if job_config['vpc_config'] is not None:
        train_config['VpcConfig'] = job_config['vpc_config']

    if estimator.hyperparameters() is not None:
        hyperparameters = {
            str(k): str(v)
            for (k, v) in estimator.hyperparameters().items()
        }

    if hyperparameters and len(hyperparameters) > 0:
        train_config['HyperParameters'] = hyperparameters

    if s3_operations:
        train_config['S3Operations'] = s3_operations

    return train_config
Beispiel #4
0
def tuning_config(tuner, inputs, job_name=None):
    """Export Airflow tuning config from an estimator

    Args:
        tuner (sagemaker.tuner.HyperparameterTuner): The tuner to export tuning config from.
        inputs: Information about the training data. Please refer to the ``fit()`` method of
                the associated estimator in the tuner, as this can take any of the following forms:

            * (str) - The S3 location where training data is saved.
            * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for
                training data, you can specify a dict mapping channel names
                to strings or :func:`~sagemaker.session.s3_input` objects.
            * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide
                additional information about the training dataset. See :func:`sagemaker.session.s3_input`
                for full details.
            * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                Amazon :class:~`Record` objects serialized and stored in S3.
                For use with an estimator for an Amazon algorithm.
            * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is
                a different channel of training data.

        job_name (str): Specify a tuning job name if needed.

    Returns:
        dict: Tuning config that can be directly used by SageMakerTuningOperator in Airflow.
    """
    train_config = training_base_config(tuner.estimator, inputs)
    hyperparameters = train_config.pop('HyperParameters', None)
    s3_operations = train_config.pop('S3Operations', None)

    if hyperparameters and len(hyperparameters) > 0:
        tuner.static_hyperparameters = \
            {utils.to_str(k): utils.to_str(v) for (k, v) in hyperparameters.items()}

    if job_name is not None:
        tuner._current_job_name = job_name
    else:
        base_name = tuner.base_tuning_job_name or utils.base_name_from_image(
            tuner.estimator.train_image())
        tuner._current_job_name = utils.airflow_name_from_base(
            base_name, tuner.TUNING_JOB_NAME_MAX_LENGTH, True)

    for hyperparameter_name in tuner._hyperparameter_ranges.keys():
        tuner.static_hyperparameters.pop(hyperparameter_name, None)

    train_config['StaticHyperParameters'] = tuner.static_hyperparameters

    tune_config = {
        'HyperParameterTuningJobName': tuner._current_job_name,
        'HyperParameterTuningJobConfig': {
            'Strategy': tuner.strategy,
            'HyperParameterTuningJobObjective': {
                'Type': tuner.objective_type,
                'MetricName': tuner.objective_metric_name,
            },
            'ResourceLimits': {
                'MaxNumberOfTrainingJobs': tuner.max_jobs,
                'MaxParallelTrainingJobs': tuner.max_parallel_jobs,
            },
            'ParameterRanges': tuner.hyperparameter_ranges(),
        },
        'TrainingJobDefinition': train_config
    }

    if tuner.metric_definitions is not None:
        tune_config['TrainingJobDefinition']['AlgorithmSpecification']['MetricDefinitions'] = \
            tuner.metric_definitions

    if tuner.tags is not None:
        tune_config['Tags'] = tuner.tags

    if s3_operations is not None:
        tune_config['S3Operations'] = s3_operations

    return tune_config
def transform_config(transformer,
                     data,
                     data_type='S3Prefix',
                     content_type=None,
                     compression_type=None,
                     split_type=None,
                     job_name=None):
    """Export Airflow transform config from a SageMaker transformer

    Args:
        transformer (sagemaker.transformer.Transformer): The SageMaker transformer to export Airflow
            config from.
        data (str): Input data location in S3.
        data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values:

            * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as
                inputs for the transform job.
            * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as
                an input for the transform job.

        content_type (str): MIME type of the input data (default: None).
        compression_type (str): Compression type of the input data, if compressed (default: None).
            Valid values: 'Gzip', None.
        split_type (str): The record delimiter for the input object (default: 'None').
            Valid values: 'None', 'Line', and 'RecordIO'.
        job_name (str): job name (default: None). If not specified, one will be generated.

    Returns:
        dict: Transform config that can be directly used by SageMakerTransformOperator in Airflow.
    """
    if job_name is not None:
        transformer._current_job_name = job_name
    else:
        base_name = transformer.base_transform_job_name
        transformer._current_job_name = utils.airflow_name_from_base(base_name) \
            if base_name is not None else transformer.model_name

    if transformer.output_path is None:
        transformer.output_path = 's3://{}/{}'.format(
            transformer.sagemaker_session.default_bucket(),
            transformer._current_job_name)

    job_config = sagemaker.transformer._TransformJob._load_config(
        data, data_type, content_type, compression_type, split_type,
        transformer)

    config = {
        'TransformJobName': transformer._current_job_name,
        'ModelName': transformer.model_name,
        'TransformInput': job_config['input_config'],
        'TransformOutput': job_config['output_config'],
        'TransformResources': job_config['resource_config'],
    }

    if transformer.strategy is not None:
        config['BatchStrategy'] = transformer.strategy

    if transformer.max_concurrent_transforms is not None:
        config[
            'MaxConcurrentTransforms'] = transformer.max_concurrent_transforms

    if transformer.max_payload is not None:
        config['MaxPayloadInMB'] = transformer.max_payload

    if transformer.env is not None:
        config['Environment'] = transformer.env

    if transformer.tags is not None:
        config['Tags'] = transformer.tags

    return config
Beispiel #6
0
def training_config(
        estimator,
        inputs=None,
        job_name=None
):  # noqa: C901 - suppress complexity warning for this method
    """Export Airflow training config from an estimator

    Args:
        estimator (sagemaker.estimator.EstimatroBase):
            The estimator to export training config from. Can be a BYO estimator,
            Framework estimator or Amazon algorithm estimator.
        inputs (str, dict, single or list of sagemaker.amazon.amazon_estimator.RecordSet):
            The training data.
        job_name (str): Specify a training job name if needed.

    Returns:
        A dict of training config that can be directly used by SageMakerTrainingOperator
            in Airflow.
    """
    default_bucket = estimator.sagemaker_session.default_bucket()
    s3_operations = {}

    if job_name is not None:
        estimator._current_job_name = job_name
    else:
        base_name = estimator.base_job_name or utils.base_name_from_image(
            estimator.train_image())
        estimator._current_job_name = utils.airflow_name_from_base(base_name)

    if estimator.output_path is None:
        estimator.output_path = 's3://{}/'.format(default_bucket)

    if isinstance(estimator, sagemaker.estimator.Framework):
        prepare_framework(estimator, s3_operations)

    elif isinstance(estimator, amazon_estimator.AmazonAlgorithmEstimatorBase):
        prepare_amazon_algorithm_estimator(estimator, inputs)

    job_config = job._Job._load_config(inputs,
                                       estimator,
                                       expand_role=False,
                                       validate_uri=False)

    train_config = {
        'AlgorithmSpecification': {
            'TrainingImage': estimator.train_image(),
            'TrainingInputMode': estimator.input_mode
        },
        'OutputDataConfig': job_config['output_config'],
        'TrainingJobName': estimator._current_job_name,
        'StoppingCondition': job_config['stop_condition'],
        'ResourceConfig': job_config['resource_config'],
        'RoleArn': job_config['role'],
    }

    if job_config['input_config'] is not None:
        train_config['InputDataConfig'] = job_config['input_config']

    if job_config['vpc_config'] is not None:
        train_config['VpcConfig'] = job_config['vpc_config']

    if estimator.hyperparameters() is not None:
        hyperparameters = {
            str(k): str(v)
            for (k, v) in estimator.hyperparameters().items()
        }

    if hyperparameters and len(hyperparameters) > 0:
        train_config['HyperParameters'] = hyperparameters

    if estimator.tags is not None:
        train_config['Tags'] = estimator.tags

    if s3_operations:
        train_config['S3Operations'] = s3_operations

    return train_config