def prepare_framework_container_def(model, instance_type, s3_operations): """Prepare the framework model container information. Specify related S3 operations for Airflow to perform. (Upload `source_dir`) Args: model (sagemaker.model.FrameworkModel): The framework model instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. s3_operations (dict): The dict to specify S3 operations (upload `source_dir`). Returns: dict: The container information of this framework model. """ deploy_image = model.image if not deploy_image: region_name = model.sagemaker_session.boto_session.region_name deploy_image = fw_utils.create_image_uri(region_name, model.__framework_name__, instance_type, model.framework_version, model.py_version) base_name = utils.base_name_from_image(deploy_image) model.name = model.name or utils.airflow_name_from_base(base_name) bucket = model.bucket or model.sagemaker_session._default_bucket script = os.path.basename(model.entry_point) key = '{}/source/sourcedir.tar.gz'.format(model.name) if model.source_dir and model.source_dir.lower().startswith('s3://'): model.uploaded_code = fw_utils.UploadedCode(s3_prefix=model.source_dir, script_name=script) else: code_dir = 's3://{}/{}'.format(bucket, key) model.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) s3_operations['S3Upload'] = [{ 'Path': model.source_dir or script, 'Bucket': bucket, 'Key': key, 'Tar': True }] deploy_env = dict(model.env) deploy_env.update(model._framework_env_vars()) try: if model.model_server_workers: deploy_env[ sagemaker.model.MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str( model.model_server_workers) except AttributeError: # This applies to a FrameworkModel which is not SageMaker Deep Learning Framework Model pass return sagemaker.container_def(deploy_image, model.model_data, deploy_env)
def model_config(instance_type, model, role=None, image=None): """Export Airflow model config from a SageMaker model Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge' model (sagemaker.model.FrameworkModel): The SageMaker model to export Airflow config from role (str): The ``ExecutionRoleArn`` IAM Role ARN for the model image (str): An container image to use for deploying the model Returns: dict: Model config that can be directly used by SageMakerModelOperator in Airflow. It can also be part of the config used by SageMakerEndpointOperator and SageMakerTransformOperator in Airflow. """ s3_operations = {} model.image = image or model.image if isinstance(model, sagemaker.model.FrameworkModel): container_def = prepare_framework_container_def( model, instance_type, s3_operations) else: container_def = model.prepare_container_def(instance_type) base_name = utils.base_name_from_image(container_def['Image']) model.name = model.name or utils.airflow_name_from_base(base_name) primary_container = session._expand_container_def(container_def) config = { 'ModelName': model.name, 'PrimaryContainer': primary_container, 'ExecutionRoleArn': role or model.role } if model.vpc_config: config['VpcConfig'] = model.vpc_config if s3_operations: config['S3Operations'] = s3_operations return config
def training_base_config(estimator, inputs=None, job_name=None, mini_batch_size=None): """Export Airflow base training config from an estimator Args: estimator (sagemaker.estimator.EstimatorBase): The estimator to export training config from. Can be a BYO estimator, Framework estimator or Amazon algorithm estimator. inputs: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See :func:`sagemaker.session.s3_input` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:~`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. job_name (str): Specify a training job name if needed. mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator. Returns: dict: Training config that can be directly used by SageMakerTrainingOperator in Airflow. """ default_bucket = estimator.sagemaker_session.default_bucket() s3_operations = {} if job_name is not None: estimator._current_job_name = job_name else: base_name = estimator.base_job_name or utils.base_name_from_image( estimator.train_image()) estimator._current_job_name = utils.airflow_name_from_base(base_name) if estimator.output_path is None: estimator.output_path = 's3://{}/'.format(default_bucket) if isinstance(estimator, sagemaker.estimator.Framework): prepare_framework(estimator, s3_operations) elif isinstance(estimator, amazon_estimator.AmazonAlgorithmEstimatorBase): prepare_amazon_algorithm_estimator(estimator, inputs, mini_batch_size) job_config = job._Job._load_config(inputs, estimator, expand_role=False, validate_uri=False) train_config = { 'AlgorithmSpecification': { 'TrainingImage': estimator.train_image(), 'TrainingInputMode': estimator.input_mode }, 'OutputDataConfig': job_config['output_config'], 'StoppingCondition': job_config['stop_condition'], 'ResourceConfig': job_config['resource_config'], 'RoleArn': job_config['role'], } if job_config['input_config'] is not None: train_config['InputDataConfig'] = job_config['input_config'] if job_config['vpc_config'] is not None: train_config['VpcConfig'] = job_config['vpc_config'] if estimator.hyperparameters() is not None: hyperparameters = { str(k): str(v) for (k, v) in estimator.hyperparameters().items() } if hyperparameters and len(hyperparameters) > 0: train_config['HyperParameters'] = hyperparameters if s3_operations: train_config['S3Operations'] = s3_operations return train_config
def tuning_config(tuner, inputs, job_name=None): """Export Airflow tuning config from an estimator Args: tuner (sagemaker.tuner.HyperparameterTuner): The tuner to export tuning config from. inputs: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms: * (str) - The S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See :func:`sagemaker.session.s3_input` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:~`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. job_name (str): Specify a tuning job name if needed. Returns: dict: Tuning config that can be directly used by SageMakerTuningOperator in Airflow. """ train_config = training_base_config(tuner.estimator, inputs) hyperparameters = train_config.pop('HyperParameters', None) s3_operations = train_config.pop('S3Operations', None) if hyperparameters and len(hyperparameters) > 0: tuner.static_hyperparameters = \ {utils.to_str(k): utils.to_str(v) for (k, v) in hyperparameters.items()} if job_name is not None: tuner._current_job_name = job_name else: base_name = tuner.base_tuning_job_name or utils.base_name_from_image( tuner.estimator.train_image()) tuner._current_job_name = utils.airflow_name_from_base( base_name, tuner.TUNING_JOB_NAME_MAX_LENGTH, True) for hyperparameter_name in tuner._hyperparameter_ranges.keys(): tuner.static_hyperparameters.pop(hyperparameter_name, None) train_config['StaticHyperParameters'] = tuner.static_hyperparameters tune_config = { 'HyperParameterTuningJobName': tuner._current_job_name, 'HyperParameterTuningJobConfig': { 'Strategy': tuner.strategy, 'HyperParameterTuningJobObjective': { 'Type': tuner.objective_type, 'MetricName': tuner.objective_metric_name, }, 'ResourceLimits': { 'MaxNumberOfTrainingJobs': tuner.max_jobs, 'MaxParallelTrainingJobs': tuner.max_parallel_jobs, }, 'ParameterRanges': tuner.hyperparameter_ranges(), }, 'TrainingJobDefinition': train_config } if tuner.metric_definitions is not None: tune_config['TrainingJobDefinition']['AlgorithmSpecification']['MetricDefinitions'] = \ tuner.metric_definitions if tuner.tags is not None: tune_config['Tags'] = tuner.tags if s3_operations is not None: tune_config['S3Operations'] = s3_operations return tune_config
def transform_config(transformer, data, data_type='S3Prefix', content_type=None, compression_type=None, split_type=None, job_name=None): """Export Airflow transform config from a SageMaker transformer Args: transformer (sagemaker.transformer.Transformer): The SageMaker transformer to export Airflow config from. data (str): Input data location in S3. data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as inputs for the transform job. * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as an input for the transform job. content_type (str): MIME type of the input data (default: None). compression_type (str): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None. split_type (str): The record delimiter for the input object (default: 'None'). Valid values: 'None', 'Line', and 'RecordIO'. job_name (str): job name (default: None). If not specified, one will be generated. Returns: dict: Transform config that can be directly used by SageMakerTransformOperator in Airflow. """ if job_name is not None: transformer._current_job_name = job_name else: base_name = transformer.base_transform_job_name transformer._current_job_name = utils.airflow_name_from_base(base_name) \ if base_name is not None else transformer.model_name if transformer.output_path is None: transformer.output_path = 's3://{}/{}'.format( transformer.sagemaker_session.default_bucket(), transformer._current_job_name) job_config = sagemaker.transformer._TransformJob._load_config( data, data_type, content_type, compression_type, split_type, transformer) config = { 'TransformJobName': transformer._current_job_name, 'ModelName': transformer.model_name, 'TransformInput': job_config['input_config'], 'TransformOutput': job_config['output_config'], 'TransformResources': job_config['resource_config'], } if transformer.strategy is not None: config['BatchStrategy'] = transformer.strategy if transformer.max_concurrent_transforms is not None: config[ 'MaxConcurrentTransforms'] = transformer.max_concurrent_transforms if transformer.max_payload is not None: config['MaxPayloadInMB'] = transformer.max_payload if transformer.env is not None: config['Environment'] = transformer.env if transformer.tags is not None: config['Tags'] = transformer.tags return config
def training_config( estimator, inputs=None, job_name=None ): # noqa: C901 - suppress complexity warning for this method """Export Airflow training config from an estimator Args: estimator (sagemaker.estimator.EstimatroBase): The estimator to export training config from. Can be a BYO estimator, Framework estimator or Amazon algorithm estimator. inputs (str, dict, single or list of sagemaker.amazon.amazon_estimator.RecordSet): The training data. job_name (str): Specify a training job name if needed. Returns: A dict of training config that can be directly used by SageMakerTrainingOperator in Airflow. """ default_bucket = estimator.sagemaker_session.default_bucket() s3_operations = {} if job_name is not None: estimator._current_job_name = job_name else: base_name = estimator.base_job_name or utils.base_name_from_image( estimator.train_image()) estimator._current_job_name = utils.airflow_name_from_base(base_name) if estimator.output_path is None: estimator.output_path = 's3://{}/'.format(default_bucket) if isinstance(estimator, sagemaker.estimator.Framework): prepare_framework(estimator, s3_operations) elif isinstance(estimator, amazon_estimator.AmazonAlgorithmEstimatorBase): prepare_amazon_algorithm_estimator(estimator, inputs) job_config = job._Job._load_config(inputs, estimator, expand_role=False, validate_uri=False) train_config = { 'AlgorithmSpecification': { 'TrainingImage': estimator.train_image(), 'TrainingInputMode': estimator.input_mode }, 'OutputDataConfig': job_config['output_config'], 'TrainingJobName': estimator._current_job_name, 'StoppingCondition': job_config['stop_condition'], 'ResourceConfig': job_config['resource_config'], 'RoleArn': job_config['role'], } if job_config['input_config'] is not None: train_config['InputDataConfig'] = job_config['input_config'] if job_config['vpc_config'] is not None: train_config['VpcConfig'] = job_config['vpc_config'] if estimator.hyperparameters() is not None: hyperparameters = { str(k): str(v) for (k, v) in estimator.hyperparameters().items() } if hyperparameters and len(hyperparameters) > 0: train_config['HyperParameters'] = hyperparameters if estimator.tags is not None: train_config['Tags'] = estimator.tags if s3_operations: train_config['S3Operations'] = s3_operations return train_config