def _test_mnist_deploy(sagemaker_session, instance_type): model_path = 'test/resources/mnist/model.tar.gz' script_path = 'test/resources/mnist/mnist.py' endpoint_name = sagemaker.utils.unique_name_from_base( 'sagemaker-chainer-test') model_data = sagemaker_session.upload_data( path=model_path, key_prefix='sagemaker-chainer/models', ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=30): chainer = ChainerModel( model_data=model_data, role='SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session, ) predictor = chainer.deploy(initial_instance_count=1, instance_type=instance_type) batch_size = 100 data = np.zeros(shape=(batch_size, 1, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size
def test_deploy_model(chainer_training_job, sagemaker_session): endpoint_name = 'test-chainer-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=chainer_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') model = ChainerModel(model_data, 'SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session) predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_deploy_model(chainer_local_training_job, sagemaker_local_session): script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") model = ChainerModel( chainer_local_training_job.model_data, "SageMakerRole", entry_point=script_path, sagemaker_session=sagemaker_local_session, ) predictor = model.deploy(1, "local") try: _predict_and_assert(predictor) finally: predictor.delete_endpoint()
def create_model(self, model_server_workers=None): """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``. Args: model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. Returns: sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object. See :func:`~sagemaker.chainer.model.ChainerModel` for full details. """ return ChainerModel( self.model_data, self.role, self.entry_point, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, model_server_workers=model_server_workers, image=self.image_name, sagemaker_session=self.sagemaker_session)
def batch_inference(session, client, model_name, setting, pytorch): sagemaker_session = sagemaker.Session(boto_session=session, sagemaker_client=client) conf = yaml.load(open(setting)) # check the target model exists if _model_exists(client, model_name): logger.info('use the registered model.') deploy_args = conf['deploy'] deploy_args['model_name'] = model_name deploy_args['base_transform_job_name'] = model_name deploy_args['sagemaker_session'] = sagemaker_session transformer = Transformer(**deploy_args) else: # [TODO] updateing case (delete and create). # Basically, models have dependencies on multiple endpoints and inference jobs, # so it is not easy to delete it. logger.info('register the new model.') model_args = conf['model'] model_args['sagemaker_session'] = sagemaker_session model_args['name'] = model_name if pytorch: model = PyTorchModel(**model_args) else: model = ChainerModel(**model_args) deploy_args = conf['deploy'] transformer = model.transformer(**deploy_args) # register model transform_args = conf['transform'] # use default job_name (model_name + datetime.now()) transformer.transform(**transform_args)
def create_model(self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT, entry_point=None, source_dir=None, dependencies=None, **kwargs): """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``. Args: model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. entry_point (str): Path (absolute or relative) to the local Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. If not specified, the training entry point is used. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified, the model source directory from training is used. dependencies (list[str]): A list of paths to directories (absolute or relative) with any additional libraries that will be exported to the container. If not specified, the dependencies from training are used. This is not supported with "local code" in Local Mode. **kwargs: Additional kwargs passed to the ChainerModel constructor. Returns: sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object. See :func:`~sagemaker.chainer.model.ChainerModel` for full details. """ kwargs["name"] = self._get_or_create_name(kwargs.get("name")) if "image_uri" not in kwargs: kwargs["image_uri"] = self.image_uri return ChainerModel( self.model_data, role or self.role, entry_point or self._model_entry_point(), source_dir=(source_dir or self._model_source_dir()), container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), dependencies=(dependencies or self.dependencies), **kwargs)
def create_model(self, model_server_workers=None, role=None): """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``. Args: role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. Returns: sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object. See :func:`~sagemaker.chainer.model.ChainerModel` for full details. """ role = role or self.role return ChainerModel( self.model_data, role, self.entry_point, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, model_server_workers=model_server_workers, image=self.image_name, sagemaker_session=self.sagemaker_session)
def create_model(self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT): """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``. Args: role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object. See :func:`~sagemaker.chainer.model.ChainerModel` for full details. """ role = role or self.role return ChainerModel(self.model_data, role, self.entry_point, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, model_server_workers=model_server_workers, image=self.image_name, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override))
def test_deploy_model(chainer_training_job, sagemaker_session): endpoint_name = unique_name_from_base("test-chainer-deploy-model") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=chainer_training_job) model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") model = ChainerModel( model_data, "SageMakerRole", entry_point=script_path, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def deploy_endpoint(session, client, endpoint_name, setting, pytorch): sagemaker_session = sagemaker.Session( boto_session=session, sagemaker_client=client) conf = yaml.load(open(setting)) model_args = conf['model'] model_args['sagemaker_session'] = sagemaker_session model_args['name'] = endpoint_name + '-model-' + dt.now().strftime('%y%m%d%H%M') if pytorch: model = PyTorchModel(**model_args) else: model = ChainerModel(**model_args) deploy_args = conf['deploy'] deploy_args['endpoint_name'] = endpoint_name model.deploy(**deploy_args)