Example #1
0
def main():
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}

    # For local training a dummy role will be sufficient
    role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001'

    print('Deploying local mode endpoint')
    print(
        'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.'
    )

    pytorch_model = PyTorchModel(model_data='./model/model.tar.gz',
                                 role=role,
                                 framework_version="1.7.1",
                                 source_dir="code",
                                 py_version="py3",
                                 entry_point="inference.py")

    predictor = pytorch_model.deploy(initial_instance_count=1,
                                     instance_type='local')

    predictor.serializer = sagemaker.serializers.JSONSerializer()
    predictor.deserializer = sagemaker.deserializers.JSONDeserializer()

    result = predictor.predict("אני אוהב לעבוד באמזון")
    print('result: {}'.format(result))

    predictor.delete_endpoint(predictor.endpoint)
Example #2
0
def test_deploy_packed_model_with_entry_point_name(
    sagemaker_session,
    cpu_instance_type,
    pytorch_inference_latest_version,
    pytorch_inference_latest_py_version,
):
    endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model_data = sagemaker_session.upload_data(path=PACKED_MODEL)
        model = PyTorchModel(
            model_data,
            "SageMakerRole",
            entry_point="mnist.py",
            framework_version=pytorch_inference_latest_version,
            py_version=pytorch_inference_latest_py_version,
            sagemaker_session=sagemaker_session,
        )
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
Example #3
0
def test_deploy_model_with_accelerator(
    sagemaker_session,
    cpu_instance_type,
    pytorch_eia_latest_version,
    pytorch_eia_latest_py_version,
):
    endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
    model_data = sagemaker_session.upload_data(path=EIA_MODEL)
    pytorch = PyTorchModel(
        model_data,
        "SageMakerRole",
        entry_point=EIA_SCRIPT,
        framework_version=pytorch_eia_latest_version,
        py_version=pytorch_eia_latest_py_version,
        sagemaker_session=sagemaker_session,
    )
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        predictor = pytorch.deploy(
            initial_instance_count=1,
            instance_type=cpu_instance_type,
            accelerator_type="ml.eia1.medium",
            endpoint_name=endpoint_name,
        )

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
Example #4
0
def run_as_local_main():
    args = parse_infer_args()
    sm_boto3 = boto3.client('sagemaker')
    sess = sagemaker.Session()
    region = sess.boto_session.region_name
    model_url = args.model_file
    model = PyTorchModel(model_data=model_url,
                         source_dir=os.path.abspath(os.path.dirname(__file__)),
                         role=get_sm_execution_role(ON_SAGEMAKER_NOTEBOOK,
                                                    region),
                         framework_version='1.0.0',
                         entry_point='inference.py')

    infer_mode = args.infer_mode
    if 'bt' == infer_mode:
        env = {'MODEL_SERVER_TIMEOUT': '120'}
        transformer = model.transformer(
            instance_count=1,
            instance_type='ml.c5.xlarge',
            output_path=args.output_dir,
            max_payload=99,
            env=env,
            max_concurrent_transforms=1,
            tags=[{
                "Key": "Project",
                "Value": "SM Example"
            }],
        )
        transformer.transform(args.input_file, content_type="text/csv")
        transformer.wait()
    elif 'ep' == infer_mode:
        model.deploy(instance_type='ml.c5.xlarge', initial_instance_count=1)
    else:
        raise Exception(f'Unknown inference mode {infer_mode}')
Example #5
0
def test_deploy_model(
    pytorch_training_job,
    sagemaker_session,
    cpu_instance_type,
    pytorch_inference_latest_version,
    pytorch_inference_latest_py_version,
):
    endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        desc = sagemaker_session.sagemaker_client.describe_training_job(
            TrainingJobName=pytorch_training_job
        )
        model_data = desc["ModelArtifacts"]["S3ModelArtifacts"]
        model = PyTorchModel(
            model_data,
            "SageMakerRole",
            entry_point=MNIST_SCRIPT,
            framework_version=pytorch_inference_latest_version,
            py_version=pytorch_inference_latest_py_version,
            sagemaker_session=sagemaker_session,
        )
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
def batch_inference(session, client, model_name, setting, pytorch):
    sagemaker_session = sagemaker.Session(boto_session=session,
                                          sagemaker_client=client)

    conf = yaml.load(open(setting))

    # check the target model exists
    if _model_exists(client, model_name):
        logger.info('use the registered model.')
        deploy_args = conf['deploy']
        deploy_args['model_name'] = model_name
        deploy_args['base_transform_job_name'] = model_name
        deploy_args['sagemaker_session'] = sagemaker_session

        transformer = Transformer(**deploy_args)

    else:
        # [TODO] updateing case (delete and create).
        # Basically, models have dependencies on multiple endpoints and inference jobs,
        # so it is not easy to delete it.
        logger.info('register the new model.')
        model_args = conf['model']
        model_args['sagemaker_session'] = sagemaker_session
        model_args['name'] = model_name
        if pytorch:
            model = PyTorchModel(**model_args)
        else:
            model = ChainerModel(**model_args)

        deploy_args = conf['deploy']
        transformer = model.transformer(**deploy_args)  # register model

    transform_args = conf['transform']
    # use default job_name (model_name + datetime.now())
    transformer.transform(**transform_args)
def test_compile_and_deploy_model_with_neo(
    sagemaker_session,
    neo_pytorch_cpu_instance_type,
    neo_pytorch_latest_version,
    neo_pytorch_latest_py_version,
    neo_pytorch_target_device,
    neo_pytorch_compilation_job_name,
):
    endpoint_name = "test-neo-pytorch-deploy-model-{}".format(
        sagemaker_timestamp())

    model_data = sagemaker_session.upload_data(path=NEO_MODEL)
    bucket = sagemaker_session.default_bucket()
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = PyTorchModel(
            model_data=model_data,
            predictor_cls=Predictor,
            role="SageMakerRole",
            entry_point=NEO_SCRIPT,
            source_dir=NEO_CODE_DIR,
            framework_version=neo_pytorch_latest_version,
            py_version=neo_pytorch_latest_py_version,
            sagemaker_session=sagemaker_session,
            env={"MMS_DEFAULT_RESPONSE_TIMEOUT": "500"},
        )
        data_shape = '{"input0":[1,3,224,224]}'
        compiled_model_path = "s3://{}/{}/output".format(
            bucket, neo_pytorch_compilation_job_name)
        compiled_model = model.compile(
            target_instance_family=neo_pytorch_target_device,
            input_shape=data_shape,
            job_name=neo_pytorch_compilation_job_name,
            role="SageMakerRole",
            framework="pytorch",
            framework_version=neo_pytorch_latest_version,
            output_path=compiled_model_path,
        )

        # Load names for ImageNet classes
        object_categories = {}
        with open(NEO_IMAGENET_CLASSES, "r") as f:
            for line in f:
                if line.strip():
                    key, val = line.strip().split(":")
                    object_categories[key] = val

        with open(NEO_INFERENCE_IMAGE, "rb") as f:
            payload = f.read()
            payload = bytearray(payload)

        predictor = compiled_model.deploy(1,
                                          neo_pytorch_cpu_instance_type,
                                          endpoint_name=endpoint_name)
        response = predictor.predict(payload)
        result = json.loads(response.decode())

        assert "tiger cat" in object_categories[str(np.argmax(result))]
        assert compiled_model.framework_version == neo_pytorch_latest_version
def test_deploy_model(pytorch_training_job, sagemaker_session):
    endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=pytorch_training_job)
        model_data = desc['ModelArtifacts']['S3ModelArtifacts']
        model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
Example #9
0
    def create_model(
        self,
        model_server_workers=None,
        role=None,
        vpc_config_override=VPC_CONFIG_DEFAULT,
        entry_point=None,
        source_dir=None,
        dependencies=None,
        **kwargs
    ):
        """Create a SageMaker ``PyTorchModel`` object that can be deployed to an
        ``Endpoint``.

        Args:
            model_server_workers (int): Optional. The number of worker processes
                used by the inference server. If None, server will use one
                worker per vCPU.
            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``,
                which is also used during transform jobs. If not specified, the
                role from the Estimator will be used.
            vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on
                the model. Default: use subnets and security groups from this Estimator.
                * 'Subnets' (list[str]): List of subnet ids.
                * 'SecurityGroupIds' (list[str]): List of security group ids.
            entry_point (str): Path (absolute or relative) to the local Python source file which
                should be executed as the entry point to training. If not specified, the training
                entry point is used.
            source_dir (str): Path (absolute or relative) to a directory with any other serving
                source code dependencies aside from the entry point file.
                If not specified, the model source directory from training is used.
            dependencies (list[str]): A list of paths to directories (absolute or relative) with
                any additional libraries that will be exported to the container.
                If not specified, the dependencies from training are used.
            **kwargs: Additional kwargs passed to the :class:`~sagemaker.pytorch.model.PyTorchModel`
                constructor.

        Returns:
            sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel``
            object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details.
        """
        if "image" not in kwargs:
            kwargs["image"] = self.image_name

        return PyTorchModel(
            self.model_data,
            role or self.role,
            entry_point or self.entry_point,
            source_dir=(source_dir or self._model_source_dir()),
            enable_cloudwatch_metrics=self.enable_cloudwatch_metrics,
            name=self._current_job_name,
            container_log_level=self.container_log_level,
            code_location=self.code_location,
            py_version=self.py_version,
            framework_version=self.framework_version,
            model_server_workers=model_server_workers,
            sagemaker_session=self.sagemaker_session,
            vpc_config=self.get_vpc_config(vpc_config_override),
            dependencies=(dependencies or self.dependencies),
            **kwargs
        )
Example #10
0
def test_deploy_model(pytorch_training_job, sagemaker_session):
    endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        desc = sagemaker_session.sagemaker_client.describe_training_job(
            TrainingJobName=pytorch_training_job)
        model_data = desc['ModelArtifacts']['S3ModelArtifacts']
        model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT,
                             sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)

        batch_size = 100
        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
    def create_model(self, model_server_workers=None):
        """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``.

        Args:
            model_server_workers (int): Optional. The number of worker processes used by the inference server.
                If None, server will use one worker per vCPU.

        Returns:
            sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object.
                See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details.
        """
        return PyTorchModel(
            self.model_data,
            self.role,
            self.entry_point,
            source_dir=self._model_source_dir(),
            enable_cloudwatch_metrics=self.enable_cloudwatch_metrics,
            name=self._current_job_name,
            container_log_level=self.container_log_level,
            code_location=self.code_location,
            py_version=self.py_version,
            framework_version=self.framework_version,
            image=self.image_name,
            model_server_workers=model_server_workers,
            sagemaker_session=self.sagemaker_session)
Example #12
0
def deploy_model_into_sagemaker(estimator, role, server_source_dir: str,
                                endpoint_name: str, aws_account_id: str,
                                aws_region: str, device: str, debug: bool):
    instance_type, image_version = __get_instance_info(device=device,
                                                       debug=debug,
                                                       mode="inference")
    image_url_inference = "{}.dkr.ecr.{}.amazonaws.com/youyakuman:{}".format(
        aws_account_id, aws_region, image_version)
    p_model = PyTorchModel(model_data=estimator.model_data,
                           image=image_url_inference,
                           role=role,
                           framework_version=estimator.framework_version,
                           entry_point=estimator.entry_point,
                           source_dir=server_source_dir)
    predictor = p_model.deploy(initial_instance_count=1,
                               instance_type=instance_type,
                               endpoint_name=endpoint_name)
    return predictor
def deploy_endpoint(session, client, endpoint_name, setting, pytorch):
    sagemaker_session = sagemaker.Session(
        boto_session=session,
        sagemaker_client=client)

    conf = yaml.load(open(setting))

    model_args = conf['model']
    model_args['sagemaker_session'] = sagemaker_session
    model_args['name'] = endpoint_name + '-model-' + dt.now().strftime('%y%m%d%H%M')
    if pytorch:
        model = PyTorchModel(**model_args)
    else:
        model = ChainerModel(**model_args)

    deploy_args = conf['deploy']
    deploy_args['endpoint_name'] = endpoint_name
    model.deploy(**deploy_args)
def test_jumpstart_catboost_image_uri(patched_get_model_specs, session):

    patched_get_model_specs.side_effect = get_prototype_model_spec

    model_id, model_version = "catboost-classification-model", "*"
    instance_type = "ml.p2.xlarge"
    region = "us-west-2"

    model_specs = accessors.JumpStartModelsAccessor.get_model_specs(
        region, model_id, model_version)

    # inference
    uri = image_uris.retrieve(
        framework=None,
        region=region,
        image_scope="inference",
        model_id=model_id,
        model_version=model_version,
        instance_type=instance_type,
    )

    framework_class_uri = PyTorchModel(
        role="mock_role",
        model_data="mock_data",
        entry_point="mock_entry_point",
        framework_version=model_specs.hosting_ecr_specs.framework_version,
        py_version=model_specs.hosting_ecr_specs.py_version,
        sagemaker_session=session,
    ).serving_image_uri(region, instance_type)

    assert uri == framework_class_uri
    assert uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:1.9.0-gpu-py38"

    # training
    uri = image_uris.retrieve(
        framework=None,
        region=region,
        image_scope="training",
        model_id=model_id,
        model_version=model_version,
        instance_type=instance_type,
    )

    framework_class_uri = PyTorch(
        role="mock_role",
        entry_point="mock_entry_point",
        framework_version=model_specs.training_ecr_specs.framework_version,
        py_version=model_specs.training_ecr_specs.py_version,
        instance_type=instance_type,
        instance_count=1,
        sagemaker_session=session,
    ).training_image_uri(region=region)

    assert uri == framework_class_uri
    assert uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.9.0-gpu-py38"
    def create_model(self, model_server_workers=None, role=None):
        """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``.

        Args:
            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during
                transform jobs. If not specified, the role from the Estimator will be used.
            model_server_workers (int): Optional. The number of worker processes used by the inference server.
                If None, server will use one worker per vCPU.

        Returns:
            sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object.
                See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details.
        """
        role = role or self.role
        return PyTorchModel(self.model_data, role, self.entry_point, source_dir=self._model_source_dir(),
                            enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name,
                            container_log_level=self.container_log_level, code_location=self.code_location,
                            py_version=self.py_version, framework_version=self.framework_version, image=self.image_name,
                            model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session)
    def create_model(self,
                     model_server_workers=None,
                     role=None,
                     vpc_config_override=VPC_CONFIG_DEFAULT):
        """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``.

        Args:
            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during
                transform jobs. If not specified, the role from the Estimator will be used.
            model_server_workers (int): Optional. The number of worker processes used by the inference server.
                If None, server will use one worker per vCPU.
            vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model.
                Default: use subnets and security groups from this Estimator.
                * 'Subnets' (list[str]): List of subnet ids.
                * 'SecurityGroupIds' (list[str]): List of security group ids.

        Returns:
            sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object.
                See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details.
        """
        role = role or self.role
        return PyTorchModel(
            self.model_data,
            role,
            self.entry_point,
            source_dir=self._model_source_dir(),
            enable_cloudwatch_metrics=self.enable_cloudwatch_metrics,
            name=self._current_job_name,
            container_log_level=self.container_log_level,
            code_location=self.code_location,
            py_version=self.py_version,
            framework_version=self.framework_version,
            image=self.image_name,
            model_server_workers=model_server_workers,
            sagemaker_session=self.sagemaker_session,
            vpc_config=self.get_vpc_config(vpc_config_override),
            dependencies=self.dependencies,
        )