def test_deploy_with_display_name(self, deploy_model_mock, sync): test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, deployed_model_display_name=_TEST_DISPLAY_NAME, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=1, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=_TEST_DISPLAY_NAME, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_deploy_with_traffic_percent(self, deploy_model_mock, sync): with mock.patch.object(endpoint_service_client.EndpointServiceClient, "get_endpoint") as get_endpoint_mock: get_endpoint_mock.return_value = gca_endpoint.Endpoint( display_name=_TEST_DISPLAY_NAME, name=_TEST_ENDPOINT_NAME, traffic_split={"model1": 100}, ) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, traffic_percentage=70, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=1, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={ "model1": 30, "0": 70 }, metadata=(), )
def test_deploy_no_endpoint_dedicated_resources(self, deploy_model_mock, sync): test_model = models.Model(_TEST_ID) test_endpoint = test_model.deploy( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1 ) expected_deployed_model = gca_endpoint.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
def deploy_model_mock(): with mock.patch.object(endpoint_service_client.EndpointServiceClient, "deploy_model") as deploy_model_mock: deployed_model = gca_endpoint.DeployedModel( model=_TEST_MODEL_NAME, display_name=_TEST_DISPLAY_NAME, ) deploy_model_lro_mock = mock.Mock(ga_operation.Operation) deploy_model_lro_mock.result.return_value = gca_endpoint_service.DeployModelResponse( deployed_model=deployed_model, ) deploy_model_mock.return_value = deploy_model_lro_mock yield deploy_model_mock
def test_deploy_with_max_replica_count(self, deploy_model_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, max_replica_count=2, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=2, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_deploy_with_dedicated_resources(self, deploy_model_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy( model=test_model, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, service_account=_TEST_SERVICE_ACCOUNT, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1, ) expected_deployed_model = gca_endpoint.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, service_account=_TEST_SERVICE_ACCOUNT, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
) _TEST_ENDPOINT_NAME_ALT_LOCATION = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION_2}/endpoints/{_TEST_ID}" ) _TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" _TEST_MODEL_NAME = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_ID}") _TEST_MODEL_ID = "1028944691210842416" _TEST_PREDICTION = [[1.0, 2.0, 3.0], [3.0, 3.0, 1.0]] _TEST_INSTANCES = [[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]] _TEST_CREDENTIALS = mock.Mock(spec=auth_credentials.AnonymousCredentials()) _TEST_SERVICE_ACCOUNT = "*****@*****.**" _TEST_DEPLOYED_MODELS = [ gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), ] _TEST_MACHINE_TYPE = "n1-standard-32" _TEST_ACCELERATOR_TYPE = "NVIDIA_TESLA_P100" _TEST_ACCELERATOR_COUNT = 2 _TEST_EXPLANATIONS = [ gca_prediction_service_v1beta1.explanation.Explanation(attributions=[]) ] _TEST_ATTRIBUTIONS = [ gca_prediction_service_v1beta1.explanation.Attribution( baseline_output_value=1.0,