def test_predict(self, get_endpoint_mock, predict_client_predict_mock): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ID) test_prediction = test_endpoint.predict(instances=_TEST_INSTANCES, parameters={"param": 3.0}) true_prediction = models.Prediction(predictions=_TEST_PREDICTION, deployed_model_id=_TEST_ID) assert true_prediction == test_prediction predict_client_predict_mock.assert_called_once_with( endpoint=_TEST_ENDPOINT_NAME, instances=_TEST_INSTANCES, parameters={"param": 3.0}, )
def test_constructor_with_custom_credentials(self, create_endpoint_client_mock): creds = auth_credentials.AnonymousCredentials() models.Endpoint(_TEST_ENDPOINT_NAME, credentials=creds) create_endpoint_client_mock.assert_has_calls([ mock.call( client_class=utils.EndpointClientWithOverride, credentials=creds, location_override=_TEST_LOCATION, ), mock.call( client_class=utils.PredictionClientWithOverride, credentials=creds, location_override=_TEST_LOCATION, prediction_client=True, ), ])
def test_deploy_with_explanations(self, deploy_model_with_explanations_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy( model=test_model, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, explanation_metadata=_TEST_EXPLANATION_METADATA, explanation_parameters=_TEST_EXPLANATION_PARAMETERS, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources_v1beta1.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1, ) expected_deployed_model = gca_endpoint_v1beta1.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, explanation_spec=gca_endpoint_v1beta1.explanation.ExplanationSpec( metadata=_TEST_EXPLANATION_METADATA, parameters=_TEST_EXPLANATION_PARAMETERS, ), ) deploy_model_with_explanations_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_constructor(self, create_endpoint_client_mock): aiplatform.init( project=_TEST_PROJECT, location=_TEST_LOCATION, credentials=_TEST_CREDENTIALS, ) models.Endpoint(_TEST_ENDPOINT_NAME) create_endpoint_client_mock.assert_has_calls([ mock.call( client_class=utils.EndpointClientWithOverride, credentials=initializer.global_config.credentials, location_override=_TEST_LOCATION, ), mock.call( client_class=utils.PredictionClientWithOverride, credentials=None, location_override=_TEST_LOCATION, prediction_client=True, ), ])
def test_undeploy(self, undeploy_model_mock, sync): with mock.patch.object(endpoint_service_client.EndpointServiceClient, "get_endpoint") as get_endpoint_mock: get_endpoint_mock.return_value = gca_endpoint.Endpoint( display_name=_TEST_DISPLAY_NAME, name=_TEST_ENDPOINT_NAME, traffic_split={"model1": 100}, ) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) assert dict(test_endpoint.traffic_split) == {"model1": 100} test_endpoint.undeploy("model1", sync=sync) if not sync: test_endpoint.wait() undeploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model_id="model1", traffic_split={}, # traffic_split={"model1": 0}, metadata=(), )
def test_deploy_with_max_replica_count(self, deploy_model_mock, sync): test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, max_replica_count=2, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=2, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_deploy_with_traffic_split(self, deploy_model_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) with mock.patch.object(endpoint_service_client.EndpointServiceClient, "get_endpoint") as get_endpoint_mock: get_endpoint_mock.return_value = gca_endpoint.Endpoint( display_name=_TEST_DISPLAY_NAME, name=_TEST_ENDPOINT_NAME, traffic_split={"model1": 100}, ) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, traffic_split={ "model1": 30, "0": 70 }, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=1, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={ "model1": 30, "0": 70 }, metadata=(), )
def test_deploy_with_dedicated_resources(self, deploy_model_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy( model=test_model, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, service_account=_TEST_SERVICE_ACCOUNT, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1, ) expected_deployed_model = gca_endpoint.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, service_account=_TEST_SERVICE_ACCOUNT, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_undeploy_raises_error_on_zero_leftover_traffic(self, sync): """ Attempting to undeploy model with 100% traffic on an Endpoint with multiple models deployed without an updated traffic_split should raise an informative error. """ traffic_remaining = _TEST_TRAFFIC_SPLIT[_TEST_ID_2] assert traffic_remaining == 100 # Confirm this model has all traffic assert sum( _TEST_TRAFFIC_SPLIT.values()) == 100 # Mock traffic sums to 100% with pytest.raises(ValueError) as e: test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_endpoint.undeploy( deployed_model_id=_TEST_ID_2, sync=sync, ) assert e.match( f"Undeploying deployed model '{_TEST_ID_2}' would leave the remaining " f"traffic split at 0%.")
def test_explain(self, get_endpoint_mock, predict_client_explain_mock): test_endpoint = models.Endpoint(_TEST_ID) test_prediction = test_endpoint.explain( instances=_TEST_INSTANCES, parameters={"param": 3.0}, deployed_model_id=_TEST_MODEL_ID, ) expected_explanations = _TEST_EXPLANATIONS expected_explanations[0].attributions.extend(_TEST_ATTRIBUTIONS) expected_prediction = models.Prediction( predictions=_TEST_PREDICTION, deployed_model_id=_TEST_ID, explanations=expected_explanations, ) assert expected_prediction == test_prediction predict_client_explain_mock.assert_called_once_with( endpoint=_TEST_ENDPOINT_NAME, instances=_TEST_INSTANCES, parameters={"param": 3.0}, deployed_model_id=_TEST_MODEL_ID, )
def test_constructor_with_custom_project(self, get_endpoint_mock): models.Endpoint(endpoint_name=_TEST_ID, project=_TEST_PROJECT_2) test_endpoint_resource_name = endpoint_service_client.EndpointServiceClient.endpoint_path( _TEST_PROJECT_2, _TEST_LOCATION, _TEST_ID) get_endpoint_mock.assert_called_with(name=test_endpoint_resource_name)
def test_constructor_with_endpoint_name(self, get_endpoint_mock): models.Endpoint(_TEST_ENDPOINT_NAME) get_endpoint_mock.assert_called_with(name=_TEST_ENDPOINT_NAME)
def test_deploy_raise_error_traffic_split(self, sync): with pytest.raises(ValueError): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, traffic_split={"a": 99}, sync=sync)
def test_deploy_raise_error_max_replica(self, sync): with pytest.raises(ValueError): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, max_replica_count=-2, sync=sync)
def test_constructor_with_endpoint_name(self, get_endpoint_mock): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) models.Endpoint(_TEST_ENDPOINT_NAME) get_endpoint_mock.assert_called_with(name=_TEST_ENDPOINT_NAME)
def test_lazy_constructor_with_endpoint_name(self, get_endpoint_mock): ep = models.Endpoint(_TEST_ENDPOINT_NAME) assert ep._gca_resource.name == _TEST_ENDPOINT_NAME assert ep._skipped_getter_call() assert not get_endpoint_mock.called
def test_undeploy_raise_error_traffic_split_total(self, sync): with pytest.raises(ValueError): test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_endpoint.undeploy(deployed_model_id="model1", traffic_split={"model2": 99}, sync=sync)
def test_constructor_with_custom_location(self, get_endpoint_mock): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) models.Endpoint(endpoint_name=_TEST_ID, location=_TEST_LOCATION_2) test_endpoint_resource_name = endpoint_service_client.EndpointServiceClient.endpoint_path( _TEST_PROJECT, _TEST_LOCATION_2, _TEST_ID) get_endpoint_mock.assert_called_with(name=test_endpoint_resource_name)