def test_init_aiplatform_with_encryption_key_name_and_create_default_metadata_store( self, create_default_metadata_store_mock ): aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, ) my_metadata_store = metadata_store._MetadataStore._create( encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, ) expected_metadata_store = GapicMetadataStore( encryption_spec=_TEST_ENCRYPTION_SPEC, ) create_default_metadata_store_mock.assert_called_once_with( parent=_TEST_PARENT, metadata_store_id=_TEST_DEFAULT_ID, metadata_store=expected_metadata_store, ) expected_metadata_store.name = _TEST_DEFAULT_NAME assert my_metadata_store._gca_resource == expected_metadata_store
def test_deploy_with_dedicated_resources(self, deploy_model_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy( model=test_model, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1, ) expected_deployed_model = gca_endpoint.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_list_artifacts(self, list_artifacts_mock): aiplatform.init(project=_TEST_PROJECT) filter = "test-filter" artifact_list = artifact._Artifact.list( filter=filter, metadata_store_id=_TEST_METADATA_STORE) expected_artifact = GapicArtifact( name=_TEST_ARTIFACT_NAME, schema_title=_TEST_SCHEMA_TITLE, schema_version=_TEST_SCHEMA_VERSION, display_name=_TEST_DISPLAY_NAME, description=_TEST_DESCRIPTION, metadata=_TEST_METADATA, ) list_artifacts_mock.assert_called_once_with( request=ListArtifactsRequest( parent=_TEST_PARENT, filter=filter, )) assert len(artifact_list) == 2 assert artifact_list[0]._gca_resource == expected_artifact assert artifact_list[1]._gca_resource == expected_artifact
def test_run_called_twice_raises(self, mock_dataset_text, sync): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, ) job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, ) with pytest.raises(RuntimeError): job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, sync=sync, )
def test_deploy(self, deploy_model_mock, sync): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(test_model, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=1, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_list_executions(self, list_executions_mock): aiplatform.init(project=_TEST_PROJECT) filter = "test-filter" execution_list = execution._Execution.list( filter=filter, metadata_store_id=_TEST_METADATA_STORE) expected_execution = GapicExecution( name=_TEST_EXECUTION_NAME, schema_title=_TEST_SCHEMA_TITLE, schema_version=_TEST_SCHEMA_VERSION, display_name=_TEST_DISPLAY_NAME, description=_TEST_DESCRIPTION, metadata=_TEST_METADATA, ) list_executions_mock.assert_called_once_with( request=ListExecutionsRequest( parent=_TEST_PARENT, filter=filter, )) assert len(execution_list) == 2 assert execution_list[0]._gca_resource == expected_execution assert execution_list[1]._gca_resource == expected_execution
def batch_serve_features_to_bq_sample( project: str, location: str, featurestore_name: str, bq_destination_output_uri: str, read_instances_uri: str, sync: bool = True, ): aiplatform.init(project=project, location=location) fs = aiplatform.featurestore.Featurestore(featurestore_name=featurestore_name) SERVING_FEATURE_IDS = { "users": ["age", "gender", "liked_genres"], "movies": ["title", "average_rating", "genres"], } fs.batch_serve_to_bq( bq_destination_output_uri=bq_destination_output_uri, serving_feature_ids=SERVING_FEATURE_IDS, read_instances_uri=read_instances_uri, sync=sync, )
def create_training_pipeline_image_classification_sample( project: str, display_name: str, dataset_id: int, location: str = "us-central1", model_display_name: str = None, training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, budget_milli_node_hours: int = 8000, disable_early_stopping: bool = False, sync: bool = True, ): aiplatform.init(project=project, location=location) job = aiplatform.AutoMLImageTrainingJob(display_name=display_name) my_image_ds = aiplatform.ImageDataset(dataset_id) model = job.run( dataset=my_image_ds, model_display_name=model_display_name, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, budget_milli_node_hours=budget_milli_node_hours, disable_early_stopping=disable_early_stopping, sync=sync, ) model.wait() print(model.display_name) print(model.resource_name) print(model.uri) return model
def test_create_dataset(self, create_dataset_mock, sync): aiplatform.init(project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME) my_dataset = datasets.VideoDataset.create( display_name=_TEST_DISPLAY_NAME, sync=sync, ) if not sync: my_dataset.wait() expected_dataset = gca_dataset.Dataset( display_name=_TEST_DISPLAY_NAME, metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_VIDEO, metadata=_TEST_NONTABULAR_DATASET_METADATA, encryption_spec=_TEST_ENCRYPTION_SPEC, ) create_dataset_mock.assert_called_once_with( parent=_TEST_PARENT, dataset=expected_dataset, metadata=_TEST_REQUEST_METADATA, )
def test_create_tensorboard_with_default_encryption_key( self, create_tensorboard_mock ): aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, ) tensorboard.Tensorboard.create( display_name=_TEST_DISPLAY_NAME, create_request_timeout=None, ) expected_tensorboard = gca_tensorboard.Tensorboard( display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_ENCRYPTION_SPEC, ) create_tensorboard_mock.assert_called_once_with( parent=_TEST_PARENT, tensorboard=expected_tensorboard, metadata=_TEST_REQUEST_METADATA, timeout=None, )
def test_create_tensorboard_experiment(self, create_tensorboard_experiment_mock, get_tensorboard_experiment_mock): aiplatform.init(project=_TEST_PROJECT, ) tensorboard.TensorboardExperiment.create( tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID, tensorboard_name=_TEST_NAME, display_name=_TEST_DISPLAY_NAME, ) expected_tensorboard_experiment = gca_tensorboard_experiment.TensorboardExperiment( display_name=_TEST_DISPLAY_NAME, ) create_tensorboard_experiment_mock.assert_called_once_with( parent=_TEST_NAME, tensorboard_experiment=expected_tensorboard_experiment, tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID, metadata=_TEST_REQUEST_METADATA, ) get_tensorboard_experiment_mock.assert_called_once_with( name=_TEST_TENSORBOARD_EXPERIMENT_NAME, retry=base._DEFAULT_RETRY)
def test_init_aiplatform_with_encryption_key_name_and_create_endpoint( self, create_endpoint_mock, sync): aiplatform.init( project=_TEST_PROJECT, location=_TEST_LOCATION, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, ) my_endpoint = models.Endpoint.create(display_name=_TEST_DISPLAY_NAME, sync=sync) if not sync: my_endpoint.wait() expected_endpoint = gca_endpoint.Endpoint( display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_ENCRYPTION_SPEC) create_endpoint_mock.assert_called_once_with( parent=_TEST_PARENT, endpoint=expected_endpoint, metadata=(), ) expected_endpoint.name = _TEST_ENDPOINT_NAME assert my_endpoint._gca_resource == expected_endpoint
def test_create_from_local_script(self, get_custom_job_mock, create_custom_job_mock, sync): aiplatform.init( project=_TEST_PROJECT, location=_TEST_LOCATION, staging_bucket=_TEST_STAGING_BUCKET, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) # configuration on this is tested in test_training_jobs.py job = aiplatform.CustomJob.from_local_script( display_name=_TEST_DISPLAY_NAME, script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, base_output_dir=_TEST_BASE_OUTPUT_DIR, labels=_TEST_LABELS, ) job.run(sync=sync) job.wait() assert (job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED)
def test_init_experiment_reset(self): aiplatform.init( project=_TEST_PROJECT, location=_TEST_LOCATION, experiment=_TEST_EXPERIMENT ) aiplatform.start_run(_TEST_RUN) aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) assert metadata.metadata_service.experiment_name == _TEST_EXPERIMENT assert metadata.metadata_service.run_name == _TEST_RUN aiplatform.init(project=_TEST_OTHER_PROJECT, location=_TEST_LOCATION) assert metadata.metadata_service.experiment_name is None assert metadata.metadata_service.run_name is None
class BQToVertexAIDataset(VertexAIWorker): """Worker to export a BigQuery table to a Vertex AI dataset.""" PARAMS = [ ('bq_project_id', 'string', True, '', 'BQ Project ID'), ('bq_dataset_id', 'string', True, '', 'BQ Dataset ID'), ('bq_table_id', 'string', True, '', 'BQ Table ID'), ('bq_dataset_location', 'string', True, '', 'BQ Dataset Location'), ('vertex_ai_dataset_name', 'string', False, '', 'Vertex AI Dataset Name'), ('clean_up', 'boolean', True, True, 'Clean Up'), ] aiplatform.init() def _execute(self): project_id = self._params['bq_project_id'] dataset_id = self._params['bq_dataset_id'] table_id = self._params['bq_table_id'] if not self._params['vertex_ai_dataset_name']: display_name = f'{project_id}.{dataset_id}.{table_id}' else: display_name = self._params['vertex_ai_dataset_name'] if self._params['clean_up']: try: datasets = aiplatform.TabularDataset.list( filter=f"display_name={display_name}", order_by="create_time") if datasets: for i, dataset in enumerate(datasets[:-1]): d = datasets[i] aiplatform.TabularDataset.delete(d) self.log_info(f'Deleted dataset: {d.resource_name}.') except Exception as e: self.log_info(f'Exception: {e}') dataset = aiplatform.TabularDataset.create( display_name=display_name, bq_source=f'bq://{project_id}.{dataset_id}.{table_id}') dataset.wait() self.log_info(f'Dataset created: {dataset.resource_name}') self.log_info('Finished successfully')
def test_splits_filter( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_image, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, base_model=mock_model ) model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, sync=sync, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_image.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition.automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_submit_call_pipeline_service_pipeline_job_create_legacy( self, mock_pipeline_service_create, mock_pipeline_service_get, job_spec_json, mock_load_json, ): aiplatform.init( project=_TEST_PROJECT, staging_bucket=_TEST_GCS_BUCKET_NAME, location=_TEST_LOCATION, credentials=_TEST_CREDENTIALS, ) job = pipeline_jobs.PipelineJob( display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME, template_path=_TEST_TEMPLATE_PATH, job_id=_TEST_PIPELINE_JOB_ID, parameter_values=_TEST_PIPELINE_PARAMETER_VALUES_LEGACY, enable_caching=True, ) job.submit(service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK) expected_runtime_config_dict = { "parameters": {"string_param": {"stringValue": "hello"}}, "gcsOutputDirectory": _TEST_GCS_BUCKET_NAME, } runtime_config = gca_pipeline_job_v1.PipelineJob.RuntimeConfig()._pb json_format.ParseDict(expected_runtime_config_dict, runtime_config) pipeline_spec = job_spec_json.get("pipelineSpec") or job_spec_json # Construct expected request expected_gapic_pipeline_job = gca_pipeline_job_v1.PipelineJob( display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME, pipeline_spec={ "components": {}, "pipelineInfo": pipeline_spec["pipelineInfo"], "root": pipeline_spec["root"], "schemaVersion": "2.0.0", }, runtime_config=runtime_config, service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, ) mock_pipeline_service_create.assert_called_once_with( parent=_TEST_PARENT, pipeline_job=expected_gapic_pipeline_job, pipeline_job_id=_TEST_PIPELINE_JOB_ID, ) assert not mock_pipeline_service_get.called job.wait() mock_pipeline_service_get.assert_called_with( name=_TEST_PIPELINE_JOB_NAME, retry=base._DEFAULT_RETRY ) assert job._gca_resource == make_pipeline_job( gca_pipeline_state_v1.PipelineState.PIPELINE_STATE_SUCCEEDED )
def test_splits_filter( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_run_call_pipeline_service_create_sentiment( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, sync, ): """Create and run an AutoML Text Sentiment training job, verify calls and return value""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT, sentiment_max=10, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_text_sentiment, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_SENTIMENT, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, ) mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY) assert job._gca_resource is mock_pipeline_service_get.return_value assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_init_artifact_with_id(self, get_artifact_mock): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) artifact._Artifact(resource_name=_TEST_ARTIFACT_ID, metadata_store_id=_TEST_METADATA_STORE) get_artifact_mock.assert_called_once_with(name=_TEST_ARTIFACT_NAME, retry=base._DEFAULT_RETRY)
def test_init_artifact(self, get_artifact_mock): aiplatform.init(project=_TEST_PROJECT) artifact._Artifact(resource_name=_TEST_ARTIFACT_NAME) get_artifact_mock.assert_called_once_with(name=_TEST_ARTIFACT_NAME, retry=base._DEFAULT_RETRY)
def test_init_execution_with_id(self, get_execution_mock): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) execution._Execution(resource_name=_TEST_EXECUTION_ID, metadata_store_id=_TEST_METADATA_STORE) get_execution_mock.assert_called_once_with(name=_TEST_EXECUTION_NAME, retry=base._DEFAULT_RETRY)
def test_init_execution(self, get_execution_mock): aiplatform.init(project=_TEST_PROJECT) execution._Execution(resource_name=_TEST_EXECUTION_NAME) get_execution_mock.assert_called_once_with(name=_TEST_EXECUTION_NAME, retry=base._DEFAULT_RETRY)
def test_init_context(self, get_context_mock): aiplatform.init(project=_TEST_PROJECT) context._Context(resource_name=_TEST_CONTEXT_NAME) get_context_mock.assert_called_once_with(name=_TEST_CONTEXT_NAME, retry=base._DEFAULT_RETRY)
def test_run_called_twice_raises( self, mock_dataset_time_series, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, ) with pytest.raises(RuntimeError): job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, )
def test_run_call_pipeline_if_no_model_display_name( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) model_from_job = job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_create_hyperparameter_tuning_job_with_enable_web_access( self, create_hyperparameter_tuning_job_mock_with_enable_web_access, get_hyperparameter_tuning_job_mock_with_enable_web_access, sync, caplog, ): caplog.set_level(logging.INFO) aiplatform.init( project=_TEST_PROJECT, location=_TEST_LOCATION, staging_bucket=_TEST_STAGING_BUCKET, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( display_name=_TEST_DISPLAY_NAME, custom_job=custom_job, metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE}, parameter_spec={ "lr": hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"), "units": hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"), "activation": hpt.CategoricalParameterSpec( values=["relu", "sigmoid", "elu", "selu", "tanh"]), "batch_size": hpt.DiscreteParameterSpec(values=[16, 32], scale="linear"), }, parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT, max_trial_count=_TEST_MAX_TRIAL_COUNT, max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, labels=_TEST_LABELS, ) job.run( service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, timeout=_TEST_TIMEOUT, restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART, enable_web_access=test_custom_job._TEST_ENABLE_WEB_ACCESS, sync=sync, ) job.wait() assert "workerpool0-0" in caplog.text expected_hyperparameter_tuning_job = ( _get_hyperparameter_tuning_job_proto_with_enable_web_access()) create_hyperparameter_tuning_job_mock_with_enable_web_access.assert_called_once_with( parent=_TEST_PARENT, hyperparameter_tuning_job=expected_hyperparameter_tuning_job, ) assert job.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED assert job.network == _TEST_NETWORK assert job.trials == [] caplog.clear()
def setup_method(self): reload(initializer) reload(aiplatform) aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
def test_batch_predict_with_all_args( self, create_batch_prediction_job_with_explanations_mock, sync ): aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) creds = auth_credentials.AnonymousCredentials() batch_prediction_job = jobs.BatchPredictionJob.create( model_name=_TEST_MODEL_NAME, job_display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME, gcs_source=_TEST_BATCH_PREDICTION_GCS_SOURCE, gcs_destination_prefix=_TEST_BATCH_PREDICTION_GCS_DEST_PREFIX, predictions_format="csv", model_parameters={}, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, starting_replica_count=_TEST_STARTING_REPLICA_COUNT, max_replica_count=_TEST_MAX_REPLICA_COUNT, generate_explanation=True, explanation_metadata=_TEST_EXPLANATION_METADATA, explanation_parameters=_TEST_EXPLANATION_PARAMETERS, labels=_TEST_LABEL, credentials=creds, sync=sync, create_request_timeout=None, ) batch_prediction_job.wait_for_resource_creation() batch_prediction_job.wait() # Construct expected request expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob( display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME, model=_TEST_MODEL_NAME, input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig( instances_format="jsonl", gcs_source=gca_io_compat.GcsSource( uris=[_TEST_BATCH_PREDICTION_GCS_SOURCE] ), ), output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig( gcs_destination=gca_io_compat.GcsDestination( output_uri_prefix=_TEST_BATCH_PREDICTION_GCS_DEST_PREFIX ), predictions_format="csv", ), dedicated_resources=gca_machine_resources_compat.BatchDedicatedResources( machine_spec=gca_machine_resources_compat.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ), starting_replica_count=_TEST_STARTING_REPLICA_COUNT, max_replica_count=_TEST_MAX_REPLICA_COUNT, ), generate_explanation=True, explanation_spec=gca_explanation_compat.ExplanationSpec( metadata=_TEST_EXPLANATION_METADATA, parameters=_TEST_EXPLANATION_PARAMETERS, ), labels=_TEST_LABEL, ) create_batch_prediction_job_with_explanations_mock.assert_called_once_with( parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}", batch_prediction_job=expected_gapic_batch_prediction_job, timeout=None, )
def test_run_hyperparameter_tuning_job_with_fail_at_creation(self): aiplatform.init( project=_TEST_PROJECT, location=_TEST_LOCATION, staging_bucket=_TEST_STAGING_BUCKET, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( display_name=_TEST_DISPLAY_NAME, custom_job=custom_job, metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE}, parameter_spec={ "lr": hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"), "units": hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"), "activation": hpt.CategoricalParameterSpec( values=["relu", "sigmoid", "elu", "selu", "tanh"]), "batch_size": hpt.DiscreteParameterSpec(values=[16, 32], scale="linear"), }, parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT, max_trial_count=_TEST_MAX_TRIAL_COUNT, max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, ) job.run( service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, timeout=_TEST_TIMEOUT, restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART, sync=False, ) with pytest.raises(RuntimeError) as e: job.wait_for_resource_creation() assert e.match("Mock fail") with pytest.raises(RuntimeError) as e: job.resource_name assert e.match( "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail" ) with pytest.raises(RuntimeError) as e: job.network assert e.match( "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail" ) with pytest.raises(RuntimeError) as e: job.trials assert e.match( "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail" )