def test_run_with_experiments( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) model_from_job = job._run_with_experiments( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column=_TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns=_TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns=_TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns=_TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, additional_experiments=_TEST_ADDITIONAL_EXPERIMENTS, ) if not sync: model_from_job.wait() # Test that if defaults to the job display name true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition.automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_splits_default( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_splits_default( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_image, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, base_model=mock_model) model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( dataset_id=mock_dataset_image.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_run_call_pipeline_if_no_model_display_name( self, mock_pipeline_service_create, mock_dataset_image, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, training_encryption_spec_key_name= _TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) model_from_job = job.run( dataset=mock_dataset_image, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction=_TEST_FRACTION_SPLIT_TEST, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_image.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_PIPELINE_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_video, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, ) model_from_job = job.run( dataset=mock_dataset_video, training_fraction_split=_TEST_ALTERNATE_FRACTION_SPLIT_TRAINING, test_fraction_split=_TEST_ALTERNATE_FRACTION_SPLIT_TEST, create_request_timeout=None, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_ALTERNATE_FRACTION_SPLIT_TRAINING, validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction=_TEST_ALTERNATE_FRACTION_SPLIT_TEST, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_video.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_run_call_pipeline_if_no_model_display_name( self, mock_pipeline_service_create, mock_dataset_text, mock_model_service_get, mock_model, sync, ): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, ) model_from_job = job.run( dataset=mock_dataset_text, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, model_display_name=None, # Omit model_display_name sync=sync, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction=_TEST_FRACTION_SPLIT_TEST, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_run_call_pipeline_service_create( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_image, mock_model_service_get, mock_model, sync, ): """Create and run an AutoML ICN training job, verify calls and return value""" aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, base_model=mock_model, labels=_TEST_LABELS, ) model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, sync=sync, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=mock_model._gca_resource.labels, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_image.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, ) mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY ) assert job._gca_resource is mock_pipeline_service_get.return_value assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model()._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_run_call_pipeline_service_create_sentiment( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, sync, ): """Create and run an AutoML Text Sentiment training job, verify calls and return value""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT, sentiment_max=10, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_text_sentiment, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_SENTIMENT, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, ) mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY) assert job._gca_resource is mock_pipeline_service_get.return_value assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_run_call_pipeline_service_create_classification_with_timeout( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, sync, ): """Create and run an AutoML Text Classification training job, verify calls and return value""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, training_encryption_spec_key_name= _TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, create_request_timeout=180.0, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_PIPELINE_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=180.0, )
def test_run_call_pipeline_service_create( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_time_series, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) model_from_job = job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, predefined_split=gca_training_pipeline.PredefinedSplit( key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME), dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, ) assert job._gca_resource is mock_pipeline_service_get.return_value mock_model_service_get.assert_called_once_with(name=_TEST_MODEL_NAME) assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_splits_default( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_time_series, mock_model_service_get, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) model_from_job = job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_dataset_create_to_model_predict( self, create_dataset_mock, # noqa: F811 import_data_mock, # noqa: F811 predict_client_predict_mock, # noqa: F811 mock_python_package_to_gcs, # noqa: F811 mock_pipeline_service_create, # noqa: F811 mock_model_service_get, # noqa: F811 mock_pipeline_service_get, # noqa: F811 sync, ): aiplatform.init( project=test_datasets._TEST_PROJECT, staging_bucket=test_training_jobs._TEST_BUCKET_NAME, credentials=test_training_jobs._TEST_CREDENTIALS, ) my_dataset = aiplatform.ImageDataset.create( display_name=test_datasets._TEST_DISPLAY_NAME, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, sync=sync, ) my_dataset.import_data( gcs_source=test_datasets._TEST_SOURCE_URI_GCS, import_schema_uri=test_datasets._TEST_IMPORT_SCHEMA_URI, data_item_labels=test_datasets._TEST_DATA_LABEL_ITEMS, sync=sync, ) job = aiplatform.CustomTrainingJob( display_name=test_training_jobs._TEST_DISPLAY_NAME, script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE, model_serving_container_image_uri=test_training_jobs. _TEST_SERVING_CONTAINER_IMAGE, model_serving_container_predict_route=test_training_jobs. _TEST_SERVING_CONTAINER_PREDICTION_ROUTE, model_serving_container_health_route=test_training_jobs. _TEST_SERVING_CONTAINER_HEALTH_ROUTE, ) model_from_job = job.run( dataset=my_dataset, base_output_dir=test_training_jobs._TEST_BASE_OUTPUT_DIR, args=test_training_jobs._TEST_RUN_ARGS, replica_count=1, machine_type=test_training_jobs._TEST_MACHINE_TYPE, accelerator_type=test_training_jobs._TEST_ACCELERATOR_TYPE, accelerator_count=test_training_jobs._TEST_ACCELERATOR_COUNT, model_display_name=test_training_jobs._TEST_MODEL_DISPLAY_NAME, training_fraction_split=test_training_jobs. _TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=test_training_jobs. _TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=test_training_jobs._TEST_TEST_FRACTION_SPLIT, sync=sync, ) created_endpoint = models.Endpoint.create( display_name=test_endpoints._TEST_DISPLAY_NAME, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, sync=sync, ) my_endpoint = model_from_job.deploy( encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, sync=sync) endpoint_deploy_return = created_endpoint.deploy(model_from_job, sync=sync) assert endpoint_deploy_return is None if not sync: my_endpoint.wait() created_endpoint.wait() test_prediction = created_endpoint.predict(instances=[[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]], parameters={"param": 3.0}) true_prediction = models.Prediction( predictions=test_endpoints._TEST_PREDICTION, deployed_model_id=test_endpoints._TEST_ID, ) assert true_prediction == test_prediction predict_client_predict_mock.assert_called_once_with( endpoint=test_endpoints._TEST_ENDPOINT_NAME, instances=[[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]], parameters={"param": 3.0}, ) expected_dataset = gca_dataset.Dataset( display_name=test_datasets._TEST_DISPLAY_NAME, metadata_schema_uri=test_datasets. _TEST_METADATA_SCHEMA_URI_NONTABULAR, metadata=test_datasets._TEST_NONTABULAR_DATASET_METADATA, encryption_spec=_TEST_ENCRYPTION_SPEC, ) expected_import_config = gca_dataset.ImportDataConfig( gcs_source=gca_io.GcsSource( uris=[test_datasets._TEST_SOURCE_URI_GCS]), import_schema_uri=test_datasets._TEST_IMPORT_SCHEMA_URI, data_item_labels=test_datasets._TEST_DATA_LABEL_ITEMS, ) create_dataset_mock.assert_called_once_with( parent=test_datasets._TEST_PARENT, dataset=expected_dataset, metadata=test_datasets._TEST_REQUEST_METADATA, ) import_data_mock.assert_called_once_with( name=test_datasets._TEST_NAME, import_configs=[expected_import_config]) expected_dataset.name = test_datasets._TEST_NAME assert my_dataset._gca_resource == expected_dataset mock_python_package_to_gcs.assert_called_once_with( gcs_staging_dir=test_training_jobs._TEST_BUCKET_NAME, project=test_training_jobs._TEST_PROJECT, credentials=initializer.global_config.credentials, ) true_args = test_training_jobs._TEST_RUN_ARGS true_worker_pool_spec = { "replica_count": test_training_jobs._TEST_REPLICA_COUNT, "machine_spec": { "machine_type": test_training_jobs._TEST_MACHINE_TYPE, "accelerator_type": test_training_jobs._TEST_ACCELERATOR_TYPE, "accelerator_count": test_training_jobs._TEST_ACCELERATOR_COUNT, }, "python_package_spec": { "executor_image_uri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, "package_uris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH], "args": true_args, }, } true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=test_training_jobs._TEST_TRAINING_FRACTION_SPLIT, validation_fraction=test_training_jobs. _TEST_VALIDATION_FRACTION_SPLIT, test_fraction=test_training_jobs._TEST_TEST_FRACTION_SPLIT, ) true_container_spec = gca_model.ModelContainerSpec( image_uri=test_training_jobs._TEST_SERVING_CONTAINER_IMAGE, predict_route=test_training_jobs. _TEST_SERVING_CONTAINER_PREDICTION_ROUTE, health_route=test_training_jobs. _TEST_SERVING_CONTAINER_HEALTH_ROUTE, ) true_managed_model = gca_model.Model( display_name=test_training_jobs._TEST_MODEL_DISPLAY_NAME, container_spec=true_container_spec, ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=my_dataset.name, gcs_destination=gca_io.GcsDestination( output_uri_prefix=test_training_jobs._TEST_BASE_OUTPUT_DIR), ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=test_training_jobs._TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. custom_task, training_task_inputs=json_format.ParseDict( { "worker_pool_specs": [true_worker_pool_spec], "base_output_directory": { "output_uri_prefix": test_training_jobs._TEST_BASE_OUTPUT_DIR }, }, struct_pb2.Value(), ), model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, ) assert job._gca_resource is mock_pipeline_service_get.return_value mock_model_service_get.assert_called_once_with( name=test_training_jobs._TEST_MODEL_NAME) assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_init_aiplatform_with_encryption_key_name_and_create_training_job( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_video, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, ) model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( dataset_id=mock_dataset_video.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, ) mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY) assert job._gca_resource is mock_pipeline_service_get.return_value assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_run_call_pipeline_service_create( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_video, mock_model_service_get, mock_model, sync, ): """Create and run an AutoML ICN training job, verify calls and return value""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, training_encryption_spec_key_name= _TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS, description=mock_model._gca_resource.description, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_video.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_PIPELINE_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, ) mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY) assert job._gca_resource is mock_pipeline_service_get.return_value assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_splits_filter( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_video, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, ) model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_video.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_run_call_pipeline_if_no_model_display_name( self, mock_pipeline_service_create, mock_dataset_tabular, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = training_jobs.AutoMLTabularTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, optimization_prediction_type= _TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, optimization_objective_recall_value=None, optimization_objective_precision_value=None, training_encryption_spec_key_name= _TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) model_from_job = job.run( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_tabular.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_tabular, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_PIPELINE_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )