def test_run_called_twice_raises( self, mock_dataset_time_series, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column=_TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns=_TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns=_TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns=_TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, ) with pytest.raises(RuntimeError): job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column=_TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns=_TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns=_TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns=_TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, )
def test_run_call_pipeline_if_no_model_display_name( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) model_from_job = job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_splits_predefined( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_time_series, mock_model_service_get, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, ) model_from_job = job.run( dataset=mock_dataset_time_series, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_split = gca_training_pipeline.PredefinedSplit( key=_TEST_SPLIT_PREDEFINED_COLUMN_NAME) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( predefined_split=true_split, dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_run_call_pipeline_service_create( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_time_series, mock_model_service_get, sync, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) job = AutoMLForecastingTrainingJob( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, labels=_TEST_LABELS, ) model_from_job = job.run( dataset=mock_dataset_time_series, target_column=_TEST_TRAINING_TARGET_COLUMN, time_column=_TEST_TRAINING_TIME_COLUMN, time_series_identifier_column= _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN, unavailable_at_forecast_columns= _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS, available_at_forecast_columns= _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS, forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON, data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns= _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, context_window=_TEST_TRAINING_CONTEXT_WINDOW, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, export_evaluated_data_items= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, export_evaluated_data_items_bigquery_destination_uri= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, export_evaluated_data_items_override_destination= _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, additional_experiments=_TEST_ADDITIONAL_EXPERIMENTS, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS) true_input_data_config = gca_training_pipeline.InputDataConfig( predefined_split=gca_training_pipeline.PredefinedSplit( key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME), dataset_id=mock_dataset_time_series.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_forecasting, training_task_inputs= _TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, ) assert job._gca_resource is mock_pipeline_service_get.return_value mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY) assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED