Example #1
0
    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_image_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.ImageDataset(dataset_name=self.dataset_id),
            prediction_type=self.prediction_type,
            multi_label=self.multi_label,
            model_type=self.model_type,
            base_model=self.base_model,
            labels=self.labels,
            training_encryption_spec_key_name=self.training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            validation_fraction_split=self.validation_fraction_split,
            test_fraction_split=self.test_fraction_split,
            training_filter_split=self.training_filter_split,
            validation_filter_split=self.validation_filter_split,
            test_filter_split=self.test_filter_split,
            budget_milli_node_hours=self.budget_milli_node_hours,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            disable_early_stopping=self.disable_early_stopping,
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context, task_instance=self, model_id=model_id)
        return result
Example #2
0
    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_video_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.VideoDataset(dataset_name=self.dataset_id),
            prediction_type=self.prediction_type,
            model_type=self.model_type,
            labels=self.labels,
            training_encryption_spec_key_name=self.training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            test_fraction_split=self.test_fraction_split,
            training_filter_split=self.training_filter_split,
            test_filter_split=self.test_filter_split,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context, task_instance=self, model_id=model_id)
        return result
Example #3
0
class CreateAutoMLVideoTrainingJobOperator(AutoMLTrainingJobBaseOperator):
    """Create Auto ML Video Training job"""

    template_fields = [
        'region',
        'impersonation_chain',
    ]
    operator_extra_links = (VertexAIModelLink(), )

    def __init__(
        self,
        *,
        dataset_id: str,
        prediction_type: str = "classification",
        model_type: str = "CLOUD",
        training_filter_split: Optional[str] = None,
        test_filter_split: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.dataset_id = dataset_id
        self.prediction_type = prediction_type
        self.model_type = model_type
        self.training_filter_split = training_filter_split
        self.test_filter_split = test_filter_split

    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_video_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.VideoDataset(dataset_name=self.dataset_id),
            prediction_type=self.prediction_type,
            model_type=self.model_type,
            labels=self.labels,
            training_encryption_spec_key_name=self.
            training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            test_fraction_split=self.test_fraction_split,
            training_filter_split=self.training_filter_split,
            test_filter_split=self.test_filter_split,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context,
                                  task_instance=self,
                                  model_id=model_id)
        return result
Example #4
0
    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_forecasting_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.TimeSeriesDataset(dataset_name=self.dataset_id),
            target_column=self.target_column,
            time_column=self.time_column,
            time_series_identifier_column=self.time_series_identifier_column,
            unavailable_at_forecast_columns=self.
            unavailable_at_forecast_columns,
            available_at_forecast_columns=self.available_at_forecast_columns,
            forecast_horizon=self.forecast_horizon,
            data_granularity_unit=self.data_granularity_unit,
            data_granularity_count=self.data_granularity_count,
            optimization_objective=self.optimization_objective,
            column_specs=self.column_specs,
            column_transformations=self.column_transformations,
            labels=self.labels,
            training_encryption_spec_key_name=self.
            training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            validation_fraction_split=self.validation_fraction_split,
            test_fraction_split=self.test_fraction_split,
            predefined_split_column_name=self.predefined_split_column_name,
            weight_column=self.weight_column,
            time_series_attribute_columns=self.time_series_attribute_columns,
            context_window=self.context_window,
            export_evaluated_data_items=self.export_evaluated_data_items,
            export_evaluated_data_items_bigquery_destination_uri=(
                self.export_evaluated_data_items_bigquery_destination_uri),
            export_evaluated_data_items_override_destination=(
                self.export_evaluated_data_items_override_destination),
            quantiles=self.quantiles,
            validation_options=self.validation_options,
            budget_milli_node_hours=self.budget_milli_node_hours,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context,
                                  task_instance=self,
                                  model_id=model_id)
        return result
Example #5
0
    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_tabular_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.TabularDataset(dataset_name=self.dataset_id),
            target_column=self.target_column,
            optimization_prediction_type=self.optimization_prediction_type,
            optimization_objective=self.optimization_objective,
            column_specs=self.column_specs,
            column_transformations=self.column_transformations,
            optimization_objective_recall_value=self.
            optimization_objective_recall_value,
            optimization_objective_precision_value=self.
            optimization_objective_precision_value,
            labels=self.labels,
            training_encryption_spec_key_name=self.
            training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            validation_fraction_split=self.validation_fraction_split,
            test_fraction_split=self.test_fraction_split,
            predefined_split_column_name=self.predefined_split_column_name,
            timestamp_split_column_name=self.timestamp_split_column_name,
            weight_column=self.weight_column,
            budget_milli_node_hours=self.budget_milli_node_hours,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            disable_early_stopping=self.disable_early_stopping,
            export_evaluated_data_items=self.export_evaluated_data_items,
            export_evaluated_data_items_bigquery_destination_uri=(
                self.export_evaluated_data_items_bigquery_destination_uri),
            export_evaluated_data_items_override_destination=(
                self.export_evaluated_data_items_override_destination),
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context,
                                  task_instance=self,
                                  model_id=model_id)
        return result
Example #6
0
 def execute(self, context: "Context"):
     hook = AutoMLHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         impersonation_chain=self.impersonation_chain,
     )
     results = hook.list_training_pipelines(
         region=self.region,
         project_id=self.project_id,
         page_size=self.page_size,
         page_token=self.page_token,
         filter=self.filter,
         read_mask=self.read_mask,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     VertexAITrainingPipelinesLink.persist(context=context, task_instance=self)
     return [TrainingPipeline.to_dict(result) for result in results]
Example #7
0
 def execute(self, context: "Context"):
     hook = AutoMLHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         impersonation_chain=self.impersonation_chain,
     )
     try:
         self.log.info("Deleting Auto ML training pipeline: %s", self.training_pipeline)
         training_pipeline_operation = hook.delete_training_pipeline(
             training_pipeline=self.training_pipeline,
             region=self.region,
             project_id=self.project_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
         hook.wait_for_operation(timeout=self.timeout, operation=training_pipeline_operation)
         self.log.info("Training pipeline was deleted.")
     except NotFound:
         self.log.info("The Training Pipeline ID %s does not exist.", self.training_pipeline)
Example #8
0
class CreateAutoMLForecastingTrainingJobOperator(AutoMLTrainingJobBaseOperator):
    """Create AutoML Forecasting Training job"""

    template_fields = [
        'region',
        'impersonation_chain',
    ]
    operator_extra_links = (VertexAIModelLink(),)

    def __init__(
        self,
        *,
        dataset_id: str,
        target_column: str,
        time_column: str,
        time_series_identifier_column: str,
        unavailable_at_forecast_columns: List[str],
        available_at_forecast_columns: List[str],
        forecast_horizon: int,
        data_granularity_unit: str,
        data_granularity_count: int,
        optimization_objective: Optional[str] = None,
        column_specs: Optional[Dict[str, str]] = None,
        column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None,
        validation_fraction_split: Optional[float] = None,
        predefined_split_column_name: Optional[str] = None,
        weight_column: Optional[str] = None,
        time_series_attribute_columns: Optional[List[str]] = None,
        context_window: Optional[int] = None,
        export_evaluated_data_items: bool = False,
        export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None,
        export_evaluated_data_items_override_destination: bool = False,
        quantiles: Optional[List[float]] = None,
        validation_options: Optional[str] = None,
        budget_milli_node_hours: int = 1000,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.dataset_id = dataset_id
        self.target_column = target_column
        self.time_column = time_column
        self.time_series_identifier_column = time_series_identifier_column
        self.unavailable_at_forecast_columns = unavailable_at_forecast_columns
        self.available_at_forecast_columns = available_at_forecast_columns
        self.forecast_horizon = forecast_horizon
        self.data_granularity_unit = data_granularity_unit
        self.data_granularity_count = data_granularity_count
        self.optimization_objective = optimization_objective
        self.column_specs = column_specs
        self.column_transformations = column_transformations
        self.validation_fraction_split = validation_fraction_split
        self.predefined_split_column_name = predefined_split_column_name
        self.weight_column = weight_column
        self.time_series_attribute_columns = time_series_attribute_columns
        self.context_window = context_window
        self.export_evaluated_data_items = export_evaluated_data_items
        self.export_evaluated_data_items_bigquery_destination_uri = (
            export_evaluated_data_items_bigquery_destination_uri
        )
        self.export_evaluated_data_items_override_destination = (
            export_evaluated_data_items_override_destination
        )
        self.quantiles = quantiles
        self.validation_options = validation_options
        self.budget_milli_node_hours = budget_milli_node_hours

    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_forecasting_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.TimeSeriesDataset(dataset_name=self.dataset_id),
            target_column=self.target_column,
            time_column=self.time_column,
            time_series_identifier_column=self.time_series_identifier_column,
            unavailable_at_forecast_columns=self.unavailable_at_forecast_columns,
            available_at_forecast_columns=self.available_at_forecast_columns,
            forecast_horizon=self.forecast_horizon,
            data_granularity_unit=self.data_granularity_unit,
            data_granularity_count=self.data_granularity_count,
            optimization_objective=self.optimization_objective,
            column_specs=self.column_specs,
            column_transformations=self.column_transformations,
            labels=self.labels,
            training_encryption_spec_key_name=self.training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            validation_fraction_split=self.validation_fraction_split,
            test_fraction_split=self.test_fraction_split,
            predefined_split_column_name=self.predefined_split_column_name,
            weight_column=self.weight_column,
            time_series_attribute_columns=self.time_series_attribute_columns,
            context_window=self.context_window,
            export_evaluated_data_items=self.export_evaluated_data_items,
            export_evaluated_data_items_bigquery_destination_uri=(
                self.export_evaluated_data_items_bigquery_destination_uri
            ),
            export_evaluated_data_items_override_destination=(
                self.export_evaluated_data_items_override_destination
            ),
            quantiles=self.quantiles,
            validation_options=self.validation_options,
            budget_milli_node_hours=self.budget_milli_node_hours,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context, task_instance=self, model_id=model_id)
        return result
Example #9
0
class CreateAutoMLTabularTrainingJobOperator(AutoMLTrainingJobBaseOperator):
    """Create Auto ML Tabular Training job"""

    template_fields = [
        'region',
        'impersonation_chain',
    ]
    operator_extra_links = (VertexAIModelLink(),)

    def __init__(
        self,
        *,
        dataset_id: str,
        target_column: str,
        optimization_prediction_type: str,
        optimization_objective: Optional[str] = None,
        column_specs: Optional[Dict[str, str]] = None,
        column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None,
        optimization_objective_recall_value: Optional[float] = None,
        optimization_objective_precision_value: Optional[float] = None,
        validation_fraction_split: Optional[float] = None,
        predefined_split_column_name: Optional[str] = None,
        timestamp_split_column_name: Optional[str] = None,
        weight_column: Optional[str] = None,
        budget_milli_node_hours: int = 1000,
        disable_early_stopping: bool = False,
        export_evaluated_data_items: bool = False,
        export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None,
        export_evaluated_data_items_override_destination: bool = False,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.dataset_id = dataset_id
        self.target_column = target_column
        self.optimization_prediction_type = optimization_prediction_type
        self.optimization_objective = optimization_objective
        self.column_specs = column_specs
        self.column_transformations = column_transformations
        self.optimization_objective_recall_value = optimization_objective_recall_value
        self.optimization_objective_precision_value = optimization_objective_precision_value
        self.validation_fraction_split = validation_fraction_split
        self.predefined_split_column_name = predefined_split_column_name
        self.timestamp_split_column_name = timestamp_split_column_name
        self.weight_column = weight_column
        self.budget_milli_node_hours = budget_milli_node_hours
        self.disable_early_stopping = disable_early_stopping
        self.export_evaluated_data_items = export_evaluated_data_items
        self.export_evaluated_data_items_bigquery_destination_uri = (
            export_evaluated_data_items_bigquery_destination_uri
        )
        self.export_evaluated_data_items_override_destination = (
            export_evaluated_data_items_override_destination
        )

    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_tabular_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.TabularDataset(dataset_name=self.dataset_id),
            target_column=self.target_column,
            optimization_prediction_type=self.optimization_prediction_type,
            optimization_objective=self.optimization_objective,
            column_specs=self.column_specs,
            column_transformations=self.column_transformations,
            optimization_objective_recall_value=self.optimization_objective_recall_value,
            optimization_objective_precision_value=self.optimization_objective_precision_value,
            labels=self.labels,
            training_encryption_spec_key_name=self.training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            validation_fraction_split=self.validation_fraction_split,
            test_fraction_split=self.test_fraction_split,
            predefined_split_column_name=self.predefined_split_column_name,
            timestamp_split_column_name=self.timestamp_split_column_name,
            weight_column=self.weight_column,
            budget_milli_node_hours=self.budget_milli_node_hours,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            disable_early_stopping=self.disable_early_stopping,
            export_evaluated_data_items=self.export_evaluated_data_items,
            export_evaluated_data_items_bigquery_destination_uri=(
                self.export_evaluated_data_items_bigquery_destination_uri
            ),
            export_evaluated_data_items_override_destination=(
                self.export_evaluated_data_items_override_destination
            ),
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context, task_instance=self, model_id=model_id)
        return result
Example #10
0
class CreateAutoMLImageTrainingJobOperator(AutoMLTrainingJobBaseOperator):
    """Create Auto ML Image Training job"""

    template_fields = [
        'region',
        'impersonation_chain',
    ]
    operator_extra_links = (VertexAIModelLink(),)

    def __init__(
        self,
        *,
        dataset_id: str,
        prediction_type: str = "classification",
        multi_label: bool = False,
        model_type: str = "CLOUD",
        base_model: Optional[Model] = None,
        validation_fraction_split: Optional[float] = None,
        training_filter_split: Optional[str] = None,
        validation_filter_split: Optional[str] = None,
        test_filter_split: Optional[str] = None,
        budget_milli_node_hours: Optional[int] = None,
        disable_early_stopping: bool = False,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.dataset_id = dataset_id
        self.prediction_type = prediction_type
        self.multi_label = multi_label
        self.model_type = model_type
        self.base_model = base_model
        self.validation_fraction_split = validation_fraction_split
        self.training_filter_split = training_filter_split
        self.validation_filter_split = validation_filter_split
        self.test_filter_split = test_filter_split
        self.budget_milli_node_hours = budget_milli_node_hours
        self.disable_early_stopping = disable_early_stopping

    def execute(self, context: "Context"):
        self.hook = AutoMLHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        model = self.hook.create_auto_ml_image_training_job(
            project_id=self.project_id,
            region=self.region,
            display_name=self.display_name,
            dataset=datasets.ImageDataset(dataset_name=self.dataset_id),
            prediction_type=self.prediction_type,
            multi_label=self.multi_label,
            model_type=self.model_type,
            base_model=self.base_model,
            labels=self.labels,
            training_encryption_spec_key_name=self.training_encryption_spec_key_name,
            model_encryption_spec_key_name=self.model_encryption_spec_key_name,
            training_fraction_split=self.training_fraction_split,
            validation_fraction_split=self.validation_fraction_split,
            test_fraction_split=self.test_fraction_split,
            training_filter_split=self.training_filter_split,
            validation_filter_split=self.validation_filter_split,
            test_filter_split=self.test_filter_split,
            budget_milli_node_hours=self.budget_milli_node_hours,
            model_display_name=self.model_display_name,
            model_labels=self.model_labels,
            disable_early_stopping=self.disable_early_stopping,
            sync=self.sync,
        )

        result = Model.to_dict(model)
        model_id = self.hook.extract_model_id(result)
        VertexAIModelLink.persist(context=context, task_instance=self, model_id=model_id)
        return result