def __init__(self, *args,
               monitoring_bq_conn_id: str,
               dag_name: str,
               days_to_live: int,
               monitoring_dataset: str,
               monitoring_table: str,
               **kwargs) -> None:
    """Initializes the MonitoringCleanupOperator.

    Args:
      *args: arguments for the operator.
      monitoring_bq_conn_id: Optional;  BigQuery connection ID for the
        monitoring table. Default is 'bigquery_default'
      dag_name: The name of the DAG running the cleanup operator.
      days_to_live: Optional; The number of days data can live before being
        removed. Default is 50 days.
      monitoring_dataset: Dataset id of the monitoring table.
      monitoring_table: Table name of the monitoring table.
      **kwargs: Other arguments to pass through to the operator or hooks.
    """
    super().__init__(*args, **kwargs)
    self.days_to_live = days_to_live
    self.monitoring_hook = monitoring_hook_lib.MonitoringHook(
        bq_conn_id=monitoring_bq_conn_id,
        dag_name=dag_name,
        monitoring_dataset=monitoring_dataset,
        monitoring_table=monitoring_table)
Esempio n. 2
0
    def test_init_handles_bigquery_create_empty_table_errors(self):
        self.mock_cursor_obj.create_empty_table.side_effect = (
            exceptions.AirflowException())
        monitoring_hook.MonitoringHook.table_exists = mock.MagicMock(
            return_value=False)

        with self.assertRaises(errors.MonitoringDatabaseError):
            monitoring_hook.MonitoringHook(bq_conn_id='test_conn',
                                           monitoring_dataset=self.dataset_id,
                                           monitoring_table=self.table_id)
Esempio n. 3
0
    def test_init_handles_bigquery_create_empty_dataset_errors(self):
        self.mock_cursor_obj.get_dataset.side_effect = exceptions.AirflowException(
        )
        self.mock_cursor_obj.create_empty_dataset.side_effect = (
            exceptions.AirflowException())

        with self.assertRaises(errors.MonitoringDatabaseError):
            monitoring_hook.MonitoringHook(bq_conn_id='test_conn',
                                           monitoring_dataset=self.dataset_id,
                                           monitoring_table=self.table_id)
    def __init__(self,
                 *args,
                 input_hook: hook_factory.InputHookType,
                 output_hook: hook_factory.OutputHookType,
                 dag_name: str,
                 monitoring_dataset: str = '',
                 monitoring_table: str = '',
                 monitoring_bq_conn_id: str = '',
                 return_report: bool = False,
                 enable_monitoring: bool = True,
                 is_retry: bool = False,
                 **kwargs) -> None:
        """Initiates the DataConnectorOperator.

    Args:
      *args: arguments for the operator.
      input_hook: The type of the input hook.
      output_hook: The type of the output hook.
      dag_name: The ID of the current running dag.
      monitoring_dataset: Dataset id of the monitoring table.
      monitoring_table: Table name of the monitoring table.
      monitoring_bq_conn_id: BigQuery connection ID for the monitoring table.
      return_report: Indicates whether to return a run report or not.
      enable_monitoring: If enabled, data transfer monitoring log will be
          stored in Storage to allow for retry of failed events.
      is_retry: If true, the operator will draw failed events from monitoring
          log and will send them to the output hook.
      **kwargs: Other arguments to pass through to the operator or hooks.
    """
        super().__init__(*args, **kwargs)

        self.dag_name = dag_name
        self.input_hook = hook_factory.get_input_hook(input_hook, **kwargs)
        self.output_hook = hook_factory.get_output_hook(output_hook, **kwargs)
        self.return_report = return_report
        self.enable_monitoring = enable_monitoring
        self.is_retry = is_retry

        if enable_monitoring and not all(
            [monitoring_dataset, monitoring_table, monitoring_bq_conn_id]):
            raise errors.MonitoringValueError(msg=(
                'Missing or empty monitoring parameters although monitoring is '
                'enabled.'),
                                              error_num=errors.ErrorNameIDMap.
                                              MONITORING_HOOK_INVALID_VARIABLES
                                              )

        self.monitor = monitoring.MonitoringHook(
            bq_conn_id=monitoring_bq_conn_id,
            enable_monitoring=enable_monitoring,
            dag_name=dag_name,
            monitoring_dataset=monitoring_dataset,
            monitoring_table=monitoring_table,
            location=self.input_hook.get_location())
Esempio n. 5
0
    def test_init(self):
        self.mock_cursor_obj.get_dataset.side_effect = exceptions.AirflowException(
        )
        monitoring_hook.MonitoringHook.table_exists = mock.MagicMock(
            return_value=True)

        monitoring_hook.MonitoringHook(bq_conn_id='test_conn',
                                       monitoring_dataset=self.dataset_id,
                                       monitoring_table=self.table_id)

        self.mock_cursor_obj.get_dataset.assert_called_with(
            project_id=self.project_id, dataset_id=self.dataset_id)
        monitoring_hook.MonitoringHook.table_exists.assert_called_with(
            project_id=self.project_id,
            dataset_id=self.dataset_id,
            table_id=self.table_id)
Esempio n. 6
0
    def test_init_create_monitoring_dataset_and_table(self):
        self.mock_cursor_obj.get_dataset.side_effect = exceptions.AirflowException(
        )
        monitoring_hook.MonitoringHook.table_exists = mock.MagicMock(
            return_value=False)

        monitoring_hook.MonitoringHook(bq_conn_id='test_conn',
                                       monitoring_dataset=self.dataset_id,
                                       monitoring_table=self.table_id)

        self.mock_cursor_obj.create_empty_table.assert_called_with(
            project_id=self.project_id,
            dataset_id=self.dataset_id,
            table_id=self.table_id,
            schema_fields=monitoring_hook._LOG_SCHEMA_FIELDS)
        self.mock_cursor_obj.create_empty_dataset.assert_called_with(
            project_id=self.project_id, dataset_id=self.dataset_id)
        self.mock_cursor_obj.get_dataset.assert_called_with(
            project_id=self.project_id, dataset_id=self.dataset_id)
        monitoring_hook.MonitoringHook.table_exists.assert_called_with(
            project_id=self.project_id,
            dataset_id=self.dataset_id,
            table_id=self.table_id)
Esempio n. 7
0
    def setUp(self):
        super().setUp()
        self.dag_name = 'dag'
        self.project_id = 'test_project'
        self.dataset_id = 'test_dataset'
        self.table_id = 'test_table'
        self.conn_id = 'test_conn'
        self.expected_run_row = {
            'dag_name': self.dag_name,
            'timestamp': '20201103180000',
            'type_id': monitoring_hook.MonitoringEntityMap.RUN.value,
            'location': 'https://input/resource',
            'position': '',
            'info': ''
        }
        self.expected_blob_row = {
            'dag_name': self.dag_name,
            'timestamp': '20201103180000',
            'type_id': monitoring_hook.MonitoringEntityMap.BLOB.value,
            'location': 'https://input/resource',
            'position': '3000',
            'info': '1500'
        }
        self.expected_event_row = {
            'dag_name': self.dag_name,
            'timestamp': '20201103180000',
            'type_id': 50,
            'location': 'https://input/resource',
            'position': '60',
            'info': json.dumps({'a': 1})
        }
        self.expected_retry_row = {
            'dag_name': self.dag_name,
            'timestamp': '20201103180000',
            'type_id': monitoring_hook.MonitoringEntityMap.RETRY.value,
            'location': 'https://input/resource',
            'position': '',
            'info': ''
        }

        self.mock_conn_obj = mock.MagicMock()
        self.mock_cursor_obj = mock.MagicMock()
        self.mock_cursor_obj.project_id = self.project_id
        self.mock_conn_obj.cursor = mock.MagicMock(
            return_value=self.mock_cursor_obj)
        self.mock_cursor_obj.create_empty_table = mock.MagicMock()
        self.mock_cursor_obj.create_empty_dataset = mock.MagicMock()
        self.mock_cursor_obj.insert_all = mock.MagicMock()

        self.original_get_conn = monitoring_hook.MonitoringHook.get_conn
        monitoring_hook.MonitoringHook.get_conn = mock.MagicMock(
            return_value=self.mock_conn_obj)

        self.original_bigquery_hook_init = bigquery_hook.BigQueryHook.__init__
        bigquery_hook.BigQueryHook.__init__ = mock.MagicMock()

        with mock.patch(
                'google3.third_party.gps_building_blocks.tcrm.hooks.monitoring_hook.'
                'MonitoringHook._create_monitoring_dataset_and_table_if_not_exist'
        ):
            self.hook = monitoring_hook.MonitoringHook(
                bq_conn_id=self.conn_id,
                monitoring_dataset=self.dataset_id,
                monitoring_table=self.table_id)
            self.hook.get_conn = mock.MagicMock(
                return_value=self.mock_conn_obj)