def create_task(
        self,
        main_dag: models.DAG = None,
        is_retry: bool = False
    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('gcs_to_ga', is_retry),
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            gcs_bucket=models.Variable.get('gcs_bucket_name', ''),
            gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''),
            gcs_content_type=models.Variable.get('gcs_content_type',
                                                 _GCS_CONTENT_TYPE).upper(),
            ga_tracking_id=models.Variable.get('ga_tracking_id', ''),
            ga_base_params=_GA_BASE_PARAMS,
            dag=main_dag)
Ejemplo n.º 2
0
    def create_task(
        self,
        main_dag: models.DAG = None,
        is_retry: bool = False
    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('bq_to_ads_oc', is_retry),
            input_hook=hook_factory.InputHookType.BIG_QUERY,
            output_hook=hook_factory.OutputHookType.
            GOOGLE_ADS_OFFLINE_CONVERSIONS,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            bq_conn_id=_BQ_CONN_ID,
            bq_dataset_id=models.Variable.get('bq_dataset_id', ''),
            bq_table_id=models.Variable.get('bq_table_id', ''),
            ads_credentials=models.Variable.get('ads_credentials', ''),
            dag=main_dag)
Ejemplo n.º 3
0
    def create_task(
        self,
        main_dag: models.DAG = None,
        is_retry: bool = False
    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('gcs_to_cm', is_retry),
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=(hook_factory.OutputHookType.
                         GOOGLE_CAMPAIGN_MANAGER_OFFLINE_CONVERSIONS),
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            gcs_bucket=models.Variable.get('gcs_bucket_name', ''),
            gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''),
            gcs_content_type=models.Variable.get('gcs_content_type',
                                                 _GCS_CONTENT_TYPE).upper(),
            cm_service_account=models.Variable.get('cm_service_account', ''),
            cm_profile_id=models.Variable.get('cm_profile_id', ''),
            dag=main_dag)
Ejemplo n.º 4
0
  def create_task(self, main_dag: models.DAG = None, is_retry: bool = False
                 ) -> data_connector_operator.DataConnectorOperator:
    """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should include a retry task.

    Returns:
      DataConnectorOperator.
    """
    return data_connector_operator.DataConnectorOperator(
        dag_name=_DAG_NAME,
        task_id=self.get_task_id('bq_to_ads_cm', is_retry),
        input_hook=hook_factory.InputHookType.BIG_QUERY,
        output_hook=hook_factory.OutputHookType.GOOGLE_ADS_CUSTOMER_MATCH,
        is_retry=is_retry,
        return_report=self.dag_enable_run_report,
        enable_monitoring=self.dag_enable_monitoring,
        monitoring_dataset=self.monitoring_dataset,
        monitoring_table=self.monitoring_table,
        monitoring_bq_conn_id=self.monitoring_bq_conn_id,
        bq_conn_id=_BQ_CONN_ID,
        bq_dataset_id=models.Variable.get('bq_dataset_id', ''),
        bq_table_id=models.Variable.get('bq_table_id', ''),
        ads_credentials=models.Variable.get('ads_credentials', ''),
        ads_upload_key_type=models.Variable.get('ads_upload_key_type', ''),
        ads_cm_app_id=models.Variable.get('ads_cm_app_id', None),
        ads_cm_create_list=models.Variable.get('ads_cm_create_list', True),
        ads_cm_membership_lifespan=models.Variable.get(
            'ads_cm_membership_lifespan', _ADS_MEMBERSHIP_LIFESPAN_DAYS),
        ads_cm_user_list_name=models.Variable.get('ads_cm_user_list_name', ''),
        dag=main_dag)
Ejemplo n.º 5
0
  def create_task(self, main_dag: models.DAG = None, is_retry: bool = False
                 ) -> data_connector_operator.DataConnectorOperator:
    """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
    return data_connector_operator.DataConnectorOperator(
        dag_name=self.dag_name,
        task_id=self.get_task_id('bq_to_ga', is_retry),
        input_hook=hook_factory.InputHookType.BIG_QUERY,
        output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
        is_retry=is_retry,
        return_report=self.dag_enable_run_report,
        enable_monitoring=self.dag_enable_monitoring,
        monitoring_dataset=self.monitoring_dataset,
        monitoring_table=self.monitoring_table,
        monitoring_bq_conn_id=self.monitoring_bq_conn_id,
        bq_conn_id=_BQ_CONN_ID,
        bq_dataset_id=models.Variable.get('bq_dataset_id', ''),
        bq_table_id=models.Variable.get('bq_table_id', ''),
        ga_tracking_id=models.Variable.get('ga_tracking_id', ''),
        ga_base_params=_GA_BASE_PARAMS,
        dag=main_dag)
Ejemplo n.º 6
0
 def test_execute_monitoring_bad_values(self):
   with self.assertRaises(errors.MonitoringValueError):
     data_connector_operator.DataConnectorOperator(
         dag_name='dag_name',
         input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
         output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
         return_report=True, monitoring_dataset='',
         **self.test_operator_kwargs)
Ejemplo n.º 7
0
 def test_execute_monitoring_use_default_bq_conn_id(self):
   with mock.patch('google3.third_party.gps_building_blocks.tcrm.hooks.'
                   'monitoring_hook.MonitoringHook', autospec=True) as mocker:
     data_connector_operator.DataConnectorOperator(
         dag_name='dag_name',
         input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
         output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
         return_report=True, monitoring_dataset='test_dataset',
         monitoring_table='test_table',
         monitoring_bq_conn_id='test_monitoring_bq_conn_id',
         **self.test_operator_kwargs)
     mocker.assert_called_with(bq_conn_id='bigquery_default')
Ejemplo n.º 8
0
  def setUp(self):
    super().setUp()
    self.addCleanup(mock.patch.stopall)

    self.test_operator_kwargs = {'task_id': 'test_task_id',
                                 'tcrm_gcs_to_ga_schedule': '@once',
                                 'ga_tracking_id': 'UA-12345-67',
                                 'ga_base_params': {'v': '1'},
                                 'gcs_bucket': 'test_bucket',
                                 'gcs_prefix': 'test_dataset',
                                 'gcs_content_type': 'JSON',}

    self.mock_hook_factory_input = mock.patch.object(
        hook_factory, 'get_input_hook', autospec=True).start()
    self.mock_hook_factory_output = mock.patch.object(
        hook_factory, 'get_output_hook', autospec=True).start()

    self.original_gcp_hook_init = gcp_api_base_hook.GoogleCloudBaseHook.__init__
    gcp_api_base_hook.GoogleCloudBaseHook.__init__ = mock.MagicMock()

    self.original_monitoring_hook = monitoring_hook.MonitoringHook
    self.mock_monitoring_hook = mock.MagicMock()
    monitoring_hook.MonitoringHook = self.mock_monitoring_hook
    monitoring_hook.MonitoringHook.return_value = self.mock_monitoring_hook
    self.mock_generator = mock.MagicMock()
    self.mock_monitoring_hook.generate_processed_blobs_ranges = mock.MagicMock()
    (self.mock_monitoring_hook
     .generate_processed_blobs_ranges.return_value) = self.mock_generator

    self.dc_operator = data_connector_operator.DataConnectorOperator(
        dag_name='dag_name',
        input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
        output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
        return_report=True,
        monitoring_dataset='test_dataset',
        monitoring_table='test_table',
        monitoring_bq_conn_id='test_monitoring_bq_conn_id',
        **self.test_operator_kwargs)

    self.dc_operator_disable_monitoring = (
        data_connector_operator.DataConnectorOperator(
            dag_name='dag_name',
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
            return_report=True,
            enable_monitoring=False,
            monitoring_dataset='test_dataset',
            monitoring_table='test_table',
            monitoring_bq_conn_id='test_monitoring_bq_conn_id',
            **self.test_operator_kwargs))

    self.dc_operator_no_report = (
        data_connector_operator.DataConnectorOperator)(
            dag_name='dag_name',
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
            monitoring_dataset='test_dataset',
            monitoring_table='test_table',
            monitoring_bq_conn_id='test_monitoring_bq_conn_id',
            **self.test_operator_kwargs)

    self.event = {
        'cid': '12345.67890',
        'ec': 'ClientID',
        'ea': 'PredictedPayer',
        'el': '20190423',
        'ev': 1,
        'z': '1558517072202080'
    }
    self.blob = blob.Blob(events=([self.event] * 2), location='blob')