def create_task(self, main_dag: models.DAG = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('gcs_to_ga', is_retry), input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, gcs_bucket=models.Variable.get('gcs_bucket_name', ''), gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''), gcs_content_type=models.Variable.get('gcs_content_type', _GCS_CONTENT_TYPE).upper(), ga_tracking_id=models.Variable.get('ga_tracking_id', ''), ga_base_params=_GA_BASE_PARAMS, dag=main_dag)
def create_task(self, main_dag: Optional[dag.DAG] = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=self.dag_name, task_id=self.get_task_id('bq_to_ga', is_retry), input_hook=hook_factory.InputHookType.BIG_QUERY, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, bq_conn_id=_BQ_CONN_ID, bq_dataset_id=variable.Variable.get('bq_dataset_id', ''), bq_table_id=variable.Variable.get('bq_table_id', ''), ga_tracking_id=variable.Variable.get('ga_tracking_id', ''), ga_base_params=_GA_BASE_PARAMS, dag=main_dag)
def create_task( self, main_dag: models.DAG = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('bq_to_ads_oc', is_retry), input_hook=hook_factory.InputHookType.BIG_QUERY, output_hook=hook_factory.OutputHookType. GOOGLE_ADS_OFFLINE_CONVERSIONS, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, bq_conn_id=_BQ_CONN_ID, bq_dataset_id=models.Variable.get('bq_dataset_id', ''), bq_table_id=models.Variable.get('bq_table_id', ''), ads_credentials=models.Variable.get('ads_credentials', ''), dag=main_dag)
def create_task(self, main_dag: models.DAG = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('gcs_to_ads_cm', is_retry), input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ADS_CUSTOMER_MATCH, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, gcs_bucket=models.Variable.get('gcs_bucket_name', ''), gcs_content_type=models.Variable.get('gcs_content_type', _GCS_CONTENT_TYPE).upper(), gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''), ads_credentials=models.Variable.get('ads_credentials', ''), ads_upload_key_type=models.Variable.get('ads_upload_key_type', ''), ads_cm_app_id=models.Variable.get('ads_cm_app_id', None), ads_cm_create_list=models.Variable.get('ads_cm_create_list', True), ads_cm_membership_lifespan=models.Variable.get( 'ads_cm_membership_lifespan', _ADS_MEMBERSHIP_LIFESPAN_DAYS), ads_cm_user_list_name=models.Variable.get('ads_cm_user_list_name', ''), dag=main_dag)
def create_task(self, main_dag: Optional[dag.DAG] = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('gcs_to_ads_oc', is_retry), input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType. GOOGLE_ADS_OFFLINE_CONVERSIONS, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, gcs_bucket=variable.Variable.get('gcs_bucket_name', ''), gcs_content_type=variable.Variable.get('gcs_content_type', _GCS_CONTENT_TYPE).upper(), gcs_prefix=variable.Variable.get('gcs_bucket_prefix', ''), ads_credentials=variable.Variable.get('ads_credentials', ''), dag=main_dag)
def create_task( self, main_dag: Optional[dag.DAG] = None, is_retry: bool = False) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('bq_to_cm', is_retry), input_hook=hook_factory.InputHookType.BIG_QUERY, output_hook=(hook_factory.OutputHookType .GOOGLE_CAMPAIGN_MANAGER_OFFLINE_CONVERSIONS), is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, bq_conn_id=_BQ_CONN_ID, bq_dataset_id=variable.Variable.get('bq_dataset_id', ''), bq_table_id=variable.Variable.get('bq_table_id', ''), cm_service_account=variable.Variable.get('cm_service_account', ''), cm_profile_id=variable.Variable.get('cm_profile_id', ''), dag=main_dag)
def create_task(self, main_dag: Optional[dag.DAG] = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=self.dag_name, task_id=self.get_task_id('bq_to_ga4', is_retry), input_hook=hook_factory.InputHookType.BIG_QUERY, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS_4, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, bq_conn_id=_BQ_CONN_ID, bq_dataset_id=variable.Variable.get('bq_dataset_id', ''), bq_table_id=variable.Variable.get('bq_table_id', ''), api_secret=variable.Variable.get('api_secret', ''), payload_type=variable.Variable.get('payload_type', ''), measurement_id=variable.Variable.get('measurement_id', ''), firebase_app_id=variable.Variable.get('firebase_app_id', ''), dag=main_dag)
def create_task( self, main_dag: models.DAG = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should includ a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('gcs_to_cm', is_retry), input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=(hook_factory.OutputHookType. GOOGLE_CAMPAIGN_MANAGER_OFFLINE_CONVERSIONS), is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, gcs_bucket=models.Variable.get('gcs_bucket_name', ''), gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''), gcs_content_type=models.Variable.get('gcs_content_type', _GCS_CONTENT_TYPE).upper(), cm_service_account=models.Variable.get('cm_service_account', ''), cm_profile_id=models.Variable.get('cm_profile_id', ''), dag=main_dag)
def test_execute_monitoring_bad_values(self): with self.assertRaises(errors.MonitoringValueError): data_connector_operator.DataConnectorOperator( dag_name='dag_name', input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, return_report=True, monitoring_dataset='', **self.test_operator_kwargs)
def test_execute_monitoring_use_default_bq_conn_id(self): with mock.patch('plugins.pipeline_plugins.hooks.' 'monitoring_hook.MonitoringHook', autospec=True) as mocker: data_connector_operator.DataConnectorOperator( dag_name='dag_name', input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, return_report=True, monitoring_dataset='test_dataset', monitoring_table='test_table', monitoring_bq_conn_id='test_monitoring_bq_conn_id', **self.test_operator_kwargs) mocker.assert_called_with(bq_conn_id='bigquery_default')
def test_execute_monitoring_use_default_bq_conn_id(self): data_connector_operator.DataConnectorOperator( dag_name='dag_name', input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, return_report=True, monitoring_dataset='test_dataset', monitoring_table='test_table', monitoring_bq_conn_id='test_monitoring_bq_conn_id', **self.test_operator_kwargs) monitoring_hook.MonitoringHook.assert_called_with( bq_conn_id='test_monitoring_bq_conn_id', dag_name='dag_name', enable_monitoring=True, location=mock.ANY, monitoring_dataset='test_dataset', monitoring_table='test_table')
def create_task(self, main_dag: Optional[dag.DAG] = None, is_retry: bool = False ) -> data_connector_operator.DataConnectorOperator: """Creates and initializes the main DAG. Args: main_dag: The dag that the task attaches to. is_retry: Whether or not the operator should include a retry task. Returns: DataConnectorOperator. """ return data_connector_operator.DataConnectorOperator( dag_name=_DAG_NAME, task_id=self.get_task_id('bq_to_ads_cm', is_retry), input_hook=hook_factory.InputHookType.BIG_QUERY, output_hook=hook_factory.OutputHookType.GOOGLE_ADS_CUSTOMER_MATCH, is_retry=is_retry, return_report=self.dag_enable_run_report, enable_monitoring=self.dag_enable_monitoring, monitoring_dataset=self.monitoring_dataset, monitoring_table=self.monitoring_table, monitoring_bq_conn_id=self.monitoring_bq_conn_id, bq_conn_id=_BQ_CONN_ID, bq_dataset_id=variable.Variable.get('bq_dataset_id', ''), bq_table_id=variable.Variable.get('bq_table_id', ''), ads_credentials=variable.Variable.get('ads_credentials', ''), ads_upload_key_type=variable.Variable.get('ads_upload_key_type', ''), ads_cm_app_id=variable.Variable.get('ads_cm_app_id', None), ads_cm_create_list=variable.Variable.get('ads_cm_create_list', True), ads_cm_membership_lifespan=variable.Variable.get( 'ads_cm_membership_lifespan', _ADS_MEMBERSHIP_LIFESPAN_DAYS), ads_cm_user_list_name=variable.Variable.get( 'ads_cm_user_list_name', ''), dag=main_dag)
def setUp(self): super().setUp() self.addCleanup(mock.patch.stopall) self.test_operator_kwargs = { 'task_id': 'test_task_id', 'tcrm_gcs_to_ga_schedule': '@once', 'ga_tracking_id': 'UA-12345-67', 'ga_base_params': { 'v': '1' }, 'gcs_bucket': 'test_bucket', 'gcs_prefix': 'test_dataset', 'gcs_content_type': 'JSON', } self.mock_hook_factory_input = mock.patch.object( hook_factory, 'get_input_hook', autospec=True).start() self.mock_hook_factory_output = mock.patch.object( hook_factory, 'get_output_hook', autospec=True).start() self.original_gcp_hook_init = gcp_api_base_hook.GoogleCloudBaseHook.__init__ gcp_api_base_hook.GoogleCloudBaseHook.__init__ = mock.MagicMock() self.mock_monitoring_hook = mock.patch.object(monitoring_hook, 'MonitoringHook', autospec=True).start() self.mock_generator = (self.mock_monitoring_hook.return_value. generate_processed_blobs_ranges.return_value) self.dc_operator = data_connector_operator.DataConnectorOperator( dag_name='dag_name', input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, return_report=True, monitoring_dataset='test_dataset', monitoring_table='test_table', monitoring_bq_conn_id='test_monitoring_bq_conn_id', **self.test_operator_kwargs) self.dc_operator_disable_monitoring = ( data_connector_operator.DataConnectorOperator( dag_name='dag_name', input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, return_report=True, enable_monitoring=False, monitoring_dataset='test_dataset', monitoring_table='test_table', monitoring_bq_conn_id='test_monitoring_bq_conn_id', **self.test_operator_kwargs)) self.dc_operator_no_report = ( data_connector_operator.DataConnectorOperator)( dag_name='dag_name', input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE, output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS, monitoring_dataset='test_dataset', monitoring_table='test_table', monitoring_bq_conn_id='test_monitoring_bq_conn_id', **self.test_operator_kwargs) self.event = { 'cid': '12345.67890', 'ec': 'ClientID', 'ea': 'PredictedPayer', 'el': '20190423', 'ev': 1, 'z': '1558517072202080' } self.blob = blob.Blob(events=([self.event] * 2), location='blob')