Example #1
0
    def _ensure_payload_is_dict(self, event: Dict[str, Any]) -> None:
        """Ensures the event contains key of payload and the value of payload is dict.

       When the event is loaded from BigQuery, the value of payload is string,
       whereas the value of payload is dict when event is loaded from Google
       Storage because the raw data is stored as JSON files.

    Args:
      event: Event to ensure.

    Returns:
      The same event as input that is ensured.
    """
        if 'payload' not in event:
            raise errors.DataOutConnectorValueError(
                error_num=errors.ErrorNameIDMap.
                GA4_HOOK_ERROR_MISSING_PAYLOAD_IN_EVENT)

        payload = event['payload']

        # Payload is from Google Cloud Storage.
        if isinstance(payload, dict):
            return

        try:
            # Payload is coming from BQ and must be converted to a dictionary
            event['payload'] = json.loads(payload)
        except json.decoder.JSONDecodeError:
            raise errors.DataOutConnectorValueError(
                error_num=errors.ErrorNameIDMap.
                GA4_HOOK_ERROR_INVALID_JSON_STRUCTURE)
Example #2
0
    def _parse_validate_result(self, event: Dict[str, Any],
                               response: requests.Response):
        """Parses the response returned from the debug API.

       The response body contains a JSON to indicate the validated result.
       For example:
       {
         "validationMessages": [
          {
            "fieldPath": "timestamp_micros"
            "description": "Measurement timestamp_micros has timestamp....",
            "validationCode": "VALUE_INVALID"
          }]
       }

       The fieldPath indicates which part of your payload JSON contains invalid
       value, when fieldPath doesn't exist, the fieldPath can be found in
       description as well.

    Args:
      event: The event that contains the index and the payload.
      response: The HTTP response from the debug API.
    """
        if response.status_code >= 500:
            raise errors.DataOutConnectorValueError(
                error_num=errors.ErrorNameIDMap.
                RETRIABLE_GA4_HOOK_ERROR_HTTP_ERROR)
        elif response.status_code != 200:
            raise errors.DataOutConnectorValueError(
                error_num=errors.ErrorNameIDMap.
                NON_RETRIABLE_ERROR_EVENT_NOT_SENT)

        try:
            validation_result = response.json()
        except json.JSONDecodeError:
            raise errors.DataOutConnectorValueError(
                error_num=errors.ErrorNameIDMap.
                RETRIABLE_GA4_HOOK_ERROR_HTTP_ERROR.value)

        # Payload is valid: validation messages are only returned if there is a
        # problem with the payload.
        if not validation_result['validationMessages']:
            return

        # The validation API only ever returns one message.
        message = validation_result['validationMessages'][0]
        field_path = message['fieldPath']
        description = message['description']

        for property_name in _ERROR_TYPES:
            if field_path == property_name or property_name in description:
                raise errors.DataOutConnectorValueError(
                    error_num=_ERROR_TYPES[property_name])

        # Prevent from losing error message if it is undefined due to API change.
        logging.error('id: %s, fieldPath: %s, description: %s', event['id'],
                      message['fieldPath'], message['description'])
        raise errors.DataOutConnectorValueError(
            error_num=errors.ErrorNameIDMap.GA4_HOOK_ERROR_INVALID_VALUES)
Example #3
0
    def _send_validate_request(self, payload: Dict[str,
                                                   Any]) -> requests.Response:
        """Sends the GA4 payload to the debug API for data validating.

       By adding the key-value pair
       (validationBehavior: ENFORCE_RECOMMENDATIONS), the API will check the
       payload thoroughly, this is recommended because the Measurement Protocol
       API won't check the data and it fails silently, you might not know what
       happened to your data.

    Args:
       payload: the JSON payload of the GA4 event.

    Returns:
       The response from the debug API.
    """
        validating_payload = dict(payload)
        validating_payload['validationBehavior'] = 'ENFORCE_RECOMMENDATIONS'
        try:
            response = requests.post(self.validate_url,
                                     json=validating_payload)
        except requests.ConnectionError:
            raise errors.DataOutConnectorValueError(
                error_num=errors.ErrorNameIDMap.
                RETRIABLE_GA4_HOOK_ERROR_HTTP_ERROR)

        return response
Example #4
0
    def test_ads_cm_hook_send_events_create_new_list_is_false(self):
        """Test hook send_events fail due to create_list incorrect value."""
        hook = self.create_ads_cm_hook(create_list=True)
        hook.get_user_list_id.side_effect = (
            errors.DataOutConnectorValueError())
        blb = blob.Blob(events=[self.contact_info_event_email], location='')

        hook = self.create_ads_cm_hook(create_list=False)
        hook.send_events(blb)

        hook.create_user_list.assert_not_called()
Example #5
0
    def _validate_init_params(self, user_list_name: str,
                              membership_lifespan: int) -> None:
        """Validate user_list_name and membership_lifespan parameters.

    Args:
      user_list_name: The name of the user list to add members to.
      membership_lifespan: Number of days a user's cookie stays.

    Raises:
      DataOutConnectorValueError if user_list_name is null or
      membership_lifespan is negative or bigger than 10000.
    """
        if not user_list_name:
            raise errors.DataOutConnectorValueError(
                'User list name is empty.',
                errors.ErrorNameIDMap.ADS_CM_HOOK_ERROR_EMPTY_USER_LIST_NAME)
        if membership_lifespan < 0 or membership_lifespan > 10000:
            raise errors.DataOutConnectorValueError(
                'Membership lifespan is not between 0 and 10,000.', errors.
                ErrorNameIDMap.ADS_CM_HOOK_ERROR_INVALID_MEMBERSHIP_LIFESPAN)
Example #6
0
    def test_ads_cm_hook_send_events_create_new_list(self):
        """Test hook send_events successful."""
        hook = self.create_ads_cm_hook(create_list=True)
        hook.get_user_list_id.side_effect = (
            errors.DataOutConnectorValueError())
        blb = blob.Blob(events=[self.contact_info_event_email], location='')

        blb = hook.send_events(blb)

        self.assertListEqual([], blb.failed_events)
        hook.get_user_list_id.assert_called_once()
        hook.add_members_to_user_list.assert_called_once()
Example #7
0
    def get_user_list_id(self, user_list_name: Text) -> int:
        """Converts user list name to user list ID.

    Searches for a ServiceType.AdwordsUserListService list in Google Ads and
    returns the list's ID if it exists and raises an error if it doesn't exist.

    Args:
      user_list_name: The name of the user list to get the ID for.

    Returns:
      user_list_id: ID of the user list.

    Raises:
      DataOutConnectorAuthenticationError raised when authentication errors
      occurred.
      DataOutConnectorValueError if the list with given user list name doesn't
      exist.
    """
        user_list_meta_data_selector = {
            'fields': ['Name', 'Id'],
            'predicates': [{
                'field': 'Name',
                'operator': 'EQUALS',
                'values': user_list_name
            }, {
                'field': 'ListType',
                'operator': 'EQUALS',
                'values': 'CRM_BASED'
            }],
        }
        service = self._get_service(ServiceType.ADWORDS_USER_LIST_SERVICE)

        try:
            result = service.get(user_list_meta_data_selector)
        except (googleads_errors.GoogleAdsServerFault,
                googleads_errors.GoogleAdsValueError,
                google_auth_exceptions.RefreshError) as error:
            raise errors.DataOutConnectorAuthenticationError(
                error=error,
                msg='Failed to get user list ID due to authentication error.',
                error_num=(errors.ErrorNameIDMap.
                           RETRIABLE_ERROR_OUTPUT_AUTHENTICATION_FAILED))

        if 'entries' in result and len(result['entries']):
            user_list_id = result['entries'][0]['id']
        else:
            raise errors.DataOutConnectorValueError(
                msg="""Failed to get user list ID. List doesn't exist""",
                error_num=errors.ErrorNameIDMap.
                ADS_HOOK_ERROR_FAIL_TO_GET_USER_LIST_ID)

        return user_list_id
Example #8
0
  def _get_developer_token(self) -> str:
    """Gets developer token from connection configuration.

    Returns:
      dev_token: Developer token of Google Ads API.

    Raises:
      DataOutConnectorValueError: If connection is not available or if password
      is missing in the connection.
    """
    conn = self.get_connection(self.http_conn_id)
    if not conn:
      raise errors.DataOutConnectorValueError(
          'Cannot get connection {id}.'.format(id=self.http_conn_id),
          errors.ErrorNameIDMap
          .RETRIABLE_ADS_UAC_HOOK_ERROR_FAIL_TO_GET_AIRFLOW_CONNECTION)
    if not conn.password:
      raise errors.DataOutConnectorValueError(
          'Missing dev token. Please check connection {id} and its password.'
          .format(id=self.http_conn_id),
          errors.ErrorNameIDMap.RETRIABLE_ADS_UAC_HOOK_ERROR_MISSING_DEV_TOKEN)
    return conn.password
Example #9
0
    def _validate_required_fields(self, event: Dict[str, Any]) -> None:
        """Validates all required fields are present in the event JSON.

    Args:
      event: Offline Conversion JSON event.

    Raises:
      AssertionError: If any any violation is found.
    """
        if not all(field.value in event.keys() for field in RequiredFields):
            raise errors.DataOutConnectorValueError(
                f'Event is missing at least one mandatory field(s)'
                f' {[field.value for field in RequiredFields]}', errors.
                ErrorNameIDMap.ADS_OC_HOOK_ERROR_MISSING_MANDATORY_FIELDS)

        if not event['conversionName'] or len(event['conversionName']) > 100:
            raise errors.DataOutConnectorValueError(
                'Length of conversionName should be <= 100.',
                errors.ErrorNameIDMap.
                ADS_OC_HOOK_ERROR_INVALID_LENGTH_OF_CONVERSION_NAME)

        if not re.match(_RE_STRING_DATE_TIME, event['conversionTime']):
            raise errors.DataOutConnectorValueError(
                'conversionTime should be formatted: yyyymmdd hhmmss [tz]',
                errors.ErrorNameIDMap.
                ADS_OC_HOOK_ERROR_INVALID_FORMAT_OF_CONVERSION_TIME)

        if event['conversionValue'] < 0:
            raise errors.DataOutConnectorValueError(
                'conversionValue should be greater than or equal to 0.',
                errors.ErrorNameIDMap.
                ADS_OC_HOOK_ERROR_INVALID_CONVERSION_VALUE)

        if not event['googleClickId'] or len(event['googleClickId']) > 512:
            raise errors.DataOutConnectorValueError(
                'Length of googleClickId should be between 1 and 512.',
                errors.ErrorNameIDMap.
                ADS_OC_HOOK_ERROR_INVALID_LENGTH_OF_GOOGLE_CLICK_ID)
Example #10
0
    def _validate_and_set_upload_key_type(
            self, upload_key_type: str, app_id: str) -> ads_hook.UploadKeyType:
        """Validate upload_key_type and the subsequent parameters for each key type.

    Args:
      upload_key_type: The upload key type. Refer to ads_hook.UploadKeyType for
        more information.
      app_id: An ID required for creating a new list if upload_key_type is
        MOBILE_ADVERTISING_ID.

    Returns:
      UploadKeyType: An UploadKeyType object defined in ads_hook.

    Raises:
      DataOutConnectorValueError in the following scenarios:
        - upload_key_type is not supported by ads_hook.
        - app_id is not specificed when create_list = True and upload_key_type
            is MOBILE_ADVERTISING_ID.
    """
        try:
            validated_upload_key_type = ads_hook.UploadKeyType[upload_key_type]
        except KeyError:
            raise errors.DataOutConnectorValueError(
                'Invalid upload key type. See ads_hook.UploadKeyType for details',
                errors.ErrorNameIDMap.ADS_CM_HOOK_ERROR_INVALID_UPLOAD_KEY_TYPE
            )

        if (validated_upload_key_type
                == ads_hook.UploadKeyType.MOBILE_ADVERTISING_ID
                and self.create_list and not app_id):
            raise errors.DataOutConnectorValueError(
                'app_id needs to be specified for '
                'MOBILE_ADVERTISING_ID when create_list is True.',
                errors.ErrorNameIDMap.ADS_CM_HOOK_ERROR_MISSING_APPID)

        return validated_upload_key_type
Example #11
0
    def _get_service(
            self,
            service_type: ServiceType,
            enable_partial_failure: bool = False) -> common.GoogleSoapService:
        """Gets AdWords service according to the given service type.

    Partial failure detailed explanation:
    https://developers.google.com/adwords/api/docs/guides/partial-failure

    Args:
      service_type: AdWords service to create a service client for. See all
        available services in ServiceType.
      enable_partial_failure: A flag to allow request that valid operations be
        committed and failed ones return errors.

    Returns:
      AdWords service object.

    Raises:
      DataOutConnectorAuthenticationError raised when authentication errors
      occurred.
      DataOutConnectorValueError if the service can't be created.
    """
        try:
            adwords_client = adwords.AdWordsClient.LoadFromString(
                self.yaml_doc)
            adwords_client.partial_failure = enable_partial_failure
        except googleads_errors.GoogleAdsValueError as error:
            raise errors.DataOutConnectorAuthenticationError(
                error=error,
                msg=
                ('Please check the credentials in the yml doc, it should contains'
                 ' a top level key named adwords and 5 sub key-value'
                 ' pairs named client_customer_id, developer_token, client_id,'
                 ' client_secret and refresh_token.'),
                error_num=(errors.ErrorNameIDMap.
                           RETRIABLE_ERROR_OUTPUT_AUTHENTICATION_FAILED))

        try:
            service = adwords_client.GetService(service_type.value,
                                                self.api_version)
        except googleads_errors.GoogleAdsValueError as error:
            raise errors.DataOutConnectorValueError(
                error=error,
                msg='Couldn\'t get service from Google Adwords API',
                error_num=errors.ErrorNameIDMap.
                RETRIABLE_ADS_HOOK_ERROR_UNAVAILABLE_ADS_SERVICE)
        return service
Example #12
0
    def _validate_tracking_id(self, tracking_id: str) -> None:
        """Validates tracking matches the common pattern.

    The tracking id must comply the specified pattern 'UA-XXXXX-Y' to proceed
    the send_hit function.

    Args:
      tracking_id: GA's property or tracking ID for GA to identify hits.

    Raises:
      DataOutConnectorValueError: If the tracking id format is invalid.
    """
        if not re.match(_GA_TRACKING_ID_REGEX, tracking_id):
            raise errors.DataOutConnectorValueError(
                'Invalid Tracking ID Format. The expected format is `UA-XXXXX-Y`.',
                errors.ErrorNameIDMap.GA_HOOK_ERROR_INVALID_TRACKING_ID_FORMAT)
Example #13
0
def _validate_sha256_pattern(field_data: str) -> None:
    """Validates if field_data matches sha256 digest string pattern.

  The correct patterh is '^[A-Fa-f0-9]{64}$'
  Note: None is an invalid sha256 value

  Args:
    field_data: A field data which is a part of member data entity of Google
                Adwords API

  Raises:
    DataOutConnectorValueError: If the any field data is invalid or None.
  """
    if field_data is None or not re.match(_SHA256_DIGEST_PATTERN, field_data):
        raise errors.DataOutConnectorValueError(
            'None or string is not in SHA256 format.', errors.ErrorNameIDMap.
            ADS_CM_HOOK_ERROR_PAYLOAD_FIELD_VIOLATES_SHA256_FORMAT)
Example #14
0
def _format_mobile_advertising_event(event: Dict[Any, Any]) -> Dict[Any, Any]:
    """Format a mobile_advertising_event event.

  Args:
    event: A raw mobile_advertising_event event.

  Returns:
    A formatted mobile_advertising_event event.

  Raises:
    DataOutConnectorValueError if mobileId field doesn't exist in the event.
  """
    if 'mobileId' not in event:
        raise errors.DataOutConnectorValueError(
            'mobileId field doesn\'t exist in the event.',
            errors.ErrorNameIDMap.ADS_CM_HOOK_ERROR_MISSING_MOBILEID_IN_EVENT)
    member = {'mobileId': event['mobileId']}
    return member
Example #15
0
def _format_crm_id_event(event: Dict[Any, Any]) -> Dict[Any, Any]:
    """Format a crm_id event.

  Args:
    event: A raw crm_id event.

  Returns:
    A formatted crm_id event.

  Raises:
    DataOutConnectorValueError if userId is not exist in the event.
  """
    if 'userId' not in event:
        raise errors.DataOutConnectorValueError(
            """userId doesn't exist in crm_id event.""", errors.ErrorNameIDMap.
            ADS_CM_HOOK_ERROR_MISSING_USERID_IN_CRMID_EVENT)
    member = {'userId': event['userId']}
    return member
Example #16
0
    def send_events(self, blb: blob.Blob) -> blob.Blob:
        """Sends Customer Match events to Google AdWords API.

    Args:
      blb: A blob containing Customer Match data to send.

    Returns:
      A blob containing updated data about any failing events or reports.

    Raises:
      DataOutConnectorValueError when user list with given name doesn't exist
      and create_list is false.
    """
        user_list_id = None
        valid_events, invalid_indices_and_errors = (
            self._validate_and_prepare_events_to_send(blb.events))
        batches = self._batch_generator(valid_events)

        for batch in batches:
            if not user_list_id:
                try:
                    user_list_id = self.get_user_list_id(self.user_list_name)
                except errors.DataOutConnectorValueError:
                    if self.create_list:
                        user_list_id = self.create_user_list(
                            self.user_list_name, self.upload_key_type,
                            self.membership_lifespan, self.app_id)
                    else:
                        raise errors.DataOutConnectorValueError(
                            'user_list_name does NOT exist (create_list = False).'
                        )
            try:
                user_list = [event[1] for event in batch]
                self.add_members_to_user_list(user_list_id, user_list)
            except errors.DataOutConnectorSendUnsuccessfulError as error:
                for event in batch:
                    invalid_indices_and_errors.append(
                        (event[0], error.error_num))

        for event in invalid_indices_and_errors:
            blb.append_failed_event(event[0] + blb.position,
                                    blb.events[event[0]], event[1].value)

        return blb
Example #17
0
def _format_contact_info_event(event: Dict[Any, Any]) -> Dict[Any, Any]:
    """Format a contact_info event.

  Args:
    event: A raw contact_info event.

  Returns:
    A formatted contact_info event.

  Raises:
    DataOutConnectorValueError for the following scenarios:
      - If filed hashedEmail and hashedPhoneNumber not
        exist in the payload.
      - hashedEmail or hashedPhoneNumber fields do not meet SHA256 format.
  """
    member = {}

    if event.get('hashedEmail', None) is not None:
        _validate_sha256_pattern(event.get('hashedEmail', None))
        member['hashedEmail'] = event['hashedEmail']

    if event.get('hashedPhoneNumber', None) is not None:
        _validate_sha256_pattern(event.get('hashedPhoneNumber', None))
        member['hashedPhoneNumber'] = event['hashedPhoneNumber']

    if 'hashedEmail' not in member and 'hashedPhoneNumber' not in member:
        raise errors.DataOutConnectorValueError(
            'Data must contain either a valid hashed email or phone number.',
            errors.ErrorNameIDMap.
            ADS_CM_HOOK_ERROR_INVALID_EMAIL_AND_PHONE_NUMBER)

    if _is_address_info_available(event):
        hashed_first_name = event['hashedFirstName']
        _validate_sha256_pattern(hashed_first_name)
        hashed_last_name = event['hashedLastName']
        _validate_sha256_pattern(hashed_last_name)
        member['addressInfo'] = {
            'hashedFirstName': hashed_first_name,
            'hashedLastName': hashed_last_name,
            'countryCode': event['countryCode'],
            'zipCode': event['zipCode'],
        }
    return member
Example #18
0
    def _validate_uid_or_cid(self, cid: Optional[str],
                             uid: Optional[str]) -> None:
        """Validates uid or cid.

    Each payload must include cid (client id) or uid (user id) in it; this
    function verifies either uid or cid are set.

    Args:
      cid: Client id to check.
      uid: User id to check.

    Raises:
      DataOutConnectorValueError: If input parameter didn't cover either cid or
      uid.
    """
        if not cid and not uid:
            raise errors.DataOutConnectorValueError(
                'Hit must have cid or uid.',
                error_num=errors.ErrorNameIDMap.
                GA_HOOK_ERROR_MISSING_CID_OR_UID)
Example #19
0
  def _validate_app_conversion_payload(self, payload: Dict[str, Any]) -> None:
    """Validates payload sent to UAC.

    Args:
      payload: The payload to be validated before sending to Google Ads UAC.

    Raises:
      DataOutConnectorValueError: If some value is missing or in wrong format.
    """

    for key in _REQUIRED_FIELDS:
      if payload.get(key) is None:
        raise errors.DataOutConnectorValueError(
            """Missing {key} in payload.""".format(key=key),
            errors.ErrorNameIDMap.ADS_UAC_HOOK_ERROR_MISSING_MANDATORY_FIELDS)

    if payload.get('app_event_type') not in [item.value
                                             for item in AppEventType]:
      raise errors.DataOutConnectorValueError(
          """Unsupported app event type in
          payload. Example: 'first_open', 'session_start', 'in_app_purchase',
          'view_item_list', 'view_item', 'view_search_results',
          'add_to_cart', 'ecommerce_purchase', 'custom'.""",
          errors.ErrorNameIDMap.ADS_UAC_HOOK_ERROR_UNSUPPORTED_APP_EVENT_TYPE)

    if (payload.get('app_event_name') and
        payload.get('app_event_type') != 'custom'):
      raise errors.DataOutConnectorValueError(
          """App event type must be 'custom' when app event name exists.""",
          errors.ErrorNameIDMap.ADS_UAC_HOOK_ERROR_WRONG_APP_EVENT_TYPE)

    match = _RDID_REGEX.match(payload.get('rdid'))
    if not match:
      raise errors.DataOutConnectorValueError(
          """Wrong raw device id format in
          payload. Should be compatible with RFC4122.""",
          errors.ErrorNameIDMap.ADS_UAC_HOOK_ERROR_WRONG_RAW_DEVICE_ID_FORMAT)

    if payload.get('id_type') not in [item.value for item in IdType]:
      raise errors.DataOutConnectorValueError(
          """Wrong raw device id type in
          payload. Example: 'advertisingid', 'idfa'.""",
          errors.ErrorNameIDMap.ADS_UAC_HOOK_ERROR_WRONG_RAW_DEVICE_ID_TYPE)

    if payload.get('lat') != 0 and payload.get('lat') != 1:
      raise errors.DataOutConnectorValueError(
          """Wrong limit-ad-tracking status in payload. Example: 0, 1.""",
          errors.ErrorNameIDMap.ADS_UAC_HOOK_ERROR_WRONG_LAT_STATUS)