def sync_report(stream_name, stream_metadata, sdk_client): customer_id = sdk_client.client_customer_id stream_schema, _ = create_schema_for_report(stream_name, sdk_client) stream_schema = add_synthetic_keys_to_stream_schema(stream_schema) xml_attribute_list = get_fields_to_sync(stream_schema, stream_metadata) primary_keys = metadata.get(stream_metadata, (), 'tap-adwords.report-key-properties') or [] LOGGER.info("{} primary keys are {}".format(stream_name, primary_keys)) write_schema(stream_name, stream_schema, primary_keys, bookmark_properties=['day']) field_list = [] for field in xml_attribute_list: field_list.append(stream_metadata[('properties', field)]['adwords.fieldName']) check_selected_fields(stream_name, field_list, sdk_client) # If an attribution window sync is interrupted, start where it left off start_date = get_attribution_window_bookmark(customer_id, stream_name) if start_date is None: start_date = apply_conversion_window( get_start_for_stream(customer_id, stream_name)) if stream_name in REPORTS_WITH_90_DAY_MAX: cutoff = utils.now() + relativedelta(days=-90) if start_date < cutoff: start_date = cutoff LOGGER.info('Selected fields: %s', field_list) while start_date <= get_end_date(): sync_report_for_day(stream_name, stream_schema, sdk_client, start_date, field_list) start_date = start_date + relativedelta(days=1) bookmarks.write_bookmark(STATE, state_key_name(customer_id, stream_name), 'last_attribution_window_date', utils.strftime(start_date)) singer.write_state(STATE) bookmarks.clear_bookmark(STATE, state_key_name(customer_id, stream_name), 'last_attribution_window_date') singer.write_state(STATE) LOGGER.info("Done syncing the %s report for customer_id %s", stream_name, customer_id)
def sync_statistics_report(config, state, stream, sdk_client, token): """Sync a stream which is backed by the Criteo Statistics endpoint.""" advertiser_ids = config.get("advertiser_ids", "") mdata = metadata.to_map(stream.metadata) stream = add_synthetic_keys_to_stream_schema(stream) field_list = get_field_list(stream) primary_keys = [] LOGGER.info("{} primary keys are {}".format(stream.stream, primary_keys)) singer.write_schema( stream.stream, stream.schema.to_dict(), primary_keys, bookmark_properties=["Day"], ) # If an attribution window sync is interrupted, start where it left off start_date = get_attribution_window_bookmark(state, advertiser_ids, stream.stream) if start_date is None: start_date = apply_conversion_window( config, get_start_for_stream(config, state, advertiser_ids, stream.stream), ) # According to Criteo's documentation the StatisticsApi only supports # between one and three dimensions and at least one metric. report_dimensions = [ field for field in field_list if metadata.get(mdata, ("properties", field), "tap-criteo.behaviour") == "dimension" ] LOGGER.info("Selected dimensions: %s", report_dimensions) if not 0 <= len(report_dimensions) <= 3: raise ValueError( "%s stream only supports up to 3 selected dimensions" % stream.stream) report_metrics = [ field for field in field_list if metadata.get(mdata, ("properties", field), "tap-criteo.behaviour") == "metric" ] LOGGER.info("Selected metrics: %s", report_metrics) if not len(report_metrics) >= 1: raise ValueError("%s stream must have at least 1 selected metric" % stream.stream) while start_date <= get_end_date(config): token = refresh_auth_token(sdk_client, token) sync_statistics_for_day( config, state, stream, sdk_client, token, start_date, report_metrics, report_dimensions, ) start_date = start_date + relativedelta(days=1) bookmarks.write_bookmark( state, state_key_name(advertiser_ids, stream.stream), "last_attribution_window_date", utils.strftime(start_date), ) singer.write_state(state) bookmarks.clear_bookmark( state, state_key_name(advertiser_ids, stream.stream), "last_attribution_window_date", ) singer.write_state(state) LOGGER.info( "Done syncing the %s report for advertiser_ids %s", stream.stream, advertiser_ids, )
def sync_report(stream_name, stream_metadata, sdk_client): report_window_days = CONFIG.get("MAX_REPORT_TIME_WINDOW", 365) is_incremental = False if metadata.get(stream_metadata, (), "replication-method") == "INCREMENTAL": is_incremental = True customer_id = sdk_client.client_customer_id stream_schema, _ = create_schema_for_report(stream_name, sdk_client) stream_schema = add_synthetic_keys_to_stream_schema(stream_schema) xml_attribute_list = get_fields_to_sync(stream_schema, stream_metadata) primary_keys = metadata.get(stream_metadata, (), 'tap-adwords.report-key-properties') or [] LOGGER.info("{} primary keys are {}".format(stream_name, primary_keys)) write_schema(stream_name, stream_schema, primary_keys, bookmark_properties=['day']) field_list = [] for field in xml_attribute_list: field_list.append(stream_metadata[('properties', field)]['adwords.fieldName']) check_selected_fields(stream_name, field_list, sdk_client) # If an attribution window sync is interrupted, start where it left off start_date = get_attribution_window_bookmark(customer_id, stream_name) if start_date is not None: start_date = start_date + relativedelta(days=1) if start_date is None: start_date = apply_conversion_window( get_start_for_stream(customer_id, stream_name)) if stream_name in REPORTS_WITH_90_DAY_MAX: cutoff = utils.now() + relativedelta(days=-90) if start_date < cutoff: LOGGER.warning( "report only supports up to 90 days, will start at {}".format( start_date)) start_date = cutoff start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0) LOGGER.info('Selected fields: %s', field_list) max_end_date = utils.now() - relativedelta(days=1) required_end_date = get_end_date() report_end_date = min(max_end_date, required_end_date) report_end_date = report_end_date.replace(hour=23, minute=59, second=59, microsecond=0) next_start_date = start_date is_single_day_report = stream_name in REPORTS_REQUIRING_DAILY_REPORTS start_plus_window = next_start_date if not is_single_day_report: start_plus_window += relativedelta(days=report_window_days) end_date = min(start_plus_window, report_end_date) while next_start_date <= report_end_date: singer.log_info("syncing %s for %s - %s", stream_name, next_start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")) actual_end_date = min(end_date, report_end_date) sync_report_for_day(stream_name, stream_schema, sdk_client, next_start_date, field_list, actual_end_date) next_start_date = end_date + relativedelta(days=1) start_plus_window = next_start_date if not is_single_day_report: start_plus_window += relativedelta(days=report_window_days) end_date = start_plus_window bookmarks.write_bookmark(STATE, state_key_name(customer_id, stream_name), 'last_attribution_window_date', actual_end_date.strftime(utils.DATETIME_FMT)) singer.write_state(STATE) if not is_incremental: bookmarks.clear_bookmark(STATE, state_key_name(customer_id, stream_name), 'last_attribution_window_date') singer.write_state(STATE) LOGGER.info("Done syncing the %s report for customer_id %s", stream_name, customer_id)