def write_report(report, dataset, table): # turn report file into rows rows = report_to_rows(report) rows = report_clean(rows) if rows: if project.verbose: print("DYNAMIC COSTS WRITTEN:", table) # pull DCM schema automatically try: schema = report_schema(next(rows)) except StopIteration: # report is empty raise ValueError("REPORT DID NOT RUN") # write report to bigquery rows_to_table( project.task['out']["auth"], project.id, project.task['out']["dataset"], table, rows, schema, 0 ) else: if project.verbose: print("DYNAMIC COSTS REPORT NOT READY:", table)
def dcm_replicate_download(account, name): filename, report = report_file(project.task['auth'], account, None, name) if report: if project.verbose: print('DCM FILE', filename) # clean up the report rows = report_to_rows(report) rows = report_clean(rows) # if bigquery, remove header and determine schema schema = None if 'bigquery' in project.task['out']: project.task['out']['bigquery']['table'] = table_name_sanitize( name) project.task['out']['bigquery']['schema'] = report_schema( next(rows)) project.task['out']['bigquery']['skip_rows'] = 0 # write rows using standard out block in json ( allows customization across all scripts ) if rows: put_rows(project.task['auth'], project.task['out'], rows)
def dcm(): if project.verbose: print('DCM') # stores existing report json report = None # check if report is to be deleted if project.task.get('delete', False): if project.verbose: print('DCM DELETE', project.task['report'].get('name', None) or project.task['report'].get('body', {}).get('name', None) or project.task['report'].get('report_id', None)) report_delete( project.task['auth'], project.task['report']['account'], project.task['report'].get('report_id', None), project.task['report'].get('name', None) or project.task['report'].get('body', {}).get('name', None), ) # check if report is to be run if project.task.get('report_run_only', False): if project.verbose: print('DCM REPORT RUN', project.task['report'].get('name', None) or project.task['report'].get('report_id', None)) report_run( project.task['auth'], project.task['report']['account'], project.task['report'].get('report_id', None), project.task['report'].get('name', None), ) # check if report is to be created - DEPRECATED if 'type' in project.task['report']: if project.verbose: print('DCM CREATE') report = report_create( project.task['auth'], project.task['report']['account'], project.task['report']['name'], project.task['report'] ) # check if report is to be created if 'body' in project.task['report']: if project.verbose: print('DCM BUILD', project.task['report']['body']['name']) # filters can be passed using special get_rows handler, allows reading values from sheets etc... if 'filters' in project.task['report']: for f, d in project.task['report']['filters'].items(): for v in get_rows(project.task['auth'], d): # accounts are specified in a unique part of the report json if f in 'accountId': project.task['report']['body']['accountId'] = v # activities are specified in a unique part of the report json elif f in 'dfa:activity': project.task['report']['body']['reachCriteria']['activities'].setdefault('filters', []).append({ "kind":"dfareporting#dimensionValue", "dimensionName": f, "id": v }) # all other filters go in the same place else: project.task['report']['body']['criteria'].setdefault('dimensionFilters', []).append({ "kind":"dfareporting#dimensionValue", "dimensionName": f, "id": v, "matchType": "EXACT" }) report = report_build( project.task['auth'], project.task['report']['body'].get('accountId') or project.task['report']['account'], project.task['report']['body'] ) # moving a report if 'out' in project.task: filename, report = report_file( project.task['auth'], project.task['report']['account'], project.task['report'].get('report_id', None), project.task['report'].get('name', None) or project.task['report'].get('body', {}).get('name', None), project.task['report'].get('timeout', 10), ) if report: if project.verbose: print('DCM FILE', filename) # clean up the report rows = report_to_rows(report) rows = report_clean(rows) # if bigquery, remove header and determine schema schema = None if 'bigquery' in project.task['out']: schema = report_schema(next(rows)) project.task['out']['bigquery']['schema'] = schema project.task['out']['bigquery']['skip_rows'] = 0 # write rows using standard out block in json ( allows customization across all scripts ) if rows: put_rows(project.task['auth'], project.task['out'], rows)
def run_floodlight_reports(project): if project.verbose: print('Creating Floodlight reports') body = { "kind": "dfareporting#report", "name": '', # this is updated below based on Floodlight Config ID "format": "CSV", "type": "FLOODLIGHT", "floodlightCriteria": { "dateRange": { "kind": "dfareporting#dateRange", "relativeDateRange": "LAST_60_DAYS" }, "floodlightConfigId": { "kind": "dfareporting#dimensionValue", "dimensionName": "dfa:floodlightConfigId", "value": 0, # updated below and replaced with Floodlight Config ID "matchType": "EXACT" }, "reportProperties": { "includeUnattributedIPConversions": False, "includeUnattributedCookieConversions": True }, "dimensions": [{ "kind": "dfareporting#sortedDimension", "name": "dfa:site" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:floodlightAttributionType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:interactionType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:pathType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:browserPlatform" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:platformType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:week" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:placementId" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:floodlightConfigId" }], "metricNames": [ "dfa:activityClickThroughConversions", "dfa:activityViewThroughConversions", "dfa:totalConversions", "dfa:totalConversionsRevenue" ] }, "schedule": { "active": True, "repeats": "WEEKLY", "every": 1, "repeatsOnWeekDays": ["Sunday"] }, "delivery": { "emailOwner": False } } reports = [] floodlightConfigs = project.task.get('floodlightConfigIds', None) for configId in floodlightConfigs: body['name'] = project.task.get('reportPrefix', '') + "_" + str(configId) body['floodlightCriteria']['floodlightConfigId']['value'] = configId report = report_build('user', project.task['account'], body) reports.append(report['id']) if project.verbose: print('Finished creating Floodlight reports - moving to BQ') queries = [] for createdReportId in reports: filename, report = report_file( 'user', project.task['account'], createdReportId, None, project.task.get('timeout', 10), ) if report: if project.verbose: print('Floodlight config report ', filename) # clean up the report rows = report_to_rows(report) rows = report_clean(rows) # determine schema schema = report_schema(next(rows)) out_block = {} out_block['bigquery'] = {} out_block['bigquery']['dataset'] = project.task['dataset'] out_block['bigquery']['schema'] = schema out_block['bigquery']['skip_rows'] = 0 out_block['bigquery']['table'] = 'z_Floodlight_CM_Report_' + str( createdReportId) # write rows using standard out block in json ( allows customization across all scripts ) if rows: put_rows('service', out_block, rows) queries.append('SELECT * FROM `{0}.{1}.{2}`'.format( project.id, out_block['bigquery']['dataset'], out_block['bigquery']['table'])) if project.verbose: print('Moved reports to BQ tables - starting join') finalQuery = ' UNION ALL '.join(queries) query_to_table('service', project.id, project.task['dataset'], CM_FLOODLIGHT_OUTPUT_TABLE, finalQuery, legacy=False) if project.verbose: print('Finished with Floodlight Config reports')
def dcm(): if project.verbose: print('DCM') # stores existing report json report = None # check if report is to be deleted if project.task.get('delete', False): if project.verbose: print('DCM DELETE', project.task['report'].get('name', None) or project.task['report'].get('body', {}).get('name', None) or project.task['report'].get('report_id', None)) report_delete( project.task['auth'], project.task['report']['account'], project.task['report'].get('report_id', None), project.task['report'].get('name', None) or project.task['report'].get('body', {}).get('name', None), ) # check if report is to be run if project.task.get('report_run_only', False): if project.verbose: print('DCM REPORT RUN', project.task['report'].get('name', None) or project.task['report'].get('report_id', None)) report_run( project.task['auth'], project.task['report']['account'], project.task['report'].get('report_id', None), project.task['report'].get('name', None), ) # check if report is to be created if 'body' in project.task['report']: if project.verbose: print('DCM BUILD', project.task['report']['body']['name']) if 'filters' in project.task['report']: project.task['report']['body'] = report_filter( project.task['auth'], project.task['report']['body'], project.task['report']['filters'] ) report = report_build( project.task['auth'], project.task['report']['body'].get('accountId') or project.task['report']['account'], project.task['report']['body'] ) # moving a report if 'out' in project.task: filename, report = report_file( project.task['auth'], project.task['report']['account'], project.task['report'].get('report_id', None), project.task['report'].get('name', None) or project.task['report'].get('body', {}).get('name', None), project.task['report'].get('timeout', 10), ) if report: if project.verbose: print('DCM FILE', filename) # clean up the report rows = report_to_rows(report) rows = report_clean(rows) # if bigquery, remove header and determine schema schema = None if 'bigquery' in project.task['out']: schema = report_schema(next(rows)) project.task['out']['bigquery']['schema'] = schema project.task['out']['bigquery']['skip_rows'] = 0 # write rows using standard out block in json ( allows customization across all scripts ) if rows: put_rows(project.task['auth'], project.task['out'], rows)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""\ Command line to help debug CM reports and build reporting tools. Examples: To get list of reports: python helper.py --account [id] --list -u [user credentials path] To get report: python helper.py --account [id] --report [id] -u [user credentials path] To get report files: python helper.py --account [id] --files [id] -u [user credentials path] To get report sample: python helper.py --account [id] --sample [id] -u [user credentials path] To get report schema: python helper.py --account [id] --schema [id] -u [user credentials path] """)) parser.add_argument( '--account', help='Account ID to use to pull the report.', default=None) parser.add_argument( '--report', help='Report ID to pull JSON definition.', default=None) parser.add_argument( '--schema', help='Report ID to pull achema definition.', default=None) parser.add_argument( '--sample', help='Report ID to pull sample data.', default=None) parser.add_argument( '--files', help='Report ID to pull file list.', default=None) parser.add_argument('--list', help='List reports.', action='store_true') # initialize project project.from_commandline(parser=parser, arguments=('-u', '-c', '-s', '-v')) auth = 'service' if project.args.service else 'user' is_superuser, profile = get_profile_for_api(auth, project.args.account) kwargs = { 'profileId': profile, 'accountId': project.args.account } if is_superuser else { 'profileId': profile } # get report list if project.args.report: kwargs['reportId'] = project.args.report report = API_DCM( auth, internal=is_superuser).reports().get(**kwargs).execute() print(json.dumps(report, indent=2, sort_keys=True)) # get report files elif project.args.files: kwargs['reportId'] = project.args.files for rf in API_DCM( auth, internal=is_superuser).reports().files().list(**kwargs).execute(): print(json.dumps(rf, indent=2, sort_keys=True)) # get schema elif project.args.schema: filename, report = report_file(auth, project.args.account, project.args.schema, None, 10) rows = report_to_rows(report) rows = report_clean(rows) print(json.dumps(report_schema(next(rows)), indent=2, sort_keys=True)) # get sample elif project.args.sample: filename, report = report_file(auth, project.args.account, project.args.sample, None, 10) rows = report_to_rows(report) rows = report_clean(rows) rows = rows_to_type(rows) for r in rows_print(rows, row_min=0, row_max=20): pass # get list else: for report in API_DCM( auth, internal=is_superuser).reports().list(**kwargs).execute(): print(json.dumps(report, indent=2, sort_keys=True))
print json.dumps(report, indent=2, sort_keys=True) # get report files elif project.args.files: kwargs['reportId'] = project.args.files for report_file in API_DCM(auth).reports().files().list( **kwargs).execute(): print json.dumps(report_file, indent=2, sort_keys=True) # get schema elif project.args.schema: filename, report = report_file(auth, project.args.account, project.args.schema, None, 10) rows = report_to_rows(report) rows = report_clean(rows) print json.dumps(report_schema(rows.next()), indent=2, sort_keys=True) # get sample elif project.args.sample: filename, report = report_file(auth, project.args.account, project.args.sample, None, 10) rows = report_to_rows(report) rows = report_clean(rows) rows = rows_to_type(rows) for r in rows_print(rows, row_min=0, row_max=20): pass # get list else: for report in API_DCM(auth).reports().list(**kwargs).execute(): print json.dumps(report, indent=2, sort_keys=True)
def create_and_move_cm_browser_report(project): browser_report_body = { "kind": "dfareporting#report", "name": project.task['cm_browser_report_name'], "fileName": project.task['cm_browser_report_name'], "format": "CSV", "type": "STANDARD", "criteria": { "dateRange": { "kind": "dfareporting#dateRange", "relativeDateRange": "LAST_24_MONTHS" }, "dimensions": [{ "kind": "dfareporting#sortedDimension", "name": "dfa:campaign" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:campaignId" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:site" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:advertiser" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:advertiserId" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:browserPlatform" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:platformType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:month" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:week" }], "metricNames": [ "dfa:impressions", "dfa:clicks", "dfa:totalConversions", "dfa:activityViewThroughConversions", "dfa:activityClickThroughConversions" ], "dimensionFilters": [] }, "schedule": { "active": False, "repeats": "DAILY", "every": 1, "startDate": "2019-09-10", "expirationDate": "2029-12-09" }, "delivery": { "emailOwner": False } } # Remove any duplicate entries from the advertiser ids advertiser_ids = project.task['advertiser_ids'].split(',') # Update body with all the advertiser filters for advertiser in advertiser_ids: if advertiser: browser_report_body["criteria"]["dimensionFilters"].append({ "kind": "dfareporting#dimensionValue", "dimensionName": "dfa:advertiser", "id": advertiser, "matchType": "EXACT" }) # Create report report = report_build('user', project.task['account'], browser_report_body) # moving a report filename, report = report_file( 'user', project.task['account'], None, project.task['cm_browser_report_name'], project.task.get('timeout', 60), ) if report: if project.verbose: print('DCM FILE: ' + filename) # clean up the report rows = report_to_rows(report) rows = report_clean(rows) # if bigquery, remove header and determine schema schema = report_schema(rows.__next__()) bigquery_out = {} bigquery_out["bigquery"] = { "dataset": project.task["dataset"], #todo update to reac from project "table": CM_BROWSER_REPORT_DIRTY_TABLE, "is_incremental_load": False, "datastudio": True, "schema": schema, "skip_rows": 0 } # write rows using standard out block in json ( allows customization across all scripts ) if rows: put_rows(project.task['auth'], bigquery_out, rows)