def census_write(query, table): if project.verbose: print('%s: %s' % (table, query)) if project.task['to']['type'] == 'table': query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], table, query, legacy=False) elif project.task['to']['type'] == 'view': query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], table, query, legacy=False)
def census_write(config, task, query, table): if config.verbose: print('%s: %s' % (table, query)) if task['to']['type'] == 'table': query_to_table(config, task['auth'], config.project, task['to']['dataset'], table, query, legacy=False) elif task['to']['type'] == 'view': query_to_view(config, task['auth'], config.project, task['to']['dataset'], table, query, legacy=False)
def bigquery(): if 'function' in project.task: query = None if project.task['function'] == 'pearson_significance_test': query = pearson_significance_test() if query: run_query(project.task['auth'], project.id, query, False, project.task['to']['dataset']) elif 'run' in project.task and 'query' in project.task.get('run', {}): if project.verbose: print('QUERY', project.task['run']['query']) run_query( project.task['auth'], project.id, query_parameters(project.task['run']['query'], project.task['run'].get('parameters')), project.task['run'].get('legacy', True), ) elif 'values' in project.task['from']: rows = get_rows(project.task['auth'], project.task['from']) rows_to_table(project.task['to'].get('auth', project.task['auth']), project.id, project.task['to']['dataset'], project.task['to']['table'], rows, project.task.get('schema', []), 0) elif 'query' in project.task['from']: if 'table' in project.task['to']: if project.verbose: print('QUERY TO TABLE', project.task['to']['table']) query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), disposition=project.task['write_disposition'] if 'write_disposition' in project.task else 'WRITE_TRUNCATE', legacy=project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql, target_project_id=project.task['to'].get( 'project_id', project.id)) elif 'sheet' in project.task['to']: if project.verbose: print('QUERY TO SHEET', project.task['to']['sheet']) rows = query_to_rows( project.task['auth'], project.id, project.task['from']['dataset'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('legacy', True)) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2')) sheets_write(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2'), rows) elif 'sftp' in project.task['to']: rows = query_to_rows( project.task['auth'], project.id, project.task['from']['dataset'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('use_legacy_sql', True)) if rows: if project.verbose: print('QUERY TO SFTP') put_rows(project.task['auth'], project.task['to'], rows) else: if project.verbose: print('QUERY TO VIEW', project.task['to']['view']) query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['view'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql project.task['to'].get('replace', False)) else: if project.verbose: print('STORAGE TO TABLE', project.task['to']['table']) storage_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], project.task['from']['bucket'] + ':' + project.task['from']['path'], project.task.get('schema', []), project.task.get('skip_rows', 1), project.task.get('structure', 'CSV'), project.task.get('disposition', 'WRITE_TRUNCATE'))
def bigquery(): if 'run' in project.task and 'query' in project.task.get('run', {}): if project.verbose: print("QUERY", project.task['run']['query']) run_query( project.task['auth'], project.id, project.task['run']['query'], project.task['run'].get('legacy', True), #project.task['run'].get('billing_project_id', None) ) elif 'values' in project.task['from']: rows = get_rows(project.task['auth'], project.task['from']) rows_to_table(project.task['to'].get('auth', project.task['auth']), project.id, project.task['to']['dataset'], project.task['to']['table'], rows, project.task.get('schema', []), 0) elif 'query' in project.task['from']: if 'table' in project.task['to']: if project.verbose: print("QUERY TO TABLE", project.task['to']['table']) if 'pre_process_query' in project.task['to']: print('executing statement') execute_statement(project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['pre_process_query'], use_legacy_sql=project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True))) query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), disposition=project.task['write_disposition'] if 'write_disposition' in project.task else 'WRITE_TRUNCATE', legacy=project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql, target_project_id=project.task['to'].get( 'project_id', project.id)) # NOT USED SO RIPPING IT OUT # Mauriciod: Yes, it is used, look at project/mauriciod/target_winrate.json elif 'storage' in project.task['to']: if project.verbose: print("QUERY TO STORAGE", project.task['to']['storage']) local_file_name = '/tmp/%s' % str(uuid.uuid1()) rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], project.task['from']['query']) f = open(local_file_name, 'wb') writer = csv.writer(f) writer.writerows(rows) f.close() f = open(local_file_name, 'rb') object_put(project.task['auth'], project.task['to']['storage'], f) f.close() os.remove(local_file_name) elif 'sheet' in project.task['to']: if project.verbose: print("QUERY TO SHEET", project.task['to']['sheet']) rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], project.task['from']['query'], legacy=project.task['from'].get( 'legacy', True)) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(project.task['auth'], project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2')) sheets_write(project.task['auth'], project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2'), rows) elif 'sftp' in project.task['to']: rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], project.task['from']['query'], legacy=project.task['from'].get( 'use_legacy_sql', True)) if rows: if project.verbose: print("QUERY TO SFTP") put_rows(project.task['auth'], project.task['to'], rows) else: if project.verbose: print("QUERY TO VIEW", project.task['to']['view']) query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['view'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql project.task['to'].get('replace', False)) else: if project.verbose: print("STORAGE TO TABLE", project.task['to']['table']) storage_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], project.task['from']['bucket'] + ':' + project.task['from']['path'], project.task.get('schema', []), project.task.get('skip_rows', 1), project.task.get('structure', 'CSV'), project.task.get('disposition', 'WRITE_TRUNCATE'))
def bigquery_query(): """Execute a query and write results to table. TODO: Replace with get_rows and put_rows combination. See: scripts/bigquery_query.json scripts/bigquery_storage.json scripts/bigquery_to_sheet.json scripts/bigquery_view.json """ if 'table' in project.task['to']: if project.verbose: print('QUERY TO TABLE', project.task['to']['table']) query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), disposition=project.task['write_disposition'] if 'write_disposition' in project.task else 'WRITE_TRUNCATE', legacy=project.task['from'].get( 'legacy', project.task['from'].get('useLegacySql', True)), # DEPRECATED: useLegacySql, target_project_id=project.task['to'].get('project_id', project.id)) elif 'sheet' in project.task['to']: if project.verbose: print('QUERY TO SHEET', project.task['to']['sheet']) rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], query_parameters( project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('legacy', True)) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2')) sheets_write(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2'), rows) elif 'sftp' in project.task['to']: if project.verbose: print('QUERY TO SFTP') rows = query_to_rows( project.task['auth'], project.id, project.task['from']['dataset'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('use_legacy_sql', True)) if rows: put_rows(project.task['auth'], project.task['to'], rows) else: if project.verbose: print('QUERY TO VIEW', project.task['to']['view']) query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['view'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), project.task['from'].get( 'legacy', project.task['from'].get('useLegacySql', True)), # DEPRECATED: useLegacySql project.task['to'].get('replace', False))
def run_floodlight_reports(project): if project.verbose: print('Creating Floodlight reports') body = { "kind": "dfareporting#report", "name": '', # this is updated below based on Floodlight Config ID "format": "CSV", "type": "FLOODLIGHT", "floodlightCriteria": { "dateRange": { "kind": "dfareporting#dateRange", "relativeDateRange": "LAST_60_DAYS" }, "floodlightConfigId": { "kind": "dfareporting#dimensionValue", "dimensionName": "dfa:floodlightConfigId", "value": 0, # updated below and replaced with Floodlight Config ID "matchType": "EXACT" }, "reportProperties": { "includeUnattributedIPConversions": False, "includeUnattributedCookieConversions": True }, "dimensions": [{ "kind": "dfareporting#sortedDimension", "name": "dfa:site" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:floodlightAttributionType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:interactionType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:pathType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:browserPlatform" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:platformType" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:week" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:placementId" }, { "kind": "dfareporting#sortedDimension", "name": "dfa:floodlightConfigId" }], "metricNames": [ "dfa:activityClickThroughConversions", "dfa:activityViewThroughConversions", "dfa:totalConversions", "dfa:totalConversionsRevenue" ] }, "schedule": { "active": True, "repeats": "WEEKLY", "every": 1, "repeatsOnWeekDays": ["Sunday"] }, "delivery": { "emailOwner": False } } reports = [] floodlightConfigs = project.task.get('floodlightConfigIds', None) for configId in floodlightConfigs: body['name'] = project.task.get('reportPrefix', '') + "_" + str(configId) body['floodlightCriteria']['floodlightConfigId']['value'] = configId report = report_build('user', project.task['account'], body) reports.append(report['id']) if project.verbose: print('Finished creating Floodlight reports - moving to BQ') queries = [] for createdReportId in reports: filename, report = report_file( 'user', project.task['account'], createdReportId, None, project.task.get('timeout', 10), ) if report: if project.verbose: print('Floodlight config report ', filename) # clean up the report rows = report_to_rows(report) rows = report_clean(rows) # determine schema schema = report_schema(next(rows)) out_block = {} out_block['bigquery'] = {} out_block['bigquery']['dataset'] = project.task['dataset'] out_block['bigquery']['schema'] = schema out_block['bigquery']['skip_rows'] = 0 out_block['bigquery']['table'] = 'z_Floodlight_CM_Report_' + str( createdReportId) # write rows using standard out block in json ( allows customization across all scripts ) if rows: put_rows('service', out_block, rows) queries.append('SELECT * FROM `{0}.{1}.{2}`'.format( project.id, out_block['bigquery']['dataset'], out_block['bigquery']['table'])) if project.verbose: print('Moved reports to BQ tables - starting join') finalQuery = ' UNION ALL '.join(queries) query_to_table('service', project.id, project.task['dataset'], CM_FLOODLIGHT_OUTPUT_TABLE, finalQuery, legacy=False) if project.verbose: print('Finished with Floodlight Config reports')
def create_dv360_segments(project): a1_notation = 'A:M' schema = [{ "type": "STRING", "name": "Advertiser", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Advertiser_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Campaign", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Campaign_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Insertion_Order", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Insertion_Order_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Line_Item", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Line_Item_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Line_Item_Type", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment1", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment2", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment3", "mode": "NULLABLE" }] sheet_rows = sheets_read('user', project.task['sheet'], 'DV3 Segments', a1_notation, retries=10) if not sheet_rows: sheet_rows = [] print('DV360 SEGMENT SHEET TABLE WRITE') rows_to_table(auth='service', project_id=project.id, dataset_id=project.task['dataset'], table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE') # Run Query path = os.path.join(os.path.dirname(__file__), SQL_DIRECTORY + DV360_CUSTOM_SEGMENTS_FILENAME) query = '' with open(path, 'r') as file: data = file.read().replace('\n', ' ') query = data.replace("{{project_id}}", project.id).replace("{{dataset}}", project.task['dataset']) print('DV360 CUSTOM SEGMENT TABLE') query_to_table('service', project.id, project.task['dataset'], DV360_CUSTOM_SEGMENTS_TABLE, query, legacy=False) # Move Table back to sheets query = 'SELECT * from `' + project.id + '.' + project.task[ 'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`' rows = query_to_rows('service', project.id, project.task['dataset'], query, legacy=False) # makes sure types are correct in sheet a1_notation = a1_notation[:1] + '2' + a1_notation[1:] rows = rows_to_type(rows) sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation) sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation, rows)
def sdf(): if project.verbose: print "SDF TO TABLE", project.task['out']['bigquery']['table'] # Set is time partition and write disposition is_time_partition = project.task['out']['bigquery'].get('is_time_partition', False) disposition = 'WRITE_TRUNCATE' if is_time_partition: disposition = 'WRITE_APPEND' # Read Filter Ids filter_id_rows = list(get_rows(project.task['auth'], project.task['read']['filter_ids'])) filter_ids = [filter_id_rows[i:i + FILTER_ID_CHUNK_SIZE] for i in xrange(0, len(filter_id_rows), FILTER_ID_CHUNK_SIZE)] # Loop through requested file types for file_type in project.task['file_types']: current_filter_id_iteration = 0 i = 0 table_names = [] # Create the destination table destination_table = '%s_%s' % (project.task['out']['bigquery']['table'], file_type.lower()) create_table_if_not_exist( project.task['auth'], project.id, project.task['out']['bigquery']['dataset'], destination_table, is_time_partition) # Request 5 filter ids at a time so the API doesn't timeout for partial_filter_ids in filter_ids: rows = sdf_read(project.task['auth'], [file_type], project.task['filter_type'], partial_filter_ids, project.task.get('version', '3.1')) if rows: schema = _sdf_schema(rows.next()) table_suffix = '%s_%s' % (current_filter_id_iteration, file_type.lower()) table_name = '%s%s' % (project.task['out']['bigquery']['table'], table_suffix) filename = '%s_%s.csv' % (file_type, project.date) # Check to see if the table exists, if not create it create_table_if_not_exist( project.task['auth'], project.id, project.task['out']['bigquery']['dataset'], table_name) if 'bigquery' in project.task['out']: project.task['out']['bigquery']['schema'] = schema project.task['out']['bigquery']['skip_rows'] = 0 put_rows(project.task['auth'], project.task['out'], filename, rows, variant=table_suffix) table_names.append(table_name) current_filter_id_iteration= current_filter_id_iteration + 1 query = _construct_combine_query( file_type, table_names, project.id, project.task['out']['bigquery']['dataset'], destination_table) query_to_table(project.task['auth'], project.id, project.task['out']['bigquery']['dataset'], destination_table, query, disposition=disposition, legacy=False) # Delete all the temporary tables that were created for table_name in table_names: drop_table(project.task['auth'], project.id, project.task['out']['bigquery']['dataset'], table_name)