def log_get(job_id=None, timezone='America/Los_Angeles'): try: project.initialize(_service=UI_SERVICE) if job_id: query = JOB_STATUS_QUERY % job_id row = query_to_rows('service', UI_PROJECT, UI_LOG_DATASET, query, row_max=1, legacy=False).next() return { 'id':row[0], 'status':row[1], 'time_stamp':time_local(row[2], timezone), 'time_ago':time_ago(row[3]), } if row else {} else: query = JOBS_STATUS_QUERY rows = query_to_rows('service', UI_PROJECT, UI_LOG_DATASET, query, legacy=False) return dict([( row[0], { 'id':row[0], 'status':row[1], 'time_stamp':time_local(row[2], timezone), 'time_ago':time_ago(row[3]), } ) for row in rows]) except Exception, e: print str(e) return {}
def bigquery(): # check schema if given ( check independent of values ) if 'schema' in project.task['bigquery']: schema = table_to_schema( project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table'] ) object_compare(schema['fields'], project.task['bigquery']['schema']) # if query given check it if 'query' in project.task['bigquery']: rows = query_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['query'] ) object_compare(sorted(rows), sorted(project.task['bigquery']['values'])) # simple table check ( unless query given ) elif 'values' in project.task['bigquery']: rows = table_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table'] ) object_compare(sorted(rows), sorted(project.task['bigquery']['values']))
def dcm_log(config, task): if config.verbose: print('DCM LOG') accounts = list(get_rows(config, 'user', task['accounts'])) # determine start log date if table_exists(config, task['out']['auth'], task['out']['project'], task['out']['dataset'], CHANGELOGS_TABLE): start = next( query_to_rows( config, task['out']['auth'], task['out']['project'], task['out']['dataset'], 'SELECT FORMAT_TIMESTAMP("%%Y-%%m-%%dT%%H:%%M:%%S-00:00", MAX(changeTime), "UTC") FROM `%s`' % CHANGELOGS_TABLE, 1, False))[0] disposition = 'WRITE_APPEND' else: start = (datetime.utcnow() - timedelta(days=int(task['days'])) ).strftime('%Y-%m-%dT%H:%M:%S-00:00') disposition = 'WRITE_TRUNCATE' # load new logs rows = get_changelogs(config, task, accounts, start) if rows: rows_to_table(config, task['out']['auth'], task['out']['project'], task['out']['dataset'], CHANGELOGS_TABLE, rows, CHANGELOGS_SCHEMA, 0, disposition)
def anonymize_query(): if project.verbose: print('ANONYMIZE QUERY', project.task['bigquery']['from']['query']) schema = query_to_schema( project.task['auth'], project.task['bigquery']['from']['project'], project.task['bigquery']['from']['dataset'], project.task['bigquery']['from']['query'], legacy=project.task['bigquery']['from'].get('legacy', False), ) rows = query_to_rows(project.task['auth'], project.task['bigquery']['from']['project'], project.task['bigquery']['from']['dataset'], project.task['bigquery']['from']['query'], legacy=project.task['bigquery']['from'].get( 'legacy', False), as_object=True) rows = anonymize_rows(rows, project.task['bigquery']['to'].get('columns', [])) json_to_table(project.task['auth'], project.task['bigquery']['to']['project'], project.task['bigquery']['to']['dataset'], project.task['bigquery']['to']['table'], rows, schema, disposition='WRITE_TRUNCATE')
def _process_filters(partners, advertisers, filters, project_id, dataset_id, auth='user'): structures = [] for p in (partners or []): structures.append({'type': 'FILTER_PARTNER', 'value': int(p)}) for a in (advertisers or []): structures.append({'type': 'FILTER_ADVERTISER', 'value': int(a)}) for f in (filters or []): if isinstance(f['value'], basestring) and f['value'].startswith('SELECT '): items = query_to_rows(auth, project_id, dataset_id, f['value']) filtered = False for item in items: if item and len(item) != 0: filtered = True structures.append({'type': f['type'], 'value': item[0]}) else: break if not filtered: raise Exception('Select filter did not return any values: %s' % f['value']) else: structures.append({'type': f['type'], 'value': f['value']}) return structures
def get_solutions(): if project.verbose: print 'GETTING SCORES' for solution in project.task['solutions']: scores = [] if 'sheet' in solution: scores = sheets_read(project.task['auth'], solution['sheet']['url'], solution['sheet']['tab'], solution['sheet']['range']) elif 'bigquery' in solution: scores = query_to_rows(project.task['auth'], project.id, solution['bigquery']['dataset'], solution['bigquery']['query']) # for easy lookup use dictionary solution['scores'] = {} for score in scores: solution['scores'].setdefault(str(score[0]), []) solution['scores'][str(score[0])].append({ 'variant_id': str(score[1]), 'variant': score[2], 'score': float(score[3]) }) if project.verbose: print 'GOT SCORES:', len(solution['scores']) return project.task['solutions']
def create_cm_site_segmentation(config, task): # Read sheet to bq table sheet_rows = sheets_read(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A:C', retries=10) if not sheet_rows: sheet_rows = [] schema = [ { "type": "STRING", "name": "Site_Dcm", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" }, { "type": "STRING", "name": "Site_Type", "mode": "NULLABLE" } ] rows_to_table( config, auth=task['auth_bq'], project_id=config.project, dataset_id=task['dataset'], table_id=CM_SITE_SEGMENTATION_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE' ) # Get Site_Type from the sheet run_query_from_file(config, task, Queries.cm_site_segmentation.replace('{{dataset}}', task['dataset']), CM_SITE_SEGMENTATION_TABLE) # Move Table back to sheets query = 'SELECT * from `' + config.project + '.' + task['dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`' rows = query_to_rows(config, task['auth_bq'], config.project, task['dataset'], query, legacy=False) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A2:C') sheets_write(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A2:C', rows)
def conversions_download(): if project.verbose: print('CONVERSION DOWNLOAD') # pull from bigquery if specified if 'bigquery' in project.task: if project.verbose: print('READING BIGQUERY') rows = query_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], 'SELECT * FROM %s' % project.task['bigquery']['table'], legacy=project.task['bigquery'].get('legacy', True)) for row in rows: yield row # pull from sheets if specified if 'sheets' in project.task: if project.verbose: print('READING SHEET') rows = sheets_read(project.task['auth'], project.task['sheets']['url'], project.task['sheets']['tab'], project.task['sheets']['range']) for row in rows: yield row # pull from csv if specified if 'csv' in project.task: if project.verbose: print('READING CSV FILE') with io.open(project.task['csv']['file']) as f: for row in csv_to_rows(f): if row[0] not in CSV_HEADERS: yield row
def get_impacts(): if project.verbose: print('GETTING IMPACTS') impacts = [] if 'sheet' in project.task['impacts']: impacts = sheets_read( project.task['auth'], project.task['impacts']['sheet']['url'], project.task['impacts']['sheet']['tab'], project.task['impacts']['sheet']['range'] ) elif 'bigquery' in project.task['impacts']: impacts = query_to_rows( project.task['auth'], project.id, project.task['impacts']['bigquery']['dataset'], project.task['impacts']['bigquery']['query'] ) # for easy lookup use dictionary impacts = dict([(str(i[0]), float(i[1])) for i in impacts]) if project.verbose: print('GOT IMPACTS:', len(impacts)) return impacts
def create_cm_site_segmentation(project): # Read sheet to bq table sheet_rows = sheets_read('user', project.task['sheet'], 'CM_Site_Segments', 'A:C', retries=10) if not sheet_rows: sheet_rows = [] schema = [{ "type": "STRING", "name": "Site_Dcm", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" }, { "type": "STRING", "name": "Site_Type", "mode": "NULLABLE" }] rows_to_table(auth='service', project_id=project.id, dataset_id=project.task['dataset'], table_id=CM_SITE_SEGMENTATION_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE') # Get Site_Type from the sheet run_query_from_file( os.path.join(os.path.dirname(__file__), SQL_DIRECTORY + CM_SITE_SEGMENTATION_FILENAME), project.id, project.task['dataset'], CM_SITE_SEGMENTATION_TABLE) # Move Table back to sheets query = 'SELECT * from `' + project.id + '.' + project.task[ 'dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`' rows = query_to_rows('service', project.id, project.task['dataset'], query, legacy=False) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C') sheets_write('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C', rows)
def create_dv360_segments(config, task): a1_notation = 'A:N' schema = [ { "type": "STRING", "name": "Advertiser", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Advertiser_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Campaign", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Campaign_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Insertion_Order", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Insertion_Order_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Line_Item", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Line_Item_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Line_Item_Type", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" }, { "type": "STRING", "name": "SegmentAutoGen", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment1", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment2", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment3", "mode": "NULLABLE" } ] sheet_rows = sheets_read(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation, retries=10) if not sheet_rows: sheet_rows = [] print('DV360 SEGMENT SHEET TABLE WRITE') rows_to_table( config, auth=task['auth_bq'], project_id=config.project, dataset_id=task['dataset'], table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE' ) # Run Query if config.verbose: print('RUN DV360 Custom Segments Query') run_query_from_file(config, task, Queries.dv360_custom_segments.replace('{{dataset}}', task['dataset']), DV360_CUSTOM_SEGMENTS_TABLE) # Move Table back to sheets query = 'SELECT * from `' + config.project + '.' + task['dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`' rows = query_to_rows(config, task['auth_bq'], config.project, task['dataset'], query, legacy=False) # makes sure types are correct in sheet a1_notation = a1_notation[:1] + '2' + a1_notation[1:] rows = rows_to_type(rows) sheets_clear(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation) sheets_write(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation, rows)
def get_owners(): if project.verbose: print 'GETTING OWNERS' owners = [] if 'sheet' in project.task['owners']: owners = sheets_read(project.task['auth'], project.task['owners']['sheet']['url'], project.task['owners']['sheet']['tab'], project.task['owners']['sheet']['range']) elif 'bigquery' in project.task['owners']: owners = query_to_rows(project.task['auth'], project.id, project.task['owners']['bigquery']['dataset'], project.task['owners']['bigquery']['query']) # group account owners by email, create easy lookup sets for ids owners_grouped = {} for owner in owners: try: owners_grouped.setdefault( owner[2], { 'Account Name': owner[0], 'Account Owner': owner[1], 'Account Email': owner[2], 'DCM Network ID': [], 'DBM Partner ID': [], 'DS Account ID': [], 'Studio Account ID': [], }) if len(owner) > 3 and owner[3]: owners_grouped[owner[2]]['DCM Network ID'].append(str( owner[3])) if len(owner) > 4 and owner[4]: owners_grouped[owner[2]]['DBM Partner ID'].append(str( owner[4])) if len(owner) > 5 and owner[5]: owners_grouped[owner[2]]['DS Account ID'].append(str(owner[5])) if len(owner) > 6 and owner[6]: owners_grouped[owner[2]]['Studio Account ID'].append( str(owner[6])) except IndexError: print 'ERROR:', owner pass if project.verbose: print 'GOT OWNERS:', len(owners) return owners_grouped.values()
def bigquery(): print('TEST: bigquery') # check schema if given ( check independent of values ) if 'schema' in project.task['bigquery']: schema = table_to_schema(project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table']) deltas = schema_compare(project.task['bigquery']['schema'], schema, path='') if deltas: print( '\nFAILED *******************************************************\n' ) for delta in deltas.values(): print('%(path)s: %(error)s ( %(expected)s - %(actual)s)' % delta) print( '\n**************************************************************\n' ) test_failed() else: test_passed() # if query given check it if 'query' in project.task['bigquery']: rows = query_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], query_parameters(project.task['bigquery']['query'], project.task['bigquery'].get('parameters')), legacy=project.task['bigquery'].get('legacy', True)) object_compare(sorted(rows), sorted(project.task['bigquery']['values'])) # simple table check ( unless query given ) elif 'values' in project.task['bigquery']: rows = table_to_rows(project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table']) object_compare(sorted(rows), sorted(project.task['bigquery']['values']))
def create_cm_site_segmentation(project): # Read sheet to bq table sheet_rows = sheets_read( 'user', project.task['sheet'], 'CM_Site_Segments', 'A:C', retries=10) if not sheet_rows: sheet_rows = [] schema = [{ 'type': 'STRING', 'name': 'Site_Dcm', 'mode': 'NULLABLE' }, { 'type': 'INTEGER', 'name': 'Impressions', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Site_Type', 'mode': 'NULLABLE' }] rows_to_table( auth='service', project_id=project.id, dataset_id=project.task['dataset'], table_id=CM_SITE_SEGMENTATION_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE') # Get Site_Type from the sheet run_query_from_file(Queries.cm_site_segmentation, CM_SITE_SEGMENTATION_TABLE) # Move Table back to sheets query = 'SELECT * from `' + project.id + '.' + project.task[ 'dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`' rows = query_to_rows( 'service', project.id, project.task['dataset'], query, legacy=False) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C') sheets_write('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C', rows)
def create_dv360_segments(project): a1_notation = 'A:M' schema = [{ "type": "STRING", "name": "Advertiser", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Advertiser_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Campaign", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Campaign_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Insertion_Order", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Insertion_Order_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Line_Item", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Line_Item_Id", "mode": "NULLABLE" }, { "type": "STRING", "name": "Line_Item_Type", "mode": "NULLABLE" }, { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment1", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment2", "mode": "NULLABLE" }, { "type": "STRING", "name": "Segment3", "mode": "NULLABLE" }] sheet_rows = sheets_read('user', project.task['sheet'], 'DV3 Segments', a1_notation, retries=10) if not sheet_rows: sheet_rows = [] print('DV360 SEGMENT SHEET TABLE WRITE') rows_to_table(auth='service', project_id=project.id, dataset_id=project.task['dataset'], table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE') # Run Query path = os.path.join(os.path.dirname(__file__), SQL_DIRECTORY + DV360_CUSTOM_SEGMENTS_FILENAME) query = '' with open(path, 'r') as file: data = file.read().replace('\n', ' ') query = data.replace("{{project_id}}", project.id).replace("{{dataset}}", project.task['dataset']) print('DV360 CUSTOM SEGMENT TABLE') query_to_table('service', project.id, project.task['dataset'], DV360_CUSTOM_SEGMENTS_TABLE, query, legacy=False) # Move Table back to sheets query = 'SELECT * from `' + project.id + '.' + project.task[ 'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`' rows = query_to_rows('service', project.id, project.task['dataset'], query, legacy=False) # makes sure types are correct in sheet a1_notation = a1_notation[:1] + '2' + a1_notation[1:] rows = rows_to_type(rows) sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation) sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation, rows)
def handle(self, *args, **kwargs): impact = [ ] #{ 'day': DATE, 'deployment':INT, 'account': INT, 'product': STRING, 'recipe': STRING, 'user': STRING } missing = {} id_max = 0 project.initialize(_service=settings.UI_SERVICE, _verbose=True) if table_exists('service', 'google.com:starthinker', 'dashboard', 'ST_Scripts'): id_max = next( query_to_rows('service', 'google.com:starthinker', 'dashboard', 'SELECT MAX(Deployment) FROM ST_Scripts', legacy=False))[0] for recipe in Recipe.objects.filter( id__gt=id_max).order_by('id')[:kwargs['recipes']]: project.initialize(_user=recipe.account.get_credentials_path(), _service=settings.UI_SERVICE, _verbose=True) values = recipe.get_values() for v in values: if v['tag'] in ('dcm_to_bigquery', 'dcm_to_sheets', 'dcm_to_storage', 'dcm_run', 'conversion_upload_from_bigquery', 'conversion_upload_from_sheets'): impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': v['values'].get('account'), 'script': v['tag'], 'product': 'dcm', 'user': recipe.account.email.replace('@google.com', '') }) elif v['tag'] in ('dbm_to_bigquery', 'dbm_to_sheets', 'dbm_to_storage'): for partner in account_from_dbm_report( v['values'].get('dbm_report_id'), v['values'].get('dbm_report_name')): impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': partner, 'script': v['tag'], 'product': 'dbm', 'user': recipe.account.email.replace('@google.com', '') }) elif v['tag'] in ('dt', ): impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': account_from_dt(v['values']), 'script': v['tag'], 'product': 'dcm', 'user': recipe.account.email.replace('@google.com', '') }) elif v['tag'] == 'barnacle': for account in v['values']['accounts']: impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': account, 'script': v['tag'], 'product': 'dcm', 'user': recipe.account.email.replace('@google.com', '') }) elif v['tag'] in ('entity', ): for partner in v['values']['partners']: impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': partner, 'script': v['tag'], 'product': 'dbm', 'user': recipe.account.email.replace('@google.com', '') }) elif v['tag'] == 'itp': impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': v['values']['dcm_account'], 'script': v['tag'], 'product': 'dcm', 'user': recipe.account.email.replace('@google.com', '') }) impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': v['values']['dbm_partner'], 'script': v['tag'], 'product': 'dbm', 'user': recipe.account.email.replace('@google.com', '') }) elif v['tag'] == 'itp_audit': impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': v['values']['cm_account_id'], 'script': v['tag'], 'product': 'dcm', 'user': recipe.account.email.replace('@google.com', '') }) for partner in account_from_dbm_report( None, v['values'].get('dv360_report_name')): impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': partner, 'script': v['tag'], 'product': 'dbm', 'user': recipe.account.email.replace('@google.com', '') }) else: impact.append({ 'day': recipe.birthday, 'deployment': recipe.id, 'account': None, 'script': v['tag'], 'product': None, 'user': recipe.account.email.replace('@google.com', '') }) missing.setdefault(v['tag'], 0) missing[v['tag']] += 1 if impact: if kwargs['test']: print(impact) else: print('WRITING TO ST_Scripts') rows_to_table('service', 'google.com:starthinker', 'dashboard', 'ST_Scripts', [(i['day'], i['deployment'], i['user'], i['product'], i['script'], i['account']) for i in impact], schema=[ { 'mode': 'REQUIRED', 'name': 'Day', 'type': 'Date' }, { 'mode': 'REQUIRED', 'name': 'Deployment', 'type': 'INTEGER' }, { 'mode': 'REQUIRED', 'name': 'User', 'type': 'STRING' }, { 'mode': 'NULLABLE', 'name': 'Product', 'type': 'STRING' }, { 'mode': 'NULLABLE', 'name': 'Recipe', 'type': 'STRING' }, { 'mode': 'NULLABLE', 'name': 'Account', 'type': 'INTEGER' }, ], skip_rows=0, disposition='WRITE_TRUNCATE' if id_max == 0 else 'WRITE_APPEND', wait=True) print('MISSING', missing) print('Coverage:', (len(impact) * 100) / (len(missing) + len(impact))) else: print('No recipes newer than:', id_max)
def videos_from_bigquery(config, task): rows = query_to_rows(config, task['auth'], task['bigquery'].get('project', config.project), task['bigquery']['dataset'], 'SELECT * FROM %s;' % task['bigquery']['table']) return rows_to_videos(rows)
def create_dv360_segments(project): a1_notation = 'A:M' schema = [{ 'type': 'STRING', 'name': 'Advertiser', 'mode': 'NULLABLE' }, { 'type': 'INTEGER', 'name': 'Advertiser_Id', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Campaign', 'mode': 'NULLABLE' }, { 'type': 'INTEGER', 'name': 'Campaign_Id', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Insertion_Order', 'mode': 'NULLABLE' }, { 'type': 'INTEGER', 'name': 'Insertion_Order_Id', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Line_Item', 'mode': 'NULLABLE' }, { 'type': 'INTEGER', 'name': 'Line_Item_Id', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Line_Item_Type', 'mode': 'NULLABLE' }, { 'type': 'INTEGER', 'name': 'Impressions', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Segment1', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Segment2', 'mode': 'NULLABLE' }, { 'type': 'STRING', 'name': 'Segment3', 'mode': 'NULLABLE' }] sheet_rows = sheets_read( 'user', project.task['sheet'], 'DV3 Segments', a1_notation, retries=10) if not sheet_rows: sheet_rows = [] print('DV360 SEGMENT SHEET TABLE WRITE') rows_to_table( auth='service', project_id=project.id, dataset_id=project.task['dataset'], table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE, rows=sheet_rows, schema=schema, skip_rows=1, disposition='WRITE_TRUNCATE') # Run Query if project.verbose: print('RUN DV360 Custom Segments Query') run_query_from_file(Queries.dv360_custom_segments, DV360_CUSTOM_SEGMENTS_TABLE) # Move Table back to sheets query = 'SELECT * from `' + project.id + '.' + project.task[ 'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`' rows = query_to_rows( 'service', project.id, project.task['dataset'], query, legacy=False) # makes sure types are correct in sheet a1_notation = a1_notation[:1] + '2' + a1_notation[1:] rows = rows_to_type(rows) sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation) sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation, rows)
def bigquery(): if 'function' in project.task: query = None if project.task['function'] == 'pearson_significance_test': query = pearson_significance_test() if query: run_query(project.task['auth'], project.id, query, False, project.task['to']['dataset']) elif 'run' in project.task and 'query' in project.task.get('run', {}): if project.verbose: print('QUERY', project.task['run']['query']) run_query( project.task['auth'], project.id, query_parameters(project.task['run']['query'], project.task['run'].get('parameters')), project.task['run'].get('legacy', True), ) elif 'values' in project.task['from']: rows = get_rows(project.task['auth'], project.task['from']) rows_to_table(project.task['to'].get('auth', project.task['auth']), project.id, project.task['to']['dataset'], project.task['to']['table'], rows, project.task.get('schema', []), 0) elif 'query' in project.task['from']: if 'table' in project.task['to']: if project.verbose: print('QUERY TO TABLE', project.task['to']['table']) query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), disposition=project.task['write_disposition'] if 'write_disposition' in project.task else 'WRITE_TRUNCATE', legacy=project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql, target_project_id=project.task['to'].get( 'project_id', project.id)) elif 'sheet' in project.task['to']: if project.verbose: print('QUERY TO SHEET', project.task['to']['sheet']) rows = query_to_rows( project.task['auth'], project.id, project.task['from']['dataset'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('legacy', True)) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2')) sheets_write(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2'), rows) elif 'sftp' in project.task['to']: rows = query_to_rows( project.task['auth'], project.id, project.task['from']['dataset'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('use_legacy_sql', True)) if rows: if project.verbose: print('QUERY TO SFTP') put_rows(project.task['auth'], project.task['to'], rows) else: if project.verbose: print('QUERY TO VIEW', project.task['to']['view']) query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['view'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql project.task['to'].get('replace', False)) else: if project.verbose: print('STORAGE TO TABLE', project.task['to']['table']) storage_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], project.task['from']['bucket'] + ':' + project.task['from']['path'], project.task.get('schema', []), project.task.get('skip_rows', 1), project.task.get('structure', 'CSV'), project.task.get('disposition', 'WRITE_TRUNCATE'))
def bigquery(): if 'run' in project.task and 'query' in project.task.get('run', {}): if project.verbose: print("QUERY", project.task['run']['query']) run_query( project.task['auth'], project.id, project.task['run']['query'], project.task['run'].get('legacy', True), #project.task['run'].get('billing_project_id', None) ) elif 'values' in project.task['from']: rows = get_rows(project.task['auth'], project.task['from']) rows_to_table(project.task['to'].get('auth', project.task['auth']), project.id, project.task['to']['dataset'], project.task['to']['table'], rows, project.task.get('schema', []), 0) elif 'query' in project.task['from']: if 'table' in project.task['to']: if project.verbose: print("QUERY TO TABLE", project.task['to']['table']) if 'pre_process_query' in project.task['to']: print('executing statement') execute_statement(project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['pre_process_query'], use_legacy_sql=project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True))) query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), disposition=project.task['write_disposition'] if 'write_disposition' in project.task else 'WRITE_TRUNCATE', legacy=project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql, target_project_id=project.task['to'].get( 'project_id', project.id)) # NOT USED SO RIPPING IT OUT # Mauriciod: Yes, it is used, look at project/mauriciod/target_winrate.json elif 'storage' in project.task['to']: if project.verbose: print("QUERY TO STORAGE", project.task['to']['storage']) local_file_name = '/tmp/%s' % str(uuid.uuid1()) rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], project.task['from']['query']) f = open(local_file_name, 'wb') writer = csv.writer(f) writer.writerows(rows) f.close() f = open(local_file_name, 'rb') object_put(project.task['auth'], project.task['to']['storage'], f) f.close() os.remove(local_file_name) elif 'sheet' in project.task['to']: if project.verbose: print("QUERY TO SHEET", project.task['to']['sheet']) rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], project.task['from']['query'], legacy=project.task['from'].get( 'legacy', True)) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(project.task['auth'], project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2')) sheets_write(project.task['auth'], project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2'), rows) elif 'sftp' in project.task['to']: rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], project.task['from']['query'], legacy=project.task['from'].get( 'use_legacy_sql', True)) if rows: if project.verbose: print("QUERY TO SFTP") put_rows(project.task['auth'], project.task['to'], rows) else: if project.verbose: print("QUERY TO VIEW", project.task['to']['view']) query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['view'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), project.task['from'].get( 'legacy', project.task['from'].get( 'useLegacySql', True)), # DEPRECATED: useLegacySql project.task['to'].get('replace', False)) else: if project.verbose: print("STORAGE TO TABLE", project.task['to']['table']) storage_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], project.task['from']['bucket'] + ':' + project.task['from']['path'], project.task.get('schema', []), project.task.get('skip_rows', 1), project.task.get('structure', 'CSV'), project.task.get('disposition', 'WRITE_TRUNCATE'))
def get_rows(auth, source): """Processes standard read JSON block for dynamic loading of data. Allows us to quickly pull a column or columns of data from and use it as an input into a script. For example pull a list of ids from bigquery and act on each one. - When pulling a single column specify single_cell = True. Returns list AKA values. - When pulling a multiple columns specify single_cell = False. Returns list of lists AKA rows. - Values are always given as a list ( single_cell will trigger necessary wrapping ). - Values, bigquery, sheet are optional, if multiple given result is one continous iterator. - Extensible, add a handler to define a new source ( be kind update the documentation json ). Include the following JSON in a recipe, then in the run.py handler when encountering that block pass it to this function and use the returned results. from utils.data import get_rows var_json = { "in":{ "single_cell":[ boolean ], "values": [ integer list ], "bigquery":{ "dataset": [ string ], "query": [ string ], "legacy":[ boolean ] }, "bigquery":{ "dataset": [ string ], "table": [ string ], }, "sheet":{ "sheet":[ string - full URL, suggest using share link ], "tab":[ string ], "range":[ string - A1:A notation ] } } } values = get_rows('user', var_json) Or you can use it directly with project singleton. from util.project import project from utils.data import get_rows @project.from_parameters def something(): values = get_rows(project.task['auth'], project.task['in']) if __name__ == "__main__": something() Args: auth: (string) The type of authentication to use, user or service. source: (json) A json block resembling var_json described above. Returns: If single_cell is False: Returns a list of row values [[v1], [v2], ... ] If single_cell is True: Returns a list of values [v1, v2, ...] """ # if handler points to list, concatenate all the values from various sources into one list if isinstance(source, list): for s in source: for r in get_rows(auth, s): yield r # if handler is an endpoint, fetch data else: if 'values' in source: if isinstance(source['values'], list): for value in source['values']: yield value else: yield source['values'] if 'sheet' in source: rows = sheets_read( project.task['auth'], source['sheet']['sheet'], source['sheet']['tab'], source['sheet']['range'], ) for row in rows: yield row[0] if source.get('single_cell', False) else row if 'bigquery' in source: rows = [] if 'table' in source['bigquery']: rows = table_to_rows(source['bigquery'].get('auth', auth), project.id, source['bigquery']['dataset'], source['bigquery']['table'], as_object=source['bigquery'].get( 'as_object', False)) else: rows = query_to_rows( source['bigquery'].get('auth', auth), project.id, source['bigquery']['dataset'], query_parameters(source['bigquery']['query'], source['bigquery'].get('parameters', {})), legacy=source['bigquery'].get('legacy', False), as_object=source['bigquery'].get('as_object', False)) for row in rows: yield row[0] if source.get('single_cell', False) else row
def bigquery_query(): """Execute a query and write results to table. TODO: Replace with get_rows and put_rows combination. See: scripts/bigquery_query.json scripts/bigquery_storage.json scripts/bigquery_to_sheet.json scripts/bigquery_view.json """ if 'table' in project.task['to']: if project.verbose: print('QUERY TO TABLE', project.task['to']['table']) query_to_table( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['table'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), disposition=project.task['write_disposition'] if 'write_disposition' in project.task else 'WRITE_TRUNCATE', legacy=project.task['from'].get( 'legacy', project.task['from'].get('useLegacySql', True)), # DEPRECATED: useLegacySql, target_project_id=project.task['to'].get('project_id', project.id)) elif 'sheet' in project.task['to']: if project.verbose: print('QUERY TO SHEET', project.task['to']['sheet']) rows = query_to_rows(project.task['auth'], project.id, project.task['from']['dataset'], query_parameters( project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('legacy', True)) # makes sure types are correct in sheet rows = rows_to_type(rows) sheets_clear(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2')) sheets_write(project.task['to'].get('auth', project.task['auth']), project.task['to']['sheet'], project.task['to']['tab'], project.task['to'].get('range', 'A2'), rows) elif 'sftp' in project.task['to']: if project.verbose: print('QUERY TO SFTP') rows = query_to_rows( project.task['auth'], project.id, project.task['from']['dataset'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), legacy=project.task['from'].get('use_legacy_sql', True)) if rows: put_rows(project.task['auth'], project.task['to'], rows) else: if project.verbose: print('QUERY TO VIEW', project.task['to']['view']) query_to_view( project.task['auth'], project.id, project.task['to']['dataset'], project.task['to']['view'], query_parameters(project.task['from']['query'], project.task['from'].get('parameters')), project.task['from'].get( 'legacy', project.task['from'].get('useLegacySql', True)), # DEPRECATED: useLegacySql project.task['to'].get('replace', False))
def videos_from_bigquery(bigquery): rows = query_to_rows(project.task['auth'], bigquery.get('project', project.id), bigquery['dataset'], 'SELECT * FROM %s;' % bigquery['table']) return rows_to_videos(rows)