Exemple #1
0
def census_write(query, table):

  if project.verbose:
    print('%s: %s' % (table, query))

  if project.task['to']['type'] == 'table':
    query_to_table(
        project.task['auth'],
        project.id,
        project.task['to']['dataset'],
        table,
        query,
        legacy=False)

  elif project.task['to']['type'] == 'view':
    query_to_view(
        project.task['auth'],
        project.id,
        project.task['to']['dataset'],
        table,
        query,
        legacy=False)
Exemple #2
0
def census_write(config, task, query, table):

    if config.verbose:
        print('%s: %s' % (table, query))

    if task['to']['type'] == 'table':
        query_to_table(config,
                       task['auth'],
                       config.project,
                       task['to']['dataset'],
                       table,
                       query,
                       legacy=False)

    elif task['to']['type'] == 'view':
        query_to_view(config,
                      task['auth'],
                      config.project,
                      task['to']['dataset'],
                      table,
                      query,
                      legacy=False)
Exemple #3
0
def bigquery():

    if 'function' in project.task:
        query = None

        if project.task['function'] == 'pearson_significance_test':
            query = pearson_significance_test()

        if query:
            run_query(project.task['auth'], project.id, query, False,
                      project.task['to']['dataset'])

    elif 'run' in project.task and 'query' in project.task.get('run', {}):
        if project.verbose:
            print('QUERY', project.task['run']['query'])
        run_query(
            project.task['auth'],
            project.id,
            query_parameters(project.task['run']['query'],
                             project.task['run'].get('parameters')),
            project.task['run'].get('legacy', True),
        )

    elif 'values' in project.task['from']:
        rows = get_rows(project.task['auth'], project.task['from'])

        rows_to_table(project.task['to'].get('auth', project.task['auth']),
                      project.id, project.task['to']['dataset'],
                      project.task['to']['table'], rows,
                      project.task.get('schema', []), 0)

    elif 'query' in project.task['from']:

        if 'table' in project.task['to']:
            if project.verbose:
                print('QUERY TO TABLE', project.task['to']['table'])

            query_to_table(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['table'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                disposition=project.task['write_disposition']
                if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
                legacy=project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql,
                target_project_id=project.task['to'].get(
                    'project_id', project.id))

        elif 'sheet' in project.task['to']:
            if project.verbose:
                print('QUERY TO SHEET', project.task['to']['sheet'])
            rows = query_to_rows(
                project.task['auth'],
                project.id,
                project.task['from']['dataset'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                legacy=project.task['from'].get('legacy', True))

            # makes sure types are correct in sheet
            rows = rows_to_type(rows)

            sheets_clear(project.task['to'].get('auth', project.task['auth']),
                         project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'))
            sheets_write(project.task['to'].get('auth', project.task['auth']),
                         project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'), rows)

        elif 'sftp' in project.task['to']:
            rows = query_to_rows(
                project.task['auth'],
                project.id,
                project.task['from']['dataset'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                legacy=project.task['from'].get('use_legacy_sql', True))

            if rows:
                if project.verbose:
                    print('QUERY TO SFTP')
                put_rows(project.task['auth'], project.task['to'], rows)

        else:
            if project.verbose:
                print('QUERY TO VIEW', project.task['to']['view'])
            query_to_view(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['view'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql
                project.task['to'].get('replace', False))

    else:
        if project.verbose:
            print('STORAGE TO TABLE', project.task['to']['table'])
        storage_to_table(
            project.task['auth'], project.id, project.task['to']['dataset'],
            project.task['to']['table'], project.task['from']['bucket'] + ':' +
            project.task['from']['path'], project.task.get('schema', []),
            project.task.get('skip_rows', 1),
            project.task.get('structure', 'CSV'),
            project.task.get('disposition', 'WRITE_TRUNCATE'))
Exemple #4
0
def bigquery():

    if 'run' in project.task and 'query' in project.task.get('run', {}):
        if project.verbose: print("QUERY", project.task['run']['query'])
        run_query(
            project.task['auth'],
            project.id,
            project.task['run']['query'],
            project.task['run'].get('legacy', True),
            #project.task['run'].get('billing_project_id', None)
        )

    elif 'values' in project.task['from']:
        rows = get_rows(project.task['auth'], project.task['from'])

        rows_to_table(project.task['to'].get('auth', project.task['auth']),
                      project.id, project.task['to']['dataset'],
                      project.task['to']['table'], rows,
                      project.task.get('schema', []), 0)

    elif 'query' in project.task['from']:
        if 'table' in project.task['to']:
            if project.verbose:
                print("QUERY TO TABLE", project.task['to']['table'])

            if 'pre_process_query' in project.task['to']:
                print('executing statement')
                execute_statement(project.task['auth'],
                                  project.id,
                                  project.task['to']['dataset'],
                                  project.task['to']['pre_process_query'],
                                  use_legacy_sql=project.task['from'].get(
                                      'legacy', project.task['from'].get(
                                          'useLegacySql', True)))
            query_to_table(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['table'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                disposition=project.task['write_disposition']
                if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
                legacy=project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql,
                target_project_id=project.task['to'].get(
                    'project_id', project.id))
        # NOT USED SO RIPPING IT OUT
        # Mauriciod: Yes, it is used, look at project/mauriciod/target_winrate.json
        elif 'storage' in project.task['to']:
            if project.verbose:
                print("QUERY TO STORAGE", project.task['to']['storage'])
            local_file_name = '/tmp/%s' % str(uuid.uuid1())
            rows = query_to_rows(project.task['auth'], project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'])

            f = open(local_file_name, 'wb')
            writer = csv.writer(f)
            writer.writerows(rows)
            f.close()

            f = open(local_file_name, 'rb')
            object_put(project.task['auth'], project.task['to']['storage'], f)
            f.close()

            os.remove(local_file_name)
        elif 'sheet' in project.task['to']:
            if project.verbose:
                print("QUERY TO SHEET", project.task['to']['sheet'])
            rows = query_to_rows(project.task['auth'],
                                 project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'],
                                 legacy=project.task['from'].get(
                                     'legacy', True))

            # makes sure types are correct in sheet
            rows = rows_to_type(rows)

            sheets_clear(project.task['auth'], project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'))
            sheets_write(project.task['auth'], project.task['to']['sheet'],
                         project.task['to']['tab'],
                         project.task['to'].get('range', 'A2'), rows)
        elif 'sftp' in project.task['to']:
            rows = query_to_rows(project.task['auth'],
                                 project.id,
                                 project.task['from']['dataset'],
                                 project.task['from']['query'],
                                 legacy=project.task['from'].get(
                                     'use_legacy_sql', True))

            if rows:
                if project.verbose: print("QUERY TO SFTP")
                put_rows(project.task['auth'], project.task['to'], rows)
        else:
            if project.verbose:
                print("QUERY TO VIEW", project.task['to']['view'])
            query_to_view(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['view'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                project.task['from'].get(
                    'legacy', project.task['from'].get(
                        'useLegacySql', True)),  # DEPRECATED: useLegacySql
                project.task['to'].get('replace', False))
    else:
        if project.verbose:
            print("STORAGE TO TABLE", project.task['to']['table'])
        storage_to_table(
            project.task['auth'], project.id, project.task['to']['dataset'],
            project.task['to']['table'], project.task['from']['bucket'] + ':' +
            project.task['from']['path'], project.task.get('schema', []),
            project.task.get('skip_rows', 1),
            project.task.get('structure', 'CSV'),
            project.task.get('disposition', 'WRITE_TRUNCATE'))
Exemple #5
0
def bigquery_query():
    """Execute a query and write results to table.

  TODO: Replace with get_rows and put_rows combination.

  See: scripts/bigquery_query.json
       scripts/bigquery_storage.json
       scripts/bigquery_to_sheet.json
       scripts/bigquery_view.json
  """

    if 'table' in project.task['to']:
        if project.verbose:
            print('QUERY TO TABLE', project.task['to']['table'])

        query_to_table(
            project.task['auth'],
            project.id,
            project.task['to']['dataset'],
            project.task['to']['table'],
            query_parameters(project.task['from']['query'],
                             project.task['from'].get('parameters')),
            disposition=project.task['write_disposition']
            if 'write_disposition' in project.task else 'WRITE_TRUNCATE',
            legacy=project.task['from'].get(
                'legacy',
                project.task['from'].get('useLegacySql',
                                         True)),  # DEPRECATED: useLegacySql,
            target_project_id=project.task['to'].get('project_id', project.id))

    elif 'sheet' in project.task['to']:
        if project.verbose:
            print('QUERY TO SHEET', project.task['to']['sheet'])

        rows = query_to_rows(project.task['auth'],
                             project.id,
                             project.task['from']['dataset'],
                             query_parameters(
                                 project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                             legacy=project.task['from'].get('legacy', True))

        # makes sure types are correct in sheet
        rows = rows_to_type(rows)

        sheets_clear(project.task['to'].get('auth', project.task['auth']),
                     project.task['to']['sheet'], project.task['to']['tab'],
                     project.task['to'].get('range', 'A2'))
        sheets_write(project.task['to'].get('auth', project.task['auth']),
                     project.task['to']['sheet'], project.task['to']['tab'],
                     project.task['to'].get('range', 'A2'), rows)

    elif 'sftp' in project.task['to']:
        if project.verbose:
            print('QUERY TO SFTP')

        rows = query_to_rows(
            project.task['auth'],
            project.id,
            project.task['from']['dataset'],
            query_parameters(project.task['from']['query'],
                             project.task['from'].get('parameters')),
            legacy=project.task['from'].get('use_legacy_sql', True))

        if rows:
            put_rows(project.task['auth'], project.task['to'], rows)

    else:
        if project.verbose:
            print('QUERY TO VIEW', project.task['to']['view'])

        query_to_view(
            project.task['auth'],
            project.id,
            project.task['to']['dataset'],
            project.task['to']['view'],
            query_parameters(project.task['from']['query'],
                             project.task['from'].get('parameters')),
            project.task['from'].get(
                'legacy',
                project.task['from'].get('useLegacySql',
                                         True)),  # DEPRECATED: useLegacySql
            project.task['to'].get('replace', False))
Exemple #6
0
def run_floodlight_reports(project):
    if project.verbose:
        print('Creating Floodlight reports')

    body = {
        "kind": "dfareporting#report",
        "name": '',  # this is updated below based on Floodlight Config ID
        "format": "CSV",
        "type": "FLOODLIGHT",
        "floodlightCriteria": {
            "dateRange": {
                "kind": "dfareporting#dateRange",
                "relativeDateRange": "LAST_60_DAYS"
            },
            "floodlightConfigId": {
                "kind": "dfareporting#dimensionValue",
                "dimensionName": "dfa:floodlightConfigId",
                "value":
                0,  # updated below and replaced with Floodlight Config ID
                "matchType": "EXACT"
            },
            "reportProperties": {
                "includeUnattributedIPConversions": False,
                "includeUnattributedCookieConversions": True
            },
            "dimensions": [{
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:site"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:floodlightAttributionType"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:interactionType"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:pathType"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:browserPlatform"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:platformType"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:week"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:placementId"
            }, {
                "kind": "dfareporting#sortedDimension",
                "name": "dfa:floodlightConfigId"
            }],
            "metricNames": [
                "dfa:activityClickThroughConversions",
                "dfa:activityViewThroughConversions", "dfa:totalConversions",
                "dfa:totalConversionsRevenue"
            ]
        },
        "schedule": {
            "active": True,
            "repeats": "WEEKLY",
            "every": 1,
            "repeatsOnWeekDays": ["Sunday"]
        },
        "delivery": {
            "emailOwner": False
        }
    }

    reports = []
    floodlightConfigs = project.task.get('floodlightConfigIds', None)

    for configId in floodlightConfigs:
        body['name'] = project.task.get('reportPrefix',
                                        '') + "_" + str(configId)
        body['floodlightCriteria']['floodlightConfigId']['value'] = configId
        report = report_build('user', project.task['account'], body)
        reports.append(report['id'])

    if project.verbose:
        print('Finished creating Floodlight reports - moving to BQ')

    queries = []

    for createdReportId in reports:
        filename, report = report_file(
            'user',
            project.task['account'],
            createdReportId,
            None,
            project.task.get('timeout', 10),
        )

        if report:
            if project.verbose:
                print('Floodlight config report ', filename)

            # clean up the report
            rows = report_to_rows(report)
            rows = report_clean(rows)

            # determine schema
            schema = report_schema(next(rows))

            out_block = {}
            out_block['bigquery'] = {}

            out_block['bigquery']['dataset'] = project.task['dataset']
            out_block['bigquery']['schema'] = schema
            out_block['bigquery']['skip_rows'] = 0
            out_block['bigquery']['table'] = 'z_Floodlight_CM_Report_' + str(
                createdReportId)

            # write rows using standard out block in json ( allows customization across all scripts )
            if rows: put_rows('service', out_block, rows)
            queries.append('SELECT * FROM `{0}.{1}.{2}`'.format(
                project.id, out_block['bigquery']['dataset'],
                out_block['bigquery']['table']))

    if project.verbose:
        print('Moved reports to BQ tables - starting join')
    finalQuery = ' UNION ALL '.join(queries)

    query_to_table('service',
                   project.id,
                   project.task['dataset'],
                   CM_FLOODLIGHT_OUTPUT_TABLE,
                   finalQuery,
                   legacy=False)

    if project.verbose:
        print('Finished with Floodlight Config reports')
Exemple #7
0
def create_dv360_segments(project):
    a1_notation = 'A:M'
    schema = [{
        "type": "STRING",
        "name": "Advertiser",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Advertiser_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Campaign",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Campaign_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Insertion_Order",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Insertion_Order_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Line_Item",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Line_Item_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Line_Item_Type",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Impressions",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment1",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment2",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment3",
        "mode": "NULLABLE"
    }]

    sheet_rows = sheets_read('user',
                             project.task['sheet'],
                             'DV3 Segments',
                             a1_notation,
                             retries=10)

    if not sheet_rows:
        sheet_rows = []

    print('DV360 SEGMENT SHEET TABLE WRITE')

    rows_to_table(auth='service',
                  project_id=project.id,
                  dataset_id=project.task['dataset'],
                  table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
                  rows=sheet_rows,
                  schema=schema,
                  skip_rows=1,
                  disposition='WRITE_TRUNCATE')

    # Run Query
    path = os.path.join(os.path.dirname(__file__),
                        SQL_DIRECTORY + DV360_CUSTOM_SEGMENTS_FILENAME)
    query = ''
    with open(path, 'r') as file:
        data = file.read().replace('\n', ' ')
        query = data.replace("{{project_id}}",
                             project.id).replace("{{dataset}}",
                                                 project.task['dataset'])

    print('DV360 CUSTOM SEGMENT TABLE')

    query_to_table('service',
                   project.id,
                   project.task['dataset'],
                   DV360_CUSTOM_SEGMENTS_TABLE,
                   query,
                   legacy=False)

    # Move Table back to sheets
    query = 'SELECT * from `' + project.id + '.' + project.task[
        'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
    rows = query_to_rows('service',
                         project.id,
                         project.task['dataset'],
                         query,
                         legacy=False)

    # makes sure types are correct in sheet
    a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
    rows = rows_to_type(rows)
    sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation)
    sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation,
                 rows)
Exemple #8
0
def sdf():
  if project.verbose: print "SDF TO TABLE", project.task['out']['bigquery']['table']
  
  # Set is time partition and write disposition
  is_time_partition = project.task['out']['bigquery'].get('is_time_partition', False)
  disposition = 'WRITE_TRUNCATE'
  if is_time_partition:
    disposition = 'WRITE_APPEND'

  # Read Filter Ids
  filter_id_rows = list(get_rows(project.task['auth'], project.task['read']['filter_ids']))
  filter_ids = [filter_id_rows[i:i + FILTER_ID_CHUNK_SIZE] for i in xrange(0, len(filter_id_rows), FILTER_ID_CHUNK_SIZE)]
  # Loop through requested file types
  for file_type in project.task['file_types']:
    current_filter_id_iteration = 0
    i = 0
    table_names = []

    # Create the destination table
    destination_table = '%s_%s' % (project.task['out']['bigquery']['table'], file_type.lower())
    create_table_if_not_exist(
      project.task['auth'],
      project.id,
      project.task['out']['bigquery']['dataset'],
      destination_table,
      is_time_partition)

    # Request 5 filter ids at a time so the API doesn't timeout
    for partial_filter_ids in filter_ids:
      rows = sdf_read(project.task['auth'], [file_type], project.task['filter_type'], partial_filter_ids, project.task.get('version', '3.1'))

      if rows:
        schema = _sdf_schema(rows.next())
        table_suffix = '%s_%s' % (current_filter_id_iteration, file_type.lower())
        table_name = '%s%s' % (project.task['out']['bigquery']['table'], table_suffix)
        filename = '%s_%s.csv' % (file_type, project.date)
        # Check to see if the table exists, if not create it
        create_table_if_not_exist(
          project.task['auth'],
          project.id,
          project.task['out']['bigquery']['dataset'],
          table_name)

        if 'bigquery' in project.task['out']:
          project.task['out']['bigquery']['schema'] = schema
          project.task['out']['bigquery']['skip_rows'] = 0

        put_rows(project.task['auth'], 
          project.task['out'], 
          filename, 
          rows, 
          variant=table_suffix)

        table_names.append(table_name)

      current_filter_id_iteration= current_filter_id_iteration + 1

    query = _construct_combine_query(
      file_type,
      table_names,
      project.id,
      project.task['out']['bigquery']['dataset'],
      destination_table)

    query_to_table(project.task['auth'], 
      project.id, 
      project.task['out']['bigquery']['dataset'], 
      destination_table, 
      query, 
      disposition=disposition,
      legacy=False)

    # Delete all the temporary tables that were created
    for table_name in table_names:
      drop_table(project.task['auth'], 
        project.id, 
        project.task['out']['bigquery']['dataset'], 
        table_name)