Example #1
0
def sov_create_reports():
    if project.verbose: print "CLIENT:", project.task['dataset']

    # make sure tab exists in sheet
    sheets_tab_copy(project.task['auth'],
                    project.task['sheet']['template']['url'],
                    project.task['sheet']['template']['tab'],
                    project.task['sheet']['url'],
                    project.task['sheet']['template']['tab'])

    # read peers from sheet
    rows = sheets_read(project.task['auth'], project.task['sheet']['url'],
                       project.task['sheet']['template']['tab'],
                       project.task['sheet']['range'])

    # CHECK: If minimum number of peers met ( prevents creation of reports )
    if len(rows) < 5:
        raise Exception(
            'Peer sheet needs 5+ DBM entries to ensure anonymity, there are %d!'
            % len(rows))

    # create a report for the peers ( given in sheet ), make partner_id:advertiser_id
    peer_name = sov_report([('%s:%s' % (r[0], r[1])) if len(r) == 2 else r[0]
                            for r in rows], 'Peer')

    # create a report for the client ( given in JSON )
    client_name = sov_report(project.task['dbm_accounts'], 'Client')

    # names are used to fetch the report
    return client_name, peer_name
Example #2
0
def get_solutions():
    if project.verbose: print 'GETTING SCORES'

    for solution in project.task['solutions']:
        scores = []

        if 'sheet' in solution:
            scores = sheets_read(project.task['auth'],
                                 solution['sheet']['url'],
                                 solution['sheet']['tab'],
                                 solution['sheet']['range'])
        elif 'bigquery' in solution:
            scores = query_to_rows(project.task['auth'], project.id,
                                   solution['bigquery']['dataset'],
                                   solution['bigquery']['query'])

        # for easy lookup use dictionary
        solution['scores'] = {}
        for score in scores:
            solution['scores'].setdefault(str(score[0]), [])
            solution['scores'][str(score[0])].append({
                'variant_id':
                str(score[1]),
                'variant':
                score[2],
                'score':
                float(score[3])
            })

        if project.verbose: print 'GOT SCORES:', len(solution['scores'])

    return project.task['solutions']
Example #3
0
def create_cm_site_segmentation(config, task):
  # Read sheet to bq table
  sheet_rows = sheets_read(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A:C', retries=10)
  if not sheet_rows:
    sheet_rows = []

  schema = [
    { "type": "STRING", "name": "Site_Dcm", "mode": "NULLABLE" },
    { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" },
    { "type": "STRING", "name": "Site_Type", "mode": "NULLABLE" }
  ]

  rows_to_table(
    config,
    auth=task['auth_bq'],
    project_id=config.project,
    dataset_id=task['dataset'],
    table_id=CM_SITE_SEGMENTATION_SHEET_TABLE,
    rows=sheet_rows,
    schema=schema,
    skip_rows=1,
    disposition='WRITE_TRUNCATE'
  )

  # Get Site_Type from the sheet
  run_query_from_file(config, task, Queries.cm_site_segmentation.replace('{{dataset}}', task['dataset']), CM_SITE_SEGMENTATION_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + config.project + '.' + task['dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`'
  rows = query_to_rows(config, task['auth_bq'], config.project, task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  rows = rows_to_type(rows)
  sheets_clear(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A2:C')
  sheets_write(config, task['auth_sheets'], task['sheet'], 'CM_Site_Segments', 'A2:C', rows)
Example #4
0
def conversions_download():
  if project.verbose:
    print('CONVERSION DOWNLOAD')

  # pull from bigquery if specified
  if 'bigquery' in project.task:
    if project.verbose:
      print('READING BIGQUERY')
    rows = query_to_rows(
        project.task['auth'],
        project.id,
        project.task['bigquery']['dataset'],
        'SELECT * FROM %s' % project.task['bigquery']['table'],
        legacy=project.task['bigquery'].get('legacy', True))
    for row in rows:
      yield row

  # pull from sheets if specified
  if 'sheets' in project.task:
    if project.verbose:
      print('READING SHEET')
    rows = sheets_read(project.task['auth'], project.task['sheets']['url'],
                       project.task['sheets']['tab'],
                       project.task['sheets']['range'])
    for row in rows:
      yield row

  # pull from csv if specified
  if 'csv' in project.task:
    if project.verbose:
      print('READING CSV FILE')
    with io.open(project.task['csv']['file']) as f:
      for row in csv_to_rows(f):
        if row[0] not in CSV_HEADERS:
          yield row
Example #5
0
def get_impacts():
  if project.verbose: print('GETTING IMPACTS')

  impacts = []

  if 'sheet' in project.task['impacts']:
    impacts = sheets_read(
      project.task['auth'],
      project.task['impacts']['sheet']['url'],
      project.task['impacts']['sheet']['tab'],
      project.task['impacts']['sheet']['range']
    )
  elif 'bigquery' in project.task['impacts']:
    impacts = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['impacts']['bigquery']['dataset'],
      project.task['impacts']['bigquery']['query']
    )

  # for easy lookup use dictionary
  impacts = dict([(str(i[0]), float(i[1])) for i in impacts])

  if project.verbose: print('GOT IMPACTS:', len(impacts))

  return impacts
Example #6
0
def sheets():

    rows = sheets_read(project.task['auth'], project.task['sheets']['url'],
                       project.task['sheets']['tab'],
                       project.task['sheets']['range'])

    rows = rows_to_type(rows)
    object_compare(list(rows), project.task['sheets']['values'])
Example #7
0
def sheets(config, task):
    print('TEST: sheets')

    rows = sheets_read(config, task['auth'], task['sheets']['sheet'],
                       task['sheets']['tab'], task['sheets']['range'])

    rows = rows_to_type(rows)
    object_compare(list(rows), task['sheets']['values'])
Example #8
0
def floodlight_monitor() -> None:
    """ The task handler.  See module description.

  Args:
    Everuthing is passed using project.task.

  Returns:
    Nothing.
  """

    if project.verbose:
        print('FLOODLIGHT MONITOR')

    # make sure tab exists in sheet ( deprecated, use sheet task instead )
    if 'template' in project.task['sheet']:
        sheets_tab_copy(project.task['auth'],
                        project.task['sheet']['template']['sheet'],
                        project.task['sheet']['template']['tab'],
                        project.task['sheet']['sheet'],
                        project.task['sheet']['tab'])

    # read peers from sheet
    triggers = sheets_read(project.task['auth'],
                           project.task['sheet']['sheet'],
                           project.task['sheet']['tab'],
                           project.task['sheet']['range'])

    if project.verbose and len(triggers) == 0:
        print('FLOODLIGHT MONITOR: No floodlight ids specified in sheet.')

    alerts = {}
    day = None

    # create reports first in parallel
    for trigger in triggers:
        trigger.append(floodlight_report(trigger[TRIGGER_ID]))

    # download data from all reports
    for trigger in triggers:

        # get report rows for each floodlight
        rows = floodlight_rows(trigger[TRIGGER_REPORT])

        # calculate outliers
        last_day, rows = floodlight_analysis(rows)

        if last_day:
            # find last day report ran
            day = last_day if day is None else max(day, last_day)

            # group alerts by email
            alerts.setdefault(trigger[TRIGGER_EMAIL], [])
            alerts[trigger[TRIGGER_EMAIL]].extend(rows)

    if alerts:
        floodlight_email(day, alerts)
Example #9
0
def mapping():
    if project.verbose: print 'MAPPING'

    # create the sheet from template if it does not exist
    sheets_tab_copy(project.task['auth'], TEMPLATE_SHEET, TEMPLATE_TAB,
                    project.task['sheet'], project.task['tab'])

    # move if specified
    dimensions = {}
    defaults = {}
    rows = sheets_read(project.task['auth'], project.task['sheet'],
                       project.task['tab'], 'A1:D')

    # if rows don't exist, query is still created without mapping ( allows blank maps )
    if rows:
        # sanitize mapping
        # 0 = Dimension, 1 = Tag, 2 = Column, 3 = Keyword
        for row in rows[1:]:
            if project.verbose: print 'ROW: ', row
            # sanitize row
            row = map(lambda c: RE_SQLINJECT.sub('', c.strip()), row)
            if len(row) == 2:  # default
                defaults.setdefault(row[0], row[1])
            else:  # tag
                dimensions.setdefault(row[0], {})  # dimension
                dimensions[row[0]].setdefault(row[1], {})
                dimensions[row[0]].setdefault(row[1], {})  # tag
                dimensions[row[0]][row[1]].setdefault(row[2], [])  # column
                dimensions[row[0]][row[1]][row[2]].extend(
                    [k.strip() for k in row[3].split(',') if k])  # keywords

    # construct query
    query = 'SELECT\n  *,\n'
    for dimension, tags in dimensions.items():
        query += '  CASE\n'
        for tag, columns in tags.items():
            query += '    WHEN '
            for column, keywords in columns.items():
                for count, keyword in enumerate(keywords):
                    if count != 0: query += 'OR '
                    query += '%s CONTAINS "%s" ' % (column, keyword)
            query += 'THEN "%s"\n' % tag
        query += '    ELSE "%s"\n  END AS %s,\n' % (defaults.get(
            dimension, ''), dimension)
    query += 'FROM [%s.%s]' % (project.task['in']['dataset'],
                               project.task['in']['table'])

    if project.verbose: print 'QUERY: ', query

    # write to view
    query_to_view(project.task['out']['auth'],
                  project.id,
                  project.task['out']['dataset'],
                  project.task['out']['view'],
                  query,
                  replace=True)
Example #10
0
def create_cm_site_segmentation(project):
    # Read sheet to bq table
    sheet_rows = sheets_read('user',
                             project.task['sheet'],
                             'CM_Site_Segments',
                             'A:C',
                             retries=10)
    if not sheet_rows:
        sheet_rows = []

    schema = [{
        "type": "STRING",
        "name": "Site_Dcm",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Impressions",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Site_Type",
        "mode": "NULLABLE"
    }]

    rows_to_table(auth='service',
                  project_id=project.id,
                  dataset_id=project.task['dataset'],
                  table_id=CM_SITE_SEGMENTATION_SHEET_TABLE,
                  rows=sheet_rows,
                  schema=schema,
                  skip_rows=1,
                  disposition='WRITE_TRUNCATE')

    # Get Site_Type from the sheet
    run_query_from_file(
        os.path.join(os.path.dirname(__file__),
                     SQL_DIRECTORY + CM_SITE_SEGMENTATION_FILENAME),
        project.id, project.task['dataset'], CM_SITE_SEGMENTATION_TABLE)

    # Move Table back to sheets
    query = 'SELECT * from `' + project.id + '.' + project.task[
        'dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`'
    rows = query_to_rows('service',
                         project.id,
                         project.task['dataset'],
                         query,
                         legacy=False)

    # makes sure types are correct in sheet
    rows = rows_to_type(rows)
    sheets_clear('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C')
    sheets_write('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C',
                 rows)
Example #11
0
    def load(self):
        """Loads configs from Bulkdozer feed and applies values to object properties.

    """
        if self.trix_id:
            data = sheets_read(self.auth,
                               self.trix_id,
                               'Store',
                               'B3',
                               retries=0)
            if data:
                self.mode = data[0][0]
Example #12
0
def create_dv360_segments(config, task):
  a1_notation = 'A:N'
  schema = [
      { "type": "STRING", "name": "Advertiser", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Advertiser_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Campaign", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Campaign_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Insertion_Order", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Insertion_Order_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Line_Item", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Line_Item_Id", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Line_Item_Type", "mode": "NULLABLE" },
      { "type": "INTEGER", "name": "Impressions", "mode": "NULLABLE" },
      { "type": "STRING", "name": "SegmentAutoGen", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Segment1", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Segment2", "mode": "NULLABLE" },
      { "type": "STRING", "name": "Segment3", "mode": "NULLABLE" }
    ]

  sheet_rows = sheets_read(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation, retries=10)

  if not sheet_rows:
    sheet_rows = []

  print('DV360 SEGMENT SHEET TABLE WRITE')

  rows_to_table(
    config,
    auth=task['auth_bq'],
    project_id=config.project,
    dataset_id=task['dataset'],
    table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
    rows=sheet_rows,
    schema=schema,
    skip_rows=1,
    disposition='WRITE_TRUNCATE'
    )

  # Run Query
  if config.verbose:
    print('RUN DV360 Custom Segments Query')
  run_query_from_file(config, task, Queries.dv360_custom_segments.replace('{{dataset}}', task['dataset']), DV360_CUSTOM_SEGMENTS_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + config.project + '.' + task['dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
  rows = query_to_rows(config, task['auth_bq'], config.project, task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
  rows = rows_to_type(rows)
  sheets_clear(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation)
  sheets_write(config, task['auth_sheets'], task['sheet'], 'DV3 Segments', a1_notation, rows)
Example #13
0
def floodlight_monitor():
    if project.verbose:
        print('FLOODLIGHT MONITOR')

    # make sure tab exists in sheet ( deprecated, use sheet task instead )
    if 'template' in project.task['sheet']:
        sheets_tab_copy(project.task['auth'],
                        project.task['sheet']['template']['sheet'],
                        project.task['sheet']['template']['tab'],
                        project.task['sheet']['sheet'],
                        project.task['sheet']['tab'])

    # read peers from sheet
    triggers = sheets_read(project.task['auth'],
                           project.task['sheet']['sheet'],
                           project.task['sheet']['tab'],
                           project.task['sheet']['range'])
    # 0 - Floodlight Id
    # 1 - email
    # 2 - dcm report id ( added by this script )
    # 3 - status, added by the script ( LOW, NORMAL, HIGH )

    if project.verbose and len(triggers) == 0:
        print('FLOODLIGHT MONITOR: No floodlight ids specified in sheet.')

    alerts = {}
    day = None

    # create reports first in parallel
    for trigger in triggers:
        trigger.append(floodlight_report(trigger[0]))

    # download data from all reports
    for trigger in triggers:

        # get report rows for each floodlight
        rows = floodlight_rows(trigger[2])

        # calculate outliers
        last_day, rows = floodlight_analysis(rows)

        if last_day:
            # find last day report ran
            day = last_day if day is None else max(day, last_day)

            # group alerts by email
            alerts.setdefault(trigger[1], [])
            alerts[trigger[1]].extend(rows)

    if alerts:
        floodlight_email(day, alerts)
Example #14
0
def get_owners():
    if project.verbose: print 'GETTING OWNERS'

    owners = []

    if 'sheet' in project.task['owners']:
        owners = sheets_read(project.task['auth'],
                             project.task['owners']['sheet']['url'],
                             project.task['owners']['sheet']['tab'],
                             project.task['owners']['sheet']['range'])
    elif 'bigquery' in project.task['owners']:
        owners = query_to_rows(project.task['auth'], project.id,
                               project.task['owners']['bigquery']['dataset'],
                               project.task['owners']['bigquery']['query'])

    # group account owners by email, create easy lookup sets for ids
    owners_grouped = {}
    for owner in owners:
        try:

            owners_grouped.setdefault(
                owner[2], {
                    'Account Name': owner[0],
                    'Account Owner': owner[1],
                    'Account Email': owner[2],
                    'DCM Network ID': [],
                    'DBM Partner ID': [],
                    'DS Account ID': [],
                    'Studio Account ID': [],
                })

            if len(owner) > 3 and owner[3]:
                owners_grouped[owner[2]]['DCM Network ID'].append(str(
                    owner[3]))
            if len(owner) > 4 and owner[4]:
                owners_grouped[owner[2]]['DBM Partner ID'].append(str(
                    owner[4]))
            if len(owner) > 5 and owner[5]:
                owners_grouped[owner[2]]['DS Account ID'].append(str(owner[5]))
            if len(owner) > 6 and owner[6]:
                owners_grouped[owner[2]]['Studio Account ID'].append(
                    str(owner[6]))

        except IndexError:
            print 'ERROR:', owner
            pass

    if project.verbose: print 'GOT OWNERS:', len(owners)

    return owners_grouped.values()
Example #15
0
    def load_id_map(self):
        """Loads the ID map from the Bulkdozer feed into the object.

    """
        if self.trix_id:
            data = sheets_read(self.auth, self.trix_id, 'Store', 'A1:Z1')
            content = ''
            if data and data[0]:
                for cell in data[0]:
                    content += cell

                self._id_map = json.loads(content)
            else:
                self._id_map = {}
Example #16
0
def dcm_replicate_accounts():
    if project.verbose:
        print('DCM REPLICATE ACCOUNTS')

    accounts = {}

    # read inputs
    if 'sheet' in project.task['in']:
        rows = sheets_read(
            project.task['auth'],
            project.task['in']['sheet'],
            project.task['in']['tab'],
        )
        # rotate rows so account, advertiser tuple is returned
        return pivot_column_to_row([row[1:] for row in rows])
Example #17
0
  def _get_feed(self):
    """Fetches the feed based on initialization parameters.

    Returns:
      List of lists that represents the rows and columns of the feed. If the
      feed isn't found returns a list with an empty list.
    """

    if self.feed_name in self._feed_name_tab_map:
      for tab_name in self._feed_name_tab_map[self.feed_name]:
        for sheet in self.spreadsheet['sheets']:
          if sheet['properties']['title'] == tab_name:
            self.tab_name = tab_name
            return sheets_read(self.auth, self.trix_id, tab_name, self.trix_range)

    return [[]]
Example #18
0
def floodlight_monitor():
    if project.verbose: print("FLOODLIGHT MONITOR")

    # make sure tab exists in sheet ( deprecated, use sheet task instead )
    if 'template' in project.task['sheet']:
        sheets_tab_copy(project.task['auth'],
                        project.task['sheet']['template']['sheet'],
                        project.task['sheet']['template']['tab'],
                        project.task['sheet']['sheet'],
                        project.task['sheet']['tab'])

    # read peers from sheet
    triggers = sheets_read(project.task['auth'],
                           project.task['sheet']['sheet'],
                           project.task['sheet']['tab'],
                           project.task['sheet']['range'])
    # 0 - Floodlight Id
    # 1 - email

    if project.verbose and len(triggers) == 0:
        print("FLOODLIGHT MONITOR: No floodlight ids specified in sheet.")

    alerts = {}
    day = None

    for trigger in triggers:

        # get report data for each floodlight
        report = floodlight_report(trigger[0])
        rows = report_to_rows(report)
        rows = report_clean(rows)
        rows = rows_header_trim(rows)
        rows = rows_to_type(rows, column=6)

        # calculate outliers
        last_day, rows = floodlight_analysis(rows)

        if last_day:
            # find last day report ran
            day = last_day if day is None else max(day, last_day)

            # group alerts by email
            alerts.setdefault(trigger[1], [])
            alerts[trigger[1]].extend(rows)

    if alerts:
        floodlight_email(day, alerts)
Example #19
0
def create_cm_site_segmentation(project):
  # Read sheet to bq table
  sheet_rows = sheets_read(
      'user', project.task['sheet'], 'CM_Site_Segments', 'A:C', retries=10)
  if not sheet_rows:
    sheet_rows = []

  schema = [{
      'type': 'STRING',
      'name': 'Site_Dcm',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Impressions',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Site_Type',
      'mode': 'NULLABLE'
  }]

  rows_to_table(
      auth='service',
      project_id=project.id,
      dataset_id=project.task['dataset'],
      table_id=CM_SITE_SEGMENTATION_SHEET_TABLE,
      rows=sheet_rows,
      schema=schema,
      skip_rows=1,
      disposition='WRITE_TRUNCATE')

  # Get Site_Type from the sheet
  run_query_from_file(Queries.cm_site_segmentation, CM_SITE_SEGMENTATION_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + project.id + '.' + project.task[
      'dataset'] + '.' + CM_SITE_SEGMENTATION_TABLE + '`'
  rows = query_to_rows(
      'service', project.id, project.task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  rows = rows_to_type(rows)
  sheets_clear('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C')
  sheets_write('user', project.task['sheet'], 'CM_Site_Segments', 'A2:C', rows)
Example #20
0
def videos_from_sheets(sheets):
    rows = sheets_read(project.task['auth'], sheets['sheet'], sheets['tab'],
                       'A3:Y')
    return rows_to_videos(rows)
Example #21
0
def videos_from_sheet(sheet):
  rows = sheets_read(project.task['auth'], sheet['url'], sheet['tab'], "A3:Y")
  return rows_to_videos(rows)
Example #22
0
def dynamic_costs():

    # make sure tab exists in sheet
    sheets_tab_copy(project.task['auth'],
                    project.task['sheet']['template']['url'],
                    project.task['sheet']['template']['tab'],
                    project.task['sheet']['url'], project.task['sheet']['tab'])

    # read configuration from sheet
    inputs = sheets_read(project.task['auth'], project.task['sheet']['url'],
                         project.task['sheet']['tab'],
                         project.task['sheet']['range'])

    # convert inputs into dictionary
    def expand_list(lst):
        if len(lst) == 1: return (lst[0], "")
        elif len(lst) == 2: return lst

    inputs = [expand_list(row) for row in inputs]
    inputs = dict(inputs)

    if project.verbose: print "DYNAMIC COSTS PARAMETERS", inputs

    if 'Main Advertiser ID' in inputs or not inputs['Main Advertiser ID']:
        print "Configuration sheet not filled out."
        return

    # allows each advertiser to run multiple reports ( somewhat collision avoidance )
    unique_name = inputs['Dynamic Profile ID']

    # check if using wrapped tags
    shadow = inputs['Shadow Advertiser ID'] and inputs['Shadow Campaign ID']

    # parse date range
    if inputs['Relative Date Range'] == 'CUSTOM':
        date_range = {
            "kind": "dfareporting#dateRange",
            "startDate": str(inputs['Start Date']),
            "endDate": str(inputs['End Date']),
        }
    else:
        date_range = {
            "kind": "dfareporting#dateRange",
            "relativeDateRange": str(inputs['Relative Date Range'])
        }

    combos_table = report_combos(unique_name, date_range,
                                 inputs['Main Advertiser ID'],
                                 inputs['Main Campaign ID'],
                                 inputs['Dynamic Profile ID'])

    main_table = report_main(unique_name, date_range,
                             inputs['Main Advertiser ID'],
                             inputs['Main Campaign ID'], shadow)

    if shadow:
        shadow_table = report_shadow(unique_name, date_range,
                                     inputs['Shadow Advertiser ID'],
                                     inputs['Shadow Campaign ID'])
    else:
        shadow_table = None

    view_combine(unique_name, combos_table, main_table, shadow_table)
Example #23
0
def create_dv360_segments(project):
  a1_notation = 'A:M'
  schema = [{
      'type': 'STRING',
      'name': 'Advertiser',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Advertiser_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Campaign',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Campaign_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Insertion_Order',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Insertion_Order_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Line_Item',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Line_Item_Id',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Line_Item_Type',
      'mode': 'NULLABLE'
  }, {
      'type': 'INTEGER',
      'name': 'Impressions',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Segment1',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Segment2',
      'mode': 'NULLABLE'
  }, {
      'type': 'STRING',
      'name': 'Segment3',
      'mode': 'NULLABLE'
  }]

  sheet_rows = sheets_read(
      'user', project.task['sheet'], 'DV3 Segments', a1_notation, retries=10)

  if not sheet_rows:
    sheet_rows = []

  print('DV360 SEGMENT SHEET TABLE WRITE')

  rows_to_table(
      auth='service',
      project_id=project.id,
      dataset_id=project.task['dataset'],
      table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
      rows=sheet_rows,
      schema=schema,
      skip_rows=1,
      disposition='WRITE_TRUNCATE')

  # Run Query
  if project.verbose:
    print('RUN DV360 Custom Segments Query')
  run_query_from_file(Queries.dv360_custom_segments, DV360_CUSTOM_SEGMENTS_TABLE)

  # Move Table back to sheets
  query = 'SELECT * from `' + project.id + '.' + project.task[
      'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
  rows = query_to_rows(
      'service', project.id, project.task['dataset'], query, legacy=False)

  # makes sure types are correct in sheet
  a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
  rows = rows_to_type(rows)
  sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation)
  sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation, rows)
Example #24
0
def get_rows(auth, source):
    """Processes standard read JSON block for dynamic loading of data.

  Allows us to quickly pull a column or columns of data from and use it as an
  input
  into a script. For example pull a list of ids from bigquery and act on each
  one.

  - When pulling a single column specify single_cell = True. Returns list AKA
  values.
  - When pulling a multiple columns specify single_cell = False. Returns list of
  lists AKA rows.
  - Values are always given as a list ( single_cell will trigger necessary
  wrapping ).
  - Values, bigquery, sheet are optional, if multiple given result is one
  continous iterator.
  - Extensible, add a handler to define a new source ( be kind update the
  documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.

    from utils.data import get_rows

    var_json = {
      "in":{
        "single_cell":[ boolean ],
        "values": [ integer list ],
        "bigquery":{
          "dataset": [ string ],
          "query": [ string ],
          "legacy":[ boolean ]
        },
        "bigquery":{
          "dataset": [ string ],
          "table": [ string ],
        },
        "sheet":{
          "sheet":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
        }
      }
    }

    values = get_rows('user', var_json)

  Or you can use it directly with project singleton.

    from util.project import project
    from utils.data import get_rows

    @project.from_parameters
    def something():
      values = get_rows(project.task['auth'], project.task['in'])

    if __name__ == "__main__":
      something()

  Args:
    auth: (string) The type of authentication to use, user or service.
    source: (json) A json block resembling var_json described above.

  Returns:
    If single_cell is False: Returns a list of row values [[v1], [v2], ... ]
    If single_cell is True: Returns a list of values [v1, v2, ...]
"""

    # if handler points to list, concatenate all the values from various sources into one list
    if isinstance(source, list):
        for s in source:
            for r in get_rows(auth, s):
                yield r

    # if handler is an endpoint, fetch data
    else:
        if 'values' in source:
            if isinstance(source['values'], list):
                for value in source['values']:
                    yield value
            else:
                yield source['values']

        if 'sheet' in source:
            rows = sheets_read(
                project.task['auth'],
                source['sheet']['sheet'],
                source['sheet']['tab'],
                source['sheet']['range'],
            )

            for row in rows:
                yield row[0] if source.get('single_cell', False) else row

        if 'bigquery' in source:

            rows = []

            if 'table' in source['bigquery']:
                rows = table_to_rows(source['bigquery'].get('auth', auth),
                                     project.id,
                                     source['bigquery']['dataset'],
                                     source['bigquery']['table'],
                                     as_object=source['bigquery'].get(
                                         'as_object', False))

            else:
                rows = query_to_rows(
                    source['bigquery'].get('auth', auth),
                    project.id,
                    source['bigquery']['dataset'],
                    query_parameters(source['bigquery']['query'],
                                     source['bigquery'].get('parameters', {})),
                    legacy=source['bigquery'].get('legacy', False),
                    as_object=source['bigquery'].get('as_object', False))

            for row in rows:
                yield row[0] if source.get('single_cell', False) else row
Example #25
0
def sheets():
    if project.verbose: print 'SHEETS'

    # clear if specified
    if project.task.get('clear', False):
        sheets_clear(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task['range'])

    # delete if specified ( after clear to prevent errors in case both are given )
    if project.task.get('delete', False):
        sheets_tab_delete(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # create or copy if specified
    if 'template' in project.task:
        sheets_tab_copy(project.task['auth'],
                        project.task['template']['sheet'],
                        project.task['template']['tab'], project.task['sheet'],
                        project.task['tab'])
    else:
        sheets_tab_create(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # write data if specified
    if 'write' in project.task:
        rows = get_rows(project.task['auth'], project.task['write'])
        sheets_write(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task['range'], rows)

    # move if specified
    if 'out' in project.task:
        rows = sheets_read(project.task['auth'], project.task['sheet'],
                           project.task['tab'], project.task['range'])

        if rows:
            schema = None

            # RECOMMENDED: define schema in json
            if project.task['out']['bigquery'].get('schema'):
                if project.verbose: print 'SHEETS SCHEMA DEFINED'
                schema = project.task['out']['bigquery']['schema']
            # NOT RECOMMENDED: determine schema if missing
            else:
                if project.verbose:
                    print 'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )'
                # cast rows to types ( for schema detection )
                rows = rows_to_type(rows)
                rows, schema = get_schema(rows,
                                          project.task.get('header', False),
                                          infer_type=project.task.get(
                                              'infer_type', True))

            # write to table ( not using put because no use cases for other destinations )
            rows_to_table(
                auth=project.task['out'].get('auth', project.task['auth']),
                project_id=project.id,
                dataset_id=project.task['out']['bigquery']['dataset'],
                table_id=project.task['out']['bigquery']['table'],
                rows=rows,
                schema=schema,
                skip_rows=1 if project.task.get('header', False) else 0,
                disposition=project.task['out']['bigquery'].get(
                    'disposition', 'WRITE_TRUNCATE'))

        else:
            print 'SHEET EMPTY'
Example #26
0
def create_dv360_segments(project):
    a1_notation = 'A:M'
    schema = [{
        "type": "STRING",
        "name": "Advertiser",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Advertiser_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Campaign",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Campaign_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Insertion_Order",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Insertion_Order_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Line_Item",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Line_Item_Id",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Line_Item_Type",
        "mode": "NULLABLE"
    }, {
        "type": "INTEGER",
        "name": "Impressions",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment1",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment2",
        "mode": "NULLABLE"
    }, {
        "type": "STRING",
        "name": "Segment3",
        "mode": "NULLABLE"
    }]

    sheet_rows = sheets_read('user',
                             project.task['sheet'],
                             'DV3 Segments',
                             a1_notation,
                             retries=10)

    if not sheet_rows:
        sheet_rows = []

    print('DV360 SEGMENT SHEET TABLE WRITE')

    rows_to_table(auth='service',
                  project_id=project.id,
                  dataset_id=project.task['dataset'],
                  table_id=DV360_CUSTOM_SEGMENTS_SHEET_TABLE,
                  rows=sheet_rows,
                  schema=schema,
                  skip_rows=1,
                  disposition='WRITE_TRUNCATE')

    # Run Query
    path = os.path.join(os.path.dirname(__file__),
                        SQL_DIRECTORY + DV360_CUSTOM_SEGMENTS_FILENAME)
    query = ''
    with open(path, 'r') as file:
        data = file.read().replace('\n', ' ')
        query = data.replace("{{project_id}}",
                             project.id).replace("{{dataset}}",
                                                 project.task['dataset'])

    print('DV360 CUSTOM SEGMENT TABLE')

    query_to_table('service',
                   project.id,
                   project.task['dataset'],
                   DV360_CUSTOM_SEGMENTS_TABLE,
                   query,
                   legacy=False)

    # Move Table back to sheets
    query = 'SELECT * from `' + project.id + '.' + project.task[
        'dataset'] + '.' + DV360_CUSTOM_SEGMENTS_TABLE + '`'
    rows = query_to_rows('service',
                         project.id,
                         project.task['dataset'],
                         query,
                         legacy=False)

    # makes sure types are correct in sheet
    a1_notation = a1_notation[:1] + '2' + a1_notation[1:]
    rows = rows_to_type(rows)
    sheets_clear('user', project.task['sheet'], 'DV3 Segments', a1_notation)
    sheets_write('user', project.task['sheet'], 'DV3 Segments', a1_notation,
                 rows)
Example #27
0
def sheets():
    if project.verbose: print('SHEETS')

    # if sheet or tab is missing, don't do anything
    if not project.task.get('sheet') or not project.task.get('tab'):
        if project.verbose: print('Missing Sheet and/or Tab, skipping task.')
        return

    # delete if specified, will delete sheet if no more tabs remain
    if project.task.get('delete', False):
        sheets_tab_delete(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # create a sheet and tab if specified, if template
    if 'template' in project.task:
        sheets_create(
            project.task['auth'],
            project.task['sheet'],
            project.task['tab'],
            project.task['template'].get('sheet'),
            project.task['template'].get('tab'),
        )

    # copy template if specified ( clear in this context means overwrite )
    #if project.task.get('template', {}).get('sheet'):
    #  sheets_tab_copy(
    #    project.task['auth'],
    #    project.task['template']['sheet'],
    #    project.task['template']['tab'],
    #    project.task['sheet'],
    #    project.task['tab'],
    #    project.task.get('clear', False)
    #  )

    # if no template at least create tab
    #else:
    #  sheets_tab_create(
    #    project.task['auth'],
    #    project.task['sheet'],
    #    project.task['tab']
    #  )

    # clear if specified
    if project.task.get('clear', False):
        sheets_clear(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task.get('range', 'A1'))

    # write data if specified
    if 'write' in project.task:
        rows = get_rows(project.task['auth'], project.task['write'])
        sheets_write(project.task['auth'],
                     project.task['sheet'],
                     project.task['tab'],
                     project.task['range'],
                     rows,
                     append=False)

    # append data if specified
    if 'append' in project.task:
        rows = get_rows(project.task['auth'], project.task['append'])
        sheets_write(project.task['auth'],
                     project.task['sheet'],
                     project.task['tab'],
                     project.task['range'],
                     rows,
                     append=True)

    # move data if specified
    # move data if specified
    if 'out' in project.task:
        rows = sheets_read(project.task['auth'],
                           project.task['sheet'], project.task['tab'],
                           project.task.get('range', 'A1'))

        if rows:
            schema = None

            # RECOMMENDED: define schema in json
            if project.task['out']['bigquery'].get('schema'):
                if project.verbose: print('SHEETS SCHEMA DEFINED')
                schema = project.task['out']['bigquery']['schema']

            # NOT RECOMMENDED: determine schema if missing
            else:
                if project.verbose:
                    print(
                        'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )'
                    )
                # cast rows to types ( for schema detection )
                rows = rows_to_type(rows)
                rows, schema = get_schema(rows,
                                          project.task.get('header', False),
                                          infer_type=project.task.get(
                                              'infer_type', True))

            # write to table ( not using put because no use cases for other destinations )
            rows_to_table(
                auth=project.task['out'].get('auth', project.task['auth']),
                project_id=project.id,
                dataset_id=project.task['out']['bigquery']['dataset'],
                table_id=project.task['out']['bigquery']['table'],
                rows=rows,
                schema=schema,
                skip_rows=1 if project.task.get('header', False) else 0,
                disposition=project.task['out']['bigquery'].get(
                    'disposition', 'WRITE_TRUNCATE'))

        else:
            print('SHEET EMPTY')
Example #28
0
def videos_from_sheets(config, task):
    rows = sheets_read(config, task['auth'], task['sheets']['sheet'],
                       task['sheets']['tab'], 'A3:Y')
    return rows_to_videos(rows)