Exemple #1
0
def get_solutions():
  if project.verbose: print 'GETTING SCORES'

  for solution in project.task['solutions']:
    scores = []

    if 'sheet' in solution:
      scores = sheets_read(
        project.task['auth'],
        solution['sheet']['url'],
        solution['sheet']['tab'],
        solution['sheet']['range']
      )
    elif 'bigquery' in solution:
      scores = query_to_rows(
        project.task['auth'],
        project.id,
        solution['bigquery']['dataset'],
        solution['bigquery']['query']
      )

    # for easy lookup use dictionary
    solution['scores'] = {}
    for score in scores:
      solution['scores'].setdefault(str(score[0]), [])
      solution['scores'][str(score[0])].append({ 'variant_id':str(score[1]), 'variant':score[2], 'score':float(score[3]) })

    if project.verbose: print 'GOT SCORES:', len(solution['scores'])

  return project.task['solutions']
Exemple #2
0
def conversions_download():
  if project.verbose: print 'CONVERSION DOWNLOAD'

  # pull from bigquery if specified
  if 'bigquery' in project.task:
    if project.verbose: print 'READING BIGQUERY'
    rows = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      'SELECT * FROM %s' % project.task['bigquery']['table'],
      legacy=project.task['bigquery'].get('legacy', True)
    )
    for row in rows: yield row

  # pull from sheets if specified
  if 'sheets' in project.task:
    if project.verbose: print 'READING SHEET'
    rows = sheets_read(
      project.task['auth'], 
      project.task['sheets']['url'], 
      project.task['sheets']['tab'], 
      project.task['sheets']['range']
    )
    for row in rows: yield row

  # pull from csv if specified
  if 'csv' in project.task:
    if project.verbose: print 'READING CSV FILE'
    with io.open(project.task['csv']['file']) as f:
      for row in csv_to_rows(f):
        if row[0] not in CSV_HEADERS: yield row
Exemple #3
0
def get_impacts():
  if project.verbose: print 'GETTING IMPACTS'

  impacts = []

  if 'sheet' in project.task['impacts']:
    impacts = sheets_read(
      project.task['auth'],
      project.task['impacts']['sheet']['url'],
      project.task['impacts']['sheet']['tab'],
      project.task['impacts']['sheet']['range']
    )
  elif 'bigquery' in project.task['impacts']:
    impacts = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['impacts']['bigquery']['dataset'],
      project.task['impacts']['bigquery']['query']
    )

  # for easy lookup use dictionary
  impacts = dict([(str(i[0]), float(i[1])) for i in impacts])

  if project.verbose: print 'GOT IMPACTS:', len(impacts)

  return impacts
Exemple #4
0
def sheets():

    rows = sheets_read(project.task['auth'], project.task['sheets']['url'],
                       project.task['sheets']['tab'],
                       project.task['sheets']['range'])

    rows = rows_to_type(rows)
    object_compare(list(rows), project.task['sheets']['values'])
Exemple #5
0
def mapping():
    if project.verbose: print 'MAPPING'

    # create the sheet from template if it does not exist
    sheets_tab_copy(project.task['auth'], TEMPLATE_SHEET, TEMPLATE_TAB,
                    project.task['sheet'], project.task['tab'])

    # move if specified
    dimensions = {}
    defaults = {}
    rows = sheets_read(project.task['auth'], project.task['sheet'],
                       project.task['tab'], 'A1:D')

    # if rows don't exist, query is still created without mapping ( allows blank maps )
    if rows:
        # sanitize mapping
        # 0 = Dimension, 1 = Tag, 2 = Column, 3 = Keyword
        for row in rows[1:]:
            if project.verbose: print 'ROW: ', row
            # sanitize row
            row = map(lambda c: RE_SQLINJECT.sub('', c.strip()), row)
            if len(row) == 2:  # default
                defaults.setdefault(row[0], row[1])
            else:  # tag
                dimensions.setdefault(row[0], {})  # dimension
                dimensions[row[0]].setdefault(row[1], {})
                dimensions[row[0]].setdefault(row[1], {})  # tag
                dimensions[row[0]][row[1]].setdefault(row[2], [])  # column
                dimensions[row[0]][row[1]][row[2]].extend(
                    [k.strip() for k in row[3].split(',') if k])  # keywords

    # construct query
    query = 'SELECT\n  *,\n'
    for dimension, tags in dimensions.items():
        query += '  CASE\n'
        for tag, columns in tags.items():
            query += '    WHEN '
            for column, keywords in columns.items():
                for count, keyword in enumerate(keywords):
                    if count != 0: query += 'OR '
                    query += '%s CONTAINS "%s" ' % (column, keyword)
            query += 'THEN "%s"\n' % tag
        query += '    ELSE "%s"\n  END AS %s,\n' % (defaults.get(
            dimension, ''), dimension)
    query += 'FROM [%s.%s]' % (project.task['in']['dataset'],
                               project.task['in']['table'])

    if project.verbose: print 'QUERY: ', query

    # write to view
    query_to_view(project.task['out']['auth'],
                  project.id,
                  project.task['out']['dataset'],
                  project.task['out']['view'],
                  query,
                  replace=True)
Exemple #6
0
    def load(self):
        """Loads configs from Bulkdozer feed and applies values to object properties.

    """
        if self.trix_id:
            data = sheets_read(self.auth,
                               self.trix_id,
                               'Store',
                               'B3',
                               retries=0)
            if data:
                self.mode = data[0][0]
Exemple #7
0
  def load_id_map(self):
    """Loads the ID map from the Bulkdozer feed into the object.

    """
    if self.trix_id:
      data = sheets_read(self.auth, self.trix_id, 'Store', 'A1:Z1')
      content = ''
      if data and data[0]:
        for cell in data[0]:
          content += cell

        self._id_map = json.loads(content)
      else:
        self._id_map = {}
Exemple #8
0
def get_owners():
  if project.verbose: print 'GETTING OWNERS'

  owners = []

  if 'sheet' in project.task['owners']:
    owners = sheets_read(
      project.task['auth'],
      project.task['owners']['sheet']['url'],
      project.task['owners']['sheet']['tab'],
      project.task['owners']['sheet']['range']
    )
  elif 'bigquery' in project.task['owners']:
    owners = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['owners']['bigquery']['dataset'],
      project.task['owners']['bigquery']['query']
    )

  # group account owners by email, create easy lookup sets for ids
  owners_grouped = {}
  for owner in owners:
    try:

      owners_grouped.setdefault(owner[2], { 
        'Account Name':owner[0],
        'Account Owner':owner[1], 
        'Account Email':owner[2],
        'DCM Network ID':[],
        'DBM Partner ID':[],
        'DS Account ID':[],
        'Studio Account ID':[],
      })

      if len(owner) > 3 and owner[3]: owners_grouped[owner[2]]['DCM Network ID'].append(str(owner[3]))
      if len(owner) > 4 and owner[4]: owners_grouped[owner[2]]['DBM Partner ID'].append(str(owner[4]))
      if len(owner) > 5 and owner[5]: owners_grouped[owner[2]]['DS Account ID'].append(str(owner[5]))
      if len(owner) > 6 and owner[6]: owners_grouped[owner[2]]['Studio Account ID'].append(str(owner[6]))
      
    except IndexError: 
      print 'ERROR:', owner
      pass

  if project.verbose: print 'GOT OWNERS:', len(owners)

  return owners_grouped.values()
Exemple #9
0
    def _get_feed(self):
        """Fetches the feed based on initialization parameters.

    Returns:
      List of lists that represents the rows and columns of the feed. If the
      feed isn't found returns a list with an empty list.
    """

        if self.feed_name in self._feed_name_tab_map:
            for tab_name in self._feed_name_tab_map[self.feed_name]:
                for sheet in self.spreadsheet['sheets']:
                    if sheet['properties']['title'] == tab_name:
                        self.tab_name = tab_name
                        return sheets_read(self.auth, self.trix_id, tab_name,
                                           self.trix_range)

        return [[]]
Exemple #10
0
def floodlight_monitor():
  if project.verbose: print "FLOODLIGHT MONITOR"

  # make sure tab exists in sheet
  sheets_tab_copy(
    project.task['auth'],
    project.task['sheet']['template']['url'],
    project.task['sheet']['template']['tab'],
    project.task['sheet']['url'],
    project.task['sheet']['tab'])

  # read peers from sheet
  triggers = sheets_read(
    project.task['auth'],
    project.task['sheet']['url'],
    project.task['sheet']['tab'],
    project.task['sheet']['range']
  )
  # 0 - Floodlight Id
  # 1 - email

  if project.verbose and len(triggers) == 0: print "FLOODLIGHT MONITOR: No floodlight ids specified in sheet."

  alerts = {}
  day = None

  for trigger in triggers:

    # get report data for each floodlight
    report = floodlight_report(trigger[0])
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_header_trim(rows)
    rows = rows_to_type(rows, column=6)
 
    # calculate outliers
    last_day, rows = floodlight_analysis(rows)

    # find last day report ran
    day = last_day if day is None else max(day, last_day)

    # group alerts by email
    alerts.setdefault(trigger[1], [])
    alerts[trigger[1]].extend(rows)

  floodlight_email(day, alerts)
Exemple #11
0
def dynamic_costs():

    # make sure tab exists in sheet
    sheets_tab_copy(project.task['auth'],
                    project.task['sheet']['template']['url'],
                    project.task['sheet']['template']['tab'],
                    project.task['sheet']['url'], project.task['sheet']['tab'])

    # read configuration from sheet
    inputs = sheets_read(project.task['auth'], project.task['sheet']['url'],
                         project.task['sheet']['tab'],
                         project.task['sheet']['range'])

    # convert inputs into dictionary
    def expand_list(lst):
        if len(lst) == 1: return (lst[0], "")
        elif len(lst) == 2: return lst

    inputs = [expand_list(row) for row in inputs]
    inputs = dict(inputs)

    if project.verbose: print "DYNAMIC COSTS PARAMETERS", inputs

    if not inputs['Main Advertiser ID']:
        print "Configuration sheet not filled out."
        return

    # allows each advertiser to run multiple reports ( somewhat collision avoidance )
    unique_name = inputs['Dynamic Profile ID']

    # check if using wrapped tags
    shadow = inputs['Shadow Advertiser ID'] and inputs['Shadow Campaign ID']

    # parse date range
    if inputs['Relative Date Range'] == 'CUSTOM':
        date_range = {
            "kind": "dfareporting#dateRange",
            "startDate": str(inputs['Start Date']),
            "endDate": str(inputs['End Date']),
        }
    else:
        date_range = {
            "kind": "dfareporting#dateRange",
            "relativeDateRange": str(inputs['Relative Date Range'])
        }

    combos_table = report_combos(unique_name, date_range,
                                 inputs['Main Advertiser ID'],
                                 inputs['Main Campaign ID'],
                                 inputs['Dynamic Profile ID'])

    main_table = report_main(unique_name, date_range,
                             inputs['Main Advertiser ID'],
                             inputs['Main Campaign ID'], shadow)

    if shadow:
        shadow_table = report_shadow(unique_name, date_range,
                                     inputs['Shadow Advertiser ID'],
                                     inputs['Shadow Campaign ID'])
    else:
        shadow_table = None

    view_combine(unique_name, combos_table, main_table, shadow_table)
Exemple #12
0
def sheets():
    if project.verbose: print 'SHEETS'

    # clear if specified
    if project.task.get('clear', False):
        sheets_clear(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task['range'])

    # delete if specified ( after clear to prevent errors in case both are given )
    if project.task.get('delete', False):
        sheets_tab_delete(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # create or copy if specified
    if 'template' in project.task:
        sheets_tab_copy(project.task['auth'],
                        project.task['template']['sheet'],
                        project.task['template']['tab'], project.task['sheet'],
                        project.task['tab'])
    else:
        sheets_tab_create(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # write data if specified
    if 'write' in project.task:
        rows = get_rows(project.task['auth'], project.task['write'])
        sheets_write(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task['range'], rows)

    # move if specified
    if 'out' in project.task:
        rows = sheets_read(project.task['auth'], project.task['sheet'],
                           project.task['tab'], project.task['range'])

        if rows:
            schema = None

            # RECOMMENDED: define schema in json
            if project.task['out']['bigquery'].get('schema'):
                if project.verbose: print 'SHEETS SCHEMA DEFINED'
                schema = project.task['out']['bigquery']['schema']
            # NOT RECOMMENDED: determine schema if missing
            else:
                if project.verbose:
                    print 'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )'
                # cast rows to types ( for schema detection )
                rows = rows_to_type(rows)
                rows, schema = get_schema(rows,
                                          project.task.get('header', False),
                                          infer_type=project.task.get(
                                              'infer_type', True))

            # write to table ( not using put because no use cases for other destinations )
            rows_to_table(
                auth=project.task['auth'],
                project_id=project.id,
                dataset_id=project.task['out']['bigquery']['dataset'],
                table_id=project.task['out']['bigquery']['table'],
                rows=rows,
                schema=schema,
                skip_rows=1 if project.task.get('header', False) else 0,
                disposition=project.task['out']['bigquery'].get(
                    'disposition', 'WRITE_TRUNCATE'))

        else:
            print 'SHEET EMPTY'
Exemple #13
0
def get_rows(auth, source):
  """Processes standard read JSON block for dynamic loading of data.
  
  Allows us to quickly pull a column or columns of data from and use it as an input 
  into a script. For example pull a list of ids from bigquery and act on each one.

  - When pulling a single column specify single_cell = True. Returns list AKA values. 
  - When pulling a multiple columns specify single_cell = False. Returns list of lists AKA rows.
  - Values are always given as a list ( single_cell will trigger necessary wrapping ).
  - Values, bigquery, sheet are optional, if multiple given result is one continous iterator.
  - Extensible, add a handler to define a new source ( be kind update the documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.
  
    from utils.data import get_rows
  
    var_json = {
      "in":{
        "single_cell":[ boolean ],
        "values": [ integer list ],
        "bigquery":{
          "dataset": [ string ],
          "table": [ string ],
          "columns":[ integer list ],
          "legacy":[ boolean ]
        },
        "sheet":{
          "url":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
        }
      } 
    } 
  
    values = get_rows('user', var_json)
  
  Or you can use it directly with project singleton.
  
    from util.project import project
    from utils.data import get_rows
  
    def something():
      values = get_rows(project.task['auth'], project.task['in'])
  
    if __name__ == "__main__":
      project.load('something')
      something()
  
  Args:
    auth: (string) The type of authentication to use, user or service.
    source: (json) A json block resembling var_json described above.

  Returns:
    If single_cell is False: Returns a list of row values [[v1], [v2], ... ]
    If single_cell is True: Returns a list of values [v1, v2, ...]
"""

  # if handler points to list, concatenate all the values from various sources into one list
  if isinstance(source, list):  
    for s in source:
      for r in get_rows(auth, s):
        yield r

  # if handler is an endpoint, fetch data
  else:
    if 'values' in source:
      for value in source['values']:
        yield value if source.get('single_cell', False) else [value]

    if 'sheet' in source:
      rows = sheets_read(project.task['auth'], source['sheet']['url'], source['sheet']['tab'], source['sheet']['range'])

      for row in rows:
        yield row[0] if source.get('single_cell', False) else row

    if 'bigquery' in source:
      rows = query_to_rows(
        source['bigquery'].get('auth', auth),
        project.id,
        source['bigquery']['dataset'],
        source['bigquery']['query'],
        legacy=source['bigquery'].get('legacy', False)
      )
      for row in rows:
        yield row[0] if source.get('single_cell', False) else row