def get_solutions(): if project.verbose: print 'GETTING SCORES' for solution in project.task['solutions']: scores = [] if 'sheet' in solution: scores = sheets_read( project.task['auth'], solution['sheet']['url'], solution['sheet']['tab'], solution['sheet']['range'] ) elif 'bigquery' in solution: scores = query_to_rows( project.task['auth'], project.id, solution['bigquery']['dataset'], solution['bigquery']['query'] ) # for easy lookup use dictionary solution['scores'] = {} for score in scores: solution['scores'].setdefault(str(score[0]), []) solution['scores'][str(score[0])].append({ 'variant_id':str(score[1]), 'variant':score[2], 'score':float(score[3]) }) if project.verbose: print 'GOT SCORES:', len(solution['scores']) return project.task['solutions']
def conversions_download(): if project.verbose: print 'CONVERSION DOWNLOAD' # pull from bigquery if specified if 'bigquery' in project.task: if project.verbose: print 'READING BIGQUERY' rows = query_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], 'SELECT * FROM %s' % project.task['bigquery']['table'], legacy=project.task['bigquery'].get('legacy', True) ) for row in rows: yield row # pull from sheets if specified if 'sheets' in project.task: if project.verbose: print 'READING SHEET' rows = sheets_read( project.task['auth'], project.task['sheets']['url'], project.task['sheets']['tab'], project.task['sheets']['range'] ) for row in rows: yield row # pull from csv if specified if 'csv' in project.task: if project.verbose: print 'READING CSV FILE' with io.open(project.task['csv']['file']) as f: for row in csv_to_rows(f): if row[0] not in CSV_HEADERS: yield row
def get_impacts(): if project.verbose: print 'GETTING IMPACTS' impacts = [] if 'sheet' in project.task['impacts']: impacts = sheets_read( project.task['auth'], project.task['impacts']['sheet']['url'], project.task['impacts']['sheet']['tab'], project.task['impacts']['sheet']['range'] ) elif 'bigquery' in project.task['impacts']: impacts = query_to_rows( project.task['auth'], project.id, project.task['impacts']['bigquery']['dataset'], project.task['impacts']['bigquery']['query'] ) # for easy lookup use dictionary impacts = dict([(str(i[0]), float(i[1])) for i in impacts]) if project.verbose: print 'GOT IMPACTS:', len(impacts) return impacts
def sheets(): rows = sheets_read(project.task['auth'], project.task['sheets']['url'], project.task['sheets']['tab'], project.task['sheets']['range']) rows = rows_to_type(rows) object_compare(list(rows), project.task['sheets']['values'])
def mapping(): if project.verbose: print 'MAPPING' # create the sheet from template if it does not exist sheets_tab_copy(project.task['auth'], TEMPLATE_SHEET, TEMPLATE_TAB, project.task['sheet'], project.task['tab']) # move if specified dimensions = {} defaults = {} rows = sheets_read(project.task['auth'], project.task['sheet'], project.task['tab'], 'A1:D') # if rows don't exist, query is still created without mapping ( allows blank maps ) if rows: # sanitize mapping # 0 = Dimension, 1 = Tag, 2 = Column, 3 = Keyword for row in rows[1:]: if project.verbose: print 'ROW: ', row # sanitize row row = map(lambda c: RE_SQLINJECT.sub('', c.strip()), row) if len(row) == 2: # default defaults.setdefault(row[0], row[1]) else: # tag dimensions.setdefault(row[0], {}) # dimension dimensions[row[0]].setdefault(row[1], {}) dimensions[row[0]].setdefault(row[1], {}) # tag dimensions[row[0]][row[1]].setdefault(row[2], []) # column dimensions[row[0]][row[1]][row[2]].extend( [k.strip() for k in row[3].split(',') if k]) # keywords # construct query query = 'SELECT\n *,\n' for dimension, tags in dimensions.items(): query += ' CASE\n' for tag, columns in tags.items(): query += ' WHEN ' for column, keywords in columns.items(): for count, keyword in enumerate(keywords): if count != 0: query += 'OR ' query += '%s CONTAINS "%s" ' % (column, keyword) query += 'THEN "%s"\n' % tag query += ' ELSE "%s"\n END AS %s,\n' % (defaults.get( dimension, ''), dimension) query += 'FROM [%s.%s]' % (project.task['in']['dataset'], project.task['in']['table']) if project.verbose: print 'QUERY: ', query # write to view query_to_view(project.task['out']['auth'], project.id, project.task['out']['dataset'], project.task['out']['view'], query, replace=True)
def load(self): """Loads configs from Bulkdozer feed and applies values to object properties. """ if self.trix_id: data = sheets_read(self.auth, self.trix_id, 'Store', 'B3', retries=0) if data: self.mode = data[0][0]
def load_id_map(self): """Loads the ID map from the Bulkdozer feed into the object. """ if self.trix_id: data = sheets_read(self.auth, self.trix_id, 'Store', 'A1:Z1') content = '' if data and data[0]: for cell in data[0]: content += cell self._id_map = json.loads(content) else: self._id_map = {}
def get_owners(): if project.verbose: print 'GETTING OWNERS' owners = [] if 'sheet' in project.task['owners']: owners = sheets_read( project.task['auth'], project.task['owners']['sheet']['url'], project.task['owners']['sheet']['tab'], project.task['owners']['sheet']['range'] ) elif 'bigquery' in project.task['owners']: owners = query_to_rows( project.task['auth'], project.id, project.task['owners']['bigquery']['dataset'], project.task['owners']['bigquery']['query'] ) # group account owners by email, create easy lookup sets for ids owners_grouped = {} for owner in owners: try: owners_grouped.setdefault(owner[2], { 'Account Name':owner[0], 'Account Owner':owner[1], 'Account Email':owner[2], 'DCM Network ID':[], 'DBM Partner ID':[], 'DS Account ID':[], 'Studio Account ID':[], }) if len(owner) > 3 and owner[3]: owners_grouped[owner[2]]['DCM Network ID'].append(str(owner[3])) if len(owner) > 4 and owner[4]: owners_grouped[owner[2]]['DBM Partner ID'].append(str(owner[4])) if len(owner) > 5 and owner[5]: owners_grouped[owner[2]]['DS Account ID'].append(str(owner[5])) if len(owner) > 6 and owner[6]: owners_grouped[owner[2]]['Studio Account ID'].append(str(owner[6])) except IndexError: print 'ERROR:', owner pass if project.verbose: print 'GOT OWNERS:', len(owners) return owners_grouped.values()
def _get_feed(self): """Fetches the feed based on initialization parameters. Returns: List of lists that represents the rows and columns of the feed. If the feed isn't found returns a list with an empty list. """ if self.feed_name in self._feed_name_tab_map: for tab_name in self._feed_name_tab_map[self.feed_name]: for sheet in self.spreadsheet['sheets']: if sheet['properties']['title'] == tab_name: self.tab_name = tab_name return sheets_read(self.auth, self.trix_id, tab_name, self.trix_range) return [[]]
def floodlight_monitor(): if project.verbose: print "FLOODLIGHT MONITOR" # make sure tab exists in sheet sheets_tab_copy( project.task['auth'], project.task['sheet']['template']['url'], project.task['sheet']['template']['tab'], project.task['sheet']['url'], project.task['sheet']['tab']) # read peers from sheet triggers = sheets_read( project.task['auth'], project.task['sheet']['url'], project.task['sheet']['tab'], project.task['sheet']['range'] ) # 0 - Floodlight Id # 1 - email if project.verbose and len(triggers) == 0: print "FLOODLIGHT MONITOR: No floodlight ids specified in sheet." alerts = {} day = None for trigger in triggers: # get report data for each floodlight report = floodlight_report(trigger[0]) rows = report_to_rows(report) rows = report_clean(rows) rows = rows_header_trim(rows) rows = rows_to_type(rows, column=6) # calculate outliers last_day, rows = floodlight_analysis(rows) # find last day report ran day = last_day if day is None else max(day, last_day) # group alerts by email alerts.setdefault(trigger[1], []) alerts[trigger[1]].extend(rows) floodlight_email(day, alerts)
def dynamic_costs(): # make sure tab exists in sheet sheets_tab_copy(project.task['auth'], project.task['sheet']['template']['url'], project.task['sheet']['template']['tab'], project.task['sheet']['url'], project.task['sheet']['tab']) # read configuration from sheet inputs = sheets_read(project.task['auth'], project.task['sheet']['url'], project.task['sheet']['tab'], project.task['sheet']['range']) # convert inputs into dictionary def expand_list(lst): if len(lst) == 1: return (lst[0], "") elif len(lst) == 2: return lst inputs = [expand_list(row) for row in inputs] inputs = dict(inputs) if project.verbose: print "DYNAMIC COSTS PARAMETERS", inputs if not inputs['Main Advertiser ID']: print "Configuration sheet not filled out." return # allows each advertiser to run multiple reports ( somewhat collision avoidance ) unique_name = inputs['Dynamic Profile ID'] # check if using wrapped tags shadow = inputs['Shadow Advertiser ID'] and inputs['Shadow Campaign ID'] # parse date range if inputs['Relative Date Range'] == 'CUSTOM': date_range = { "kind": "dfareporting#dateRange", "startDate": str(inputs['Start Date']), "endDate": str(inputs['End Date']), } else: date_range = { "kind": "dfareporting#dateRange", "relativeDateRange": str(inputs['Relative Date Range']) } combos_table = report_combos(unique_name, date_range, inputs['Main Advertiser ID'], inputs['Main Campaign ID'], inputs['Dynamic Profile ID']) main_table = report_main(unique_name, date_range, inputs['Main Advertiser ID'], inputs['Main Campaign ID'], shadow) if shadow: shadow_table = report_shadow(unique_name, date_range, inputs['Shadow Advertiser ID'], inputs['Shadow Campaign ID']) else: shadow_table = None view_combine(unique_name, combos_table, main_table, shadow_table)
def sheets(): if project.verbose: print 'SHEETS' # clear if specified if project.task.get('clear', False): sheets_clear(project.task['auth'], project.task['sheet'], project.task['tab'], project.task['range']) # delete if specified ( after clear to prevent errors in case both are given ) if project.task.get('delete', False): sheets_tab_delete(project.task['auth'], project.task['sheet'], project.task['tab']) # create or copy if specified if 'template' in project.task: sheets_tab_copy(project.task['auth'], project.task['template']['sheet'], project.task['template']['tab'], project.task['sheet'], project.task['tab']) else: sheets_tab_create(project.task['auth'], project.task['sheet'], project.task['tab']) # write data if specified if 'write' in project.task: rows = get_rows(project.task['auth'], project.task['write']) sheets_write(project.task['auth'], project.task['sheet'], project.task['tab'], project.task['range'], rows) # move if specified if 'out' in project.task: rows = sheets_read(project.task['auth'], project.task['sheet'], project.task['tab'], project.task['range']) if rows: schema = None # RECOMMENDED: define schema in json if project.task['out']['bigquery'].get('schema'): if project.verbose: print 'SHEETS SCHEMA DEFINED' schema = project.task['out']['bigquery']['schema'] # NOT RECOMMENDED: determine schema if missing else: if project.verbose: print 'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )' # cast rows to types ( for schema detection ) rows = rows_to_type(rows) rows, schema = get_schema(rows, project.task.get('header', False), infer_type=project.task.get( 'infer_type', True)) # write to table ( not using put because no use cases for other destinations ) rows_to_table( auth=project.task['auth'], project_id=project.id, dataset_id=project.task['out']['bigquery']['dataset'], table_id=project.task['out']['bigquery']['table'], rows=rows, schema=schema, skip_rows=1 if project.task.get('header', False) else 0, disposition=project.task['out']['bigquery'].get( 'disposition', 'WRITE_TRUNCATE')) else: print 'SHEET EMPTY'
def get_rows(auth, source): """Processes standard read JSON block for dynamic loading of data. Allows us to quickly pull a column or columns of data from and use it as an input into a script. For example pull a list of ids from bigquery and act on each one. - When pulling a single column specify single_cell = True. Returns list AKA values. - When pulling a multiple columns specify single_cell = False. Returns list of lists AKA rows. - Values are always given as a list ( single_cell will trigger necessary wrapping ). - Values, bigquery, sheet are optional, if multiple given result is one continous iterator. - Extensible, add a handler to define a new source ( be kind update the documentation json ). Include the following JSON in a recipe, then in the run.py handler when encountering that block pass it to this function and use the returned results. from utils.data import get_rows var_json = { "in":{ "single_cell":[ boolean ], "values": [ integer list ], "bigquery":{ "dataset": [ string ], "table": [ string ], "columns":[ integer list ], "legacy":[ boolean ] }, "sheet":{ "url":[ string - full URL, suggest using share link ], "tab":[ string ], "range":[ string - A1:A notation ] } } } values = get_rows('user', var_json) Or you can use it directly with project singleton. from util.project import project from utils.data import get_rows def something(): values = get_rows(project.task['auth'], project.task['in']) if __name__ == "__main__": project.load('something') something() Args: auth: (string) The type of authentication to use, user or service. source: (json) A json block resembling var_json described above. Returns: If single_cell is False: Returns a list of row values [[v1], [v2], ... ] If single_cell is True: Returns a list of values [v1, v2, ...] """ # if handler points to list, concatenate all the values from various sources into one list if isinstance(source, list): for s in source: for r in get_rows(auth, s): yield r # if handler is an endpoint, fetch data else: if 'values' in source: for value in source['values']: yield value if source.get('single_cell', False) else [value] if 'sheet' in source: rows = sheets_read(project.task['auth'], source['sheet']['url'], source['sheet']['tab'], source['sheet']['range']) for row in rows: yield row[0] if source.get('single_cell', False) else row if 'bigquery' in source: rows = query_to_rows( source['bigquery'].get('auth', auth), project.id, source['bigquery']['dataset'], source['bigquery']['query'], legacy=source['bigquery'].get('legacy', False) ) for row in rows: yield row[0] if source.get('single_cell', False) else row