def delete_with_implicit_rowid(table_id, values): # get row id row_id = values['rowid'] # delete row query = _SQL.delete(table_id, row_id=row_id) sleep = 1 for attempt in range(10): try: service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE) logging.debug("Trying to delete row from slave %s" % table_id) query_result = service.query().sql(sql=query).execute() except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e: time.sleep(sleep) # pause to avoid "Rate Limit Exceeded" error logging.warning("Sleeping %d seconds because of HttpError trying to delete row from slave %s (%s)" % (sleep, table_id, e)) sleep = sleep * 2 else: break # no error caught else: logging.critical("Retried 10 times deleting row from slave %s" % table_id) raise # attempts exhausted if not 'error' in query_result: logging.info("Deleted in %s %s" % (table_id, json.dumps(values))) else: logging.error("Deleted in %s failed on %s with %s" % (table_id, values, query_result['error'])) return
def insert_go(table_id): if table_id in _inserts and _inserts[table_id]: csv = list_of_dicts_to_csv(table_id, _inserts[table_id]) media_body = MediaIoBaseUpload(fd=csv, mimetype='application/octet-stream') sleep = 1 for attempt in range(10): try: service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE) logging.debug("Trying to insert %d rows in slave %s" % (len(_inserts[table_id]), table_id)) csv.seek(0) # rewind the StringIO object logging.debug("First row: %s" % csv.readline()) result = service.table().importRows(tableId=table_id, media_body=media_body).execute() except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e: time.sleep(sleep) # pause to avoid "Rate Limit Exceeded" error logging.warning("Sleeping %d seconds because of HttpError trying to insert %d rows in slave %s (%s)" % (sleep, len(_inserts[table_id]), table_id, e)) sleep = sleep * 2 else: break # no error caught else: logging.critical("Retried 10 times inserting %d rows in slave %s" % (len(_inserts[table_id]), table_id)) raise # attempts exhausted if not 'error' in result: logging.info("Inserted %d rows in %s" % (len(_inserts[table_id]), table_id)) _inserts[table_id] = [] else: logging.error("Insert of %d rows in %s failed with %s" % (len(_inserts[table_id]), table_id, result['error'])) return
def select_nth(table_id, cols=None, condition=None, n=1): if not cols: cols = table_cols(table_id) # make sure you return the rowid, that's useful for later 'updates' if not 'rowid' in cols: cols.append('rowid') query = _SQL.select(table_id, cols, condition) + ' OFFSET %d LIMIT 1' % n sleep = 1 for attempt in range(10): try: service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE) logging.debug("Trying to select nth row in %s" % table_id) query_result = service.query().sqlGet(sql=query).execute() except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e: time.sleep(sleep) # pause to avoid "Rate Limit Exceeded" error logging.warning("Sleeping %d seconds because of HttpError trying to select nth row in %s (%s)" % (sleep, table_id, e)) sleep = sleep * 2 else: break # no error caught else: logging.critical("Retried 10 times selecting nth row in %s" % table_id) raise # attempts exhausted rows = fusion_table_query_result_as_list_of_dict(query_result) for row in rows: # this is an intermediate fix for data entered before sequence field was added to slave tables if 'sequence' in row and row['sequence'] == 'NaN': row['sequence'] = 1 return rows
def select(table_id, cols=None, condition=None, filter_obsolete_rows=True): """ condition can contain GROUP BY and LIMIT statements, but there must be at least one WHERE statement!! filter_obsolete_rows: only has effect on slave table queries (by testing on 'datetime slug' field) filter_repeating_rows: only has effect on slave table queries, returns the next upcoming (or ongoing) event """ if not cols: cols = table_cols(table_id) # make sure you return the rowid, that's useful for later 'updates' if not 'rowid' in cols and not 'GROUP BY' in condition: cols.append('rowid') query = _SQL.select(table_id, cols, condition) sleep = 1 urlfetch.set_default_fetch_deadline(60) for attempt in range(10): try: service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE) logging.debug("Trying to select rows in %s" % table_id) query_result = service.query().sqlGet(sql=query).execute() except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e: time.sleep(sleep) # pause to avoid "Rate Limit Exceeded" error logging.warning("Sleeping %d seconds because of HttpError trying to select rows in %s (%s)" % (sleep, table_id, e)) sleep = sleep * 2 else: break # no error caught else: logging.critical("Retried 10 times selecting rows in %s" % table_id) raise # attempts exhausted rows = fusion_table_query_result_as_list_of_dict(query_result) for row in rows: # this is an intermediate fix for data entered before se quence field was added to slave tables if 'sequence' in row and row['sequence'] == 'NaN': row['sequence'] = 1 # for each event only return the row(s) with the highest 'sequence' if filter_obsolete_rows and rows and 'datetime slug' in rows[0]: # for each event slug, find the maximum sequence maximum_sequence = {} for row in rows: event_slug = row['event slug'] sequence = row['sequence'] if event_slug not in maximum_sequence or maximum_sequence[event_slug] < sequence: maximum_sequence[event_slug] = sequence # filter the rows with sequence lower than the maximum sequence for that event slug rows[:] = [row for row in rows if row['sequence'] == maximum_sequence[row['event slug']]] # # for each event only return the row(s) with the earliest TWO 'datetime slug' values # if filter_repeating_rows and rows and 'datetime slug' in rows[0]: # # for each event slug, find the TWO minimum datetime values # maximum_datetime = {} # maximum2_datetime = {} # for row in rows: # event_slug = row['event slug'] # datetime = row['datetime slug'] # if event_slug not in maximum_datetime or maximum_datetime[event_slug] > datetime: # maximum_datetime[event_slug] = datetime # elif event_slug not in maximum2_datetime or maximum2_datetime[event_slug] > datetime: # maximum2_datetime[event_slug] = datetime # # filter the rows with two lowest datetime values for that event slug # rows[:] = [row for row in rows if row['datetime slug'] in (maximum_datetime[row['event slug']], maximum2_datetime[row['event slug']])] return rows
def table_cols(table_id): # returns a list of column names, stored in global variable global _table_cols if table_id not in _table_cols: query = _SQL.select(table_id) + ' LIMIT 1' sleep = 1 for attempt in range(10): try: service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE) logging.debug("Trying to read column names in %s" % table_id) query_result = service.query().sqlGet(sql=query).execute() except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e: time.sleep(sleep) # pause to avoid "Rate Limit Exceeded" error logging.warning("Sleeping %d seconds because of HttpError trying to read column names in %s (%s)" % (sleep, table_id, e)) sleep = sleep * 2 else: break # no error caught else: logging.critical("Retried 10 times reading column names in %s" % table_id) raise # attempts exhausted _table_cols[table_id] = query_result['columns'] logging.info("Read column names in %s: %s" % (table_id, ','.join(_table_cols[table_id]))) return list(_table_cols[table_id]) # clone it! if they're gonna modify it, the global var is safe
def count(table_id, condition=None): """ condition can contain GROUP BY and LIMIT statements, but there must be at least one WHERE statement!! filter_obsolete_rows: only has effect on slave table queries (by testing on 'datetime slug' field) """ query = _SQL.count(table_id, condition) sleep = 1 for attempt in range(10): try: service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE) logging.debug("Trying to count rows in %s" % table_id) query_result = service.query().sqlGet(sql=query).execute() except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e: time.sleep(sleep) # pause to avoid "Rate Limit Exceeded" error logging.warning("Sleeping %d seconds because of HttpError trying to count rows in %s (%s)" % (sleep, table_id, e)) sleep = sleep * 2 else: break # no error caught else: logging.critical("Retried 10 times counting rows in %s" % table_id) raise # attempts exhausted rows = fusion_table_query_result_as_list_of_dict(query_result) count = int(rows[0]['count()']) return count