예제 #1
0
def delete_with_implicit_rowid(table_id, values):
    # get row id
    row_id = values['rowid']
    # delete row
    query = _SQL.delete(table_id, row_id=row_id)
    sleep = 1
    for attempt in range(10):
        try:
            service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE)
            logging.debug("Trying to delete row from slave %s" % table_id)
            query_result = service.query().sql(sql=query).execute()
        except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e:
            time.sleep(sleep)  # pause to avoid "Rate Limit Exceeded" error
            logging.warning("Sleeping %d seconds because of HttpError trying to delete row from slave %s (%s)" % (sleep, table_id, e))
            sleep = sleep * 2
        else:
            break  # no error caught
    else:
        logging.critical("Retried 10 times deleting row from slave %s" % table_id)
        raise  # attempts exhausted
    if not 'error' in query_result:
        logging.info("Deleted in %s %s" % (table_id, json.dumps(values)))
    else:
        logging.error("Deleted in %s failed on %s with %s" % (table_id, values, query_result['error']))
    return
예제 #2
0
def insert_go(table_id):
    if table_id in _inserts and _inserts[table_id]:
        csv = list_of_dicts_to_csv(table_id, _inserts[table_id])
        media_body = MediaIoBaseUpload(fd=csv, mimetype='application/octet-stream')
        sleep = 1
        for attempt in range(10):
            try:
                service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE)
                logging.debug("Trying to insert %d rows in slave %s" % (len(_inserts[table_id]), table_id))
                csv.seek(0)  # rewind the StringIO object
                logging.debug("First row: %s" % csv.readline())
                result = service.table().importRows(tableId=table_id, media_body=media_body).execute()
            except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e:
                time.sleep(sleep)  # pause to avoid "Rate Limit Exceeded" error
                logging.warning("Sleeping %d seconds because of HttpError trying to insert %d rows in slave %s (%s)" % (sleep, len(_inserts[table_id]), table_id, e))
                sleep = sleep * 2
            else:
                break  # no error caught
        else:
            logging.critical("Retried 10 times inserting %d rows in slave %s" % (len(_inserts[table_id]), table_id))
            raise  # attempts exhausted
        if not 'error' in result:
            logging.info("Inserted %d rows in %s" % (len(_inserts[table_id]), table_id))
            _inserts[table_id] = []
        else:
            logging.error("Insert of %d rows in %s failed with %s" % (len(_inserts[table_id]), table_id, result['error']))
    return
예제 #3
0
def select_nth(table_id, cols=None, condition=None, n=1):
    if not cols:
        cols = table_cols(table_id)
        # make sure you return the rowid, that's useful for later 'updates'
    if not 'rowid' in cols:
        cols.append('rowid')
    query = _SQL.select(table_id, cols, condition) + ' OFFSET %d LIMIT 1' % n
    sleep = 1
    for attempt in range(10):
        try:
            service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE)
            logging.debug("Trying to select nth row in %s" % table_id)
            query_result = service.query().sqlGet(sql=query).execute()
        except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e:
            time.sleep(sleep)  # pause to avoid "Rate Limit Exceeded" error
            logging.warning("Sleeping %d seconds because of HttpError trying to select nth row in %s (%s)" % (sleep, table_id, e))
            sleep = sleep * 2
        else:
            break  # no error caught
    else:
        logging.critical("Retried 10 times selecting nth row in %s" % table_id)
        raise  # attempts exhausted
    rows = fusion_table_query_result_as_list_of_dict(query_result)
    for row in rows:  # this is an intermediate fix for data entered before sequence field was added to slave tables
        if 'sequence' in row and row['sequence'] == 'NaN':
            row['sequence'] = 1
    return rows
예제 #4
0
def select(table_id, cols=None, condition=None, filter_obsolete_rows=True):
    """
     condition can contain GROUP BY and LIMIT statements, but there must be at least one WHERE statement!!
     filter_obsolete_rows: only has effect on slave table queries (by testing on 'datetime slug' field)
     filter_repeating_rows: only has effect on slave table queries, returns the next upcoming (or ongoing) event
    """
    if not cols:
        cols = table_cols(table_id)
    # make sure you return the rowid, that's useful for later 'updates'
    if not 'rowid' in cols and not 'GROUP BY' in condition:
        cols.append('rowid')
    query = _SQL.select(table_id, cols, condition)
    sleep = 1
    urlfetch.set_default_fetch_deadline(60)
    for attempt in range(10):
        try:
            service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE)
            logging.debug("Trying to select rows in %s" % table_id)
            query_result = service.query().sqlGet(sql=query).execute()
        except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e:
            time.sleep(sleep)  # pause to avoid "Rate Limit Exceeded" error
            logging.warning("Sleeping %d seconds because of HttpError trying to select rows in %s (%s)" % (sleep, table_id, e))
            sleep = sleep * 2
        else:
            break  # no error caught
    else:
        logging.critical("Retried 10 times selecting rows in %s" % table_id)
        raise  # attempts exhausted
    rows = fusion_table_query_result_as_list_of_dict(query_result)
    for row in rows:  # this is an intermediate fix for data entered before se quence field was added to slave tables
        if 'sequence' in row and row['sequence'] == 'NaN':
            row['sequence'] = 1
    # for each event only return the row(s) with the highest 'sequence'
    if filter_obsolete_rows and rows and 'datetime slug' in rows[0]:
        # for each event slug, find the maximum sequence
        maximum_sequence = {}
        for row in rows:
            event_slug = row['event slug']
            sequence = row['sequence']
            if event_slug not in maximum_sequence or maximum_sequence[event_slug] < sequence:
                maximum_sequence[event_slug] = sequence
        # filter the rows with sequence lower than the maximum sequence for that event slug
        rows[:] = [row for row in rows if row['sequence'] == maximum_sequence[row['event slug']]]
#    # for each event only return the row(s) with the earliest TWO 'datetime slug' values
#    if filter_repeating_rows and rows and 'datetime slug' in rows[0]:
#        # for each event slug, find the TWO minimum datetime values
#        maximum_datetime = {}
#        maximum2_datetime = {}
#        for row in rows:
#            event_slug = row['event slug']
#            datetime = row['datetime slug']
#            if event_slug not in maximum_datetime or maximum_datetime[event_slug] > datetime:
#                maximum_datetime[event_slug] = datetime
#            elif event_slug not in maximum2_datetime or maximum2_datetime[event_slug] > datetime:
#                maximum2_datetime[event_slug] = datetime
#        # filter the rows with two lowest datetime values for that event slug
#        rows[:] = [row for row in rows if row['datetime slug'] in (maximum_datetime[row['event slug']], maximum2_datetime[row['event slug']])]
    return rows
예제 #5
0
def table_cols(table_id):
    # returns a list of column names, stored in global variable
    global _table_cols
    if table_id not in _table_cols:
        query = _SQL.select(table_id) + ' LIMIT 1'
        sleep = 1
        for attempt in range(10):
            try:
                service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE)
                logging.debug("Trying to read column names in %s" % table_id)
                query_result = service.query().sqlGet(sql=query).execute()
            except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e:
                time.sleep(sleep)  # pause to avoid "Rate Limit Exceeded" error
                logging.warning("Sleeping %d seconds because of HttpError trying to read column names in %s (%s)" % (sleep, table_id, e))
                sleep = sleep * 2
            else:
                break  # no error caught
        else:
            logging.critical("Retried 10 times reading column names in %s" % table_id)
            raise  # attempts exhausted
        _table_cols[table_id] = query_result['columns']
        logging.info("Read column names in %s: %s" % (table_id, ','.join(_table_cols[table_id])))
    return list(_table_cols[table_id])  # clone it! if they're gonna modify it, the global var is safe
예제 #6
0
def count(table_id, condition=None):
    """
     condition can contain GROUP BY and LIMIT statements, but there must be at least one WHERE statement!!
     filter_obsolete_rows: only has effect on slave table queries (by testing on 'datetime slug' field)
    """
    query = _SQL.count(table_id, condition)
    sleep = 1
    for attempt in range(10):
        try:
            service = oauth2_two_legged.get_service(API_CLIENT, VERSION, OAUTH_SCOPE)
            logging.debug("Trying to count rows in %s" % table_id)
            query_result = service.query().sqlGet(sql=query).execute()
        except (errors.HttpError, apiproxy_errors.DeadlineExceededError, httplib.HTTPException) as e:
            time.sleep(sleep)  # pause to avoid "Rate Limit Exceeded" error
            logging.warning("Sleeping %d seconds because of HttpError trying to count rows in %s (%s)" % (sleep, table_id, e))
            sleep = sleep * 2
        else:
            break  # no error caught
    else:
        logging.critical("Retried 10 times counting rows in %s" % table_id)
        raise  # attempts exhausted
    rows = fusion_table_query_result_as_list_of_dict(query_result)
    count = int(rows[0]['count()'])
    return count