Exemple #1
0
def get_row(table_config, pool, row_key, **kwargs):
    '''
    Retrieve a single result from a table

    table_config = configuration object for a table to scan.
    pool = bigtable connection pool.
    row_key = key of row to get.
    kwargs = additional keyword arguments passed directly to query.
    '''
    table_id = table_config['bigtable_table_name']
    row_key = row_key.encode('utf-8')

    logging.info("querying: %s", table_id)
    logging.info("row_key: %s", row_key)
    row = {}
    # Hack to allow for reattempts
    for attempt in range(10):
        try:
            with pool.connection(timeout=5) as connection:
                connection.open()
                table = connection.table(table_id)

                data = table.row(row_key, **kwargs)
                row = du.parse_row(data, table_config.columns)
        # TODO: use specific exception catch.
        except Exception as err:  #pylint: disable=W0703
            logging.warning("Failed query attempt %s", str(attempt))
            logging.warning(err)
        else:
            break
    else:
        row = {}
    return row
def get_row(table_config, pool, row_key, **kwargs):
    '''
    Retrieve a single result from a table

    table_config = configuration object for a table to scan.
    pool = bigtable connection pool.
    row_key = key of row to get.
    kwargs = additional keyword arguments passed directly to query.
    '''
    table_id = table_config['bigtable_table_name']
    row_key = row_key.encode('utf-8')

    logging.info("querying: %s", table_id)
    logging.info("row_key: %s", row_key)
    row = {}
    # Hack to allow for reattempts
    for attempt in range(10):
        try:
            with pool.connection(timeout=5) as connection:
                connection.open()
                table = connection.table(table_id)

                data = table.row(row_key, **kwargs)
                row = du.parse_row(data, table_config.columns)
        # TODO: use specific exception catch.
        except Exception as err: #pylint: disable=W0703
            logging.warning("Failed query attempt %s", str(attempt))
            logging.warning(err)
        else:
            break
    else:
        row = {}
    return row
Exemple #3
0
def test_parse_data():
    '''
    test parse_data
    '''

    config = {
        'client_city': {
            'name': 'client_city',
            'type': 'string'
        },
        'median_download': {
            'type': 'double'
        }
    }
    data = {
        'data:median_download': '@:\xADQ\x83\xBE\x02O',
        'meta:client_city': b'New York'
    }

    result = du.parse_row(data, config)

    assert len(result.keys()) > 1

    assert 'data' in result
    assert 'meta' in result
    assert 'median_download' in result['data']

    assert isinstance(result['data']['median_download'], float)
def scan_table(table_config, pool, prefix="", start_key="", end_key="",
               **kwargs):
    '''
    Abstracts table scan - performing the connection to table via
    connection pool and automatic retry of failed scans.
    Additional named arguments are passed to scan call.

    table_config = configuration object for a table to scan.
    pool = bigtable connection pool.
    prefix = prefix key scan with this value.
    start_key = alternative to prefix, start_key & stop_key allow for
        scaning ranges.
    stop_key = alternative to prefix, start_key & stop_key allow for
        scaning ranges.
    kwargs = additional keyword arguments passed directly to the scan operation.
    '''
    table_id = table_config['bigtable_table_name']

    # build table query parameters.
    # if prefix is present, use that.
    # else, use start / end key
    params = {}
    if prefix:
        params = {"row_prefix": prefix.encode('utf-8')}
    elif start_key:
        params = {
            "row_start": start_key.encode('utf-8'),
            "row_stop": end_key.encode('utf-8')
        }

    params.update(kwargs)

    logging.info("querying: %s", table_id)
    logging.info("start_key: %s", start_key)
    logging.info("end_key: %s", end_key)
    logging.info("prefix: %s", prefix)
    logging.info("params %s", str(params))

    results = []

    # Hack to allow for reattempts
    for attempt in range(10):
        try:
            with pool.connection(timeout=5) as connection:
                connection.open()
                table = connection.table(table_id)

                for _, data in table.scan(**params):
                    results.append(du.parse_row(data, table_config.columns))
        # TODO: use specific exception catch.
        except Exception as err: #pylint: disable=W0703
            logging.warning("Failed query attempt %s", str(attempt))
            logging.warning(err)
        else:
            break
    else:
        results = []
    logging.info("result size %s", str(len(results)))
    return results
Exemple #5
0
def scan_table(table_config,
               pool,
               prefix="",
               start_key="",
               end_key="",
               **kwargs):
    '''
    Abstracts table scan - performing the connection to table via
    connection pool and automatic retry of failed scans.
    Additional named arguments are passed to scan call.

    table_config = configuration object for a table to scan.
    pool = bigtable connection pool.
    prefix = prefix key scan with this value.
    start_key = alternative to prefix, start_key & stop_key allow for
        scaning ranges.
    stop_key = alternative to prefix, start_key & stop_key allow for
        scaning ranges.
    kwargs = additional keyword arguments passed directly to the scan operation.
    '''
    table_id = table_config['bigtable_table_name']

    # build table query parameters.
    # if prefix is present, use that.
    # else, use start / end key
    params = {}
    if prefix:
        params = {"row_prefix": prefix.encode('utf-8')}
    elif start_key:
        params = {
            "row_start": start_key.encode('utf-8'),
            "row_stop": end_key.encode('utf-8')
        }

    params.update(kwargs)

    logging.info("querying: %s", table_id)
    logging.info("start_key: %s", start_key)
    logging.info("end_key: %s", end_key)
    logging.info("prefix: %s", prefix)
    logging.info("params %s", str(params))

    results = []

    # Hack to allow for reattempts
    for attempt in range(10):
        try:
            with pool.connection(timeout=5) as connection:
                connection.open()
                table = connection.table(table_id)

                for _, data in table.scan(**params):
                    results.append(du.parse_row(data, table_config.columns))
        # TODO: use specific exception catch.
        except Exception as err:  #pylint: disable=W0703
            logging.warning("Failed query attempt %s", str(attempt))
            logging.warning(err)
        else:
            break
    else:
        results = []
    logging.info("result size %s", str(len(results)))
    return results