def test_row_set__eq__len_row_keys_differ(): from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_key(row_key1) row_set1.add_row_key(row_key2) row_set2.add_row_key(row_key2) assert not (row_set1 == row_set2)
def test_row_set__eq__row_keys_differ(): from google.cloud.bigtable.row_set import RowSet row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_key(b"row_key1") row_set1.add_row_key(b"row_key2") row_set1.add_row_key(b"row_key3") row_set2.add_row_key(b"row_key1") row_set2.add_row_key(b"row_key2") row_set2.add_row_key(b"row_key4") assert not (row_set1 == row_set2)
def test_row_set__eq__len_row_ranges_differ(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_range(row_range1) row_set1.add_row_range(row_range2) row_set2.add_row_range(row_range2) assert not (row_set1 == row_set2)
def query_builder(): all_pairs = [ "LTC/USD", "BTC/GBP", "XLM/USD", "BTC/JPY", "BTC/EUR", "XRP/USD", "XRP/BTC", "XMR/USD", "XLM/BTC", "BTC/USD", "LTC/BTC", "XRP/EUR", "XMR/BTC", "XTZ/BTC" ] pairs = ["BTC/USD"] exchanges = [ "bitfinex", "bitStamp", "poloniex", "gemini", "hitBTC", "okCoin" ] rowset = RowSet() for pair in pairs: for exchange in exchanges: startkey = "{}#{}#{}".format( pair, exchange, int( time.mktime( (datetime.now() - timedelta(seconds=3)).timetuple()))) endkey = "{}#{}#{}".format( pair, exchange, int( time.mktime( (datetime.now() + timedelta(seconds=1)).timetuple()))) rowrange = RowRange(start_key=startkey, end_key=endkey) rowset.add_row_range(rowrange) return rowset
def test_row_set_add_row_key(): from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_set.add_row_key("row_key1") row_set.add_row_key("row_key2") assert ["row_key1" == "row_key2"], row_set.row_keys
def test_add_row_range_by_prefix_from_keys(self): row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"sample_row_key_1", b"sample_row_key_2", ] rows = [] for row_key in row_keys: row = self._table.row(row_key) row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, CELL_VAL1) rows.append(row) self.rows_to_delete.append(row) self._table.mutate_rows(rows) row_set = RowSet() row_set.add_row_range_with_prefix("row") read_rows = self._table.yield_rows(row_set=row_set) expected_row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", ] found_row_keys = [row.row_key for row in read_rows] self.assertEqual(found_row_keys, expected_row_keys)
def _get_row_set_from_rows(rows): """Return a RowSet object for the given rows """ row_set = RowSet() for row_key in rows: row_set.add_row_key(row_key) return row_set
def test_yield_rows_with_row_set(self): row_keys = [ b'row_key_1', b'row_key_2', b'row_key_3', b'row_key_4', b'row_key_5', b'row_key_6', b'row_key_7', b'row_key_8', b'row_key_9' ] rows = [] for row_key in row_keys: row = self._table.row(row_key) row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, CELL_VAL1) rows.append(row) self.rows_to_delete.append(row) self._table.mutate_rows(rows) row_set = RowSet() row_set.add_row_range( RowRange(start_key=b'row_key_3', end_key=b'row_key_7')) row_set.add_row_key(b'row_key_1') read_rows = self._table.yield_rows(row_set=row_set) expected_row_keys = [ b'row_key_1', b'row_key_3', b'row_key_4', b'row_key_5', b'row_key_6' ] found_row_keys = [row.row_key for row in read_rows] self.assertEqual(found_row_keys, expected_row_keys)
def get_many(self, keys: Sequence[str]) -> Iterator[Tuple[str, bytes]]: rows = RowSet() for key in keys: rows.add_row_key(key) for row in self._get_table().read_rows(row_set=rows): yield row.row_key.decode("utf-8"), self.__decode_row(row)
def read_row(self, row_key, filter_=None): """Read a single row from this table. For example: .. literalinclude:: snippets_table.py :start-after: [START bigtable_read_row] :end-before: [END bigtable_read_row] :type row_key: bytes :param row_key: The key of the row to read from. :type filter_: :class:`.RowFilter` :param filter_: (Optional) The filter to apply to the contents of the row. If unset, returns the entire row. :rtype: :class:`.PartialRowData`, :data:`NoneType <types.NoneType>` :returns: The contents of the row if any chunks were returned in the response, otherwise :data:`None`. :raises: :class:`ValueError <exceptions.ValueError>` if a commit row chunk is never encountered. """ row_set = RowSet() row_set.add_row_key(row_key) result_iter = iter(self.read_rows(filter_=filter_, row_set=row_set)) row = next(result_iter, None) if next(result_iter, None) is not None: raise ValueError("More than one row was returned.") return row
def test_table_read_rows_w_row_set(data_table, rows_to_delete): from google.cloud.bigtable.row_set import RowSet from google.cloud.bigtable.row_set import RowRange row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", b"row_key_7", b"row_key_8", b"row_key_9", ] _populate_table(data_table, rows_to_delete, row_keys) row_range = RowRange(start_key=b"row_key_3", end_key=b"row_key_7") row_set = RowSet() row_set.add_row_range(row_range) row_set.add_row_key(b"row_key_1") found_rows = data_table.read_rows(row_set=row_set) found_row_keys = [row.row_key for row in found_rows] expected_row_keys = [ row_key for row_key in row_keys[:6] if not row_key.endswith(b"_2") ] assert found_row_keys == expected_row_keys
def get_first_row(self, start_key, column_families=None, end_key=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() row_set.add_row_range_from_keys(start_key=start_key, start_inclusive=True, end_key=end_key) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 # if rowdata is None: # continue rk = rowdata.row_key.decode("utf-8") if end_key is None and not rk.startswith(start_key): break curr_row_dict = self.partial_row_to_dict(rowdata) return (rk, curr_row_dict)
def test_row_set__ne__same_value(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_key(row_key1) row_set2.add_row_key(row_key2) row_set1.add_row_range(row_range1) row_set2.add_row_range(row_range2) assert not (row_set1 != row_set2)
def test_row_set__eq__(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_key(row_key1) row_set2.add_row_key(row_key2) row_set1.add_row_range(row_range1) row_set2.add_row_range(row_range2) assert row_set1 == row_set2
def _get_row_range_with_row_keys(row_index): row_set = RowSet() start_index = 0 end_index = row_index while start_index < end_index: start_index += 1 key = "beam_key%s" % ('{0:07}'.format(start_index)) row_set.add_row_key(key) return row_set
def _get_bytes_multi(self, id_list): rv = {} rows = RowSet() for id in id_list: rows.add_row_key(id) rv[id] = None for row in self.connection.read_rows(row_set=rows): rv[row.row_key.decode("utf-8")] = self.decode_row(row) return rv
def query(self, host, dc, region, t, limit=1, window=60): t0 = int(t) - window t1 = int(t) start_key = rowkey(host, dc, region, t0) end_key = rowkey(host, dc, region, t1) row_set = RowSet() row_set.add_row_range(RowRange(start_key, end_key)) return self.table.read_rows( limit=limit, filter_=row_filters.CellsColumnLimitFilter(1), row_set=row_set)
def test_row_set_add_row_range(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_range1 = RowRange(b"row_key1", b"row_key9") row_range2 = RowRange(b"row_key21", b"row_key29") row_set.add_row_range(row_range1) row_set.add_row_range(row_range2) expected = [row_range1, row_range2] assert expected == row_set.row_ranges
def test_row_set_add_row_range_from_keys(): from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_set.add_row_range_from_keys( start_key=b"row_key1", end_key=b"row_key9", start_inclusive=False, end_inclusive=True, ) assert row_set.row_ranges[0].end_key == b"row_key9"
def read_row_range(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) row_set = RowSet() row_set.add_row_range_from_keys(start_key=b"phone#4c410523#20190501", end_key=b"phone#4c410523#201906201") rows = table.read_rows(row_set=row_set) for row in rows: print_row(row)
def row_generator(self, row_keys=None, start_key=None, end_key=None, column_families=None, check_prefix=None): if row_keys is None and start_key is None: raise ValueError("use row_keys or start_key parameter") if start_key is not None and (end_key is None and check_prefix is None): raise ValueError( "use start_key together with end_key or check_prefix") filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() if row_keys: for r in row_keys: row_set.add_row_key(r) else: row_set.add_row_range_from_keys(start_key=start_key, end_key=end_key, start_inclusive=True, end_inclusive=True) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 if rowdata is None: if row_keys: yield (row_keys[i], {}) continue rk = rowdata.row_key.decode("utf-8") if check_prefix: if not rk.startswith(check_prefix): break curr_row_dict = self.partial_row_to_ordered_dict(rowdata) yield (rk, curr_row_dict)
def get_many(self, keys: Sequence[str]) -> Iterator[Tuple[str, bytes]]: rows = RowSet() for key in keys: rows.add_row_key(key) for row in self._get_table().read_rows(row_set=rows): value = self.__decode_row(row) # Even though Bigtable in't going to return empty rows, an empty # value may be returned by ``__decode_row`` if the the row has # outlived its TTL, so we need to check its value here. if value is not None: yield row.row_key.decode("utf-8"), value
def get(self): bt_array = [] try: table = instance.table(bt_table_name) row_set = RowSet() for row_key in row_keys: row_set.add_row_key(row_key) colFilters = [] for name, bt_name in bt_mapping_dict.items(): colFilters.append( row_filters.ColumnQualifierRegexFilter(bt_name)) print("before read_rows...") rows = table.read_rows( row_set=row_set, filter_=row_filters.RowFilterChain(filters=[ row_filters.CellsColumnLimitFilter(1), row_filters.RowFilterUnion(filters=colFilters) ]), retry=bigtable.table.DEFAULT_RETRY_READ_ROWS.with_deadline( 60.0)) print("after read_rows...") for row in rows: print("Reading data for {}:".format( row.row_key.decode('utf-8'))) for cf, cols in sorted(row.cells.items()): bt_dict = {} bt_dict['id'] = row.row_key.decode('utf-8') key = None # using BT mapping to return data for col, cells in sorted(cols.items()): for cell in cells: for name, bt_name in bt_mapping_dict.items(): if col.decode('utf-8') == bt_name: key = name break if key is not None: bt_dict[key] = cell.value.decode('utf-8') bt_array.append(bt_dict) except BaseException as error: logging.error( 'An exception occurred - DemoBigTableGet::get(): {}'.format( error)) print(bt_array) return json.dumps(bt_array), 200, {'ContentType': 'application/json'}
def read_prefix(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) prefix = "phone#" end_key = prefix[:-1] + chr(ord(prefix[-1]) + 1) row_set = RowSet() row_set.add_row_range_from_keys(prefix.encode("utf-8"), end_key.encode("utf-8")) rows = table.read_rows(row_set=row_set) for row in rows: print_row(row)
def get_multi(self, id_list): if len(id_list) == 1: id = id_list[0] return {id: self.get(id)} rv = {} rows = RowSet() for id in id_list: rows.add_row_key(id) rv[id] = None for row in self.connection.read_rows(row_set=rows): rv[row.row_key] = self.decode_row(row) return rv
def bigtable_read_data(request): instance = client.instance(request.headers.get("instance_id")) table = instance.table(request.headers.get("table_id")) prefix = 'phone#' end_key = prefix[:-1] + chr(ord(prefix[-1]) + 1) outputs = [] row_set = RowSet() row_set.add_row_range_from_keys(prefix.encode("utf-8"), end_key.encode("utf-8")) rows = table.read_rows(row_set=row_set) for row in rows: output = 'Rowkey: {}, os_build: {}'.format( row.row_key.decode('utf-8'), row.cells["stats_summary"][ "os_build".encode('utf-8')][0].value.decode('utf-8')) outputs.append(output) return '\n'.join(outputs)
def test_row_set__update_message_request(): from google.cloud._helpers import _to_bytes from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_set = RowSet() table_name = "table_name" row_set.add_row_key("row_key1") row_range1 = RowRange(b"row_key21", b"row_key29") row_set.add_row_range(row_range1) request = _ReadRowsRequestPB(table_name=table_name) row_set._update_message_request(request) expected_request = _ReadRowsRequestPB(table_name=table_name) expected_request.rows.row_keys.append(_to_bytes("row_key1")) expected_request.rows.row_ranges.append(row_range1.get_range_kwargs()) assert request == expected_request
def get_multi(self, id_list): if len(id_list) == 1: return {id_list[0]: self.get(id_list[0])} cache_items = self._get_cache_items(id_list) if len(cache_items) == len(id_list): return cache_items uncached_ids = [id for id in id_list if id not in cache_items] rv = {} rows = RowSet() for id in uncached_ids: rows.add_row_key(id) rv[id] = None for row in self.connection.read_rows(row_set=rows): rv[row.row_key] = self.decode_row(row) self._set_cache_items(rv) rv.update(cache_items) return rv
def test_rowset_add_row_range_w_pfx(data_table, rows_to_delete): from google.cloud.bigtable.row_set import RowSet row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"sample_row_key_1", b"sample_row_key_2", ] _populate_table(data_table, rows_to_delete, row_keys) row_set = RowSet() row_set.add_row_range_with_prefix("row") expected_row_keys = [ row_key for row_key in row_keys if row_key.startswith(b"row") ] found_rows = data_table.read_rows(row_set=row_set) found_row_keys = [row.row_key for row in found_rows] assert found_row_keys == expected_row_keys
def test_yield_rows_with_row_set(self): row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", b"row_key_7", b"row_key_8", b"row_key_9", ] rows = [] for row_key in row_keys: row = self._table.row(row_key) row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, CELL_VAL1) rows.append(row) self.rows_to_delete.append(row) self._table.mutate_rows(rows) row_set = RowSet() row_set.add_row_range( RowRange(start_key=b"row_key_3", end_key=b"row_key_7")) row_set.add_row_key(b"row_key_1") read_rows = self._table.yield_rows(row_set=row_set) expected_row_keys = [ b"row_key_1", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", ] found_row_keys = [row.row_key for row in read_rows] self.assertEqual(found_row_keys, expected_row_keys)