def test__ne__same_value(self): row_key1 = b"row_key1" row_key2 = b"row_key1" row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") row_set1 = self._make_one() row_set2 = self._make_one() row_set1.add_row_key(row_key1) row_set2.add_row_key(row_key2) row_set1.add_row_range(row_range1) row_set2.add_row_range(row_range2) comparison_val = row_set1 != row_set2 self.assertFalse(comparison_val)
def query_builder(): all_pairs = [ "LTC/USD", "BTC/GBP", "XLM/USD", "BTC/JPY", "BTC/EUR", "XRP/USD", "XRP/BTC", "XMR/USD", "XLM/BTC", "BTC/USD", "LTC/BTC", "XRP/EUR", "XMR/BTC", "XTZ/BTC" ] pairs = ["BTC/USD"] exchanges = [ "bitfinex", "bitStamp", "poloniex", "gemini", "hitBTC", "okCoin" ] rowset = RowSet() for pair in pairs: for exchange in exchanges: startkey = "{}#{}#{}".format( pair, exchange, int( time.mktime( (datetime.now() - timedelta(seconds=3)).timetuple()))) endkey = "{}#{}#{}".format( pair, exchange, int( time.mktime( (datetime.now() + timedelta(seconds=1)).timetuple()))) rowrange = RowRange(start_key=startkey, end_key=endkey) rowset.add_row_range(rowrange) return rowset
def test_yield_rows_with_row_set(self): row_keys = [ b'row_key_1', b'row_key_2', b'row_key_3', b'row_key_4', b'row_key_5', b'row_key_6', b'row_key_7', b'row_key_8', b'row_key_9' ] rows = [] for row_key in row_keys: row = self._table.row(row_key) row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, CELL_VAL1) rows.append(row) self.rows_to_delete.append(row) self._table.mutate_rows(rows) row_set = RowSet() row_set.add_row_range( RowRange(start_key=b'row_key_3', end_key=b'row_key_7')) row_set.add_row_key(b'row_key_1') read_rows = self._table.yield_rows(row_set=row_set) expected_row_keys = set([ b'row_key_1', b'row_key_3', b'row_key_4', b'row_key_5', b'row_key_6' ]) found_row_keys = set([row.row_key for row in read_rows]) self.assertEqual(found_row_keys, set(expected_row_keys))
def test_table_read_rows_w_row_set(data_table, rows_to_delete): from google.cloud.bigtable.row_set import RowSet from google.cloud.bigtable.row_set import RowRange row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", b"row_key_7", b"row_key_8", b"row_key_9", ] _populate_table(data_table, rows_to_delete, row_keys) row_range = RowRange(start_key=b"row_key_3", end_key=b"row_key_7") row_set = RowSet() row_set.add_row_range(row_range) row_set.add_row_key(b"row_key_1") found_rows = data_table.read_rows(row_set=row_set) found_row_keys = [row.row_key for row in found_rows] expected_row_keys = [ row_key for row_key in row_keys[:6] if not row_key.endswith(b"_2") ] assert found_row_keys == expected_row_keys
def test_row_range___eq__type_differ(): from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" row_range1 = RowRange(start_key, end_key, True, False) row_range2 = object() assert row_range1 != row_range2
def add_row_range_from_keys(self, start_key=None, end_key=None, start_inclusive=True, end_inclusive=False): row_range = RowRange(start_key, end_key, start_inclusive, end_inclusive) self.add_row_range(row_range)
def test__filter_row_ranges_all_ranges_already_read_open_closed(self): last_scanned_key = b"row_key54" row_range1 = RowRange(b"row_key21", b"row_key29", False, True) row_range2 = RowRange(b"row_key31", b"row_key39") row_range3 = RowRange(b"row_key41", b"row_key49", False, True) request = _ReadRowsRequestPB(table_name=self.table_name) request.rows.row_ranges.add(**row_range1.get_range_kwargs()) request.rows.row_ranges.add(**row_range2.get_range_kwargs()) request.rows.row_ranges.add(**row_range3.get_range_kwargs()) request_manager = self._make_one(request, last_scanned_key, 2) request_manager.new_message = _ReadRowsRequestPB(table_name=self.table_name) row_ranges = request_manager._filter_row_ranges() self.assertEqual(row_ranges, [])
def test_row_range_get_range_kwargs_open_closed(): from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" expected_result = {"start_key_open": start_key, "end_key_closed": end_key} row_range = RowRange(start_key, end_key, False, True) actual_result = row_range.get_range_kwargs() assert expected_result == actual_result
def test_row_set__eq__(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_key(row_key1) row_set2.add_row_key(row_key2) row_set1.add_row_range(row_range1) row_set2.add_row_range(row_range2) assert row_set1 == row_set2
def test_row_set__ne__same_value(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_key(row_key1) row_set2.add_row_key(row_key2) row_set1.add_row_range(row_range1) row_set2.add_row_range(row_range2) assert not (row_set1 != row_set2)
def test_row_set__eq__row_ranges_differ(): from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key14", b"row_key19") row_range3 = RowRange(b"row_key24", b"row_key29") row_set1 = RowSet() row_set2 = RowSet() row_set1.add_row_range(row_range1) row_set1.add_row_range(row_range2) row_set1.add_row_range(row_range3) row_set2.add_row_range(row_range1) row_set2.add_row_range(row_range2) assert not (row_set1 == row_set2)
def split(self, desired_bundle_size, start_position=None, stop_position=None): ''' Splits the source into a set of bundles, using the row_set if it is set. Bundles should be approximately of ``desired_bundle_size`` bytes, if this bundle its bigger, it use the ``range_split_fraction`` to split the bundles in fractions. :param desired_bundle_size: the desired size (in bytes) of the bundles returned. :param start_position: if specified the given position must be used as the starting position of the first bundle. :param stop_position: if specified the given position must be used as the ending position of the last bundle. Returns: an iterator of objects of type 'SourceBundle' that gives information about the generated bundles. ''' if start_position is None: start_position = b'' if stop_position is None: stop_position = b'' if start_position == b'' and stop_position == b'': if self.beam_options['row_set'] is not None: for row_range in self.row_set_overlap.row_ranges: for row_split in self.split_range_size( desired_bundle_size, self.get_sample_row_keys(), row_range): yield row_split else: addition_size = 0 last_offset = 0 current_size = 0 start_key = b'' end_key = b'' for sample_row_key in self.get_sample_row_keys(): current_size = sample_row_key.offset_bytes - last_offset addition_size += current_size if addition_size >= desired_bundle_size: end_key = sample_row_key.row_key for fraction in self.range_split_fraction( addition_size, desired_bundle_size, start_key, end_key): yield fraction start_key = sample_row_key.row_key addition_size = 0 last_offset = sample_row_key.offset_bytes elif start_position is not None or stop_position is not None: row_range = RowRange(start_position, stop_position) for row_split in self.split_range_size(desired_bundle_size, self.get_sample_row_keys(), row_range): yield row_split
def test_row_range_constructor(): from google.cloud.bigtable.row_set import RowRange start_key = "row_key1" end_key = "row_key9" row_range = RowRange(start_key, end_key) assert start_key == row_range.start_key assert end_key == row_range.end_key assert row_range.start_inclusive assert not row_range.end_inclusive
def query(self, host, dc, region, t, limit=1, window=60): t0 = int(t) - window t1 = int(t) start_key = rowkey(host, dc, region, t0) end_key = rowkey(host, dc, region, t1) row_set = RowSet() row_set.add_row_range(RowRange(start_key, end_key)) return self.table.read_rows( limit=limit, filter_=row_filters.CellsColumnLimitFilter(1), row_set=row_set)
def test_build_updated_request_full_table(self): from google.cloud.bigtable_v2.types import RowRange last_scanned_key = b"row_key14" request = _ReadRowsRequestPB(table_name=self.table_name) request_manager = self._make_one(request, last_scanned_key, 2) result = request_manager.build_updated_request() expected_result = _ReadRowsRequestPB(table_name=self.table_name, filter={}) row_range1 = RowRange(start_key_open=last_scanned_key) expected_result.rows.row_ranges.append(row_range1) self.assertEqual(expected_result, result)
def test_build_updated_request_no_end_key(self): from google.cloud.bigtable.row_filters import RowSampleFilter from google.cloud.bigtable_v2.types import RowRange row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key25" request = _ReadRowsRequestPB(filter=row_filter.to_pb(), rows_limit=8, table_name=self.table_name) row_range1 = RowRange(start_key_closed=b"row_key20") request.rows.row_ranges.append(row_range1) request_manager = self._make_one(request, last_scanned_key, 2) result = request_manager.build_updated_request() expected_result = _ReadRowsRequestPB(table_name=self.table_name, filter=row_filter.to_pb(), rows_limit=6) row_range2 = RowRange(start_key_open=last_scanned_key) expected_result.rows.row_ranges.append(row_range2) self.assertEqual(expected_result, result)
def test__update_message_request(self): row_set = self._make_one() table_name = 'table_name' row_set.add_row_key("row_key1") row_range1 = RowRange(b"row_key21", b"row_key29") row_set.add_row_range(row_range1) request = _ReadRowsRequestPB(table_name=table_name) row_set._update_message_request(request) expected_request = _ReadRowsRequestPB(table_name=table_name) expected_request.rows.row_keys.append(_to_bytes("row_key1")) expected_request.rows.row_ranges.add(**row_range1.get_range_kwargs()) self.assertEqual(request, expected_request)
def add_row_range(self, row_range): overlaped = True def overlap(start1, end1, start2, end2): overlaps = start1 <= end2 and end1 >= start2 if not overlaps: return False, None, None return True, min(start1, start2), max(end1, end2) for (i, ranges) in enumerate(self.row_ranges): over = overlap(row_range.start_key, row_range.end_key, ranges.start_key, ranges.end_key) if over[0]: self.row_ranges[i] = RowRange(over[1], over[2]) overlaped = False break if overlaped: self.row_ranges.append(row_range)
def test_row_set__update_message_request(): from google.cloud._helpers import _to_bytes from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.row_set import RowSet row_set = RowSet() table_name = "table_name" row_set.add_row_key("row_key1") row_range1 = RowRange(b"row_key21", b"row_key29") row_set.add_row_range(row_range1) request = _ReadRowsRequestPB(table_name=table_name) row_set._update_message_request(request) expected_request = _ReadRowsRequestPB(table_name=table_name) expected_request.rows.row_keys.append(_to_bytes("row_key1")) expected_request.rows.row_ranges.append(row_range1.get_range_kwargs()) assert request == expected_request
def test_yield_rows_with_row_set(self): row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", b"row_key_7", b"row_key_8", b"row_key_9", ] rows = [] for row_key in row_keys: row = self._table.row(row_key) row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, CELL_VAL1) rows.append(row) self.rows_to_delete.append(row) self._table.mutate_rows(rows) row_set = RowSet() row_set.add_row_range( RowRange(start_key=b"row_key_3", end_key=b"row_key_7")) row_set.add_row_key(b"row_key_1") read_rows = self._table.yield_rows(row_set=row_set) expected_row_keys = [ b"row_key_1", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", ] found_row_keys = [row.row_key for row in read_rows] self.assertEqual(found_row_keys, expected_row_keys)
def _create_row_request( table_name, start_key=None, end_key=None, filter_=None, limit=None, end_inclusive=False, app_profile_id=None, row_set=None, ): """Creates a request to read rows in a table. :type table_name: str :param table_name: The name of the table to read from. :type start_key: bytes :param start_key: (Optional) The beginning of a range of row keys to read from. The range will include ``start_key``. If left empty, will be interpreted as the empty string. :type end_key: bytes :param end_key: (Optional) The end of a range of row keys to read from. The range will not include ``end_key``. If left empty, will be interpreted as an infinite string. :type filter_: :class:`.RowFilter` :param filter_: (Optional) The filter to apply to the contents of the specified row(s). If unset, reads the entire table. :type limit: int :param limit: (Optional) The read will terminate after committing to N rows' worth of results. The default (zero) is to return all results. :type end_inclusive: bool :param end_inclusive: (Optional) Whether the ``end_key`` should be considered inclusive. The default is False (exclusive). :type: app_profile_id: str :param app_profile_id: (Optional) The unique name of the AppProfile. :type row_set: :class:`row_set.RowSet` :param row_set: (Optional) The row set containing multiple row keys and row_ranges. :rtype: :class:`data_messages_v2_pb2.ReadRowsRequest` :returns: The ``ReadRowsRequest`` protobuf corresponding to the inputs. :raises: :class:`ValueError <exceptions.ValueError>` if both ``row_set`` and one of ``start_key`` or ``end_key`` are set """ request_kwargs = {"table_name": table_name} if (start_key is not None or end_key is not None) and row_set is not None: raise ValueError("Row range and row set cannot be " "set simultaneously") if filter_ is not None: request_kwargs["filter"] = filter_.to_pb() if limit is not None: request_kwargs["rows_limit"] = limit if app_profile_id is not None: request_kwargs["app_profile_id"] = app_profile_id message = data_messages_v2_pb2.ReadRowsRequest(**request_kwargs) if start_key is not None or end_key is not None: row_set = RowSet() row_set.add_row_range( RowRange(start_key, end_key, end_inclusive=end_inclusive)) if row_set is not None: row_set._update_message_request(message) return message
start_key = min(element.start_key, other.start_key) end_key = max(element.end_key, other.end_key) return RowRange(start_key, end_key) def is_overlaped(element, other): if (element.start_key < other.start_key and element.end_key > other.start_key) or \ (other.start_key < element.start_key and other.end_key > element.start_key): return [overlap_it(element, other)] else: return [element, other] def list_overlap(ranges): for element in ranges: print(element) lista = [ RowRange(b'beam_key0038',b'beam_key004'), RowRange(b'beam_key0010', b'beam_key0011'), RowRange(b'beam_key0037',b'beam_key0039'), RowRange(b'beam_key0020', b'beam_key0025'), RowRange(b'beam_key0035',b'beam_key0036'), RowRange(b'beam_key0023', b'beam_key0024'), ] check_list = is_overlaped(lista[4], lista[5]) print(check_list) check_list = is_overlaped(lista[0], lista[1]) print(check_list)
def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): row_keys = [ b"row_key_1", b"row_key_2", b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6", b"row_key_7", b"row_key_8", b"row_key_9", ] rows = [] for row_key in row_keys: row = Config.TABLE.row(row_key) row.set_cell(COLUMN_FAMILY_ID, COL_NAME1, CELL_VAL1) rows.append(row) Config.TABLE.mutate_rows(rows) # [START bigtable_add_row_key] from google.cloud.bigtable import Client from google.cloud.bigtable.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) row_set = RowSet() row_set.add_row_key(b"row_key_5") # [END bigtable_add_row_key] read_rows = table.read_rows(row_set=row_set) expected_row_keys = [b"row_key_5"] found_row_keys = [row.row_key for row in read_rows] assert found_row_keys == expected_row_keys # [START bigtable_add_row_range] from google.cloud.bigtable import Client from google.cloud.bigtable.row_set import RowSet from google.cloud.bigtable.row_set import RowRange client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) row_set = RowSet() row_set.add_row_range(RowRange(start_key=b"row_key_3", end_key=b"row_key_7")) # [END bigtable_add_row_range] read_rows = table.read_rows(row_set=row_set) expected_row_keys = [b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6"] found_row_keys = [row.row_key for row in read_rows] assert found_row_keys == expected_row_keys # [START bigtable_row_range_from_keys] from google.cloud.bigtable import Client from google.cloud.bigtable.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) row_set = RowSet() row_set.add_row_range_from_keys(start_key=b"row_key_3", end_key=b"row_key_7") # [END bigtable_row_range_from_keys] read_rows = table.read_rows(row_set=row_set) expected_row_keys = [b"row_key_3", b"row_key_4", b"row_key_5", b"row_key_6"] found_row_keys = [row.row_key for row in read_rows] assert found_row_keys == expected_row_keys table.truncate(timeout=200)
def overlap_it(element, other): start_key = min(element.start_key, other.start_key) end_key = max(element.end_key, other.end_key) return RowRange(start_key, end_key)