def __init__(self): try: # pass in environment variables self.project_id = os.environ["GCLOUD_PROJECT_NAME"] self.instance_id = os.environ["BIGTABLE_CLUSTER"] self.table_id = os.environ["TABLE_NAME"] self.cloud_region = os.environ["CLOUD_REGION"] self.iot_registry = os.environ["IOT_REGISTRY"] self.row_filter_count = int(os.environ["ROW_FILTER"]) self.key_ring_id = os.environ["KEY_RING_ID"] self.crypto_key_id = os.environ["CRYPTO_KEY_ID"] self.__service_account_ciphertext = base64.b64decode( os.environ["GOOGLE_APP_CREDENTIALS"]) # setup bigtable variables self.row_filter = row_filters.CellsColumnLimitFilter( (self.row_filter_count)) self.bigtable_client = bigtable.Client(project=self.project_id, admin=True) self.column = "device-temp".encode() self.column_family_id = "device-family" self.instance = self.bigtable_client.instance(self.instance_id) self.table = self.instance.table(self.table_id) # error handling messages self.index_error_message = "Refresh browser until live data starts flowing through and infrastructure is deployed! Exiting application now." self.type_error_message = ( "Ensure GOOGLE_APP_CREDENTIALS env var is base64 decoded ciphertext" ) except KeyError as e: print( f"Make sure this variable is defined in the application env vars: {str(e)}" ) sys.exit()
def Resolve(self, subject, attribute, token=None): """Retrieve the latest value set for a subject's attribute. Args: subject: The subject URN. attribute: The attribute. token: The security token used in this call. Returns: A (string, timestamp in microseconds) stored in the bigtable cell, or (None, 0). Raises: AccessError: if anything goes wrong. """ subject = utils.SmartStr(subject) attribute = utils.SmartStr(attribute) family, column = self.GetFamilyColumn(attribute) col_filter = row_filters.ColumnRangeFilter( family, start_column=column, end_column=column) # Most recent latest_filter = row_filters.CellsColumnLimitFilter(1) row_filter = row_filters.RowFilterChain(filters=[col_filter, latest_filter]) row_data = self.table.read_row(subject, filter_=row_filter) if row_data: for cell in row_data.cells[family][column]: return self.Decode( attribute, cell.value), self.DatetimeToMicroseconds(cell.timestamp) return None, 0
def _Acquire(self, lease_time): now = int(time.time() * 1e6) expires = int((time.time() + lease_time) * 1e6) # Only latest value latest_value = row_filters.CellsColumnLimitFilter(1) # Match any lease time value > now which means someone else holds a lock # We can't store these as ints, encode to str. current_lease = row_filters.ValueRangeFilter( start_value=utils.SmartStr(now), inclusive_start=False) # aff4:lease family, column = self.store.GetFamilyColumn(self.store.LEASE_ATTRIBUTE) col_filter = row_filters.ColumnRangeFilter( family, start_column=column, end_column=column) # Note filter chains are evaluated in order so there are performance # considerations with which filter to apply first filter_chain = row_filters.RowFilterChain( [col_filter, current_lease, latest_value]) mutate_row = self.store.table.row(self.subject, filter_=filter_chain) # state=False means no lease or it's expired, in this case take the lock. mutate_row.set_cell(family, column, utils.SmartStr(expires), state=False) # Check in review: I think we want to retry the RPC here? Or should we just # raise like we can't get the lock? existing_lock = self.store.CallWithRetry(mutate_row.commit, "write") if existing_lock: raise ExistingLock("Row %s locked." % self.subject) # We have the lock self.expires = expires self.locked = True
def __init__(self, device_data): # pass through environment vars self.project_id = os.environ["GCLOUD_PROJECT_NAME"] self.instance_id = os.environ["BIGTABLE_CLUSTER"] self.table_id = os.environ["TABLE_NAME"] self.row_filter_count = int(os.environ["ROW_FILTER"]) # setup table config self.client = bigtable.Client(project=self.project_id, admin=True) self.instance = self.client.instance(self.instance_id) self.column = "device-temp".encode() self.column_family_id = "device-family" self.row_filter = row_filters.CellsColumnLimitFilter( (self.row_filter_count)) # setup row value config self.device_data = literal_eval(device_data) # reverse the timestamp as that is the most common query # https://cloud.google.com/bigtable/docs/schema-design-time-series#reverse_timestamps_only_when_necessary self.row_key = "device#{}#{}".format( self.device_data["device"], (sys.maxsize - (self.device_data["timestamp"]))).encode() # convert to string as bigtable can't accept float types # https://streamsets.com/documentation/datacollector/latest/help/datacollector/UserGuide/Destinations/Bigtable.html self.value = str(self.device_data["temperature"])
def cbt_get_global_trajectory_buffer(cbt_table): row_filter = row_filters.CellsColumnLimitFilter(1) row = cbt_table.read_row('global_traj_buff'.encode()) if row is not None: return np.flip(np.frombuffer(row.cells['global']['traj_buff'.encode()][0].value, dtype=np.int32), axis=0) else: print("Table is empty.") exit()
def get_random_poem(self): random_n = random.randint(0, 499) column = 'poem'.encode() key = ('poem' + str(random_n)).encode() row_filter = row_filters.CellsColumnLimitFilter(1) row = self.table.read_row(key, row_filter) cell = row.cells[self.column_family_id][column][0] return cell.value.decode('utf-8')
def filter_limit_cells_per_col(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) rows = table.read_rows(filter_=row_filters.CellsColumnLimitFilter(2)) for row in rows: print_row(row)
def filter_composing_chain(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) rows = table.read_rows(filter_=row_filters.RowFilterChain( filters=[row_filters.CellsColumnLimitFilter(1), row_filters.FamilyNameRegexFilter("cell_plan")])) for row in rows: print_row(row)
def query(self, host, dc, region, t, limit=1, window=60): t0 = int(t) - window t1 = int(t) start_key = rowkey(host, dc, region, t0) end_key = rowkey(host, dc, region, t1) row_set = RowSet() row_set.add_row_range(RowRange(start_key, end_key)) return self.table.read_rows( limit=limit, filter_=row_filters.CellsColumnLimitFilter(1), row_set=row_set)
def _TimestampToFilter(self, timestamp): if timestamp == data_store.DataStore.ALL_TIMESTAMPS: return None if timestamp is None or timestamp == data_store.DataStore.NEWEST_TIMESTAMP: # Latest value only return row_filters.CellsColumnLimitFilter(1) if isinstance(timestamp, tuple): return row_filters.TimestampRangeFilter( self._TimestampRangeFromTuple(timestamp)) raise data_store.Error("Invalid timestamp specification: %s." % timestamp)
def get(self): bt_array = [] try: table = instance.table(bt_table_name) row_set = RowSet() for row_key in row_keys: row_set.add_row_key(row_key) colFilters = [] for name, bt_name in bt_mapping_dict.items(): colFilters.append( row_filters.ColumnQualifierRegexFilter(bt_name)) print("before read_rows...") rows = table.read_rows( row_set=row_set, filter_=row_filters.RowFilterChain(filters=[ row_filters.CellsColumnLimitFilter(1), row_filters.RowFilterUnion(filters=colFilters) ]), retry=bigtable.table.DEFAULT_RETRY_READ_ROWS.with_deadline( 60.0)) print("after read_rows...") for row in rows: print("Reading data for {}:".format( row.row_key.decode('utf-8'))) for cf, cols in sorted(row.cells.items()): bt_dict = {} bt_dict['id'] = row.row_key.decode('utf-8') key = None # using BT mapping to return data for col, cells in sorted(cols.items()): for cell in cells: for name, bt_name in bt_mapping_dict.items(): if col.decode('utf-8') == bt_name: key = name break if key is not None: bt_dict[key] = cell.value.decode('utf-8') bt_array.append(bt_dict) except BaseException as error: logging.error( 'An exception occurred - DemoBigTableGet::get(): {}'.format( error)) print(bt_array) return json.dumps(bt_array), 200, {'ContentType': 'application/json'}
def get_client_profile(self, client_id): """This fetches a single client record out of GCP BigTable """ try: row_key = client_id.encode() table = self._instance.table(self._table_id) row_filter = row_filters.CellsColumnLimitFilter(1) row = table.read_row(row_key, row_filter) cell = row.cells[self._column_family_id][self._column_name][0] jdata = json.loads(zlib.decompress(cell.value).decode("utf-8")) return jdata except Exception: logger = self._ctx[IMozLogging].get_logger("taar") logger.warning(f"Error loading client profile for {client_id}") return None
def read_from_bt(request): bt_client = bigtable.Client(project=BT_PROJECT, admin=True) instance = bt_client.instance(BT_INSTANCE) table = instance.table(BT_TABLE) max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {COLUMN_FAMILY_ID: max_versions_rule} if not table.exists(): table.create(column_families=column_families) bt_row_filter = row_filters.CellsColumnLimitFilter(1) bt_row_key = request['receipt_id'] bt_row = table.read_row(bt_row_key.encode('utf-8'), bt_row_filter) return bt_row
def cbt_global_iterator(cbt_table): row_filter = row_filters.CellsColumnLimitFilter(1) gi_row = cbt_table.read_row('global_iterator'.encode()) if gi_row is not None: global_i = gi_row.cells['global']['i'.encode()][0].value global_i = struct.unpack('i', global_i)[0] else: gi_row = cbt_table.row('global_iterator'.encode()) gi_row.set_cell(column_family_id='global', column='i'.encode(), value=struct.pack('i', 0), timestamp=datetime.datetime.utcnow()) cbt_table.mutate_rows([gi_row]) global_i = 0 return global_i
def main(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) print('Writing some greetings to the table.') greetings = ['Hello World!', 'Hello Cloud Bigtable!', 'Hello Python!'] rows = [] column = 'greeting'.encode() for i, value in enumerate(greetings): row_key = 'greeting{}'.format(i).encode() row = table.row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) row_filter = row_filters.CellsColumnLimitFilter(1) print('Getting a single greeting by row key.') key = 'greeting0'.encode() row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) print('Scanning for all greetings:') partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) print('Deleting the {} table.'.format(table_id)) table.delete()
def cbt_global_trajectory_buffer(cbt_table, local_traj_buff, global_traj_buff_size): row_filter = row_filters.CellsColumnLimitFilter(1) old_row = cbt_table.read_row('global_traj_buff'.encode()) if old_row is not None: global_traj_buff = np.frombuffer(old_row.cells['global']['traj_buff'.encode()][0].value, dtype=np.int32) global_traj_buff = np.append(global_traj_buff, local_traj_buff) update_size = local_traj_buff.shape[0] - (global_traj_buff_size - global_traj_buff.shape[0]) if update_size > 0: global_traj_buff = global_traj_buff[update_size:] else: global_traj_buff = local_traj_buff new_row = cbt_table.row('global_traj_buff'.encode()) new_row.set_cell(column_family_id='global', column='traj_buff'.encode(), value=global_traj_buff.tobytes(), timestamp=datetime.datetime.utcnow()) cbt_table.mutate_rows([new_row])
def get_data(sensortype): client = bigtable.Client(project='sensorray', admin=True) instance = client.instance('instance') table = instance.table('table') row_filter = row_filters.CellsColumnLimitFilter(1) print("Getting 250 most recent records for " + sensortype) slist = [] for i in range(0, 10): for j in range(0, 25): pod = str(i).zfill(2) + str(j).zfill(2) key = pod.encode() row = table.read_row(key, row_filter) cell = row.cells['sensor'][sensortype.encode()][0] slist.append(int.from_bytes(cell.value, 'big')) slist = np.array(slist).reshape(10, 25) return slist
def setUp(self): self.project_id = 'aerospike2bt' self.instance_id = 'bookshelf' self.table_id = 'books' self.client = bigtable.Client(project=self.project_id, admin=True) self.instance = self.client.instance(self.instance_id) self.table = self.instance.table(self.table_id) self.cf = 'info' self.title = 'title'.encode() self.author = 'author'.encode() self.published_date = 'published_date'.encode() self.image_url = 'image_url'.encode() self.description = 'description'.encode() self.created_by = 'created_by'.encode() self.created_by_id = 'created_by_id'.encode() self.row_filter = row_filters.CellsColumnLimitFilter(1) self.test_data = { 'id': '123456789', 'title': 'test_title', 'author': 'test_author', 'publishedDate': 'test_published_date', 'imageUrl': 'test_image_url', 'description': 'test_description', 'createdBy': 'test_created_by', 'createdById': 'test_created_by_id' } self.test_data_updated = { 'id': '123456789', 'title': 'updated_title', 'author': 'test_author', 'publishedDate': 'test_published_date', 'imageUrl': 'test_image_url', 'description': 'test_description', 'createdBy': 'test_created_by', 'createdById': 'test_created_by_id' } self.row_key = (self.test_data['createdById'] + '_' + self.test_data['id']).encode()
def ScanAttributes(self, subject_prefix, attributes, after_urn=None, max_records=None, token=None, relaxed_order=False): subject_prefix = self._CleanSubjectPrefix(subject_prefix) after_urn = self._CleanAfterURN(after_urn, subject_prefix) # Turn subject prefix into an actual regex subject_prefix += ".*" self.security_manager.CheckDataStoreAccess(token, [subject_prefix], "rq") subject_filter = row_filters.RowKeyRegexFilter( utils.SmartStr(subject_prefix)) latest_value = row_filters.CellsColumnLimitFilter(1) attribute_filters = self._GetAttributeFilterUnion(attributes) # Subject AND (attr1 OR attr2) AND latest_value query_filter = row_filters.RowFilterChain( [subject_filter, attribute_filters, latest_value]) # The API results include the start row, we want to exclude it, append a # null to do so. if after_urn is not None: after_urn += "\x00" rows_data = self.CallWithRetry( self.table.read_rows, "read", start_key=after_urn, limit=max_records, filter_=query_filter) # Ideally we should be able to stream and yield, but it seems we can't: # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1812 self.CallWithRetry(rows_data.consume_all, "read") results = [] if rows_data.rows: for subject, row_data in rows_data.rows.iteritems(): subject_results = self._ReOrderRowResults(row_data) results.append((subject, subject_results)) return sorted(results, key=lambda x: x[0])
def cbt_global_iterator(cbt_table): """ Fetches and sets global iterator from bigtable. cbt_table -- bigtable object (default none) """ row_filter = row_filters.CellsColumnLimitFilter(1) gi_row = cbt_table.read_row('collection_global_iterator'.encode()) if gi_row is not None: global_i = gi_row.cells['global']['i'.encode()][0].value global_i = struct.unpack('i', global_i)[0] + 1 else: global_i = 0 gi_row = cbt_table.row('collection_global_iterator'.encode()) gi_row.set_cell(column_family_id='global', column='i'.encode(), value=struct.pack('i',global_i), timestamp=datetime.datetime.utcnow()) cbt_table.mutate_rows([gi_row]) return global_i
scoped_credentials = credentials.with_scopes( ['https://www.googleapis.com/auth/cloud-platform']) # In[87]: project_id = '' instance_id = 'accidentdb' table_id = 'accident' # In[88]: client = bigtable.Client(project=project_id, admin=True, credentials=credentials) instance = client.instance(instance_id) row_filter = row_filters.CellsColumnLimitFilter(1) table = instance.table(table_id) column_family_id = 'location' partial_rows = table.read_rows(filter_=row_filter) lc = [] # In[89]: print('Scanning for all rows:') for row in partial_rows: loc = row.cells[column_family_id] lng = loc[b'Longitude'][0] lt = loc[b'Latitude'][0] valy = struct.unpack('>d', lng.value)[0] valx = struct.unpack('>d', lt.value)[0] lc.append([valx, valy])
def main(project_id, instance_id, table_id): # [START bigtable_hw_connect] # The client must be created with admin=True because it will create a # table. client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) # [END bigtable_hw_connect] # [START bigtable_hw_create_table] print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) # [END bigtable_hw_create_table] # [START bigtable_hw_write_rows] print('Writing some greetings to the table.') greetings = ['Hello World!', 'Hello Cloud Bigtable!', 'Hello Python!'] rows = [] column = 'greeting'.encode() for i, value in enumerate(greetings): # Note: This example uses sequential numeric IDs for simplicity, # but this can result in poor performance in a production # application. Since rows are stored in sorted order by key, # sequential keys can result in poor distribution of operations # across nodes. # # For more information about how to design a Bigtable schema for # the best performance, see the documentation: # # https://cloud.google.com/bigtable/docs/schema-design row_key = 'greeting{}'.format(i).encode() row = table.direct_row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) # [END bigtable_hw_write_rows] # [START bigtable_hw_create_filter] # Create a filter to only retrieve the most recent version of the cell # for each column accross entire row. row_filter = row_filters.CellsColumnLimitFilter(1) # [END bigtable_hw_create_filter] # [START bigtable_hw_get_with_filter] print('Getting a single greeting by row key.') key = 'greeting0'.encode() row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) # [END bigtable_hw_get_with_filter] # [START bigtable_hw_scan_with_filter] print('Scanning for all greetings:') partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) # [END bigtable_hw_scan_with_filter] # [START bigtable_hw_delete_table] print('Deleting the {} table.'.format(table_id)) table.delete()
def bigtable_func(project_id, instance_id, table_id): from google.cloud import bigtable from google.cloud.bigtable import column_family from google.cloud.bigtable import row_filters from google.auth.credentials import AnonymousCredentials os.environ["BIGTABLE_EMULATOR_HOST"] = "localhost:8086" # [START bigtable_hw_connect] # The client must be created with admin=True because it will create a # table. client = bigtable.Client(project=project_id, admin=True, credentials=AnonymousCredentials()) instance = client.instance(instance_id) # [END bigtable_hw_connect] # [START bigtable_hw_create_table] print(f"Creating the {table_id} table.") table = instance.table(table_id) print("Creating column family cf1 with Max Version GC rule...") # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = "cf1" column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print(f"Table {table_id} already exists.") # [END bigtable_hw_create_table] # [START bigtable_hw_write_rows] print("Writing some greetings to the table.") greetings = ["Hello World!", "Hello Cloud Bigtable!", "Hello Python!"] rows = [] column = b"greeting" for i, value in enumerate(greetings): # Note: This example uses sequential numeric IDs for simplicity, # but this can result in poor performance in a production # application. Since rows are stored in sorted order by key, # sequential keys can result in poor distribution of operations # across nodes. # # For more information about how to design a Bigtable schema for # the best performance, see the documentation: # # https://cloud.google.com/bigtable/docs/schema-design row_key = f"greeting{i}".encode() row = table.direct_row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) # [END bigtable_hw_write_rows] # [START bigtable_hw_create_filter] # Create a filter to only retrieve the most recent version of the cell # for each column accross entire row. row_filter = row_filters.CellsColumnLimitFilter(1) # [END bigtable_hw_create_filter] # [START bigtable_hw_get_with_filter] print("Getting a single greeting by row key.") key = b"greeting0" row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode("utf-8")) # [END bigtable_hw_get_with_filter] # [START bigtable_hw_scan_with_filter] print("Scanning for all greetings:") partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode("utf-8")) # [END bigtable_hw_scan_with_filter] # [START bigtable_hw_delete_table] print(f"Deleting the {table_id} table.") table.delete()