def __init__(self, client=None, instance_id='pychunkedgraph', project_id="neuromancer-seung-import"): if client is not None: self._client = client else: self._client = bigtable.Client(project=project_id, admin=True) self._instance = self.client.instance(instance_id) self._loaded_tables = {}
def delete(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) # [START creating_a_table if there is not] print('Checking the {} table exists.'.format(table_id)) tables = instance.list_tables() table = instance.table(table_id) if table in tables: print('Table found. Deleting the {} table.'.format(table_id)) table.delete() else: print('Table {} not found'.format(table_id))
def dispose_of(): instances = [] def disposal(instance): instances.append(instance) yield disposal client = bigtable.Client(project=PROJECT, admin=True) for instance_id in instances: instance = client.instance(instance_id) if instance.exists(): instance.delete()
def main(project_id, instance_id): # [START connecting_to_bigtable] client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) connection = happybase.Connection(instance=instance) # [END connecting_to_bigtable] table_list = connection.tables() print('Listing tables in Bigtable clister: ', instance_id) for table in table_list: print(table) connection.close()
def main(argv): project_name = argv[0] bigtable_instance = argv[1] bigtable_table = argv[2] bigtable_family = argv[3] global client global instance global table global family client = bigtable.Client(project=project_name, admin=True) instance = client.instance(bigtable_instance) table = instance.table(bigtable_table) family = bigtable_family
def main(): """Main func, does what the module docstring says.""" client = bigtable.Client(project='example-big-table-209918', admin=True) instance = client.instance('main-bigtable') table_id = 'test_table' print("Creating the {} table.".format(table_id)) table = instance.table(table_id) table.create() column_family_id = 'cf1' cf1 = table.column_family(column_family_id) cf1.create() print("Writing some greetings to the table.") column_id = 'greeting'.encode('utf-8') greetings = [ 'Hello, World!', 'Hello, Cloud Bigtable', 'Hello, Python' ] for i, value in enumerate(greetings): # i will be the index of the current value row_key = 'greeting{}'.format(i) row = table.row(row_key) row.set_cell( column_family_id, column_id, value.encode('utf-8') ) row.commit() print('Getting a single greeting by row key.') key = 'greeting0' row = table.read_row(key.encode('utf-8')) value = row.cells[column_family_id][column_id][0].value print('\t{}: {}'.format(key, value.decode('utf-8'))) print('Scanning for all greetings:') partial_rows = table.read_rows() partial_rows.consume_all() for row_key, row in partial_rows.rows.items(): key = row_key.decode('utf-8') cell = row.cells[column_family_id][column_id][0] value = cell.value.decode('utf-8') print('\t{}: {}'.format(key, value)) print('Deleting the {} table.'.format(table_id)) table.delete()
def main(project_id, instance_id, table_name): # [START connecting_to_bigtable] client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) connection = happybase.Connection(instance=instance) # [END connecting_to_bigtable] try: # [START creating_a_table] print('Creating the {} table.'.format(table_name)) column_family_name = 'cf1' connection.create_table( table_name, { column_family_name: dict() # Use default options. }) # [END creating_a_table] # [START writing_rows] print('Writing some words to table: ', table_name) table = connection.table(table_name) column_name = '{fam}:words'.format(fam=column_family_name) print('column_name ', column_name) # Do your staff here to create words.... # # For the test we simply hardcode a list of words words = [ 'IamWods1', 'SheIsWord2', 'HeIsWord3', ] for i, value in enumerate(words): # Note: for large lists of words, this method might have poor performance # Since rows are stored in sorted order by key, # sequential keys can result in poor distribution of operations # across nodes. # # For more information about how to design a Bigtable schema for # the best performance, see the documentation: # # https://cloud.google.com/bigtable/docs/schema-design print('Writing word to the table.',i,value) row_key = 'words{}'.format(i) table.put(row_key, {column_name: value}) # [END writing_rows] finally: connection.close()
def table_id(): client = bigtable.Client(project=PROJECT, admin=True) instance = client.instance(BIGTABLE_INSTANCE) table_id = TABLE_ID_PREFIX.format(str(uuid.uuid4())[:16]) table = instance.table(table_id) if table.exists(): table.delete() table.create(column_families={'stats_summary': None, 'cell_plan': None}) timestamp = datetime.datetime(2019, 5, 1) timestamp_minus_hr = datetime.datetime(2019, 5, 1) - datetime.timedelta(hours=1) rows = [ table.direct_row("phone#4c410523#20190501"), table.direct_row("phone#4c410523#20190502"), table.direct_row("phone#4c410523#20190505"), table.direct_row("phone#5c10102#20190501"), table.direct_row("phone#5c10102#20190502"), ] rows[0].set_cell("stats_summary", "connected_cell", 1, timestamp) rows[0].set_cell("stats_summary", "connected_wifi", 1, timestamp) rows[0].set_cell("stats_summary", "os_build", "PQ2A.190405.003", timestamp) rows[0].set_cell("cell_plan", "data_plan_01gb", "true", timestamp_minus_hr) rows[0].set_cell("cell_plan", "data_plan_01gb", "false", timestamp) rows[0].set_cell("cell_plan", "data_plan_05gb", "true", timestamp) rows[1].set_cell("stats_summary", "connected_cell", 1, timestamp) rows[1].set_cell("stats_summary", "connected_wifi", 1, timestamp) rows[1].set_cell("stats_summary", "os_build", "PQ2A.190405.004", timestamp) rows[1].set_cell("cell_plan", "data_plan_05gb", "true", timestamp) rows[2].set_cell("stats_summary", "connected_cell", 0, timestamp) rows[2].set_cell("stats_summary", "connected_wifi", 1, timestamp) rows[2].set_cell("stats_summary", "os_build", "PQ2A.190406.000", timestamp) rows[2].set_cell("cell_plan", "data_plan_05gb", "true", timestamp) rows[3].set_cell("stats_summary", "connected_cell", 1, timestamp) rows[3].set_cell("stats_summary", "connected_wifi", 1, timestamp) rows[3].set_cell("stats_summary", "os_build", "PQ2A.190401.002", timestamp) rows[3].set_cell("cell_plan", "data_plan_10gb", "true", timestamp) rows[4].set_cell("stats_summary", "connected_cell", 1, timestamp) rows[4].set_cell("stats_summary", "connected_wifi", 0, timestamp) rows[4].set_cell("stats_summary", "os_build", "PQ2A.190406.000", timestamp) rows[4].set_cell("cell_plan", "data_plan_10gb", "true", timestamp) table.mutate_rows(rows) yield table_id table.delete()
def _get_table(self, admin: bool = False) -> Table: if admin is True: return (bigtable.Client(project=self.project, admin=True, **self.client_options).instance( self.instance).table(self.table_name)) try: # Fast check for an existing table return self.__table except AttributeError: # If missing, we acquire our lock to initialize a new one with self.__table_lock: # It's possible that the lock was blocked waiting on someone # else who already initialized, so we first check again to make # sure this isn't the case. try: table = self.__table except AttributeError: table = self.__table = (bigtable.Client( project=self.project, **self.client_options).instance( self.instance).table(self.table_name)) return table
def read_prefix(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) prefix = "phone#" end_key = prefix[:-1] + chr(ord(prefix[-1]) + 1) row_set = RowSet() row_set.add_row_range_from_keys(prefix.encode("utf-8"), end_key.encode("utf-8")) rows = table.read_rows(row_set=row_set) for row in rows: print_row(row)
def main(project_id, instance_id, table_id): # Establish a connection to the big table. # specify admin as True for any admin related operations including 'create'. client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) print('Creating the {} table.'.format(table_id)) # New table table = instance.table(table_id) table.create() # declaring column id # Since Bigtable tables are sparse, its okay to create empty columns column_family_id = 'c1' c1 = table.column_family(column_family_id) c1.create() print('Writing data to the table.') column_id = 'data'.encode('utf-8') data = [ 'Data entry 1', 'Data entry random', 'Data entry 2', 'Data entry is not ordered :)', ] # To read the data we use a simple data_number as an ID. Not an optimal way of ordering the keys # we can even store timestamps as row key for better. for i, value in enumerate(data): row_key = 'data{}'.format(i) row = table.row(row_key) row.set_cell(column_family_id, column_id, value.encode('utf-8')) row.commit() # Retrieving single data print('Getting a data entry by row key.') key = 'data0' row = table.read_row(key.encode('utf-8')) value = row.cells[column_family_id][column_id][0].value print('\t{}: {}'.format(key, value.decode('utf-8'))) # Retrieving entire data print('Scanning for all data entries:') scan_rows = table.read_rows() scan_rows.consume_all() for row_key, row in scan_rows.rows.items(): key = row_key.decode('utf-8') cell = row.cells[column_family_id][column_id][0] value = cell.value.decode('utf-8') print('\t{}: {}'.format(key, value))
def filter_composing_condition(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) rows = table.read_rows(filter_=row_filters.ConditionalRowFilter( base_filter=row_filters.RowFilterChain(filters=[ row_filters.ColumnQualifierRegexFilter("data_plan_10gb"), row_filters.ValueRegexFilter("true") ]), true_filter=row_filters.ApplyLabelFilter(label="passed-filter"), false_filter=row_filters.ApplyLabelFilter(label="filtered-out"))) for row in rows: print_row(row)
def read_row_ranges(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) row_set = RowSet() row_set.add_row_range_from_keys(start_key=b"phone#4c410523#20190501", end_key=b"phone#4c410523#201906201") row_set.add_row_range_from_keys(start_key=b"phone#5c10102#20190501", end_key=b"phone#5c10102#201906201") rows = table.read_rows(row_set=row_set) for row in rows: print_row(row)
def __init__(self, project_name, instance_name, table_name): """Constructor. Args: project_name: string name of GCP project having table. instance_name: string name of CBT instance in project. table_name: string name of CBT table in instance. """ self.btspec = BigtableSpec(project_name, instance_name, table_name) self.bt_table = bigtable.Client( self.btspec.project, admin=True).instance(self.btspec.instance).table(self.btspec.table) self.tf_table = tf.contrib.cloud.BigtableClient( self.btspec.project, self.btspec.instance).table(self.btspec.table)
def preclean(): """In case any test instances weren't cleared out in a previous run. Deletes any test instances that were created over an hour ago. Newer instances may be being used by a concurrent test run. """ client = bigtable.Client(project=PROJECT, admin=True) for instance in client.list_instances()[0]: if instance.instance_id.startswith("instanceadmin-"): timestamp = instance.instance_id.split("-")[-1] timestamp = int(timestamp) if time.time() - timestamp > 3600: warnings.warn( f"Deleting leftover test instance: {instance.instance_id}") instance.delete()
def write_increment(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) column_family_id = "stats_summary" row_key = "phone#4c410523#20190501" row = table.row(row_key, append=True) # Decrement the connected_wifi value by 1. row.increment_cell_value(column_family_id, "connected_wifi", -1) row.commit() print('Successfully updated row {}.'.format(row_key))
def __init__(self, project_name, instance_name, table_name): """Constructor. Args: project_name: string name of GCP project having table. instance_name: string name of CBT instance in project. table_name: string name of CBT table in instance. """ self.project_name = project_name, self.instance_name = instance_name self.table_name = table_name self.bt_table = bigtable.Client(project_name).instance( instance_name).table(table_name) self.tf_table = tf.contrib.cloud.BigtableClient( project_name, instance_name).table(table_name)
def create_bigtable_table(region="us-west1-a", instance_id="test-instance", table_name="test-table", column_family="cf1"): """ Cloud Bigtable table (and instance) creation. Check the full docstring for details! Setting display_name to the same as the instance_id by default. Assuming non-existent instance! """ client = bigtable.Client(admin=True) instance = client.instance(instance_id, region, display_name=instance_id) instance.create() table = instance.table(table_name) table.create() column_family = table.column_family(column_family) column_family.create()
def createTable(project_id, instance_id, table_id, column_family_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) return table
def read_from_bt(request): bt_client = bigtable.Client(project=BT_PROJECT, admin=True) instance = bt_client.instance(BT_INSTANCE) table = instance.table(BT_TABLE) max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {COLUMN_FAMILY_ID: max_versions_rule} if not table.exists(): table.create(column_families=column_families) bt_row_filter = row_filters.CellsColumnLimitFilter(1) bt_row_key = request['receipt_id'] bt_row = table.read_row(bt_row_key.encode('utf-8'), bt_row_filter) return bt_row
def main(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) print('Writing some greetings to the table.') greetings = ['Hello World!', 'Hello Cloud Bigtable!', 'Hello Python!'] rows = [] column = 'greeting'.encode() for i, value in enumerate(greetings): row_key = 'greeting{}'.format(i).encode() row = table.row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) row_filter = row_filters.CellsColumnLimitFilter(1) print('Getting a single greeting by row key.') key = 'greeting0'.encode() row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) print('Scanning for all greetings:') partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) print('Deleting the {} table.'.format(table_id)) table.delete()
def table(): table_id = TABLE_ID_FORMAT.format(random.randrange(TABLE_ID_RANGE)) client = bigtable.Client(project=PROJECT, admin=True) instance = client.instance(BIGTABLE_INSTANCE) table = instance.table(table_id) column_family_id = 'cf1' column_families = {column_family_id: None} table.create(column_families=column_families) row = table.direct_row("r1") row.set_cell(column_family_id, "c1", "test-value") row.commit() yield table_id table.delete()
def main(project_id, instance_id, table_name): # [START connecting_to_bigtable] client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) connection = happybase.Connection(instance=instance) # [END connecting_to_bigtable] print('Listing tables in Bigtable cluster: ', instance_id, ' before deleting') list_tables(connection) #[START deleting_a_table] print('Deleting the {} table.'.format(table_name)) connection.delete_table(table_name) # [END deleting_a_table] print('Listing tables in Bigtable cluster: ', instance_id, ' after deleting') list_tables(connection) connection.close()
def main(project_id, instance_id, table_id): # Connection to Bigtable client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) # Create table & column family print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) users_column_family_id = 'users' uf = table.column_family(users_column_family_id) message_column_family_id = 'post' mf = table.column_family(message_column_family_id) table.create(column_families=(mf, uf)) # Populate table for tagged, tagger, msg in generate_messages(): row_key = hashlib.md5(tagged).hexdigest()[:8] + \ hashlib.md5(tagger).hexdigest()[:8] + \ str(int(time.time() * 100)) row = table.row(row_key) print(row_key, tagged, tagger, msg) row.set_cell(users_column_family_id, 'tagged', tagged) row.set_cell(users_column_family_id, 'tagger', tagger) row.set_cell(message_column_family_id, 'msg', msg) row.commit() # Search row = table.read_row(row_key) print('{}'.format(deep_to_dict(row))) print('Scanning for Superman:') rows = table.read_rows('84d95', '84d97') rows.consume_all() for row_key, row in rows.rows.items(): users_data = row.cells[users_column_family_id] print('\t{} -> {}'.format(users_data['tagger'][0].value, users_data['tagged'][0].value)) # print('\t {}'.format(deep_to_dict(row))) print("Tot.: {}".format(len(rows.rows.keys()))) # clean print('Deleting the {} table.'.format(table_id)) table.delete()
def initialize(self): """ Create the table for frame metadata """ # We need an admin client to create a table client = bigtable.Client(project=self.project_id, admin=True) instance = client.instance(self.instance_id) # Create table print('Creating the {} table.'.format(table_id)) table = instance.table(self.table_id) table.create() # Create column family self.column_family_id = 'cf1' cf1 = table.column_family(self.column_family_id) cf1.create()
def __init__(self, ctx, project_id, instance_id, table_id): self._ctx = ctx self._project_id = project_id self._instance_id = instance_id self._table_id = table_id self._column_family_id = "profile" self._column_name = "payload".encode() # Define the GC policy to retain only the most recent version max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=90)) max_versions_rule = column_family.MaxVersionsGCRule(1) self._gc_rule = column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule] ) self._client = bigtable.Client(project=project_id, admin=False) self._instance = self._client.instance(self._instance_id)
def start_bundle(self): from google.cloud import bigtable from google.cloud.bigtable import column_family try: self.client = bigtable.Client(project=self.project_id, admin=True) self.instance = self.client.instance(self.instance_id) self.table = self.instance.table(self.table_id) max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not self.table.exists(): self.table.create(column_families=column_families) else: logging.info("Table {} already exists.".format(self.table_id)) except: logging.error("Failed to start bundle") raise
def _delete_rows(args): """Delete the given row keys from the given Bigtable. The args are (BigtableSpec, row_keys), but are passed as a single argument in order to work with multiprocessing.Pool.map. This is also the reason why this is a top-level function instead of a method. """ btspec, row_keys = args bt_table = bigtable.Client(btspec.project).instance(btspec.instance).table( btspec.table) rows = [bt_table.row(k) for k in row_keys] for r in rows: r.delete() bt_table.mutate_rows(rows) return row_keys
def write(project_id, instance_id, table_id, schema_file, data): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) schema = json.loads(schema_file.read()) body = json.loads(data.read()) for row_body in body['rows']: row_key = row_body['rowkey'] row = table.direct_row(row_key) for col in row_body['columns']: family_id, column_id = col['key'].split(':') if 'timestamp' in col: timestamp = datetime.datetime.utcfromtimestamp( col['timestamp']) else: timestamp = datetime.datetime.utcnow() schema_family = next( (x for x in schema['column_families'] if x['name'] == family_id), None) schema_column = next( (x for x in schema_family['columns'] if x['key'] == column_id), None) schema_type = schema_column['type'] value = col['value'] if schema_type == 'long': value = struct.pack(">q", value) elif schema_type == 'double': value = struct.pack(">d", value) elif schema_type == 'list_double': value = struct.pack(f'>{len(value)}d', *value) else: value = value.encode('utf-8') row.set_cell(family_id, column_id, value, timestamp=timestamp) row.commit() print('Successfully wrote row {}.'.format(row_key))
def get_data(sensortype): client = bigtable.Client(project='sensorray', admin=True) instance = client.instance('instance') table = instance.table('table') row_filter = row_filters.CellsColumnLimitFilter(1) print("Getting 250 most recent records for " + sensortype) slist = [] for i in range(0, 10): for j in range(0, 25): pod = str(i).zfill(2) + str(j).zfill(2) key = pod.encode() row = table.read_row(key, row_filter) cell = row.cells['sensor'][sensortype.encode()][0] slist.append(int.from_bytes(cell.value, 'big')) slist = np.array(slist).reshape(10, 25) return slist