def test_bigtable_create_family_gc_nested(): # [START bigtable_create_family_gc_nested] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) # Create a column family with nested GC policies. # Create a nested GC rule: # Drop cells that are either older than the 10 recent versions # OR # Drop cells that are older than a month AND older than the # 2 recent versions rule1 = column_family.MaxVersionsGCRule(10) rule2 = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2), ]) nested_rule = column_family.GCRuleUnion([rule1, rule2]) column_family_obj = table.column_family("cf5", nested_rule) column_family_obj.create() # [END bigtable_create_family_gc_nested] rule = str(column_family_obj.to_pb()) assert "intersection" in rule assert "max_num_versions: 2" in rule assert "max_age" in rule assert "seconds: 432000" in rule column_family_obj.delete()
def create_table_in_bigtable(self): from google.cloud import bigtable from google.cloud.bigtable import column_family from google.cloud.bigtable import row_filters from datetime import timedelta print("Checking if we need to create the {} table.".format( self.BIGQUERY_TABLE_ID)) client = bigtable.Client(project=self.GCP_PROJECT, admin=True) instance = client.instance(self.BIGTABLE_INSTANCE_ID) table = instance.table(self.BIGTABLE_TABLE_ID) print("Creating column family `profile`") # Define the GC policy to retain only the most recent version max_age_rule = column_family.MaxAgeGCRule(timedelta(days=90)) max_versions_rule = column_family.MaxVersionsGCRule(1) gc_rule = column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule]) # Note that this ties out to the configuration in # taar.profile_fetcher::BigTableProfileController column_family_id = "profile" column_families = {column_family_id: gc_rule} if not table.exists(): table.create(column_families=column_families) print(f"Created {column_family_id}")
def create_column_family(self, column_family_name, table_name, max_age=None, nr_max_versions=None, gc_rule_union=None): """Create a column family and add it to a table. Garbage collection rules can be included to the column family. Args: column_family_name (str): table_name (str): max_age (int): the time to live in days nr_max_versions (int): the number of versions that should be kept gc_rule_union (bool or None): if both max_age and nr_max_versions are specified, then this parameter should be a bool. If True, then the max age and the max versions rules are unified, if False, then the intersection of the rules is used. Returns: google.cloud.bigtable.column_family.ColumnFamily """ if max_age and nr_max_versions: # Both rules are specified, this also means a merge method must be specified (union or intersection) time_to_live = dt.timedelta(days=max_age) max_age_rule = bt_column_family.MaxAgeGCRule(time_to_live) max_versions_rule = bt_column_family.MaxVersionsGCRule( nr_max_versions) if gc_rule_union is None: raise Conflict( "If max_age and nr_max_versions are both specified, then gc_rule_union cannot be None." ) elif gc_rule_union: gc_rule = bt_column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule]) else: gc_rule = bt_column_family.GCRuleIntersection( rules=[max_age_rule, max_versions_rule]) elif max_age: # only max age is specified time_to_live = dt.timedelta(days=max_age) gc_rule = bt_column_family.MaxAgeGCRule(time_to_live) elif nr_max_versions: # only max number of versions is specified gc_rule = bt_column_family.MaxVersionsGCRule(nr_max_versions) else: # no rule is specified gc_rule = None table = self.instance.table(table_name) if not table.exists(): raise NotFound( "Table name '{}' does not exist.".format(table_name)) logging.info("Creating column family '%s' in table '%s'.", column_family_name, table_name) column_family = bt_column_family.ColumnFamily(column_family_name, table, gc_rule) column_family.create()
def __init__(self, ctx, project_id, instance_id, table_id): self._ctx = ctx self._project_id = project_id self._instance_id = instance_id self._table_id = table_id self._column_family_id = "profile" self._column_name = "payload".encode() # Define the GC policy to retain only the most recent version max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=90)) max_versions_rule = column_family.MaxVersionsGCRule(1) self._gc_rule = column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule] ) self._client = bigtable.Client(project=project_id, admin=False) self._instance = self._client.instance(self._instance_id)
def test_bigtable_create_family_gc_union(): # [START bigtable_create_family_gc_union] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) max_versions_rule = column_family.MaxVersionsGCRule(2) max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) union_rule = column_family.GCRuleUnion([max_versions_rule, max_age_rule]) column_family_obj = table.column_family("cf3", union_rule) column_family_obj.create() # [END bigtable_create_family_gc_union] rule = str(column_family_obj.to_pb()) assert "union" in rule assert "max_age" in rule assert "seconds: 432000" in rule assert "max_num_versions: 2" in rule column_family_obj.delete()
def run_table_operations(project_id, instance_id, table_id): ''' Create a Bigtable table and perform basic table operations :type project_id: str :param project_id: Project id of the client. :type instance_id: str :param instance_id: Instance of the client. :type table_id: str :param table_id: Table id to create table. ''' client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) # Check whether table exists in an instance. # Create table if it does not exists. print 'Checking if table {} exists...'.format(table_id) if table.exists(): print 'Table {} already exists.'.format(table_id) else: print 'Creating the {} table.'.format(table_id) table.create() print 'Created table {}.'.format(table_id) # [START bigtable_list_tables] tables = instance.list_tables() print 'Listing tables in current project...' if tables != []: for tbl in tables: print tbl.table_id else: print 'No table exists in current project...' # [END bigtable_list_tables] # [START bigtable_create_family_gc_max_age] print 'Creating column family cf1 with with MaxAge GC Rule...' # Create a column family with GC policy : maximum age # where age = current time minus cell timestamp # Define the GC rule to retain data with max age of 5 days max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) column_family1 = table.column_family('cf1', max_age_rule) column_family1.create() print 'Created column family cf1 with MaxAge GC Rule.' # [END bigtable_create_family_gc_max_age] # [START bigtable_create_family_gc_max_versions] print 'Creating column family cf2 with max versions GC rule...' # Create a column family with GC policy : most recent N versions # where 1 = most recent version # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family2 = table.column_family('cf2', max_versions_rule) column_family2.create() print 'Created column family cf2 with Max Versions GC Rule.' # [END bigtable_create_family_gc_max_versions] # [START bigtable_create_family_gc_union] print 'Creating column family cf3 with union GC rule...' # Create a column family with GC policy to drop data that matches # at least one condition. # Define a GC rule to drop cells older than 5 days or not the # most recent version union_rule = column_family.GCRuleUnion([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2) ]) column_family3 = table.column_family('cf3', union_rule) column_family3.create() print 'Created column family cf3 with Union GC rule' # [END bigtable_create_family_gc_union] # [START bigtable_create_family_gc_intersection] print 'Creating column family cf4 with Intersection GC rule...' # Create a column family with GC policy to drop data that matches # all conditions # GC rule: Drop cells older than 5 days AND older than the most # recent 2 versions intersection_rule = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2) ]) column_family4 = table.column_family('cf4', intersection_rule) column_family4.create() print 'Created column family cf4 with Intersection GC rule.' # [END bigtable_create_family_gc_intersection] # [START bigtable_create_family_gc_nested] print 'Creating column family cf5 with a Nested GC rule...' # Create a column family with nested GC policies. # Create a nested GC rule: # Drop cells that are either older than the 10 recent versions # OR # Drop cells that are older than a month AND older than the # 2 recent versions rule1 = column_family.MaxVersionsGCRule(10) rule2 = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=30)), column_family.MaxVersionsGCRule(2) ]) nested_rule = column_family.GCRuleUnion([rule1, rule2]) column_family5 = table.column_family('cf5', nested_rule) column_family5.create() print 'Created column family cf5 with a Nested GC rule.' # [END bigtable_create_family_gc_nested] # [START bigtable_list_column_families] print 'Printing Column Family and GC Rule for all column families...' column_families = table.list_column_families() for column_family_name, gc_rule in sorted(column_families.items()): print 'Column Family:', column_family_name print 'GC Rule:' print gc_rule.to_pb() # Sample output: # Column Family: cf4 # GC Rule: # gc_rule { # intersection { # rules { # max_age { # seconds: 432000 # } # } # rules { # max_num_versions: 2 # } # } # } # [END bigtable_list_column_families] print 'Print column family cf1 GC rule before update...' print 'Column Family: cf1' print column_family1.to_pb() # [START bigtable_update_gc_rule] print 'Updating column family cf1 GC rule...' # Update the column family cf1 to update the GC rule column_family1 = table.column_family('cf1', column_family.MaxVersionsGCRule(1)) column_family1.update() print 'Updated column family cf1 GC rule\n' # [END bigtable_update_gc_rule] print 'Print column family cf1 GC rule after update...' print 'Column Family: cf1' print column_family1.to_pb() # [START bigtable_delete_family] print 'Delete a column family cf2...' # Delete a column family column_family2.delete() print 'Column family cf2 deleted successfully.' # [END bigtable_delete_family] print 'execute command "python tableadmin.py delete [project_id] \
def run_table_operations(project_id, instance_id, table_id): """Create a Bigtable table and perform basic operations on it :type project_id: str :param project_id: Project id of the client. :type instance_id: str :param instance_id: Instance of the client. :type table_id: str :param table_id: Table id to create table. """ client, instance, table = create_table(project_id, instance_id, table_id) # [START bigtable_list_tables] tables = instance.list_tables() print("Listing tables in current project...") if tables != []: for tbl in tables: print(tbl.table_id) else: print("No table exists in current project...") # [END bigtable_list_tables] # [START bigtable_create_family_gc_max_age] print("Creating column family cf1 with with MaxAge GC Rule...") # Create a column family with GC policy : maximum age # where age = current time minus cell timestamp # Define the GC rule to retain data with max age of 5 days max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) column_family1 = table.column_family("cf1", max_age_rule) column_family1.create() print("Created column family cf1 with MaxAge GC Rule.") # [END bigtable_create_family_gc_max_age] # [START bigtable_create_family_gc_max_versions] print("Creating column family cf2 with max versions GC rule...") # Create a column family with GC policy : most recent N versions # where 1 = most recent version # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family2 = table.column_family("cf2", max_versions_rule) column_family2.create() print("Created column family cf2 with Max Versions GC Rule.") # [END bigtable_create_family_gc_max_versions] # [START bigtable_create_family_gc_union] print("Creating column family cf3 with union GC rule...") # Create a column family with GC policy to drop data that matches # at least one condition. # Define a GC rule to drop cells older than 5 days or not the # most recent version union_rule = column_family.GCRuleUnion([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2), ]) column_family3 = table.column_family("cf3", union_rule) column_family3.create() print("Created column family cf3 with Union GC rule") # [END bigtable_create_family_gc_union] # [START bigtable_create_family_gc_intersection] print("Creating column family cf4 with Intersection GC rule...") # Create a column family with GC policy to drop data that matches # all conditions # GC rule: Drop cells older than 5 days AND older than the most # recent 2 versions intersection_rule = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2), ]) column_family4 = table.column_family("cf4", intersection_rule) column_family4.create() print("Created column family cf4 with Intersection GC rule.") # [END bigtable_create_family_gc_intersection] # [START bigtable_create_family_gc_nested] print("Creating column family cf5 with a Nested GC rule...") # Create a column family with nested GC policies. # Create a nested GC rule: # Drop cells that are either older than the 10 recent versions # OR # Drop cells that are older than a month AND older than the # 2 recent versions rule1 = column_family.MaxVersionsGCRule(10) rule2 = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=30)), column_family.MaxVersionsGCRule(2), ]) nested_rule = column_family.GCRuleUnion([rule1, rule2]) column_family5 = table.column_family("cf5", nested_rule) column_family5.create() print("Created column family cf5 with a Nested GC rule.") # [END bigtable_create_family_gc_nested] # [START bigtable_list_column_families] print("Printing Column Family and GC Rule for all column families...") column_families = table.list_column_families() for column_family_name, gc_rule in sorted(column_families.items()): print("Column Family:", column_family_name) print("GC Rule:") print(gc_rule.to_pb()) # Sample output: # Column Family: cf4 # GC Rule: # gc_rule { # intersection { # rules { # max_age { # seconds: 432000 # } # } # rules { # max_num_versions: 2 # } # } # } # [END bigtable_list_column_families] print("Print column family cf1 GC rule before update...") print("Column Family: cf1") print(column_family1.to_pb()) # [START bigtable_update_gc_rule] print("Updating column family cf1 GC rule...") # Update the column family cf1 to update the GC rule column_family1 = table.column_family("cf1", column_family.MaxVersionsGCRule(1)) column_family1.update() print("Updated column family cf1 GC rule\n") # [END bigtable_update_gc_rule] print("Print column family cf1 GC rule after update...") print("Column Family: cf1") print(column_family1.to_pb()) # [START bigtable_delete_family] print("Delete a column family cf2...") # Delete a column family column_family2.delete() print("Column family cf2 deleted successfully.") # [END bigtable_delete_family] print('execute command "python tableadmin.py delete [project_id] \ [instance_id] --table [tableName]" to delete the table.')