def publicize(self, mode="all"): """Changes IAM configuration to turn BigQuery dataset public. Args: mode (bool): Which dataset to create [prod|staging|all]. """ for m in self._loop_modes(mode): dataset = m["client"].get_dataset(m["id"]) entries = dataset.access_entries entries.extend( [ bigquery.AccessEntry( role="roles/bigquery.dataViewer", entity_type="iamMember", entity_id="allUsers", ), bigquery.AccessEntry( role="roles/bigquery.metadataViewer", entity_type="iamMember", entity_id="allUsers", ), bigquery.AccessEntry( role="roles/bigquery.user", entity_type="iamMember", entity_id="allUsers", ), ] ) dataset.access_entries = entries m["client"].update_dataset(dataset, ["access_entries"])
def publish_udf(raw_udf, client, project_id, gcs_bucket, gcs_path, known_udfs): """Publish a specific UDF to BigQuery.""" # create new dataset for UDF if necessary dataset = client.create_dataset(raw_udf.dataset, exists_ok=True) # set permissions for dataset, public for everyone entry = bigquery.AccessEntry("READER", "specialGroup", "allAuthenticatedUsers") entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) # transforms temporary UDF to persistent UDFs and publishes them for definition in raw_udf.definitions: # Within a standard SQL function, references to other entities require # explicit project IDs for udf in known_udfs: definition.replace(udf, f"`{project_id}`.{udf}") # adjust paths for dependencies stored in GCS query = OPTIONS_LIB_RE.sub( fr'library = "gs://{gcs_bucket}/{gcs_path}\1"', definition) client.query(query).result()
def update_dataset_access(dataset_id): # [START bigquery_update_dataset_access] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' dataset = client.get_dataset(dataset_id) # Make an API request. entry = bigquery.AccessEntry( role="READER", entity_type="userByEmail", entity_id="*****@*****.**", ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print("Updated dataset '{}' with modified user permissions.".format( full_dataset_id))
def test_update_dataset_multiple_properties(client, to_delete): """Update a dataset's metadata.""" DATASET_ID = 'update_dataset_multiple_properties_%d' % (_millis(), ) dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset.description = ORIGINAL_DESCRIPTION dataset = client.create_dataset(dataset) to_delete.append(dataset) # [START update_dataset_multiple_properties] assert dataset.description == ORIGINAL_DESCRIPTION assert dataset.default_table_expiration_ms is None entry = bigquery.AccessEntry(role='READER', entity_type='domain', entity_id='example.com') assert entry not in dataset.access_entries ONE_DAY_MS = 24 * 60 * 60 * 1000 # in milliseconds dataset.description = UPDATED_DESCRIPTION dataset.default_table_expiration_ms = ONE_DAY_MS entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset( dataset, ['description', 'default_table_expiration_ms', 'access_entries' ]) # API request assert dataset.description == UPDATED_DESCRIPTION assert dataset.default_table_expiration_ms == ONE_DAY_MS assert entry in dataset.access_entries
def bq_update_access_to_dataset(dataset_id, role, entity_type, entity_id): ''' Update access control for a BigQuery Dataset USAGE: bq_modify_access_to_dataset('ztest1','READER','userByEmail','*****@*****.**') Required Permissions: bigquery.dataOwner bigquery.admin ''' try: client = bigquery.Client() dataset = client.get_dataset(client.dataset(dataset_id)) entry = bigquery.AccessEntry( role=role, # 'READER', 'WRITER', 'OWNER' entity_type= entity_type, # 'userByEmail', 'groupByEmail', 'domain','specialGroup', 'view' entity_id=entity_id # User or resource to grant access to ) assert entry not in dataset.access_entries entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ['access_entries']) # API request assert entry in dataset.access_entries except Exception as e: print('[ ERROR ] {}'.format(e))
def share_dataset_all_users_read(bigquery_client, full_dataset_name): bigquery.AccessEntry.ENTITY_TYPES = ["userByEmail", "groupByEmail", "domain", "specialGroup", "view", "iamMember"] role = 'READER' entity_type = 'iamMember' entity_id = 'allUsers' dataset = bigquery_client.get_dataset(full_dataset_name) entries = list(dataset.access_entries) is_shared = False for entry in entries: if entry.role == role and entry.entity_type == entity_type and entry.entity_id == entity_id: is_shared = True if not is_shared: entry = bigquery.AccessEntry( role=role, entity_type=entity_type, entity_id=entity_id, ) entries.append(entry) dataset.access_entries = entries dataset = bigquery_client.update_dataset(dataset, ["access_entries"]) logging.info('Updated dataset permissions') else: logging.info('The dataset is already shared')
def authorize(self, shared_dataset_id, shared_view_id, source_dataset_id): client = self.get_client() shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) view = bigquery.Table(shared_dataset.table(shared_view_id)) access_entries = source_dataset.access_entries access_entry = bigquery.AccessEntry(None, 'view', view.reference.to_api_repr()) access_entries.append(access_entry) source_dataset.access_entries = access_entries client.update_dataset(source_dataset, ['access_entries'])
def publish_routine(raw_routine, client, project_id, gcs_bucket, gcs_path, known_udfs, is_public): """Publish a specific routine to BigQuery.""" if is_public: # create new dataset for routine if necessary dataset = client.create_dataset(raw_routine.dataset, exists_ok=True) # set permissions for dataset, public for everyone entry = bigquery.AccessEntry("READER", "specialGroup", "allAuthenticatedUsers") entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) # transforms temporary UDF to persistent UDFs and publishes them for definition in raw_routine.definitions: # Within a standard SQL function, references to other entities require # explicit project IDs for udf in set(known_udfs): # ensure UDF definitions are not replaced twice as would be the case for # `mozfun`.stats.mode_last and `mozfun`.stats.mode_last_retain_nulls # since one name is a substring of the other definition = definition.replace(f"`{project_id}`.{udf}", udf) definition = definition.replace(f"{project_id}.{udf}", udf) definition = definition.replace(udf, f"`{project_id}`.{udf}") # adjust paths for dependencies stored in GCS query = OPTIONS_LIB_RE.sub( fr'library = "gs://{gcs_bucket}/{gcs_path}\1"', definition) # add UDF descriptions if raw_routine.filepath not in SKIP and not raw_routine.is_stored_procedure: # descriptions need to be escaped since quotation marks and other # characters, such as \x01, will make the query invalid otherwise escaped_description = json.dumps(str(raw_routine.description)) query = OPTIONS_RE.sub( f"OPTIONS(description={escaped_description},", query) if "OPTIONS(" not in query and query[-1] == ";": query = query[:-1] + f"OPTIONS(description={escaped_description});" print(f"Publish {raw_routine.name}") client.query(query).result()
def test_update_dataset_access(client, to_delete): """Update a dataset's metadata.""" DATASET_ID = 'update_dataset_access_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset = client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_update_dataset_access] entry = bigquery.AccessEntry(role='READER', entity_type='userByEmail', entity_id='*****@*****.**') assert entry not in dataset.access_entries entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ['access_entries']) # API request assert entry in dataset.access_entries
def add_bq_iam_member(project, db_name): client = bigquery.Client() name = project + "." + db_name dataset = client.get_dataset(name) entry = bigquery.AccessEntry( role="OWNER", entity_type="userByEmail", entity_id="*****@*****.**", ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print("Updated dataset '{}' with modified user permissions.".format( full_dataset_id))
def _share_views(views_project, views_dataset, origin_project, origin_dataset, tables): client_views_project = bigquery.Client(views_project) client_origin_project = bigquery.Client(origin_project) views_dataset_obj = bigquery.Dataset( client_views_project.dataset(views_dataset)) origin_dataset_obj = bigquery.Dataset( client_origin_project.dataset(origin_dataset)) access_entries = origin_dataset_obj.access_entries for table in tables: view = bigquery.Table(views_dataset_obj.table(table)) access_entries.append( bigquery.AccessEntry(None, 'view', view.reference.to_api_repr())) origin_dataset_obj.access_entries = access_entries origin_dataset_obj = client_views_project.update_dataset( origin_dataset_obj, ['access_entries'])
def grant_access(override_values={}): # [START bigquery_grant_view_access] from google.cloud import bigquery client = bigquery.Client() # To use a view, the analyst requires ACLs to both the view and the source # table. Create an authorized view to allow an analyst to use a view # without direct access permissions to the source table. view_dataset_id = "my-project.my_view_dataset" # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. view_dataset_id = override_values.get("view_dataset_id", view_dataset_id) # [START bigquery_grant_view_access] # Make an API request to get the view dataset ACLs. view_dataset = client.get_dataset(view_dataset_id) analyst_group_email = "*****@*****.**" # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. analyst_group_email = override_values.get("analyst_group_email", analyst_group_email) # [START bigquery_grant_view_access] access_entries = view_dataset.access_entries access_entries.append( bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email)) view_dataset.access_entries = access_entries # Make an API request to update the ACLs property of the view dataset. view_dataset = client.update_dataset(view_dataset, ["access_entries"]) print(f"Access to view: {view_dataset.access_entries}") # Group members of "*****@*****.**" now have access to the view, # but they require access to the source table to use it. To remove this # restriction, authorize the view to access the source dataset. source_dataset_id = "my-project.my_source_dataset" # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) # [START bigquery_grant_view_access] # Make an API request to set the source dataset ACLs. source_dataset = client.get_dataset(source_dataset_id) view_reference = { "projectId": "my-project", "datasetId": "my_view_dataset", "tableId": "my_authorized_view", } # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. view_reference = override_values.get("view_reference", view_reference) # [START bigquery_grant_view_access] access_entries = source_dataset.access_entries access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) source_dataset.access_entries = access_entries # Make an API request to update the ACLs property of the source dataset. source_dataset = client.update_dataset(source_dataset, ["access_entries"]) print(f"Access to source: {source_dataset.access_entries}") # [END bigquery_grant_view_access] return view_dataset, source_dataset
from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to fetch. # dataset_id = 'your-project.your_dataset' dataset = client.get_dataset(dataset_id) # Make an API request. entry = bigquery.AccessEntry( role="READER", entity_type="userByEmail", entity_id="*****@*****.**", ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print("Updated dataset '{}' with modified user permissions.".format( full_dataset_id))
# Crate the read-only dataset if not already exist m = re.search('(.*)@.*', group_email) shared_dataset_id = m.group(1).replace('.', '_') shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id + '_ro')) shared_dataset.location = 'EU' try: shared_dataset = client.create_dataset(shared_dataset) # API request except Exception, e: logging.error(str(e)) # Authorize the group to access the shared read-only dataset try: access_entries = shared_dataset.access_entries access_entries.append( bigquery.AccessEntry('READER', 'groupByEmail', group_email)) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset( shared_dataset, ['access_entries']) # API request except Exception, e: logging.error(str(e)) # Create the shared view in the new dataset. First delete view if it already exists try: table = client.get_table(shared_dataset.table(shared_view_id)) if (table.view_query != None): client.delete_table(shared_dataset.table(shared_view_id)) except Exception, e: logging.error(str(e)) view = bigquery.Table(shared_dataset.table(shared_view_id))
csv_reader = reader(read_obj) next(csv_reader, None) # skip the headers # Iterate over each row in the csv using reader object for row in csv_reader: # row variable is a list that represents a row in csv dataset_id = row[1].strip() emailid = row[2].strip() role = row[3].strip() print(emailid) print(dataset_id) print(role) dataset = client.get_dataset(dataset_id) # Make an API request. entry = bigquery.AccessEntry( role=role, #'Reader' entity_type="userByEmail", entity_id=emailid #"*****@*****.**", ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset( dataset, ["access_entries"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) print("Updated dataset '{}' with modified user permissions.".format( full_dataset_id))
def run_authorized_view_tutorial(override_values={}): # Note to user: This is a group email for testing purposes. Replace with # your own group email address when running this code. analyst_group_email = "*****@*****.**" # [START bigquery_authorized_view_tutorial] # Create a source dataset # [START bigquery_avt_create_source_dataset] from google.cloud import bigquery client = bigquery.Client() source_dataset_id = "github_source_data" source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id) # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_source_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id) # [START bigquery_authorized_view_tutorial] # [START bigquery_avt_create_source_dataset] source_dataset = bigquery.Dataset(source_dataset_id_full) # Specify the geographic location where the dataset should reside. source_dataset.location = "US" source_dataset = client.create_dataset(source_dataset) # API request # [END bigquery_avt_create_source_dataset] # Populate a source table # [START bigquery_avt_create_source_table] source_table_id = "github_contributors" job_config = bigquery.QueryJobConfig() job_config.destination = source_dataset.table(source_table_id) sql = """ SELECT commit, author, committer, repo_name FROM `bigquery-public-data.github_repos.commits` LIMIT 1000 """ query_job = client.query( sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. location="US", job_config=job_config, ) # API request - starts the query query_job.result() # Waits for the query to finish # [END bigquery_avt_create_source_table] # Create a separate dataset to store your view # [START bigquery_avt_create_shared_dataset] shared_dataset_id = "shared_views" shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id) # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_shared_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. shared_dataset_id = override_values.get("shared_dataset_id", shared_dataset_id) shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id) # [START bigquery_authorized_view_tutorial] # [START bigquery_avt_create_shared_dataset] shared_dataset = bigquery.Dataset(shared_dataset_id_full) shared_dataset.location = "US" shared_dataset = client.create_dataset(shared_dataset) # API request # [END bigquery_avt_create_shared_dataset] # Create the view in the new dataset # [START bigquery_avt_create_view] shared_view_id = "github_analyst_view" view = bigquery.Table(shared_dataset.table(shared_view_id)) sql_template = """ SELECT commit, author.name as author, committer.name as committer, repo_name FROM `{}.{}.{}` """ view.view_query = sql_template.format(client.project, source_dataset_id, source_table_id) view = client.create_table(view) # API request # [END bigquery_avt_create_view] # Assign access controls to the dataset containing the view # [START bigquery_avt_shared_dataset_access] # analyst_group_email = '*****@*****.**' access_entries = shared_dataset.access_entries access_entries.append( bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email)) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset(shared_dataset, ["access_entries"]) # API request # [END bigquery_avt_shared_dataset_access] # Authorize the view to access the source dataset # [START bigquery_avt_source_dataset_access] access_entries = source_dataset.access_entries access_entries.append( bigquery.AccessEntry(None, "view", view.reference.to_api_repr())) source_dataset.access_entries = access_entries source_dataset = client.update_dataset(source_dataset, ["access_entries"]) # API request
def test_manage_views(client, to_delete): project = client.project source_dataset_id = "source_dataset_{}".format(_millis()) source_dataset_ref = client.dataset(source_dataset_id) source_dataset = bigquery.Dataset(source_dataset_ref) source_dataset = client.create_dataset(source_dataset) to_delete.append(source_dataset) job_config = bigquery.LoadJobConfig() job_config.schema = [ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] job_config.skip_leading_rows = 1 uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" source_table_id = "us_states" load_job = client.load_table_from_uri( uri, source_dataset.table(source_table_id), job_config=job_config ) load_job.result() shared_dataset_id = "shared_dataset_{}".format(_millis()) shared_dataset_ref = client.dataset(shared_dataset_id) shared_dataset = bigquery.Dataset(shared_dataset_ref) shared_dataset = client.create_dataset(shared_dataset) to_delete.append(shared_dataset) # [START bigquery_create_view] # from google.cloud import bigquery # client = bigquery.Client() # project = 'my-project' # source_dataset_id = 'my_source_dataset' # source_table_id = 'us_states' # shared_dataset_ref = client.dataset('my_shared_dataset') # This example shows how to create a shared view of a source table of # US States. The source table contains all 50 states, while the view will # contain only states with names starting with 'W'. view_ref = shared_dataset_ref.table("my_shared_view") view = bigquery.Table(view_ref) sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' view.view_query = sql_template.format(project, source_dataset_id, source_table_id) view = client.create_table(view) # API request print("Successfully created view at {}".format(view.full_table_id)) # [END bigquery_create_view] # [START bigquery_update_view_query] # from google.cloud import bigquery # client = bigquery.Client() # project = 'my-project' # source_dataset_id = 'my_source_dataset' # source_table_id = 'us_states' # shared_dataset_ref = client.dataset('my_shared_dataset') # This example shows how to update a shared view of a source table of # US States. The view's query will be updated to contain only states with # names starting with 'M'. view_ref = shared_dataset_ref.table("my_shared_view") view = bigquery.Table(view_ref) sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' view.view_query = sql_template.format(project, source_dataset_id, source_table_id) view = client.update_table(view, ["view_query"]) # API request # [END bigquery_update_view_query] # [START bigquery_get_view] # from google.cloud import bigquery # client = bigquery.Client() # shared_dataset_id = 'my_shared_dataset' view_ref = client.dataset(shared_dataset_id).table("my_shared_view") view = client.get_table(view_ref) # API Request # Display view properties print("View at {}".format(view.full_table_id)) print("View Query:\n{}".format(view.view_query)) # [END bigquery_get_view] assert view.view_query is not None analyst_group_email = "*****@*****.**" # [START bigquery_grant_view_access] # from google.cloud import bigquery # client = bigquery.Client() # Assign access controls to the dataset containing the view # shared_dataset_id = 'my_shared_dataset' # analyst_group_email = '*****@*****.**' shared_dataset = client.get_dataset( client.dataset(shared_dataset_id) ) # API request access_entries = shared_dataset.access_entries access_entries.append( bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) ) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset( shared_dataset, ["access_entries"] ) # API request # Authorize the view to access the source dataset # project = 'my-project' # source_dataset_id = 'my_source_dataset' source_dataset = client.get_dataset( client.dataset(source_dataset_id) ) # API request view_reference = { "projectId": project, "datasetId": shared_dataset_id, "tableId": "my_shared_view", } access_entries = source_dataset.access_entries access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) source_dataset.access_entries = access_entries source_dataset = client.update_dataset( source_dataset, ["access_entries"] ) # API request
from google.cloud import bigquery project_id = "PROJECT_ID" dataset_id = "DATASET_NAME" group_name= "*****@*****.**" role = "READER" client = bigquery.Client(project=project_id) dataset_info = client.get_dataset(client.dataset(dataset_id)) access_entries = dataset_info.access_entries access_entries.append( bigquery.AccessEntry(role, "groupByEmail", group_name) ) dataset_info.access_entries = access_entries dataset_info = client.update_dataset( dataset_info, ['access_entries'])
def execute(self, data): #setup client = self.client bigquery = self.bigquery datetime = self.datetime pytz = self.pytz time = self.time name = data.get("titleName") if data.get( "titleName") else "My_Target_Table" emails = data.get("emails") if data.get("emails") else [ "*****@*****.**" ] query = data.get("query") source_url = data.get("sourceURL") if data.get( "titleName" ) else "gs://cloud-samples-data/bigquery/us-states/us-states.csv" emails = data.get("emails") table = "" # # create a dataset first if needed dataset_main = self.make_dataset() table_id = "{}.{}".format(dataset_main[0], name) # #create a table if needed table = self.make_table(table_id, "load") # # create a view if (self.env.get("create_view")): try: view_id = "{}.{}".format(dataset_main[1], "My_View") source_id = table_id view = bigquery.Table(view_id) view.view_query = query or f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'W%'" # Make an API request to create the view. view = client.create_table(view) return f"Created {view.table_type}: {str(view.reference)}" except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # create an anuthorized view elif (self.env.get("create_authorized_view")): try: view_dataset_id = dataset_main[1] view_dataset = client.get_dataset(view_dataset_id) access_entries = view_dataset.access_entries for email in emails: access_entries.append( bigquery.AccessEntry("READER", "userByEmail", email)) view_dataset.access_entries = access_entries view_dataset = client.update_dataset(view_dataset, ["access_entries"]) source_dataset_id = dataset_main[0] source_dataset = client.get_dataset(source_dataset_id) view_reference = { "projectId": client.project, "datasetId": self.dataset_names[0], "tableId": name, } access_entries = source_dataset.access_entries access_entries.append( bigquery.AccessEntry(None, "view", view_reference)) source_dataset.access_entries = access_entries # Make an API request to update the ACLs property of the source dataset. source_dataset = client.update_dataset(source_dataset, ["access_entries"]) emailList = "" for email in emails: emailList += email + ", " return f""" Access to view : {emailList}, and the view has access to the source table, which means who has access can use the view """ except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # list views elif (self.env.get("list_views")): try: dataset_id = dataset_main[0] for view in ["view_1", "view_2", "view_3"]: view_id = "{}.{}".format(dataset_main[0], view) source_id = table_id view = bigquery.Table(view_id) view.view_query = query or f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'W%'" # Make an API request to create the view. view = client.create_table(view) tables = client.list_tables(dataset_id) # Make an API request. print("Tables contained in '{}':".format(dataset_id)) views = "" for table in tables: if (table.table_type == "VIEW"): views += table.table_id + " ," return "List of views in the dataset {}".format(views) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # update view query elif (self.env.get("update_views")): try: view_id = "{}.{}".format(dataset_main[1], "My_View") source_id = table_id view = bigquery.Table(view_id) view.view_query = f"SELECT name FROM `{source_id}` WHERE name LIKE 'M%'" view = client.update_table(view, ["view_query"]) return f"Updated {view.table_type}: {str(view.reference)}" except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # return "Check the backend env dictionary you did set it so the backend didnt do anything"
def execute(self, name): # create a dataset if (self.env.get("create")): dataset_id = self.make_dataset_id(name) dataset_init = bigquery.Dataset(dataset_id) dataset_init.location = "US" # multi-region try: if (name == ""): raise IndexError dataset = client.create_dataset(dataset_init, timeout=30) return "Created dataset {}.{}".format(client.project, dataset.dataset_id) except IndexError: return "Please input a name" except BaseException as e: print("\nlook here\n") print(e) return "Dataset already exists choose another name" # # copy a dataset elif (self.env.get("copy")): transfer_client = bigquery_datatransfer.DataTransferServiceClient() # create a source dataset source_project_id = client.project source_dataset_id = "my_source_dataset" dataset_id = self.make_dataset_id(source_dataset_id) dataset_init = bigquery.Dataset(dataset_id) try: dataset = client.create_dataset(dataset_init, timeout=30) except BaseException as e: print("") #no-op # try: destination_project_id = client.project destination_dataset_id = name if (name == ""): raise IndexError transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=destination_dataset_id, display_name="Your Dataset Copy Name", data_source_id="cross_region_copy", params={ "source_project_id": source_project_id, "source_dataset_id": source_dataset_id, }, # schedule="from ", ) transfer_config = transfer_client.create_transfer_config( parent=transfer_client.common_project_path( destination_project_id), transfer_config=transfer_config, ) return "Created transfer config: {}".format( transfer_config.name) except BaseException as e: print("\nlook here\n") print(e) return "Was not able to make a copy mabye check the terminal where the python server is running" # # access control elif (self.env.get("access_control")): try: dataset_id = self.make_dataset_id(name) dataset = client.get_dataset(dataset_id) entry = bigquery.AccessEntry( role="READER", entity_type="userByEmail", entity_id="*****@*****.**", ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) return "Updated dataset '{}' with modified user permissions.".format( full_dataset_id) except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # list all datasets elif (self.env.get("list")): try: datasets = list(client.list_datasets()) project = client.project value = "datasets in project {}".format(project) if datasets: for dataset in datasets: value += "\n{}".format(dataset.dataset_id) return value else: return "{} project does not contain any datasets.".format( project) except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # metadata elif (self.env.get("metadata")): value = "Metadata:\n" try: # get the dataset friendly name dataset_id = self.make_dataset_id(name) dataset = client.get_dataset(dataset_id) full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) value += "{}: {}\n".format("Friendly Name", dataset.friendly_name) # # more properties value += "{}: {}\n".format("Description", dataset.description) value += "Labels:\n" labels = dataset.labels if labels: for label, value in labels.items(): value += "\n{}: {}".format(label, value) else: value += "\tDataset has no labels defined.\n" # # View tables value += "Tables:" tables = list(client.list_tables(dataset)) if tables: for table in tables: value += "\n{}:".format(table.table_id) else: value += "\tDataset does not have tables.\n" # return value except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # update dataset elif (self.env.get("update")): value = "" try: # update dataset description dataset_id = self.make_dataset_id(name) dataset = client.get_dataset( dataset_id) # Make an API request. print(dataset) dataset.description = "Updated description." dataset = client.update_dataset( dataset, ["description"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) value += "Updated dataset '{}' with description '{}'.\n".format( full_dataset_id, dataset.description) # # update default table expiration dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # In milliseconds means in 24 hours dataset = client.update_dataset( dataset, ["default_table_expiration_ms"]) value += "Updated dataset '{}' with table expiration '{}'.\n".format( full_dataset_id, dataset.default_table_expiration_ms) # # update default partition expiration dataset.default_partition_expiration_ms = 24 * 60 * 60 * 1000 # In milliseconds means in 24 hours dataset = client.update_dataset( dataset, ["default_partition_expiration_ms"]) value += "Updated dataset '{}' with table partition expiration '{}'.\n".format( full_dataset_id, dataset.default_partition_expiration_ms) # return value except IndexError: return "Provide the dataset name" except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # delete dataset elif (self.env.get("delete")): try: dataset_id = self.make_dataset_id(name) client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=False) return "Deleted dataset '{}'.".format(dataset_id) except IndexError: return "Provide the dataset name" except BaseException as e: print("\nlook here\n") print(e.__class__.__name__) if (e.__class__.__name__ == "NotFound"): return "Could not find dataset with name {} to delete".format( dataset_id) return "An error occured"
def update_datasets(**kwargs): """ Updates datasets in Big Query. :param bq_client: Target Big Query Client (type:google.cloud.bigquery.client.Client) :param project_id: GCP Project_Id (type:str) :param datasets: List of Datasets (type:list) :param properties: Dataset Properties (type:dict) :return dataset_updation_flag: {0:SUCCESS, 1:FAIL} (type:int) """ bq_client = kwargs.get("bq_client") project_id = kwargs.get("project_id") datasets = kwargs.get("datasets", []) properties = kwargs.get("properties", {}) dataset_updation_flag = 0 try: for dataset_id in datasets: dataset_id = project_id + "." + dataset_id try: dataset = bq_client.get_dataset(dataset_id) for k1, v1 in properties.items(): if v1: if k1 == "description": dataset.description = v1 dataset = bq_client.update_dataset(dataset, ["description"]) elif k1 == "default_table_expiration_ms": dataset.default_table_expiration_ms = v1 dataset = bq_client.update_dataset( dataset, ["default_table_expiration_ms"] ) elif k1 == "labels": dataset.labels = v1 dataset = bq_client.update_dataset(dataset, ["labels"]) elif k1 == "access_controls": v1_vals = v1.values() if None in v1_vals or True in [ str(elem).isspace() for elem in v1_vals ]: pass else: entry = bigquery.AccessEntry( role=properties["access_controls"]["role"], entity_type=properties["access_controls"][ "entity_type" ], entity_id=properties["access_controls"][ "entity_id" ], ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = bq_client.update_dataset( dataset, ["access_entries"] ) except NotFound: print("Dataset {} does not exist.".format(dataset_id)) dataset_updation_flag = dataset_updation_flag + 1 except Exception as e: print("Exception occurred: {}".format(e)) dataset_updation_flag = dataset_updation_flag + 1 finally: return dataset_updation_flag
def run_authorized_view_tutorial(): # Note to user: This is a group email for testing purposes. Replace with # your own group email address when running this code. analyst_group_email = '*****@*****.**' # [START bigquery_authorized_view_tutorial] # Create a source dataset # [START bigquery_avt_create_source_dataset] from google.cloud import bigquery client = bigquery.Client() source_dataset_id = 'github_source_data' source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) # Specify the geographic location where the dataset should reside. source_dataset.location = 'US' source_dataset = client.create_dataset(source_dataset) # API request # [END bigquery_avt_create_source_dataset] # Populate a source table # [START bigquery_avt_create_source_table] source_table_id = 'github_contributors' job_config = bigquery.QueryJobConfig() job_config.destination = source_dataset.table(source_table_id) sql = """ SELECT commit, author, committer, repo_name FROM `bigquery-public-data.github_repos.commits` LIMIT 1000 """ query_job = client.query( sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. location='US', job_config=job_config) # API request - starts the query query_job.result() # Waits for the query to finish # [END bigquery_avt_create_source_table] # Create a separate dataset to store your view # [START bigquery_avt_create_shared_dataset] shared_dataset_id = 'shared_views' shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) shared_dataset.location = 'US' shared_dataset = client.create_dataset(shared_dataset) # API request # [END bigquery_avt_create_shared_dataset] # Create the view in the new dataset # [START bigquery_avt_create_view] shared_view_id = 'github_analyst_view' view = bigquery.Table(shared_dataset.table(shared_view_id)) sql_template = """ SELECT commit, author.name as author, committer.name as committer, repo_name FROM `{}.{}.{}` """ view.view_query = sql_template.format( client.project, source_dataset_id, source_table_id) view = client.create_table(view) # API request # [END bigquery_avt_create_view] # Assign access controls to the dataset containing the view # [START bigquery_avt_shared_dataset_access] # analyst_group_email = '*****@*****.**' access_entries = shared_dataset.access_entries access_entries.append( bigquery.AccessEntry('READER', 'groupByEmail', analyst_group_email) ) shared_dataset.access_entries = access_entries shared_dataset = client.update_dataset( shared_dataset, ['access_entries']) # API request # [END bigquery_avt_shared_dataset_access] # Authorize the view to access the source dataset # [START bigquery_avt_source_dataset_access] access_entries = source_dataset.access_entries access_entries.append( bigquery.AccessEntry(None, 'view', view.reference.to_api_repr()) ) source_dataset.access_entries = access_entries source_dataset = client.update_dataset( source_dataset, ['access_entries']) # API request # [END bigquery_avt_source_dataset_access] # [END bigquery_authorized_view_tutorial] return (source_dataset, shared_dataset)