def run_notification(transfer_config_name, pubsub_topic): orig_transfer_config_name = transfer_config_name orig_pubsub_topic = pubsub_topic # [START bigquerydatatransfer_run_notification] transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" pubsub_topic = "projects/PROJECT-ID/topics/TOPIC-ID" # [END bigquerydatatransfer_run_notification] transfer_config_name = orig_transfer_config_name pubsub_topic = orig_pubsub_topic # [START bigquerydatatransfer_run_notification] from google.cloud import bigquery_datatransfer from google.protobuf import field_mask_pb2 transfer_client = bigquery_datatransfer.DataTransferServiceClient() transfer_config = bigquery_datatransfer.TransferConfig( name=transfer_config_name) transfer_config.notification_pubsub_topic = pubsub_topic update_mask = field_mask_pb2.FieldMask(paths=["notification_pubsub_topic"]) transfer_config = transfer_client.update_transfer_config({ "transfer_config": transfer_config, "update_mask": update_mask }) print(f"Updated config: '{transfer_config.name}'") print( f"Notification Pub/Sub topic: '{transfer_config.notification_pubsub_topic}'" ) # [END bigquerydatatransfer_run_notification] # Return the config name for testing purposes, so that it can be deleted. return transfer_config
def update_credentials_with_service_account(override_values={}): # [START bigquerydatatransfer_update_credentials] from google.cloud import bigquery_datatransfer from google.protobuf import field_mask_pb2 transfer_client = bigquery_datatransfer.DataTransferServiceClient() service_account_name = "*****@*****.**" transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" # [END bigquerydatatransfer_update_credentials] # To facilitate testing, we replace values with alternatives # provided by the testing harness. service_account_name = override_values.get("service_account_name", service_account_name) transfer_config_name = override_values.get("transfer_config_name", transfer_config_name) # [START bigquerydatatransfer_update_credentials] transfer_config = bigquery_datatransfer.TransferConfig( name=transfer_config_name) transfer_config = transfer_client.update_transfer_config({ "transfer_config": transfer_config, "update_mask": field_mask_pb2.FieldMask(paths=["service_account_name"]), "service_account_name": service_account_name, }) print("Updated config: '{}'".format(transfer_config.name)) # [END bigquerydatatransfer_update_credentials] # Return the config name for testing purposes, so that it can be deleted. return transfer_config
def scheduled_query(transfer_client, dataset_id, table_id, query_string, schedule, name): # The project where the query job runs is the same as the project # containing the destination dataset. parent = transfer_client.common_project_path('zapp-case-study') transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=dataset_id, display_name=name, data_source_id="scheduled_query", params={ "query": query_string, "destination_table_name_template": table_id, "write_disposition": "WRITE_APPEND", "partitioning_field": "", }, schedule=schedule) transfer_config = transfer_client.create_transfer_config( bigquery_datatransfer.CreateTransferConfigRequest( parent=parent, transfer_config=transfer_config, )) print("Created scheduled query '{}'".format(transfer_config.name))
def update_config(override_values={}): # [START bigquerydatatransfer_update_config] from google.cloud import bigquery_datatransfer from google.protobuf import field_mask_pb2 transfer_client = bigquery_datatransfer.DataTransferServiceClient() transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" new_display_name = "My Transfer Config" # [END bigquerydatatransfer_update_config] # To facilitate testing, we replace values with alternatives # provided by the testing harness. new_display_name = override_values.get("new_display_name", new_display_name) transfer_config_name = override_values.get("transfer_config_name", transfer_config_name) # [START bigquerydatatransfer_update_config] transfer_config = bigquery_datatransfer.TransferConfig( name=transfer_config_name) transfer_config.display_name = new_display_name transfer_config = transfer_client.update_transfer_config({ "transfer_config": transfer_config, "update_mask": field_mask_pb2.FieldMask(paths=["display_name"]), }) print(f"Updated config: '{transfer_config.name}'") print(f"New display name: '{transfer_config.display_name}'") # [END bigquerydatatransfer_update_config] # Return the config name for testing purposes, so that it can be deleted. return transfer_config
def scheduled_query(self): transfer_client = bigquery_datatransfer.DataTransferServiceClient() # The project where the query job runs is the same as the project # containing the destination dataset. project_id = "your-project-id" dataset_id = "your_dataset_id" # This service account will be used to execute the scheduled queries. Omit # this request parameter to run the query as the user with the credentials # associated with this client. service_account_name = "*****@*****.**" # Use standard SQL syntax for the query. query_string = """ SELECT CURRENT_TIMESTAMP() as current_time, @run_time as intended_run_time, @run_date as intended_run_date, 17 as some_integer """ parent = transfer_client.common_project_path(project_id) transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=dataset_id, display_name="Your Scheduled Query Name", data_source_id="scheduled_query", params={ "query": query_string, "destination_table_name_template": "your_table_{run_date}", "write_disposition": "WRITE_TRUNCATE", "partitioning_field": "", }, schedule="every 24 hours", ) transfer_config = transfer_client.create_transfer_config( bigquery_datatransfer.CreateTransferConfigRequest( parent=parent, transfer_config=transfer_config, service_account_name=service_account_name, )) print("Created scheduled query '{}'".format(transfer_config.name))
def copy_dataset(override_values={}): # [START bigquerydatatransfer_copy_dataset] from google.cloud import bigquery_datatransfer transfer_client = bigquery_datatransfer.DataTransferServiceClient() destination_project_id = "my-destination-project" destination_dataset_id = "my_destination_dataset" source_project_id = "my-source-project" source_dataset_id = "my_source_dataset" # [END bigquerydatatransfer_copy_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. destination_project_id = override_values.get("destination_project_id", destination_project_id) destination_dataset_id = override_values.get("destination_dataset_id", destination_dataset_id) source_project_id = override_values.get("source_project_id", source_project_id) source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) # [START bigquerydatatransfer_copy_dataset] transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=destination_dataset_id, display_name="Your Dataset Copy Name", data_source_id="cross_region_copy", params={ "source_project_id": source_project_id, "source_dataset_id": source_dataset_id, }, schedule="every 24 hours", ) transfer_config = transfer_client.create_transfer_config( parent=transfer_client.common_project_path(destination_project_id), transfer_config=transfer_config, ) print(f"Created transfer config: {transfer_config.name}") # [END bigquerydatatransfer_copy_dataset] return transfer_config
def sample_create_transfer_config(project_id, dataset_id, authorization_code=""): # [START bigquerydatatransfer_create_scheduled_query] from google.cloud import bigquery_datatransfer client = bigquery_datatransfer.DataTransferServiceClient() # TODO(developer): Set the project_id to the project that contains the # destination dataset. # project_id = "your-project-id" # TODO(developer): Set the destination dataset. The authorized user must # have owner permissions on the dataset. # dataset_id = "your_dataset_id" # TODO(developer): The first time you run this sample, set the # authorization code to a value from the URL: # https://www.gstatic.com/bigquerydatatransfer/oauthz/auth?client_id=433065040935-hav5fqnc9p9cht3rqneus9115ias2kn1.apps.googleusercontent.com&scope=https://www.googleapis.com/auth/bigquery%20https://www.googleapis.com/auth/drive&redirect_uri=urn:ietf:wg:oauth:2.0:oob # # authorization_code = "_4/ABCD-EFGHIJKLMNOP-QRSTUVWXYZ" # # You can use an empty string for authorization_code in subsequent runs of # this code sample with the same credentials. # # authorization_code = "" # Use standard SQL syntax for the query. query_string = """ SELECT CURRENT_TIMESTAMP() as current_time, @run_time as intended_run_time, @run_date as intended_run_date, 17 as some_integer """ parent = f"projects/{project_id}" transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=dataset_id, display_name="Your Scheduled Query Name", data_source_id="scheduled_query", params={ "query": query_string, "destination_table_name_template": "your_table_{run_date}", "write_disposition": "WRITE_TRUNCATE", "partitioning_field": "", }, schedule="every 24 hours", ) response = client.create_transfer_config( request={ "parent": parent, "transfer_config": transfer_config, "authorization_code": authorization_code, }) print("Created scheduled query '{}'".format(response.name)) # [END bigquerydatatransfer_create_scheduled_query] # Return the config name for testing purposes, so that it can be deleted. return response.name
def create_scheduled_query(override_values={}): # [START bigquerydatatransfer_create_scheduled_query] # [START bigquerydatatransfer_create_scheduled_query_with_service_account] from google.cloud import bigquery_datatransfer transfer_client = bigquery_datatransfer.DataTransferServiceClient() # The project where the query job runs is the same as the project # containing the destination dataset. project_id = "your-project-id" dataset_id = "your_dataset_id" # This service account will be used to execute the scheduled queries. Omit # this request parameter to run the query as the user with the credentials # associated with this client. service_account_name = "*****@*****.**" # [END bigquerydatatransfer_create_scheduled_query_with_service_account] # [END bigquerydatatransfer_create_scheduled_query] # To facilitate testing, we replace values with alternatives # provided by the testing harness. project_id = override_values.get("project_id", project_id) dataset_id = override_values.get("dataset_id", dataset_id) service_account_name = override_values.get("service_account_name", service_account_name) # [START bigquerydatatransfer_create_scheduled_query] # [START bigquerydatatransfer_create_scheduled_query_with_service_account] # Use standard SQL syntax for the query. query_string = """ SELECT CURRENT_TIMESTAMP() as current_time, @run_time as intended_run_time, @run_date as intended_run_date, 17 as some_integer """ parent = transfer_client.common_project_path(project_id) transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=dataset_id, display_name="Your Scheduled Query Name", data_source_id="scheduled_query", params={ "query": query_string, "destination_table_name_template": "your_table_{run_date}", "write_disposition": "WRITE_TRUNCATE", "partitioning_field": "", }, schedule="every 24 hours", ) transfer_config = transfer_client.create_transfer_config( bigquery_datatransfer.CreateTransferConfigRequest( parent=parent, transfer_config=transfer_config, service_account_name=service_account_name, )) print("Created scheduled query '{}'".format(transfer_config.name)) # [END bigquerydatatransfer_create_scheduled_query_with_service_account] # [END bigquerydatatransfer_create_scheduled_query] # Return the config name for testing purposes, so that it can be deleted. return transfer_config
def execute(self, name): # create a dataset if (self.env.get("create")): dataset_id = self.make_dataset_id(name) dataset_init = bigquery.Dataset(dataset_id) dataset_init.location = "US" # multi-region try: if (name == ""): raise IndexError dataset = client.create_dataset(dataset_init, timeout=30) return "Created dataset {}.{}".format(client.project, dataset.dataset_id) except IndexError: return "Please input a name" except BaseException as e: print("\nlook here\n") print(e) return "Dataset already exists choose another name" # # copy a dataset elif (self.env.get("copy")): transfer_client = bigquery_datatransfer.DataTransferServiceClient() # create a source dataset source_project_id = client.project source_dataset_id = "my_source_dataset" dataset_id = self.make_dataset_id(source_dataset_id) dataset_init = bigquery.Dataset(dataset_id) try: dataset = client.create_dataset(dataset_init, timeout=30) except BaseException as e: print("") #no-op # try: destination_project_id = client.project destination_dataset_id = name if (name == ""): raise IndexError transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=destination_dataset_id, display_name="Your Dataset Copy Name", data_source_id="cross_region_copy", params={ "source_project_id": source_project_id, "source_dataset_id": source_dataset_id, }, # schedule="from ", ) transfer_config = transfer_client.create_transfer_config( parent=transfer_client.common_project_path( destination_project_id), transfer_config=transfer_config, ) return "Created transfer config: {}".format( transfer_config.name) except BaseException as e: print("\nlook here\n") print(e) return "Was not able to make a copy mabye check the terminal where the python server is running" # # access control elif (self.env.get("access_control")): try: dataset_id = self.make_dataset_id(name) dataset = client.get_dataset(dataset_id) entry = bigquery.AccessEntry( role="READER", entity_type="userByEmail", entity_id="*****@*****.**", ) entries = list(dataset.access_entries) entries.append(entry) dataset.access_entries = entries dataset = client.update_dataset(dataset, ["access_entries"]) full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) return "Updated dataset '{}' with modified user permissions.".format( full_dataset_id) except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # list all datasets elif (self.env.get("list")): try: datasets = list(client.list_datasets()) project = client.project value = "datasets in project {}".format(project) if datasets: for dataset in datasets: value += "\n{}".format(dataset.dataset_id) return value else: return "{} project does not contain any datasets.".format( project) except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # metadata elif (self.env.get("metadata")): value = "Metadata:\n" try: # get the dataset friendly name dataset_id = self.make_dataset_id(name) dataset = client.get_dataset(dataset_id) full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) value += "{}: {}\n".format("Friendly Name", dataset.friendly_name) # # more properties value += "{}: {}\n".format("Description", dataset.description) value += "Labels:\n" labels = dataset.labels if labels: for label, value in labels.items(): value += "\n{}: {}".format(label, value) else: value += "\tDataset has no labels defined.\n" # # View tables value += "Tables:" tables = list(client.list_tables(dataset)) if tables: for table in tables: value += "\n{}:".format(table.table_id) else: value += "\tDataset does not have tables.\n" # return value except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # update dataset elif (self.env.get("update")): value = "" try: # update dataset description dataset_id = self.make_dataset_id(name) dataset = client.get_dataset( dataset_id) # Make an API request. print(dataset) dataset.description = "Updated description." dataset = client.update_dataset( dataset, ["description"]) # Make an API request. full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) value += "Updated dataset '{}' with description '{}'.\n".format( full_dataset_id, dataset.description) # # update default table expiration dataset.default_table_expiration_ms = 24 * 60 * 60 * 1000 # In milliseconds means in 24 hours dataset = client.update_dataset( dataset, ["default_table_expiration_ms"]) value += "Updated dataset '{}' with table expiration '{}'.\n".format( full_dataset_id, dataset.default_table_expiration_ms) # # update default partition expiration dataset.default_partition_expiration_ms = 24 * 60 * 60 * 1000 # In milliseconds means in 24 hours dataset = client.update_dataset( dataset, ["default_partition_expiration_ms"]) value += "Updated dataset '{}' with table partition expiration '{}'.\n".format( full_dataset_id, dataset.default_partition_expiration_ms) # return value except IndexError: return "Provide the dataset name" except BaseException as e: print("\nlook here\n") print(e) return "An error occured" # # delete dataset elif (self.env.get("delete")): try: dataset_id = self.make_dataset_id(name) client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=False) return "Deleted dataset '{}'.".format(dataset_id) except IndexError: return "Provide the dataset name" except BaseException as e: print("\nlook here\n") print(e.__class__.__name__) if (e.__class__.__name__ == "NotFound"): return "Could not find dataset with name {} to delete".format( dataset_id) return "An error occured"
client = bigquery_datatransfer.DataTransferServiceClient() # TODO: Replace variables before attempting project = '' source_dataset_id = '' destination_dataset_id = '' display_name = 'Python Data Transfer final' parent = f"projects/{project}" transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=destination_dataset_id, display_name=display_name, data_source_id="cross_region_copy", params={ "source_dataset_id": source_dataset_id, "source_project_id": project, "overwrite_destination_table": "true" }, ) response = client.create_transfer_config( request={ "parent": parent, "transfer_config": transfer_config, # "authorization_code": authorization_code, }) print("Transfer Created:'{}'".format(response.name))
query_string = """ SELECT CURRENT_TIMESTAMP() as current_time, @run_time as intended_run_time, @run_date as intended_run_date, 17 as some_integer """ parent = transfer_client.common_project_path(project_id) transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=dataset_id, display_name="Your Scheduled Query Name", data_source_id="scheduled_query", params={ "query": query_string, "destination_table_name_template": "mascots{20210115}", "write_disposition": "WRITE_TRUNCATE", "partitioning_field": "", }, schedule="every 24 hours", ) transfer_config = transfer_client.create_transfer_config( bigquery_datatransfer.CreateTransferConfigRequest( parent=parent, transfer_config=transfer_config, service_account_name=service_account_name, )) print("Created scheduled query '{}'".format(transfer_config.name))
def execute(self, data): # setup client = self.client bigquery = self.bigquery bigquery_datatransfer = self.bigquery_datatransfer datetime = self.datetime pytz = self.pytz time = self.time name = data.get("titleName") emails = data.get("emails") query = data.get("query") table = "" # # create a dataset first if needed dataset_main = self.make_dataset() table_id = "{}.{}".format(dataset_main, name) # #create a table if needed table= self.make_table(table_id) # # an interactive query if(self.env.get("interactive")): try: """ query = SELECT name, SUM(number) as total_people FROM `bigquery-public-data.usa_names.usa_1910_2013` WHERE state = 'TX' GROUP BY name, state ORDER BY total_people DESC LIMIT 20 """ query_job = client.query(query) # Make an API request. return json.dumps({ "schema":[{"field":x} for x in ["Name","Count"]], "data":[ # Row values can be accessed by field name or index. { "Name":row[0], "Count":row["total_people"] } for row in query_job ] }) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # a batch query elif(self.env.get("batch")): try: job_config = bigquery.QueryJobConfig( # Run at batch priority, which won't count toward concurrent rate limit. priority=bigquery.QueryPriority.BATCH ) """ query = SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ # Start the query, passing in the extra configuration. query_job = client.query(query, job_config=job_config) # Make an API request. query_job = client.get_job( query_job.job_id, location=query_job.location ) # Make an API request. return "Job {} is currently in state {}".format(query_job.job_id, query_job.state) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # a dry run elif (self.env.get("dry_run")): try: """ query SELECT name, COUNT(*) as name_count FROM `bigquery-public-data.usa_names.usa_1910_2013` WHERE state = 'WA' GROUP BY name """ job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) # Start the query, passing in the extra configuration. query_job = client.query( query, job_config=job_config, ) # Make an API request. # A dry run query completes immediately. return "This query will process {} megabytes.".format(query_job.total_bytes_processed/1000000) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # write to a permanent table elif(self.env.get("permanent_table")): try: """ query SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ job_config = bigquery.QueryJobConfig(destination=table_id) # Start the query, passing in the extra configuration. query_job = client.query(query, job_config=job_config) # Make an API request. # fr queries that return massive results using legacy SQL # job_config = bigquery.QueryJobConfig( # allow_large_results=True, destination=table_id, use_legacy_sql=True # ) query_job.result() # Wait for the job to complete. return "Query results loaded to the table {}".format(table_id) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # query caching elif (self.env.get("cache_enable/disable")): try: """ query SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ job_config = bigquery.QueryJobConfig( use_query_cache=False # to disable # createDisposition="CREATE_NEVER" # to enable ) query_job = client.query(query, job_config=job_config) # Make an API request. query_job.result() return "was this query cached? {}".format(query_job.cache_hit) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # regular parameterized elif(self.env.get("parameterized")): try: """ query = SELECT word, word_count FROM `bigquery-public-data.samples.shakespeare` WHERE corpus = @corpus AND word_count >= @min_word_count ORDER BY word_count DESC; """ job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"), bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), ] ) query_job = client.query(query, job_config=job_config) # Make an API request. return json.dumps({ "schema":[{"field":x} for x in ["word","word_count"]], "data":[ # Row values can be accessed by field name or index. { "word":row["word"], "word_count":row["word_count"] } for row in query_job ] }) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # array parameterized elif(self.env.get("array_parameterized")): try: schema = ["name","count"] """query = SELECT name, sum(number) as count FROM `bigquery-public-data.usa_names.usa_1910_2013` WHERE gender = @gender AND state IN UNNEST(@states) GROUP BY name ORDER BY count DESC LIMIT 10; """ job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter("gender", "STRING", "M"), bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), ] ) query_job = client.query(query, job_config=job_config) # Make an API request. return json.dumps({ "schema":[{"field":x} for x in schema], "data":[ # Row values can be accessed by field name or index. { schema[0]:row[schema[0]], schema[1]:row[schema[1]] } for row in query_job ] }) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # timestamp parameters elif(self.env.get("timestamp_parameterized")): try: schema = ["date","f0_"] """query = SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR); """ job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter( "ts_value", "TIMESTAMP", datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), ) ] ) query_job = client.query(query, job_config=job_config) # Make an API request. return json.dumps( { "schema":[{"field":x} for x in schema], "data":[ # Row values can be accessed by field name or index. { "date":row[0].strftime("%m/%d/%Y, %H:%M:%S"), schema[1]:row[schema[1]] } for row in query_job ] }, default=str ) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # struct parameters elif(self.env.get("struct_parameterized")): try: schema = ["s"] """ query = SELECT @struct_value AS s; """ job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.StructQueryParameter( "struct_value", bigquery.ScalarQueryParameter("x", "INT64", 1), bigquery.ScalarQueryParameter("y", "STRING", "foo"), ) ] ) query_job = client.query(query, job_config=job_config) # Make an API request. return json.dumps({ "schema":[{"field":x} for x in schema], "data":[ # Row values can be accessed by field name or index. { schema[0]:str(row[schema[0]]), } for row in query_job ] }) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # wildcard elif (self.env.get("mutliple_by_wildcard")): try: job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) # replace the asterick with a number or change the table suffixes to see the amount of data used """ query SELECT max, ROUND((max-32)*5/9,1) celsius, mo, da, year FROM `bigquery-public-data.noaa_gsod.gsod194*` WHERE max != 9999.9 # code for missing data AND ( _TABLE_SUFFIX = '0' OR _TABLE_SUFFIX = '4' ) ORDER BY max DESC """ # Start the query, passing in the extra configuration. query_job = client.query( query , job_config=job_config, ) # Make an API request. # A dry run query completes immediately. return "This query will process {} megabytes.".format(query_job.total_bytes_processed/1000000) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # scheduled query # using bigquery datatransfer, the names are not joined as much elif(self.env.get("scheduled")): try: # Use standard SQL syntax for the query. """ query = SELECT CURRENT_TIMESTAMP() as current_time, @run_time as intended_run_time, @run_date as intended_run_date, 17 as some_integer """ transfer_client = bigquery_datatransfer.DataTransferServiceClient() project_id = client.project dataset_id = self.dataset_names[0] # This service account will be used to execute the scheduled queries. Omit # this request parameter to run the query as the user with the credentials # associated with this client. remember roles and permissions # service_account_name = "*****@*****.**" parent = transfer_client.common_project_path(project_id) transfer_config = bigquery_datatransfer.TransferConfig( destination_dataset_id=dataset_id, display_name="Your Scheduled Query Name", data_source_id="scheduled_query", params={ "query": query, "destination_table_name_template": "your_table_{run_date}", "write_disposition": "WRITE_APPEND", "partitioning_field": "", }, schedule="every 24 hours", ) transfer_config = transfer_client.create_transfer_config( bigquery_datatransfer.CreateTransferConfigRequest( parent=parent, transfer_config=transfer_config, # service_account_name=service_account_name, ) ) return "Created scheduled query '{}'".format(transfer_config.name) except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # check schedule status # you see them ins scheduled queries elif(self.env.get("scheduled_status")): try: transfer_client = bigquery_datatransfer.DataTransferServiceClient() project_id = client.project parent = transfer_client.common_project_path(project_id) configs = transfer_client.list_transfer_configs(parent=parent) result = "Got the following configs:" for config in configs: result += f"\n\tID: {config.name}, Schedule: {config.schedule}" return result except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # manual runs # used to immediatley execute a query elif (self.env.get("manual_run")): try: transfer_client = bigquery_datatransfer.DataTransferServiceClient() transfer_config_name = query now = datetime.datetime.now(datetime.timezone.utc) start_time = now - datetime.timedelta(days=5) end_time = now - datetime.timedelta(days=2) # Some data sources, such as scheduled_query only support daily run. # Truncate start_time and end_time to midnight time (00:00AM UTC). start_time = datetime.datetime( start_time.year, start_time.month, start_time.day, tzinfo=datetime.timezone.utc ) end_time = datetime.datetime( end_time.year, end_time.month, end_time.day, tzinfo=datetime.timezone.utc ) response = transfer_client.schedule_transfer_runs( parent=transfer_config_name, start_time=start_time, end_time=end_time, ) result = "Started transfer runs:" for run in response.runs: result += f"\n\tbackfill: {run.run_time} run: {run.name}" return result except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # # delete scheduled query elif(self.env.get("scheduled_delete")): try: import google.api_core.exceptions transfer_client = bigquery_datatransfer.DataTransferServiceClient() transfer_config_name = query transfer_client.delete_transfer_config(name=transfer_config_name) return f"Deleted transfer config: {transfer_config_name}" except google.api_core.exceptions.NotFound: return "Transfer config not found." except BaseException as e: print('my custom error\n') print(e.__class__.__name__) print('\n') print(e) return 'an error occured check the output from the backend' # return "Check the backend env dictionary you did set it so the backend didnt do anything"