def copy_table(dataset_name, table_name, new_table_name, project=None): """Copies a table. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) dataset = bigquery_client.dataset(dataset_name) table = dataset.table(table_name) # This sample shows the destination table in the same dataset and project, # however, it's possible to copy across datasets and projects. You can # also copy muliple source tables into a single destination table by # providing addtional arguments to `copy_table`. destination_table = dataset.table(new_table_name) # Create a job to copy the table to the destination table. job_id = str(uuid.uuid4()) job = bigquery_client.copy_table( job_id, destination_table, table) # Create the table if it doesn't exist. job.create_disposition = ( google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) job.begin() # Start the job. print('Waiting for job to finish...') job.result() print('Table {} copied to {}.'.format(table_name, new_table_name))
def execute( self, query: str, destination_table: Optional[str] = None, write_disposition: Optional[ google.cloud.bigquery.job.WriteDisposition] = None, ) -> None: dataset = google.cloud.bigquery.dataset.DatasetReference.from_string( self.dataset, default_project=self.project, ) kwargs = {} if destination_table: kwargs["destination"] = dataset.table(destination_table) kwargs[ "write_disposition"] = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE if write_disposition: kwargs["write_disposition"] = write_disposition config = google.cloud.bigquery.job.QueryJobConfig( default_dataset=dataset, **kwargs) job = self.client.query(query, config) # block on result job.result(max_results=1) if destination_table: # add a label with the current timestamp to the table self.add_labels_to_table( destination_table, {"last_updated": self._current_timestamp_label()}, )
def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = _make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job.result() self.assertEqual(len(connection.api_request.call_args_list), 2) begin_request, reload_request = connection.api_request.call_args_list self.assertEqual(begin_request[1]["method"], "POST") self.assertEqual(reload_request[1]["method"], "GET")
def copyTable(projectFrom, projectTo, datasetFrom, datasetTo, tableName): table_source = datasetFrom.table(tableName) table_destination = datasetTo.table(tableName) job_id = str(uuid.uuid4()) job = projectFrom.copy_table(job_id, table_destination, table_source) job.create_disposition = ( google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) job.begin() # Start the job. print('Waiting for job to finish...') job.result() print('Table {} copied to {}.'.format(tableName, tableName))
def run_sql(sql_script_path: str) -> bool: """Runs a sql load script and returns job completion status. Args: sql_script_path: Path to the sql script file. Returns: True if the job completed successfully, false otherwise. """ client = _get_bq_client() with open(sql_script_path, 'r') as f: raw_query = f.read() query = _populate_table_names(raw_query) job = client.query(query) try: rows = job.result() except google.cloud.exceptions.GoogleCloudError as e: logging.error(e) return list(), job.errors return list(rows), job.errors
def test_result(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) result = job.result() self.assertIs(result, job)
def execute(self, query: str, destination_table: Optional[str] = None) -> None: dataset = google.cloud.bigquery.dataset.DatasetReference.from_string( self.dataset, default_project=self.project, ) kwargs = {} if destination_table: kwargs["destination"] = dataset.table(destination_table) kwargs[ "write_disposition"] = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE config = google.cloud.bigquery.job.QueryJobConfig( default_dataset=dataset, **kwargs) job = self.client.query(query, config) # block on result job.result(max_results=1)
def _delete_rows(client: bigquery.Client, table_id: str) -> None: """Delete all rows in a table.""" query = f'DELETE FROM `{table_id}` WHERE TRUE' job = client.query(query) try: rows = job.result() except google.cloud.exceptions.GoogleCloudError as e: logging.error(e) raise e
def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource(job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True) done_job_resource = _make_job_resource( job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True, ended=True, ) conn = _make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, exceptions.NotFound("not normally retriable"), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one( self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client) custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( method="POST", path=f"/projects/{self.PROJECT}/jobs", data={ "jobReference": { "jobId": self.JOB_ID, "projectId": self.PROJECT, "location": "EU", } }, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", query_params={"location": "EU"}, timeout=None, ) conn.api_request.assert_has_calls( [begin_call, begin_call, reload_call, reload_call])
def test_result_explicit_w_state(self): conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) # Use _set_properties() instead of directly modifying _properties so # that the result state is set properly. job_resource = job._properties job_resource["status"] = {"state": "DONE"} job._set_properties(job_resource) timeout = 1 self.assertIs(job.result(timeout=timeout), job) conn.api_request.assert_not_called()
def test_result_default_wo_state(self): begun_job_resource = _make_job_resource(job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True) done_job_resource = _make_job_resource( job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True, ended=True, ) conn = _make_connection( _make_retriable_exception(), begun_job_resource, _make_retriable_exception(), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) self.assertIs(job.result(), job) begin_call = mock.call( method="POST", path=f"/projects/{self.PROJECT}/jobs", data={ "jobReference": { "jobId": self.JOB_ID, "projectId": self.PROJECT } }, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", query_params={"location": "US"}, timeout=None, ) conn.api_request.assert_has_calls( [begin_call, begin_call, reload_call, reload_call])