def txfr_blob(filename: str, bq: BigQueryer = PostBigQueryer(), cs: CloudStorager = CloudStorager()): """ uploads the blob to bigquery. This would probably be better as a shell script :param cs: :param bq: :param bucket: :param filename: :return: """ tm = current_time_ms( ) # pain in the ass to get nanotime in python apparently objname = 'api-update-blob-{}'.format(tm) blob = Blob(objname, cs.get_cloud_storage_bucket()) logger.info("Uploading file (this will take a long time)... ") blob.upload_from_filename(filename) # change this to change table table = bq.get_bigquery_table() uri = 'gs://' + cs.bucket + "/" + objname logger.info("Loading file to BQ...") # insert into tmp table # tmptable = bq.client.dataset('forums').table(objname) job = LoadTableFromStorageJob('api-job-{}'.format(tm), table, [uri], client=bq.client) job.write_disposition = 'WRITE_APPEND' job.source_format = 'AVRO' job.begin() wait_for_job(job) logger.info("Cleaning up...") blob.delete(cs.client)
def load_table_from_storage(self, job_name, destination, *source_uris): """Construct a job for loading data into a table from CloudStorage. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load :type job_name: str :param job_name: Name of the job. :type destination: :class:`google.cloud.bigquery.table.Table` :param destination: Table into which data is to be loaded. :type source_uris: sequence of string :param source_uris: URIs of data files to be loaded; in format ``gs://<bucket_name>/<object_name_or_glob>``. :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` :returns: a new ``LoadTableFromStorageJob`` instance """ return LoadTableFromStorageJob(job_name, destination, source_uris, client=self)
def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. :type resource: dict :param resource: one job resource from API response :rtype: One of: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob`, :class:`google.cloud.bigquery.job.CopyJob`, :class:`google.cloud.bigquery.job.ExtractTableToStorageJob`, :class:`google.cloud.bigquery.job.QueryJob`, :class:`google.cloud.bigquery.job.RunSyncQueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] if 'load' in config: return LoadTableFromStorageJob.from_api_repr(resource, self) elif 'copy' in config: return CopyJob.from_api_repr(resource, self) elif 'extract' in config: return ExtractTableToStorageJob.from_api_repr(resource, self) elif 'query' in config: return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource')