def test_extract_table(client, to_delete): DATASET_ID = 'export_data_dataset_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(DATASET_ID)) client.create_dataset(dataset) to_delete.append(dataset) table_ref = dataset.table('person_ages') table = client.create_table(bigquery.Table(table_ref, schema=SCHEMA)) to_delete.insert(0, table) client.create_rows(table, ROWS) bucket_name = 'extract_person_ages_job_{}'.format(_millis()) # [START extract_table] from google.cloud.storage import Client as StorageClient storage_client = StorageClient() bucket = storage_client.create_bucket(bucket_name) # API request destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name) extract_job = client.extract_table(table_ref, destination_uri) # API request extract_job.result(timeout=100) # Waits for job to complete. got = destination.download_as_string().decode('utf-8') # API request assert 'Bharney Rhubble' in got # [END extract_table] to_delete.append(bucket) to_delete.insert(0, destination)
def __init__( self, application_credentials: Optional[Union[str, os.PathLike]] = None, credentials: Optional["Credentials"] = None, project: Optional[str] = None, storage_client: Optional["StorageClient"] = None, local_cache_dir: Optional[Union[str, os.PathLike]] = None, ): """Class constructor. Sets up a [`Storage Client`](https://googleapis.dev/python/storage/latest/client.html). Supports the following authentication methods of `Storage Client`. - Environment variable `"GOOGLE_APPLICATION_CREDENTIALS"` containing a path to a JSON credentials file for a Google service account. See [Authenticating as a Service Account](https://cloud.google.com/docs/authentication/production). - File path to a JSON credentials file for a Google service account. - OAuth2 Credentials object and a project name. - Instantiated and already authenticated `Storage Client`. If multiple methods are used, priority order is reverse of list above (later in list takes priority). If no authentication methods are used, then the client will be instantiated as anonymous, which will only have access to public buckets. Args: application_credentials (Optional[Union[str, os.PathLike]]): Path to Google service account credentials file. credentials (Optional[Credentials]): The OAuth2 Credentials to use for this client. See documentation for [`StorageClient`]( https://googleapis.dev/python/storage/latest/client.html). project (Optional[str]): The project which the client acts on behalf of. See documentation for [`StorageClient`]( https://googleapis.dev/python/storage/latest/client.html). storage_client (Optional[StorageClient]): Instantiated [`StorageClient`]( https://googleapis.dev/python/storage/latest/client.html). local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache for downloaded files. If None, will use a temporary directory. """ if application_credentials is None: application_credentials = os.getenv( "GOOGLE_APPLICATION_CREDENTIALS") if storage_client is not None: self.client = storage_client elif credentials is not None: self.client = StorageClient(credentials=credentials, project=project) elif application_credentials is not None: self.client = StorageClient.from_service_account_json( application_credentials) else: self.client = StorageClient.create_anonymous_client() super().__init__(local_cache_dir=local_cache_dir)
def _uploadFile(credentials: dict, bucket_name: str, key: str, file_name: str) -> None: """ Uploads a file to cloud storage. :param credentials: The Google cloud storage service credentials retrieved from the Kubernetes secret. :param bucket_name: The name of the bucket. :param key: The key to save the file in the cloud storage. :param file_name: The local file that will be uploaded. """ credentials = ServiceCredentials.from_service_account_info(credentials) gcs_client = StorageClient(credentials.project_id, credentials) bucket = gcs_client.bucket(bucket_name) bucket.blob(key).upload_from_filename(file_name) logging.info("Backup uploaded to gcs://%s/%s", bucket_name, key)
def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows): import csv from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient storage_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = storage_client.create_bucket(bucket_name) blob = bucket.blob(blob_name) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(header_row) writer.writerows(data_rows) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') return bucket, blob
def _downloadFile(credentials: dict, bucket_name: str, key: str, file_name: str) -> str: """ Downloads a file from cloud storage. :param credentials: The Google cloud storage service credentials retrieved from the Kubernetes secret. :param bucket_name: The name of the bucket. :param key: The key to download the file from the cloud storage. :param file_name: The file that will be downloaded. :return: The location of the downloaded file. """ credentials = ServiceCredentials.from_service_account_info(credentials) gcs_client = StorageClient(credentials.project_id, credentials) bucket = gcs_client.get_bucket(bucket_name) logging.info("Going to download gcs://%s/%s", bucket_name, key) bucket.blob(key).download_to_filename(file_name) logging.info("Backup gcs://%s/%s downloaded to %s", bucket_name, key, file_name) return file_name
def test_extract_table(client, to_delete): DATASET_ID = 'export_data_dataset_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(DATASET_ID)) client.create_dataset(dataset) to_delete.append(dataset) table_ref = dataset.table('person_ages') to_insert = [ {'full_name': name, 'age': age} for name, age in ROWS ] rows = [json.dumps(row) for row in to_insert] body = six.StringIO('{}\n'.format('\n'.join(rows))) job_config = bigquery.LoadJobConfig() job_config.write_disposition = 'WRITE_TRUNCATE' job_config.source_format = 'NEWLINE_DELIMITED_JSON' job_config.schema = SCHEMA to_delete.insert(0, table_ref) # Load a table using a local JSON file from memory. client.load_table_from_file( body, table_ref, job_config=job_config).result() bucket_name = 'extract_person_ages_job_{}'.format(_millis()) # [START extract_table] from google.cloud.storage import Client as StorageClient storage_client = StorageClient() bucket = storage_client.create_bucket(bucket_name) # API request destination_blob_name = 'person_ages_out.csv' destination = bucket.blob(destination_blob_name) destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name) extract_job = client.extract_table( table_ref, destination_uri) # API request extract_job.result(timeout=100) # Waits for job to complete. got = destination.download_as_string().decode('utf-8') # API request assert 'Bharney Rhubble' in got # [END extract_table] to_delete.append(bucket) to_delete.insert(0, destination)
def _lastBackupFile(credentials: dict, bucket_name: str, key: str) -> str: """ Gets the name of the last backup file in the bucket. :param credentials: The Google cloud storage service credentials retrieved from the Kubernetes secret. :param bucket_name: The name of the bucket. :param key: The prefix of tha backups :return: The location of the last backup file. """ credentials = ServiceCredentials.from_service_account_info(credentials) gcs_client = StorageClient(credentials.project_id, credentials) bucket = gcs_client.get_bucket(bucket_name) blobs = bucket.list_blobs(prefix=key) last_blob = None for blob in blobs: logging.info("Found backup file '%s' in bucket '%s'", blob.name, bucket_name) if last_blob is None or blob.time_created > last_blob.time_created: last_blob = blob logging.info("Returning backup file %s", last_blob.name.replace(key, "")) return last_blob.name.replace(key, "") if last_blob else None
def __init__(self, ctx, config=None): super().__init__(ctx, config) self.bucket = Bucket(StorageClient(), self.bucket)
def test_load_table_from_storage_then_dump_table(self): import csv import tempfile from google.cloud.storage import Client as StorageClient local_id = unique_resource_id() BUCKET_NAME = 'bq_load_test' + local_id BLOB_NAME = 'person_ages.csv' GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) ROWS = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] TABLE_NAME = 'test_table' s_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = s_client.create_bucket(BUCKET_NAME) self.to_delete.append(bucket) blob = bucket.blob(BLOB_NAME) with tempfile.TemporaryFile(mode='w+') as csv_file: writer = csv.writer(csv_file) writer.writerow(('Full Name', 'Age')) writer.writerows(ROWS) blob.upload_from_file(csv_file, rewind=True, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') table = dataset.table(TABLE_NAME, schema=[full_name, age]) table.create() self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, GS_URL) job.create_disposition = 'CREATE_NEVER' job.skip_leading_rows = 1 job.source_format = 'CSV' job.write_disposition = 'WRITE_EMPTY' job.begin() def _job_done(instance): return instance.state in ('DONE', 'done') # Allow for 90 seconds of "warm up" before rows visible. See: # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() rows, _, _ = table.fetch_data() by_age = operator.itemgetter(1) self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
def test_load_table_from_storage_w_autodetect_schema(self): from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient from google.cloud.bigquery import SchemaField local_id = unique_resource_id() bucket_name = 'bq_load_test' + local_id blob_name = 'person_ages.csv' gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) rows = [ ('Phred Phlyntstone', 32), ('Bharney Rhubble', 33), ('Wylma Phlyntstone', 29), ('Bhettye Rhubble', 27), ] * 100 # BigQuery internally uses the first 100 rows to detect schema table_name = 'test_table' storage_client = StorageClient() # In the **very** rare case the bucket name is reserved, this # fails with a ConnectionError. bucket = storage_client.create_bucket(bucket_name) self.to_delete.append(bucket) blob = bucket.blob(blob_name) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) writer.writerow(('Full Name', 'Age')) writer.writerows(rows) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) dataset = Config.CLIENT.dataset( _make_dataset_name('load_gcs_then_dump')) retry_403(dataset.create)() self.to_delete.append(dataset) table = dataset.table(table_name) self.to_delete.insert(0, table) job = Config.CLIENT.load_table_from_storage( 'bq_load_storage_test_' + local_id, table, gs_url) job.autodetect = True job.begin() # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() table.reload() field_name = SchemaField(u'Full_Name', u'string', u'NULLABLE', None, ()) field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) self.assertEqual(table.schema, [field_name, field_age]) actual_rows = self._fetch_single_page(table) by_age = operator.itemgetter(1) self.assertEqual(sorted(actual_rows, key=by_age), sorted(rows, key=by_age))
from time import time from abc import ABCMeta, abstractmethod from io import BytesIO from random import random from orjson import dumps, OPT_SORT_KEYS from google.cloud.storage import Client as StorageClient from PIL import Image from matplotlib import pyplot as plt from matplotlib import ticker as tkr import matplotlib.transforms as mtransforms from lark import Tree, Token, Transformer, v_args from assets import static_storage storage_client = StorageClient() plt.switch_backend("Agg") plt.ion() plt.rcParams["font.family"] = "DejaVu Sans" plt.rcParams['figure.figsize'] = (8, 6) plt.rcParams["figure.dpi"] = 200.0 plt.rcParams['savefig.facecolor'] = "#131722" class AbstractProvider(object): __metaclass__ = ABCMeta bucket = storage_client.get_bucket("nlc-bot-36685.appspot.com") stableCoinTickers = [ "USD", "USDT", "USDC", "DAI", "HUSD", "TUSD", "PAX", "USDK", "USDN", "BUSD", "GUSD", "USDS"
from google.cloud.firestore import Client as FirestoreClient from google.cloud.storage import Client as StorageClient from joblib import Memory from PIL import Image from requests import Session from requests.exceptions import HTTPError app = Flask(__name__) firestore = FirestoreClient() memory = Memory(tempfile.gettempdir(), verbose=0) requests = Session() storage = StorageClient() bucket = storage.get_bucket(os.environ["BUCKET"]) PROFILE_URL = os.environ["PROFILE_URL"] GAMES_URL = os.environ["GAMES_URL"] MEDIA_URL = os.environ["MEDIA_URL"] NO_CONTENT = "", http.HTTPStatus.NO_CONTENT CAPSULE_WIDTH = 184 CAPSULE_HEIGHT = 69 def get_steam_id(uid): logging.info(f"fetching steam_id of the user {uid}") try:
def __init__(self): """Initiates connection to gcs using google gcs library.""" self.client = StorageClient()