def storage_copy_table(ctx, dataset_id, table_id, source_bucket_name, dst_bucket_name, mode): Storage(dataset_id, table_id, **ctx.obj).copy_table( source_bucket_name=source_bucket_name, destination_bucket_name=dst_bucket_name, mode=mode, )
def append(self, filepath, partitions=None, if_exists="replace", **upload_args): """Appends new data to existing BigQuery table. As long as the data has the same schema. It appends the data in the filepath to the existing table. Args: filepath (str or pathlib.PosixPath): Where to find the file that you want to upload to create a table with partitions (str, pathlib.PosixPath, dict): Optional. Hive structured partition as a string or dict * str : `<key>=<value>/<key2>=<value2>` * dict: `dict(key=value, key2=value2)` if_exists (str): 0ptional. What to do if data with same name exists in storage * 'raise' : Raises Conflict exception * 'replace' : Replace table * 'pass' : Do nothing """ if not self.table_exists("staging"): raise BaseDosDadosException( "You cannot append to a table that does not exist") else: Storage(self.dataset_id, self.table_id, **self.main_vars).upload( filepath, mode="staging", partitions=partitions, if_exists=if_exists, **upload_args, )
def storage_delete_table(ctx, dataset_id, table_id, mode, not_found_ok, bucket_name): Storage(dataset_id, table_id, **ctx.obj).delete_table(mode=mode, not_found_ok=not_found_ok, bucket_name=bucket_name) click.echo( click.style( f"Data was deleted from bucket `{bucket_name}`", fg="green", ))
def download_storage(ctx, dataset_id, table_id, filename, savepath, partitions, mode, if_not_exists): Storage(dataset_id, table_id, **ctx.obj).download(filename, savepath, partitions, mode, if_not_exists) click.echo( click.style( f"Data was downloaded to `{savepath}`", fg="green", ))
def init_storage(ctx, bucket_name, replace, very_sure): # TODO: Create config file to store bucket_name, etc... ctx.obj.pop("bucket_name") Storage(bucket_name=bucket_name, **ctx.obj).init(replace=replace, very_sure=very_sure) click.echo(click.style( f"Bucket `{bucket_name}` was created", fg="green", ))
def upload_storage(ctx, dataset_id, table_id, filepath, mode, partitions, if_exists): ctx.obj.pop("bucket_name") blob_name = Storage(dataset_id, table_id, **ctx.obj).upload(filepath=filepath, mode=mode, partitions=partitions, if_exists=if_exists) click.echo(click.style( f"Data was added to `{blob_name}`", fg="green", ))
def create( self, path=None, job_config_params=None, force_dataset=True, if_table_exists="raise", if_storage_data_exists="raise", if_table_config_exists="raise", source_format="csv", columns_config_url=None, ): """Creates BigQuery table at staging dataset. If you add a path, it automatically saves the data in the storage, creates a datasets folder and BigQuery location, besides creating the table and its configuration files. The new table should be located at `<dataset_id>_staging.<table_id>` in BigQuery. It looks for data saved in Storage at `<bucket_name>/staging/<dataset_id>/<table_id>/*` and builds the table. It currently supports the types: - Comma Delimited CSV Data can also be partitioned following the hive partitioning scheme `<key1>=<value1>/<key2>=<value2>` - for instance, `year=2012/country=BR`. The partition is automatcally detected by searching for `partitions` on the `table_config.yaml`. Args: path (str or pathlib.PosixPath): Where to find the file that you want to upload to create a table with job_config_params (dict): Optional. Job configuration params from bigquery if_table_exists (str): Optional What to do if table exists * 'raise' : Raises Conflict exception * 'replace' : Replace table * 'pass' : Do nothing force_dataset (bool): Creates `<dataset_id>` folder and BigQuery Dataset if it doesn't exists. if_table_config_exists (str): Optional. What to do if config files already exist * 'raise': Raises FileExistError * 'replace': Replace with blank template * 'pass'; Do nothing if_storage_data_exists (str): Optional. What to do if data already exists on your bucket: * 'raise' : Raises Conflict exception * 'replace' : Replace table * 'pass' : Do nothing source_format (str): Optional Data source format. Only 'csv' is supported. Defaults to 'csv'. columns_config_url (str): google sheets URL. The URL must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>. The sheet must contain the column name: "coluna" and column description: "descricao" """ if path is None: # Look if table data already exists at Storage data = self.client["storage_staging"].list_blobs( self.bucket_name, prefix=f"staging/{self.dataset_id}/{self.table_id}") # Raise: Cannot create table without external data if not data: raise BaseDosDadosException( "You must provide a path for uploading data") # Add data to storage if isinstance( path, ( str, Path, ), ): Storage(self.dataset_id, self.table_id, **self.main_vars).upload(path, mode="staging", if_exists=if_storage_data_exists) # Create Dataset if it doesn't exist if force_dataset: dataset_obj = Dataset(self.dataset_id, **self.main_vars) try: dataset_obj.init() except FileExistsError: pass dataset_obj.create(if_exists="pass") self.init( data_sample_path=path, if_folder_exists="replace", if_table_config_exists=if_table_config_exists, columns_config_url=columns_config_url, ) table = bigquery.Table(self.table_full_name["staging"]) table.external_data_configuration = Datatype( self, source_format, "staging", partitioned=self._is_partitioned()).external_config # Lookup if table alreay exists table_ref = None try: table_ref = self.client["bigquery_staging"].get_table( self.table_full_name["staging"]) except google.api_core.exceptions.NotFound: pass if isinstance(table_ref, google.cloud.bigquery.table.Table): if if_table_exists == "pass": return None elif if_table_exists == "raise": raise FileExistsError( "Table already exists, choose replace if you want to overwrite it" ) if if_table_exists == "replace": self.delete(mode="staging") self.client["bigquery_staging"].create_table(table)