def update_dataset(ctx, dataset_id, mode): Dataset(dataset_id=dataset_id, **ctx.obj).update(mode=mode) click.echo( click.style( mode_text(mode, "updated", dataset_id), fg="green", ))
def init_dataset(ctx, dataset_id, replace): d = Dataset(dataset_id=dataset_id, **ctx.obj).init(replace=replace) click.echo( click.style( f"Dataset `{dataset_id}` folder and metadata were created at {d.metadata_path}", fg="green", ))
def publicize_dataset(ctx, dataset_id): Dataset(dataset_id=dataset_id, **ctx.obj).publicize() click.echo( click.style( f"Dataset `{dataset_id}` became public!", fg="green", ))
def create_dataset(ctx, dataset_id, mode, if_exists): Dataset(dataset_id=dataset_id, **ctx.obj).create(mode=mode, if_exists=if_exists) click.echo( click.style( mode_text(mode, "created", dataset_id), fg="green", ))
def delete_dataset(ctx, dataset_id, mode): if click.confirm(f"Are you sure you want to delete `{dataset_id}`?"): Dataset(dataset_id=dataset_id, **ctx.obj).delete(mode=mode) click.echo( click.style( mode_text(mode, "deleted", dataset_id), fg="green", ))
def create( self, path=None, job_config_params=None, force_dataset=True, if_table_exists="raise", if_storage_data_exists="raise", if_table_config_exists="raise", source_format="csv", columns_config_url=None, ): """Creates BigQuery table at staging dataset. If you add a path, it automatically saves the data in the storage, creates a datasets folder and BigQuery location, besides creating the table and its configuration files. The new table should be located at `<dataset_id>_staging.<table_id>` in BigQuery. It looks for data saved in Storage at `<bucket_name>/staging/<dataset_id>/<table_id>/*` and builds the table. It currently supports the types: - Comma Delimited CSV Data can also be partitioned following the hive partitioning scheme `<key1>=<value1>/<key2>=<value2>` - for instance, `year=2012/country=BR`. The partition is automatcally detected by searching for `partitions` on the `table_config.yaml`. Args: path (str or pathlib.PosixPath): Where to find the file that you want to upload to create a table with job_config_params (dict): Optional. Job configuration params from bigquery if_table_exists (str): Optional What to do if table exists * 'raise' : Raises Conflict exception * 'replace' : Replace table * 'pass' : Do nothing force_dataset (bool): Creates `<dataset_id>` folder and BigQuery Dataset if it doesn't exists. if_table_config_exists (str): Optional. What to do if config files already exist * 'raise': Raises FileExistError * 'replace': Replace with blank template * 'pass'; Do nothing if_storage_data_exists (str): Optional. What to do if data already exists on your bucket: * 'raise' : Raises Conflict exception * 'replace' : Replace table * 'pass' : Do nothing source_format (str): Optional Data source format. Only 'csv' is supported. Defaults to 'csv'. columns_config_url (str): google sheets URL. The URL must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>. The sheet must contain the column name: "coluna" and column description: "descricao" """ if path is None: # Look if table data already exists at Storage data = self.client["storage_staging"].list_blobs( self.bucket_name, prefix=f"staging/{self.dataset_id}/{self.table_id}") # Raise: Cannot create table without external data if not data: raise BaseDosDadosException( "You must provide a path for uploading data") # Add data to storage if isinstance( path, ( str, Path, ), ): Storage(self.dataset_id, self.table_id, **self.main_vars).upload(path, mode="staging", if_exists=if_storage_data_exists) # Create Dataset if it doesn't exist if force_dataset: dataset_obj = Dataset(self.dataset_id, **self.main_vars) try: dataset_obj.init() except FileExistsError: pass dataset_obj.create(if_exists="pass") self.init( data_sample_path=path, if_folder_exists="replace", if_table_config_exists=if_table_config_exists, columns_config_url=columns_config_url, ) table = bigquery.Table(self.table_full_name["staging"]) table.external_data_configuration = Datatype( self, source_format, "staging", partitioned=self._is_partitioned()).external_config # Lookup if table alreay exists table_ref = None try: table_ref = self.client["bigquery_staging"].get_table( self.table_full_name["staging"]) except google.api_core.exceptions.NotFound: pass if isinstance(table_ref, google.cloud.bigquery.table.Table): if if_table_exists == "pass": return None elif if_table_exists == "raise": raise FileExistsError( "Table already exists, choose replace if you want to overwrite it" ) if if_table_exists == "replace": self.delete(mode="staging") self.client["bigquery_staging"].create_table(table)