def sync_bucket( source_bucket_name, dataset_id, table_id, destination_bucket_name, backup_bucket_name, mode="staging", ): """Copies proprosed data between storage buckets. Creates a backup of old data, then delete it and copies new data into the destination bucket. Args: source_bucket_name (str): The bucket name from which to copy data. dataset_id (str): Dataset id available in basedosdados. It should always come with table_id. table_id (str): Table id available in basedosdados.dataset_id. It should always come with dataset_id. destination_bucket_name (str): The bucket name which data will be copied to. If None, defaults to the bucket initialized when instantianting Storage object (check it with the Storage.bucket proprerty) backup_bucket_name (str): The bucket name for where backup data will be stored. mode (str): Optional. Folder of which dataset to update. Raises: ValueError: If there are no files corresponding to the given dataset_id and table_id on the source bucket """ ref = Storage(dataset_id=dataset_id, table_id=table_id) prefix = f"{mode}/{dataset_id}/{table_id}/" source_ref = ( ref.client["storage_staging"].bucket(source_bucket_name).list_blobs( prefix=prefix)) destination_ref = ref.bucket.list_blobs(prefix=prefix) if len(list(source_ref)) == 0: raise ValueError("No objects found on the source bucket") # MAKE A BACKUP OF OLD DATA if len(list(destination_ref)): print( f"\n########################################### COPY BACKUP ###########################################\n" ) ref.copy_table( source_bucket_name=destination_bucket_name, destination_bucket_name=backup_bucket_name, ) print( f"\n########################################## DELETE OLD DATA ##########################################\n" ) # DELETE OLD DATA FROM PROD ref.delete_table(not_found_ok=True) print( f"\n########################################### COPY NEW DATA ###########################################\n" ) # COPIES DATA TO DESTINATION ref.copy_table(source_bucket_name=source_bucket_name)
def sync_bucket( source_bucket_name, dataset_id, table_id, destination_bucket_name, backup_bucket_name, mode="staging", ): """Copies proprosed data between storage buckets. Creates a backup of old data, then delete it and copies new data into the destination bucket. Args: source_bucket_name (str): The bucket name from which to copy data. dataset_id (str): Dataset id available in basedosdados. It should always come with table_id. table_id (str): Table id available in basedosdados.dataset_id. It should always come with dataset_id. destination_bucket_name (str): The bucket name which data will be copied to. If None, defaults to the bucket initialized when instantianting Storage object (check it with the Storage.bucket proprerty) backup_bucket_name (str): The bucket name for where backup data will be stored. mode (str): Optional Folder of which dataset to update.[raw|staging|header|auxiliary_files|architecture] Raises: ValueError: If there are no files corresponding to the given dataset_id and table_id on the source bucket """ ref = Storage(dataset_id=dataset_id, table_id=table_id) prefix = f"{mode}/{dataset_id}/{table_id}/" source_ref = ( ref.client["storage_staging"].bucket(source_bucket_name).list_blobs( prefix=prefix)) destination_ref = ref.bucket.list_blobs(prefix=prefix) if len(list(source_ref)) == 0: raise ValueError( f"No objects found on the source bucket {source_bucket_name}.{prefix}" ) if len(list(destination_ref)): backup_bucket_blobs = list(ref.client["storage_staging"].bucket( backup_bucket_name).list_blobs(prefix=prefix)) if len(backup_bucket_blobs): tprint(f"{mode.upper()}: DELETE BACKUP DATA") ref.delete_table(not_found_ok=True, mode=mode, bucket_name=backup_bucket_name) tprint(f"{mode.upper()}: BACKUP OLD DATA") ref.copy_table( source_bucket_name=destination_bucket_name, destination_bucket_name=backup_bucket_name, mode=mode, ) tprint(f"{mode.upper()}: DELETE OLD DATA") ref.delete_table(not_found_ok=True, mode=mode, bucket_name=destination_bucket_name) tprint(f"{mode.upper()}: TRANSFER NEW DATA") ref.copy_table( source_bucket_name=source_bucket_name, destination_bucket_name=destination_bucket_name, mode=mode, )