def test_create_storage_data_exist_table_config_exist(table, metadatadir, data_path, sample_data): shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True) Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).create(if_exists="pass") Storage(dataset_id=DATASET_ID, table_id=TABLE_ID, metadata_path=metadatadir).upload(data_path, mode="staging", if_exists="replace") table.init( data_sample_path=data_path, if_folder_exists="replace", if_table_config_exists="replace", ) for file in TABLE_FILES: shutil.copy(sample_data / file, table.table_folder / file) table.delete(mode="all") table.create( data_path, if_storage_data_exists="pass", if_table_config_exists="pass", ) assert table_exists(table, "staging")
def test_create(table, metadatadir): shutil.rmtree(Path(metadatadir) / DATASET_ID / TABLE_ID, ignore_errors=True) Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).create(if_exists="pass") Storage(dataset_id=DATASET_ID, table_id=TABLE_ID, metadata_path=metadatadir).upload( "tests/sample_data/municipios.csv", mode="staging", if_exists="replace") table.init(data_sample_path="tests/sample_data/municipios.csv", if_exists="replace") table.delete(mode="all") table.create() assert table_exists(table, mode="staging") table.create(if_exists="replace") assert table_exists(table, mode="staging") table.create("tests/sample_data/municipios.csv", if_exists="replace")
def test_init(table, metadatadir): # remove folder shutil.rmtree(Path(metadatadir) / DATASET_ID / TABLE_ID, ignore_errors=True) Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).init(replace=True) table.init() folder = Path(metadatadir) / DATASET_ID / TABLE_ID check_files(folder) with pytest.raises(FileExistsError): table.init() table.init(if_exists="raise") table.init(if_exists="replace") check_files(folder) table.init(if_exists="pass") check_files(folder) table.init(if_exists="replace", data_sample_path="tests/sample_data/municipios.csv") check_files(folder) with pytest.raises(NotImplementedError): table.init(if_exists="replace", data_sample_path="tests/sample_data/municipios.json")
def push_table_to_bq( dataset_id, table_id, source_bucket_name="basedosdados-dev", destination_bucket_name="basedosdados", backup_bucket_name="basedosdados-backup", ): # copy proprosed data between storage buckets # create a backup of old data, then delete it and copies new data into the destination bucket modes = ["staging", "raw", "auxiliary_files", "architecture", "header"] for mode in modes: try: sync_bucket( source_bucket_name=source_bucket_name, dataset_id=dataset_id, table_id=table_id, destination_bucket_name=destination_bucket_name, backup_bucket_name=backup_bucket_name, mode=mode, ) tprint() except Exception as error: tprint(f"DATA ERROR ON {mode}.{dataset_id}.{table_id}") traceback.print_exc(file=sys.stderr) tprint() # load the table_config.yaml to get the metadata IDs table_config, configs_path = load_configs(dataset_id, table_id) # adjust the correct project ID in publish sql replace_project_id_publish_sql(configs_path, dataset_id, table_id) # create table object of selected table and dataset ID tb = bd.Table(dataset_id=dataset_id, table_id=table_id) # delete table from staging and prod if exists tb.delete("all") # create the staging table in bigquery tb.create( path=None, if_table_exists="replace", if_storage_data_exists="pass", if_table_config_exists="pass", ) # publish the table in prod bigquery tb.publish(if_exists="replace") # updates the table description tb.update("prod") # updates the dataset description Dataset(dataset_id).update(mode="prod") ### save table header in storage save_header_files(dataset_id, table_id)
def test_create_no_path_error(table, metadatadir, data_path, sample_data): shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True) Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).create(if_exists="pass") with pytest.raises(BaseDosDadosException): table.create(if_storage_data_exists="replace") with pytest.raises(BaseDosDadosException): table.create(if_table_config_exists="replace")
def test_init( table, metadatadir, folder, data_path, ): # remove folder shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True) Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).init(replace=True) table.init(data_sample_path=data_path) check_files(folder)
def test_publish(table, metadatadir): Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).create(if_exists="replace") table.create("tests/sample_data/municipios.csv", if_exists="replace") shutil.copy( "tests/sample_data/table/table_config.yaml", Path(metadatadir) / "pytest" / "pytest" / "table_config.yaml", ) shutil.copy( "tests/sample_data/table/publish.sql", Path(metadatadir) / "pytest" / "pytest" / "publish.sql", ) table.publish(if_exists="replace") assert table_exists(table, "prod")
def push_table_to_bq( dataset_id, table_id, source_bucket_name="basedosdados-dev", destination_bucket_name="basedosdados", backup_bucket_name="basedosdados-staging", ): ### Copies proprosed data between storage buckets. ### Creates a backup of old data, then delete it and copies new data into the destination bucket. sync_bucket( source_bucket_name, dataset_id, table_id, destination_bucket_name, backup_bucket_name, ) ### laod the table_config.yalm to get the metadata IDs table_config, configs_path = load_configs(dataset_id, table_id) ### adjust the correct project ID in publish sql replace_project_id_publish_sql(configs_path, dataset_id, table_id) ### create Table object of selected table and dataset ID tb = bd.Table(table_id, dataset_id) ### delete table from staging and prod if exists tb.delete("all") ### create the staging table in bigquery tb.create( path=None, if_table_exists="replace", if_storage_data_exists="pass", if_table_config_exists="pass", ) ### publish the table in prod bigquery tb.publish(if_exists="replace") ### updates the table description tb.update("prod") ### updates the dataset description Dataset(dataset_id).update("prod")
def dataset(metadatadir): return Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir)