Exemple #1
0
def test_create_storage_data_exist_table_config_exist(table, metadatadir,
                                                      data_path, sample_data):

    shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True)

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).create(if_exists="pass")

    Storage(dataset_id=DATASET_ID,
            table_id=TABLE_ID,
            metadata_path=metadatadir).upload(data_path,
                                              mode="staging",
                                              if_exists="replace")

    table.init(
        data_sample_path=data_path,
        if_folder_exists="replace",
        if_table_config_exists="replace",
    )

    for file in TABLE_FILES:
        shutil.copy(sample_data / file, table.table_folder / file)

    table.delete(mode="all")

    table.create(
        data_path,
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )
    assert table_exists(table, "staging")
Exemple #2
0
def test_create(table, metadatadir):

    shutil.rmtree(Path(metadatadir) / DATASET_ID / TABLE_ID,
                  ignore_errors=True)

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).create(if_exists="pass")

    Storage(dataset_id=DATASET_ID,
            table_id=TABLE_ID,
            metadata_path=metadatadir).upload(
                "tests/sample_data/municipios.csv",
                mode="staging",
                if_exists="replace")

    table.init(data_sample_path="tests/sample_data/municipios.csv",
               if_exists="replace")

    table.delete(mode="all")

    table.create()

    assert table_exists(table, mode="staging")

    table.create(if_exists="replace")

    assert table_exists(table, mode="staging")

    table.create("tests/sample_data/municipios.csv", if_exists="replace")
Exemple #3
0
def test_init(table, metadatadir):

    # remove folder
    shutil.rmtree(Path(metadatadir) / DATASET_ID / TABLE_ID,
                  ignore_errors=True)

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).init(replace=True)

    table.init()

    folder = Path(metadatadir) / DATASET_ID / TABLE_ID

    check_files(folder)

    with pytest.raises(FileExistsError):
        table.init()
        table.init(if_exists="raise")

    table.init(if_exists="replace")

    check_files(folder)

    table.init(if_exists="pass")

    check_files(folder)

    table.init(if_exists="replace",
               data_sample_path="tests/sample_data/municipios.csv")

    check_files(folder)

    with pytest.raises(NotImplementedError):
        table.init(if_exists="replace",
                   data_sample_path="tests/sample_data/municipios.json")
Exemple #4
0
def push_table_to_bq(
    dataset_id,
    table_id,
    source_bucket_name="basedosdados-dev",
    destination_bucket_name="basedosdados",
    backup_bucket_name="basedosdados-backup",
):
    # copy proprosed data between storage buckets
    # create a backup of old data, then delete it and copies new data into the destination bucket
    modes = ["staging", "raw", "auxiliary_files", "architecture", "header"]

    for mode in modes:
        try:
            sync_bucket(
                source_bucket_name=source_bucket_name,
                dataset_id=dataset_id,
                table_id=table_id,
                destination_bucket_name=destination_bucket_name,
                backup_bucket_name=backup_bucket_name,
                mode=mode,
            )
            tprint()
        except Exception as error:
            tprint(f"DATA ERROR ON {mode}.{dataset_id}.{table_id}")
            traceback.print_exc(file=sys.stderr)
            tprint()

    # load the table_config.yaml to get the metadata IDs
    table_config, configs_path = load_configs(dataset_id, table_id)
    # adjust the correct project ID in publish sql
    replace_project_id_publish_sql(configs_path, dataset_id, table_id)
    # create table object of selected table and dataset ID
    tb = bd.Table(dataset_id=dataset_id, table_id=table_id)

    # delete table from staging and prod if exists
    tb.delete("all")

    # create the staging table in bigquery
    tb.create(
        path=None,
        if_table_exists="replace",
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )

    # publish the table in prod bigquery
    tb.publish(if_exists="replace")

    # updates the table description
    tb.update("prod")

    # updates the dataset description
    Dataset(dataset_id).update(mode="prod")

    ### save table header in storage
    save_header_files(dataset_id, table_id)
Exemple #5
0
def test_create_no_path_error(table, metadatadir, data_path, sample_data):

    shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True)

    Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).create(if_exists="pass")

    with pytest.raises(BaseDosDadosException):
        table.create(if_storage_data_exists="replace")

    with pytest.raises(BaseDosDadosException):
        table.create(if_table_config_exists="replace")
Exemple #6
0
def test_init(
    table,
    metadatadir,
    folder,
    data_path,
):

    # remove folder
    shutil.rmtree(metadatadir / DATASET_ID / TABLE_ID, ignore_errors=True)

    Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir).init(replace=True)

    table.init(data_sample_path=data_path)

    check_files(folder)
Exemple #7
0
def test_publish(table, metadatadir):

    Dataset(dataset_id=DATASET_ID,
            metadata_path=metadatadir).create(if_exists="replace")

    table.create("tests/sample_data/municipios.csv", if_exists="replace")

    shutil.copy(
        "tests/sample_data/table/table_config.yaml",
        Path(metadatadir) / "pytest" / "pytest" / "table_config.yaml",
    )
    shutil.copy(
        "tests/sample_data/table/publish.sql",
        Path(metadatadir) / "pytest" / "pytest" / "publish.sql",
    )

    table.publish(if_exists="replace")

    assert table_exists(table, "prod")
Exemple #8
0
def push_table_to_bq(
    dataset_id,
    table_id,
    source_bucket_name="basedosdados-dev",
    destination_bucket_name="basedosdados",
    backup_bucket_name="basedosdados-staging",
):

    ### Copies proprosed data between storage buckets.
    ### Creates a backup of old data, then delete it and copies new data into the destination bucket.
    sync_bucket(
        source_bucket_name,
        dataset_id,
        table_id,
        destination_bucket_name,
        backup_bucket_name,
    )

    ### laod the table_config.yalm to get the metadata IDs
    table_config, configs_path = load_configs(dataset_id, table_id)
    ### adjust the correct project ID in publish sql
    replace_project_id_publish_sql(configs_path, dataset_id, table_id)

    ### create Table object of selected table and dataset ID
    tb = bd.Table(table_id, dataset_id)

    ### delete table from staging and prod if exists
    tb.delete("all")

    ### create the staging table in bigquery
    tb.create(
        path=None,
        if_table_exists="replace",
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )
    ### publish the table in prod bigquery
    tb.publish(if_exists="replace")
    ### updates the table description
    tb.update("prod")
    ### updates the dataset description
    Dataset(dataset_id).update("prod")
Exemple #9
0
def dataset(metadatadir):
    return Dataset(dataset_id=DATASET_ID, metadata_path=metadatadir)