예제 #1
0
def upload_to_bd(tipo, filepath):
    if tipo == "estabelecimentos":
        tb = bd.Table(table_id="microdados_estabelecimentos",
                      dataset_id="br_me_caged")
    else:
        tb = bd.Table(table_id="microdados_movimentacoes",
                      dataset_id="br_me_caged")

    tb.append(filepath, if_exists="replace")
예제 #2
0
파일: solids.py 프로젝트: RJ-SMTR/maestro
def create_table_bq(
    context,
    file_path,
    table_config="replace",
    publish_config="pass",
    table_id=None,
    dataset_id=None,
):

    if not table_id:
        table_id = context.resources.basedosdados_config["table_id"]
    if not dataset_id:
        dataset_id = context.resources.basedosdados_config["dataset_id"]

    tb = bd.Table(dataset_id=dataset_id, table_id=table_id)
    _file_path = file_path.split(table_id)[0] + table_id
    context.log.debug(_file_path)
    context.log.debug(table_id)

    tb.create(
        path=Path(_file_path),
        if_table_exists="replace",
        if_storage_data_exists="replace",
        if_table_config_exists=table_config,
    )

    tb.publish(if_exists=publish_config)
예제 #3
0
def get_filtered_download_dict(tipo,
                               download_dict,
                               bucket_name="basedosdados"):
    def get_year_month(b):
        ano = b.split("ano=")[1].split("/")[0]
        mes = b.split("mes=")[1].split("/")[0]
        return f"{ano}/{mes}/"

    tb = bd.Table(dataset_id="br_me_caged", table_id=f"microdados_{tipo}")
    blobs = list(tb.client["storage_staging"].bucket(bucket_name).list_blobs(
        prefix=f"staging/{tb.dataset_id}/{tb.table_id}/"))

    blobs = list(set([get_year_month(b.name) for b in blobs]))

    check_download_pop = [
        d for d in download_dict[tipo]["check_download"] if d in blobs
    ]

    [
        download_dict[tipo]["check_download"].pop(year_month, None)
        for year_month in check_download_pop
    ]

    must_download_pop = [
        d for d in download_dict[tipo]["must_download"] if d not in blobs
    ]
    if must_download_pop != []:
        pass
    else:
        download_dict[tipo]["must_download"] = {}

    return download_dict
예제 #4
0
def push_table_to_bq(
    dataset_id,
    table_id,
    source_bucket_name="basedosdados-dev",
    destination_bucket_name="basedosdados",
    backup_bucket_name="basedosdados-backup",
):
    # copy proprosed data between storage buckets
    # create a backup of old data, then delete it and copies new data into the destination bucket
    modes = ["staging", "raw", "auxiliary_files", "architecture", "header"]

    for mode in modes:
        try:
            sync_bucket(
                source_bucket_name=source_bucket_name,
                dataset_id=dataset_id,
                table_id=table_id,
                destination_bucket_name=destination_bucket_name,
                backup_bucket_name=backup_bucket_name,
                mode=mode,
            )
            tprint()
        except Exception as error:
            tprint(f"DATA ERROR ON {mode}.{dataset_id}.{table_id}")
            traceback.print_exc(file=sys.stderr)
            tprint()

    # load the table_config.yaml to get the metadata IDs
    table_config, configs_path = load_configs(dataset_id, table_id)
    # adjust the correct project ID in publish sql
    replace_project_id_publish_sql(configs_path, dataset_id, table_id)
    # create table object of selected table and dataset ID
    tb = bd.Table(dataset_id=dataset_id, table_id=table_id)

    # delete table from staging and prod if exists
    tb.delete("all")

    # create the staging table in bigquery
    tb.create(
        path=None,
        if_table_exists="replace",
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )

    # publish the table in prod bigquery
    tb.publish(if_exists="replace")

    # updates the table description
    tb.update("prod")

    # updates the dataset description
    Dataset(dataset_id).update(mode="prod")

    ### save table header in storage
    save_header_files(dataset_id, table_id)
예제 #5
0
파일: main.py 프로젝트: avila/mais
def push_table_to_bq(
    dataset_id,
    table_id,
    source_bucket_name="basedosdados-dev",
    destination_bucket_name="basedosdados",
    backup_bucket_name="basedosdados-staging",
):

    ### Copies proprosed data between storage buckets.
    ### Creates a backup of old data, then delete it and copies new data into the destination bucket.
    sync_bucket(
        source_bucket_name,
        dataset_id,
        table_id,
        destination_bucket_name,
        backup_bucket_name,
    )

    ### laod the table_config.yalm to get the metadata IDs
    table_config, configs_path = load_configs(dataset_id, table_id)
    ### adjust the correct project ID in publish sql
    replace_project_id_publish_sql(configs_path, dataset_id, table_id)

    ### create Table object of selected table and dataset ID
    tb = bd.Table(table_id, dataset_id)

    ### delete table from staging and prod if exists
    tb.delete("all")

    ### create the staging table in bigquery
    tb.create(
        path=None,
        if_table_exists="replace",
        if_storage_data_exists="pass",
        if_table_config_exists="pass",
    )
    ### publish the table in prod bigquery
    tb.publish(if_exists="replace")
    ### updates the table description
    tb.update("prod")
    ### updates the dataset description
    Dataset(dataset_id).update("prod")
예제 #6
0
파일: solids.py 프로젝트: RJ-SMTR/maestro
def create_table_bq_v2(context,
                       file_path,
                       table_config="replace",
                       publish_config="pass",
                       table_id=None):
    if not table_id:
        table_id = context.resources.basedosdados_config["table_id"]
    dataset_id = context.resources.basedosdados_config["dataset_id"]

    context.log.debug(f"Filepath: {file_path}")

    tb = bd.Table(dataset_id=dataset_id, table_id=table_id)
    tb.create(
        path=Path(file_path),
        if_table_exists="replace",
        if_storage_data_exists="replace",
        if_table_config_exists=table_config,
    )

    tb.publish(if_exists=publish_config)

    # delete file
    Path(file_path).unlink(missing_ok=True)
예제 #7
0
    df.rename(columns = rename_m, inplace = True)                  

    return df 

indice          = icv(codes[0], drop_m, rename_m)
variacao_mensal = icv(codes[1], drop_m, rename_m)
variacao_anual  = icv(codes[2], drop_a, rename_a)

icv_mes = pd.merge(indice, variacao_mensal, 
                   how      = 'left', 
                   left_on  = ['ano', 'mes'], 
                   right_on = ['ano', 'mes'])

icv_mes.to_csv(path_output        + 'mes/mes.csv', index=False, encoding = 'utf-8', na_rep = '')
variacao_anual.to_csv(path_output + 'ano/ano.csv', index=False, encoding = 'utf-8', na_rep = '')
print(icv_mes.head())
print(variacao_anual.head())
#------------------------------------------------------------------------------#
# Bigquery
#------------------------------------------------------------------------------#

table_ids = ['ano', 'mes']


for table_id in table_ids:
    tb = bd.Table(dataset_id = 'br_sp_saopaulo_dieese_icv', table_id = table_id)
    tb.create(
            path = path_dados + 'output/{}'.format(table_id),
            if_storage_data_exists = 'replace',
            if_table_config_exists = 'replace',
            if_table_exists        = 'replace')