Esempio n. 1
0
def create_or_append_table(context, dataset_id, table_id, path):
    tb = Table(table_id=table_id, dataset_id=dataset_id)
    if not tb.table_exists('staging'):
        context.log.info("Table does not exist in STAGING, creating table...")
        tb.create(
            path=path,
            if_table_exists="pass",
            if_storage_data_exists="replace",
            if_table_config_exists="pass",
        )
        context.log.info("Table created in STAGING")
    else:
        context.log.info("Table already exists in STAGING, appending to it...")
        tb.append(filepath=path,
                  if_exists="replace",
                  timeout=600,
                  chunk_size=1024 * 1024 * 10)
        context.log.info("Appended to table on STAGING successfully.")

    if not tb.table_exists("prod"):
        context.log.info("Table does not exist in PROD, publishing...")
        tb.publish(if_exists="pass")
        context.log.info("Published table in PROD successfully.")
    else:
        context.log.info("Table already published in PROD.")
Esempio n. 2
0
def upload_logs_to_bq(context, timestamp, error):

    dataset_id = context.resources.basedosdados_config['dataset_id']
    table_id = context.resources.basedosdados_config['table_id'] + "_logs"

    filepath = Path(
        f"{timestamp}/{table_id}/data={pendulum.parse(timestamp).date()}/{table_id}_{timestamp}.csv")
    # create partition directory
    filepath.parent.mkdir(exist_ok=True, parents=True)
    # create dataframe to be uploaded
    df = pd.DataFrame(
        {"timestamp_captura": [pd.to_datetime(timestamp)], "sucesso": [
            error is None], "erro": [error]}
    )
    # save local
    df.to_csv(filepath, index=False)
    # BD Table object
    tb = Table(table_id, dataset_id)
    # create and publish if table does not exist, append to it otherwise
    if not tb.table_exists("staging"):
        tb.create(
            path=f"{timestamp}/{table_id}",
            if_table_exists="replace",
            if_storage_data_exists="replace",
            if_table_config_exists="pass",
        )
    elif not tb.table_exists("prod"):
        tb.publish(if_exists="replace")
    else:
        tb.append(filepath=f"{timestamp}/{table_id}", if_exists='replace')

    # delete local file
    shutil.rmtree(f"{timestamp}")
Esempio n. 3
0
def upload_to_bq(context, paths):
    for key in paths.keys():
        context.log.info("#" * 80)
        context.log.info(f"KEY = {key}")
        tb = Table(
            key,
            context.resources.basedosdados_config["dataset_id"],
        )
        tb_dir = paths[key].parent.parent
        context.log.info(f"tb_dir = {tb_dir}")

        if not tb.table_exists("staging"):
            context.log.info(
                "Table does not exist in STAGING, creating table...")
            tb.create(
                path=tb_dir,
                if_table_exists="pass",
                if_storage_data_exists="replace",
                if_table_config_exists="pass",
            )
            context.log.info("Table created in STAGING")
        else:
            context.log.info(
                "Table already exists in STAGING, appending to it...")
            tb.append(filepath=tb_dir,
                      if_exists="replace",
                      timeout=600,
                      chunk_size=1024 * 1024 * 10)
            context.log.info("Appended to table on STAGING successfully.")

        if not tb.table_exists("prod"):
            context.log.info("Table does not exist in PROD, publishing...")
            tb.publish(if_exists="pass")
            context.log.info("Published table in PROD successfully.")
        else:
            context.log.info("Table already published in PROD.")
    context.log.info(f"Returning -> {tb_dir.parent}")

    return tb_dir.parent