Beispiel #1
0
def bq_upload(context, filepath, raw_filepath=None, partitions=None):
    table_id = context.resources.basedosdados_config['table_id']
    dataset_id = context.resources.basedosdados_config['dataset_id']
    context.log.info(f"""
    Received inputs:
    raw_filepath = {raw_filepath}, type = {type(raw_filepath)}
    treated_filepath = {filepath}, type = {type(filepath)}
    dataset_id = {dataset_id}, type = {type(dataset_id)}
    table_id = {table_id}, type = {type(table_id)}
    partitions = {partitions}, type = {type(partitions)}
    """)
    # Upload raw to staging
    if raw_filepath:
        st = Storage(table_id=table_id, dataset_id=dataset_id)
        context.log.info(
            f"Uploading raw file: {raw_filepath} to bucket {st.bucket_name} at {st.bucket_name}/{dataset_id}/{table_id}"
        )
        st.upload(path=raw_filepath,
                  partitions=partitions,
                  mode='raw',
                  if_exists='replace')

    # creates and publish table if it does not exist, append to it otherwise
    if partitions:
        # If table is partitioned, get parent directory wherein partitions are stored
        tb_dir = filepath.split(partitions)[0]
        create_or_append_table(context, dataset_id, table_id, tb_dir)
    else:
        create_or_append_table(context, dataset_id, table_id, filepath)

    # Delete local Files
    context.log.info(f"Deleting local files: {raw_filepath}, {filepath}")
    cleanup_local(filepath, raw_filepath)
Beispiel #2
0
def upload(context, filename):
    dataset_id = context.resources.basedosdados_config["dataset_id"]
    table_id = context.resources.basedosdados_config["table_id"]

    st = Storage(dataset_id, table_id)

    context.log.info(
        f"Uploading {filename} to GCS at:{st.bucket_name}/staging/{dataset_id}/{table_id}",
    )
    st.upload(path=filename, mode="staging", if_exists="replace")

    return filename