Python Storage Examples

Programming Language: Python

Namespace/Package Name: basedosdados

Method/Function: Storage

Examples at hotexamples.com: 6

Python Storage - 6 examples found. These are the top rated real world Python examples of basedosdados.Storage extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def upload_to_raw(tipo, save_raw_path):
    if tipo == "estabelecimentos":
        st = bd.Storage(table_id="microdados_estabelecimentos",
                        dataset_id="br_me_caged")
    else:
        st = bd.Storage(table_id="microdados_movimentacoes",
                        dataset_id="br_me_caged")

    st.upload(path=save_raw_path, mode="raw", if_exists="replace")

Example #2

Show file

def fn_upload_file_to_storage(context,
                              file_path,
                              partitions=None,
                              mode="raw",
                              table_id=None,
                              dataset_id=None):

    # Upload to storage
    # If not specific table_id, use resource one
    if not table_id:
        table_id = context.resources.basedosdados_config["table_id"]
    if not dataset_id:
        dataset_id = context.resources.basedosdados_config["dataset_id"]

    st = bd.Storage(table_id=table_id, dataset_id=dataset_id)

    context.log.debug(f"Table ID: {table_id}, Dataset ID: {dataset_id}")
    context.log.debug(
        f"Uploading file {file_path} to mode {mode} with partitions {partitions}"
    )
    st.upload(path=file_path,
              mode=mode,
              partitions=partitions,
              if_exists="replace")

    return True

Example #3

Show file

File: solids.py Project: RJ-SMTR/maestro

def append_to_bigquery(
    context,
    file_paths,
    partitions,
    modes=["raw", "staging"],
    table_id=None,
    dataset_id=None,
):

    if not table_id:
        table_id = context.resources.basedosdados_config["table_id"]
    if not dataset_id:
        dataset_id = context.resources.basedosdados_config["dataset_id"]

    context.log.info(f"Table ID: {table_id} / Dataset ID: {dataset_id}")

    st = bd.Storage(dataset_id=dataset_id, table_id=table_id)

    for idx, mode in enumerate(modes):
        context.log.info(
            f"Uploading file {file_paths[idx]} to mode {mode} with partitions {partitions}"
        )
        st.upload(file_paths[idx],
                  partitions=partitions,
                  mode=mode,
                  if_exists="replace")
        Path(file_paths[idx]).unlink(missing_ok=True)

Example #4

Show file

def download_gtfs_from_storage(context):

    bucket = (bd.Storage(
        context.solid_config["dataset_id"],
        context.solid_config["table_id"]).client["storage_staging"].bucket(
            "rj-smtr-staging"))
    prefix = context.solid_config["storage_path"]
    blobs = [(blob.name, blob) for blob in bucket.list_blobs(prefix=prefix)]

    gtfs_versions = list(
        set([
            datetime.strptime(blob[0].split("=")[1].split("/")[0],
                              "%Y%m%d").date() for blob in blobs
        ]))

    gtfs_partition = build_gtfs_version_name(
        gtfs_versions, context.resources.schedule_run_date["date"])

    blob_obj = [
        blob[1] for blob in blobs if (prefix + gtfs_partition) in blob[0]
    ]

    Path("tmp_data").mkdir(exist_ok=True)

    gtfs_path = f"tmp_data/{gtfs_partition}.zip"

    blob_obj[0].download_to_filename(filename=gtfs_path)

    return gtfs_path

Example #5

Show file

File: table_approve.py Project: basedosdados/mais

def save_header_files(dataset_id, table_id):
    ### save table header in storage
    query = f"""
    SELECT * FROM `basedosdados.{dataset_id}.{table_id}` LIMIT 20
    """
    df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
    df.to_csv("header.csv", index=False, encoding="utf-8")
    st = bd.Storage(dataset_id=dataset_id, table_id=table_id)
    st.upload("header.csv", mode="header", if_exists="replace")

Example #6

Show file

File: solids.py Project: RJ-SMTR/maestro

def append_to_bigquery_v2(context, file_path, partitions, mode, table_id=None):

    if not table_id:
        table_id = context.resources.basedosdados_config["table_id"]
    dataset_id = context.resources.basedosdados_config["dataset_id"]

    bd.Storage(dataset_id=dataset_id,
               table_id=table_id).upload(file_path,
                                         partitions=partitions,
                                         mode=mode,
                                         if_exists="replace")

    # delete file
    Path(file_path).unlink(missing_ok=True)