Python get_cube_metadata 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: stats_can.scwds

메소드/함수: get_cube_metadata

hotexamples.com에서의 예제들: 4

Python get_cube_metadata - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 stats_can.scwds.get_cube_metadata에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: sc.py 프로젝트: chetanparma1/stats_can

def zip_update_tables(path=None, csv=True):
    """check local json, update zips of outdated tables

    Grabs the json files in path, checks them against the metadata on
    StatsCan and grabs updated tables where there have been changes
    There isn't actually a "last modified date" part to the metadata
    What I'm doing is comparing the latest reference period. Almost all
    data changes will at least include incremental releases, so this should
    capture what I want

    Parameters
    ----------
    path: str, default: None
        where to look for tables to update
    csv: boolean, default: True
        Downloads updates in CSV form by default, SDMX if false

    Returns
    -------
    update_table_list: list
        list of the tables that were updated

    """
    local_jsons = list_zipped_tables(path=path)
    tables = [j["productId"] for j in local_jsons]
    remote_jsons = get_cube_metadata(tables)
    update_table_list = []
    for local, remote in zip(local_jsons, remote_jsons):
        if local["cubeEndDate"] != remote["cubeEndDate"]:
            update_table_list.append(local["productId"])
    download_tables(update_table_list, path, csv=csv)
    return update_table_list

예제 #2

파일 보기

파일: sc.py 프로젝트: chetanparma1/stats_can

def h5_update_tables(h5file="stats_can.h5", path=None, tables=None):
    """update any stats_can tables contained in an h5 file

    Parameters
    ----------
    h5file: str, default stats_can.h5
        name of the h5file to store the tables in
    path: str or path, default = current working directory
        path to the h5file
    tables: str or list of str, optional, default None
        If included will only update the subset of tables already in the file
        and in the tables parameter
    """
    if tables:
        local_jsons = metadata_from_h5(tables, h5file=h5file, path=path)
    else:
        if path:
            h5 = os.path.join(path, h5file)
        else:
            h5 = h5file
        with h5py.File(h5) as f:
            keys = [key for key in f.keys() if key.startswith("json")]
            local_jsons = [json.loads(f[key][()]) for key in keys]
    tables = [j["productId"] for j in local_jsons]
    remote_jsons = get_cube_metadata(tables)
    update_table_list = []
    for local, remote in zip(local_jsons, remote_jsons):
        if local["cubeEndDate"] != remote["cubeEndDate"]:
            update_table_list.append(local["productId"])
    tables_to_h5(update_table_list, h5file=h5file, path=path)
    return update_table_list

예제 #3

파일 보기

def download_tables(tables, path=None, csv=True):
    """Download a json file and zip of data for a list of tables to path.

    Parameters
    ----------
    tables: list of str
        tables to be downloaded
    path: str or path object, default: None (will do current directory)
        Where to download the table and json
    csv: boolean, default True
        download in CSV format, if not download SDMX

    Returns
    -------
    downloaded: list
        list of tables that were downloaded
    """
    path = pathlib.Path(path) if path else pathlib.Path()
    metas = get_cube_metadata(tables)
    for meta in metas:
        product_id = meta["productId"]
        zip_url = get_full_table_download(product_id, csv=csv)
        zip_file_name = product_id + ("-eng.zip" if csv else ".zip")
        json_file_name = product_id + ".json"
        zip_file = path / zip_file_name
        json_file = path / json_file_name

        # Thanks http://evanhahn.com/python-requests-library-useragent/
        response = requests.get(zip_url,
                                stream=True,
                                headers={"user-agent": None})

        progress_bar = tqdm(
            desc=zip_file_name,
            total=int(response.headers.get("content-length", 0)),
            unit="B",
            unit_scale=True,
        )

        # Thanks https://bit.ly/2sPYPYw
        with open(json_file, "w") as outfile:
            json.dump(meta, outfile)
        with open(zip_file, "wb") as handle:
            for chunk in response.iter_content(chunk_size=512):
                if chunk:  # filter out keep-alive new chunks
                    handle.write(chunk)
                    progress_bar.update(len(chunk))
        progress_bar.close()
    return [meta["productId"] for meta in metas]

예제 #4

파일 보기

파일: sc.py 프로젝트: chetanparma1/stats_can

def download_tables(tables, path=None, csv=True):
    """Download a json file and zip of data for a list of tables to path

    Parameters
    ----------
    tables: list of str
        tables to be downloaded
    path: str, default: None (will do current directory)
        Where to download the table and json
    csv: boolean, default True
        download in CSV format, if not download SDMX

    Returns
    -------
    downloaded: list
        list of tables that were downloaded
    """
    metas = get_cube_metadata(tables)
    for meta in metas:
        product_id = meta["productId"]
        zip_url = get_full_table_download(product_id, csv=csv)
        if csv:
            zip_file = product_id + "-eng.zip"
        else:
            zip_file = product_id + ".zip"
        json_file = product_id + ".json"
        if path:
            zip_file = os.path.join(path, zip_file)
            json_file = os.path.join(path, json_file)
        # Thanks http://evanhahn.com/python-requests-library-useragent/
        response = requests.get(zip_url,
                                stream=True,
                                headers={"user-agent": None})
        # Thanks https://bit.ly/2sPYPYw
        with open(json_file, "w") as outfile:
            json.dump(meta, outfile)
        with open(zip_file, "wb") as handle:
            for chunk in response.iter_content(chunk_size=512):
                if chunk:  # filter out keep-alive new chunks
                    handle.write(chunk)
    downloaded = [meta["productId"] for meta in metas]
    return downloaded