Python PerUrlIndex Examples

Programming Language: Python

Namespace/Package Name: climetlab.indexing

Class/Type: PerUrlIndex

Examples at hotexamples.com: 5

Python PerUrlIndex - 5 examples found. These are the top rated real world Python examples of climetlab.indexing.PerUrlIndex extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PerUrlIndex(7)

Frequently Used Methods

PerUrlIndex (7)

Example #1

Show file

def timing():
    baseurl = CML_BASEURL_S3
    baseurl = CML_BASEURL_CDS
    index = PerUrlIndex(
        f"{baseurl}/test-data/input/indexed-urls/large_grib_1.grb", )

    sizes = ["sharp(1,1)", "auto", "cluster"]
    sizes = []
    for r in range(11, 24):  # from 2k to 8M
        sizes.append(f"blocked({2 ** r})")

    report = {}
    for request in [
            dict(param="r"),
            dict(param="r", time="1000"),
            dict(date="19970101"),
            dict(param="r", time="1000", date="19970101"),
    ]:
        times = []
        for n in sizes:
            try:
                elapsed = retrieve_and_check(index,
                                             request,
                                             range_method=n,
                                             force=True)
            except Exception as e:
                print(e)
                times.append(-1)
                continue
            if n is None:
                n = 0
            if n == "auto":
                n = -1
            if n == "cluster":
                n = 1
            if n == "sharp":
                n = -2
            times.append((round(elapsed * 10) / 10.0, n))

        report[tuple(request.items())] = request, sorted(times)

    for k, v in report.items():
        print(k)
        print(v)

Example #2

Show file

def test_grib_index_eumetnet():
    from climetlab import load_source
    from climetlab.indexing import PerUrlIndex

    request = {
        "param": "2ti",
        "date": "20171228",
        "step":
        ["0-24", "24-48", "48-72", "72-96", "96-120", "120-144", "144-168"],
        # Parameters passed to the filename mangling
        "url":
        "https://storage.ecmwf.europeanweather.cloud/eumetnet-postprocessing-benchmark-training-dataset/",
        "month": "12",
        "year": "2017",
    }
    PATTERN = "{url}data/fcs/efi/" "EU_forecast_efi_params_{year}-{month}_0.grb"
    ds = load_source("indexed-urls", PerUrlIndex(PATTERN), request)
    xds = ds.to_xarray()
    print(xds)

Example #3

Show file

def test_per_url_index(baseurl):
    index = PerUrlIndex(
        f"{baseurl}/test-data/input/indexed-urls/large_grib_1.grb", )
    request = dict(param="r", time="1000", date="19970101")
    retrieve_and_check(index, request)

Example #4

Show file

def test_per_url_index_2():
    baseurl = CML_BASEURL_S3
    index = PerUrlIndex(f"{baseurl}/test-data/big.grib", )
    request = dict(param="cin", date="20211125", step="6", number=["1", "3"])
    retrieve_and_check(index, request)

Example #5

Show file

def benchmark():
    collect_statistics(True)

    baseurls = [
        CML_BASEURL_S3,
        CML_BASEURL_CDS,
        # CML_BASEURL_GET,
    ]

    requests = [
        {"param": "r", "time": "1000", "step": "0"},
        {"param": "r", "time": "1000"},
        {"param": "r", "time": ["1100", "1200", "1300", "1400"]},
        {
            "param": ["r", "z"],
            "time": ["0200", "1000", "1800", "2300"],
            "levelist": ["500", "850"],
        },
        {"param": ["r", "z"], "levelist": ["500", "850"]},
        {"param": "r"},
        # {"param": ["r", "z"]},
        {"param": ["r", "z", "t"]},
        # {},
    ]

    methods = get_methods_list()

    # requests = [requests[2]]
    # methods = [methods[0]]
    # baseurls = [baseurls[0]]
    # requests = requests[::2]
    # methods = methods[::2]
    # baseurls = [baseurls[0]]
    failed = []
    successfull = 0
    import tqdm

    for request in tqdm.tqdm(requests):
        for range_method in tqdm.tqdm(methods):
            for baseurl in baseurls:
                index = PerUrlIndex(
                    f"{baseurl}/test-data/input/indexed-urls/large_grib_1.grb",
                )
                try:
                    retrieve_and_check(
                        index,
                        request,
                        range_method,
                        force=True,
                    )
                    successfull += 1
                except Exception as e:
                    failed.append((index, request, range_method))
                    print("FAILED for ", index, request, range_method)
                    print(e)

    stats = retrieve_statistics()

    run_id = get_run_id()

    logfiles = []

    path = f"climetlab_benchmark{run_id}.json"
    logfiles.append(path)
    stats.write_to_json(path)
    print(f"BENCHMARK FINISHED. Raw logs saved in {path}")

    df = stats.to_pandas()

    df["server"] = df["url"].apply(url_to_server)
    df["speed"] = df["total"] / df["elapsed"] / (1024 * 1024)  # MB/s
    df["method"] = df["full_method"].apply(radix)

    df = df.rename(
        dict(
            size_parts="size_requested",
            size_blocks="size_downloaded",
        )
    )
    df["size_ratio"] = df["size_downloaded"] / df["size_requested"]

    path = f"climetlab_benchmark{run_id}.csv"
    df.to_csv(path)
    # df.to_csv("climetlab_benchmark.csv")
    logfiles.append(path)

    print(f"Benchmark finished ({successfull} successfull, {len(failed)} failed).")
    print(
        "All data in the log files are anonymous."
        "Only the log file names contain personal data (machine name, IP, etc.)."
    )
    for f in logfiles:
        print(f"Log file: {f}")