Exemple #1
0
def get_obs_frequency():
    payload = request.get_json()
    labels, thrWT, path, code, mode, cheaper = (
        payload["labels"],
        payload["thrWT"],
        sanitize_path(payload["path"]),
        payload["code"],
        payload["mode"],
        payload["cheaper"],
    )

    loader = load_point_data_by_path(path, cheaper=cheaper)

    thrWT = [float(cell) for cell in thrWT]
    series = pandas.Series(dict(zip(labels, thrWT)))
    thrL, thrH = series.iloc[::2], series.iloc[1::2]

    wt = WeatherType(
        thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]
    )

    df, _ = wt.evaluate(
        loader.error_type.name, "LonOBS", "LatOBS", "OBS", loader=loader
    )
    cv_map = wt.plot_maps(df, code, mode.lower())

    return jsonify(cv_map)
Exemple #2
0
def get_wt_histogram():
    payload = request.get_json()
    labels, thrWT, path, y_lim, bins, num_bins, cheaper = (
        payload["labels"],
        payload["thrWT"],
        sanitize_path(payload["path"]),
        payload["yLim"],
        payload["bins"],
        payload["numBins"],
        payload["cheaper"],
    )

    loader = load_point_data_by_path(path, cheaper=cheaper)

    thrWT = [float(cell) for cell in thrWT]
    series = pandas.Series(dict(zip(labels, thrWT)))
    thrL, thrH = series.iloc[::2], series.iloc[1::2]

    bins = [float(each) for each in bins]

    wt = WeatherType(
        thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]
    )

    df, title_tokens = wt.evaluate(loader.error_type.name, loader=loader)
    title = wrap_title(title=title_tokens, chunk_size=6)

    error = df[loader.error_type.name]
    plot = wt.plot(error, bins, title, y_lim=int(y_lim), num_bins=int(num_bins))

    return jsonify({"histogram": plot})
Exemple #3
0
def get_pdt_metadata():
    payload = request.get_json()
    path = sanitize_path(payload["path"])

    loader = load_point_data_by_path(path)

    return Response(json.dumps(loader.metadata), mimetype="application/json")
Exemple #4
0
def save_wt_histograms():
    payload = request.get_json()
    labels, thrGridOut, path, y_lim, destination, bins, num_bins, cheaper = (
        payload["labels"],
        payload["thrGridOut"],
        sanitize_path(payload["path"]),
        payload["yLim"],
        payload["destinationDir"],
        payload["bins"],
        payload["numBins"],
        payload["cheaper"],
    )
    destination = sanitize_path(destination)

    loader = load_point_data_by_path(path, cheaper=cheaper)

    matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut]
    df = pandas.DataFrame.from_records(matrix, columns=labels)

    bins = [float(each) for each in bins]

    thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2]

    for idx in range(len(thrL_out)):
        thrL = thrL_out.iloc[idx]
        thrH = thrH_out.iloc[idx]
        wt = WeatherType(
            thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]
        )

        dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader)
        title = wrap_title(title=title_tokens, chunk_size=6)
        error = dataframe[loader.error_type.name]

        wt_code = thrGridOut[idx][0]
        wt.plot(
            error,
            bins,
            title,
            y_lim=int(y_lim),
            num_bins=int(num_bins),
            out_path=os.path.join(destination, f"WT_{wt_code}.png"),
        )

    return jsonify({"status": "success"})
Exemple #5
0
def get_obs_metadata():
    payload = request.get_json()
    path = Path(sanitize_path(payload["path"]))

    first_geo_file = next(path.glob("**/*.geo"))

    try:
        units = geopoints_loader.read_units(first_geo_file)
    except ValueError:
        units = None

    return Response(json.dumps({"units": units}), mimetype="application/json")
Exemple #6
0
def get_predictors():
    payload = request.get_json()
    path = sanitize_path(payload["path"])

    codes = [
        name
        for name in os.listdir(path)
        if os.path.isdir(os.path.join(path, name)) and not name.startswith(".")
    ]

    # Warming up the LRU cache for fetching units
    for code in codes:
        get_metadata(os.path.join(path, code))

    return Response(json.dumps(codes), mimetype="application/json")
Exemple #7
0
def get_error_rep():
    payload = request.get_json()
    labels, matrix, path, numCols, cheaper, ranges = (
        payload["labels"],
        payload["matrix"],
        sanitize_path(payload["path"]),
        payload["numCols"],
        payload["cheaper"],
        payload["ranges"],
    )

    matrix = [[float(cell) for cell in row] for row in matrix]
    df = pandas.DataFrame.from_records(matrix, columns=labels)
    thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2]
    loader = load_point_data_by_path(path, cheaper=cheaper)

    dt = DecisionTree(threshold_low=thrL, threshold_high=thrH, ranges=ranges)
    rep = dt.cal_rep_error(loader, nBin=int(numCols))

    s = StringIO()
    rep.to_csv(s)
    return jsonify(s.getvalue())
Exemple #8
0
def get_pdt_statistics():
    payload = request.get_json()
    path = sanitize_path(payload["path"])

    resp = postprocessing_svc.get_pdt_statistics(path)
    return Response(json.dumps(resp), mimetype="application/json")
Exemple #9
0
def get_breakpoints_suggestions():
    payload = request.get_json()

    labels, thrWT, path, predictor, num_bp, lower_bound, upper_bound, cheaper = (
        payload["labels"],
        payload["thrWT"],
        sanitize_path(payload["path"]),
        payload["predictor"],
        int(payload["numBreakpoints"]),
        payload.get("lowerBound"),
        payload.get("upperBound"),
        payload["cheaper"],
    )

    loader = load_point_data_by_path(path, cheaper=cheaper)

    thrWT = [float(cell) for cell in thrWT]
    series = pandas.Series(dict(zip(labels, thrWT)))
    thrL, thrH = series.iloc[::2], series.iloc[1::2]

    lower_bound = float(lower_bound) if lower_bound else -inf
    upper_bound = float(upper_bound) if upper_bound else inf

    wt = WeatherType(
        thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]
    )

    df, title_tokens = wt.evaluate(loader.error_type.name, predictor, loader=loader)
    title_tokens = [
        f"({lower_bound} <= {predictor} < {upper_bound})"
        if predictor in token
        else token
        for token in title_tokens
    ]
    title_ks = wrap_title(title_tokens, chunk_size=4)

    df_breakpoints, df_size = ks_test_engine(
        df=df,
        predictor_name=predictor,
        error_name=loader.error_type.name,
        breakpoints_num=num_bp,
        lower_bound=lower_bound,
        upper_bound=upper_bound,
    )

    plot = plot_ks_stats(
        df=df_breakpoints,
        node=title_ks + f"\n\nNo. of points: {df_size}",
        predictor=predictor,
        unit=loader.units["predictors"][predictor],
    )
    df_breakpoints = format_ks_stats(df_breakpoints)

    return Response(
        json.dumps(
            {
                "records": df_breakpoints.to_dict("records"),
                "figure": plot,
                "count": df_size,
            }
        ),
        mimetype="application/json",
    )
Exemple #10
0
def get_predictor_units():
    payload = request.get_json()
    path = sanitize_path(payload["path"])

    metadata = get_metadata(path)
    return Response(json.dumps(metadata), mimetype="application/json")
Exemple #11
0
def save_operation():
    payload = request.get_json()

    labels = payload["labels"]
    matrix = payload["matrix"]
    ranges = payload["fieldRanges"]
    pdt_path = sanitize_path(payload["pdtPath"])
    mf_cols = payload["mfcols"]
    cheaper = payload["cheaper"]
    mode = payload["mode"]
    output_path = Path(sanitize_path(payload["outPath"]))

    if mode == "all":
        version = payload["version"]
        family = payload["family"]
        accumulation = payload["accumulation"]
        accumulation = f"{accumulation}h" if accumulation else ""
        dataset_name = payload["datasetName"]

        output_path = output_path / f"{family}{accumulation}{dataset_name}_{version}"

        os.makedirs(output_path, exist_ok=True)

    if mode in ["breakpoints", "all"]:
        csv = payload["breakpointsCSV"]
        path = output_path
        if mode == "all":
            path = path / "BP.csv"

        with open(path, "w") as f:
            f.write(csv)

    if mode in ["mf", "all"]:
        matrix = [[float(cell) for cell in row] for row in matrix]
        df = pandas.DataFrame.from_records(matrix, columns=labels)
        thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2]
        loader = load_point_data_by_path(pdt_path, cheaper=cheaper)

        dt = DecisionTree(threshold_low=thrL, threshold_high=thrH, ranges=ranges)
        rep = dt.cal_rep_error(loader, nBin=int(mf_cols))

        path = output_path
        if mode == "all":
            path = path / f"{loader.error_type.name}.csv"

        with open(path, "w") as f:
            rep.to_csv(
                f,
                header=[str(i + 1) for i in range(int(mf_cols))],
                index_label="WT Code",
            )

    if mode in ["wt", "all"]:
        ylim = payload["yLim"]
        bins = payload["bins"]
        num_bins = payload["numBins"]
        thrGridOut = payload["thrGridOut"]

        matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut]
        df = pandas.DataFrame.from_records(matrix, columns=labels)

        loader = load_point_data_by_path(pdt_path, cheaper=cheaper)
        bins = [float(each) for each in bins]

        thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2]

        path = output_path
        if mode == "all":
            path = path / "WTs"

            os.makedirs(path, exist_ok=True)

        for idx in range(len(thrL_out)):
            thrL = thrL_out.iloc[idx]
            thrH = thrH_out.iloc[idx]
            wt = WeatherType(
                thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]
            )

            dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader)
            title = wrap_title(title=title_tokens, chunk_size=6)
            error = dataframe[loader.error_type.name]

            wt_code = thrGridOut[idx][0]
            wt.plot(
                error,
                bins,
                title,
                y_lim=int(ylim),
                num_bins=int(num_bins),
                out_path=os.path.join(path, f"WT_{wt_code}.png"),
            )

    if mode in ["bias", "all"]:
        thrGridOut = payload["thrGridOut"]
        bins = payload["bins"]
        num_bins = payload["numBins"]
        bins = [float(each) for each in bins]

        matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut]
        df = pandas.DataFrame.from_records(matrix, columns=labels)

        loader = load_point_data_by_path(pdt_path, cheaper=cheaper)

        thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2]

        path = output_path
        if mode == "all":
            path = path / "Bias.csv"

        csv = []
        for idx in range(len(thrL_out)):
            thrL = thrL_out.iloc[idx]
            thrH = thrH_out.iloc[idx]
            wt = WeatherType(
                thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]
            )

            dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader)
            error = dataframe[loader.error_type.name]
            discretized_error = wt.discretize_error(error=error, num_bins=int(num_bins))

            bias = loader.error_type.bias(
                error=discretized_error, low=bins[0], high=bins[-1]
            )
            bias = f"{bias:.2f}"

            wt_code = thrGridOut[idx][0]
            csv += [(wt_code, bias)]

        pandas.DataFrame.from_records(csv, columns=["WT Code", "Bias"]).to_csv(
            path, index=False
        )

    if mode == "all":
        family = payload["family"]
        version = payload["version"]

        accumulation = payload["accumulation"]
        accumulation = f", {accumulation}-hourly" if accumulation else ""

        with open(output_path / "README.txt", "w") as f:
            text = dedent(
                f"""
                ecPoint-{family}{accumulation}
                Version: {version}
                Timestamp: {datetime.now()}
                """
            )

            f.write(text.lstrip())

        loader = load_point_data_by_path(pdt_path, cheaper=cheaper)

        if pdt_path.endswith(".ascii"):
            ext = "ascii"
        elif pdt_path.endswith(".parquet"):
            ext = "parquet"
        else:
            ext = "ascii"

        exclude_cols = payload["excludePredictors"]
        cols = [col for col in loader.columns if col not in exclude_cols]

        loader.clone(*cols, path=output_path / f"PDT.{ext}")

    return Response(json.dumps({}), mimetype="application/json")