Exemple #1
0
def test_good_ascii_file_clone(tmp_path):
    path = TEST_DATA_DIR / "good_ascii_file.ascii"
    data = ASCIIDecoder(path=path)

    exclude_cols = ["TP", "CAPE"]
    cloned_path = tmp_path / "good_ascii_file.ascii"
    cols = [col for col in data.columns if col not in exclude_cols]
    data.clone(*cols, path=cloned_path)
    cloned_data = ASCIIDecoder(path=cloned_path)

    assert_frame_equal(cloned_data.dataframe,
                       data.dataframe.drop(exclude_cols, axis=1))
Exemple #2
0
def test_good_ascii_file():
    path = TEST_DATA_DIR / "good_ascii_file.ascii"

    data = ASCIIDecoder(path=path)

    assert numpy.allclose(data.dataframe["WSPD"],
                          [6.163900, 4.319410, 6.537020, 8.513560, 8.770020])
Exemple #3
0
def get_obs_frequency():
    payload = request.get_json()
    labels, thrWT, path, code, mode = (
        payload["labels"],
        payload["thrWT"],
        payload["path"],
        payload["code"],
        payload["mode"],
    )

    predictor_matrix = ASCIIDecoder(path=path).dataframe

    thrWT = [float(cell) for cell in thrWT]
    series = pandas.Series(dict(zip(labels, thrWT)))
    thrL, thrH = series.iloc[::2], series.iloc[1::2]

    wt = WeatherType(thrL=thrL,
                     thrH=thrH,
                     thrL_labels=labels[::2],
                     thrH_labels=labels[1::2])
    error, predictor_matrix, _ = wt.evaluate(predictor_matrix)

    cv_map = wt.plot_maps(predictor_matrix, code, mode.lower())

    return jsonify(cv_map)
Exemple #4
0
def get_error_rep():
    payload = request.get_json()
    labels, matrix, path, numCols = (
        payload["labels"],
        payload["matrix"],
        payload["path"],
        payload["numCols"],
    )

    matrix = [[float(cell) for cell in row] for row in matrix]

    df = pandas.DataFrame.from_records(matrix, columns=labels)

    thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2]

    predictor_matrix = ASCIIDecoder(path=path).dataframe
    rep = DecisionTree.cal_rep_error(predictor_matrix,
                                     thrL_out=thrL,
                                     thrH_out=thrH,
                                     nBin=int(numCols))

    s = StringIO()
    np.savetxt(s, rep, delimiter=",")

    return jsonify(s.getvalue())
Exemple #5
0
def get_wt_histogram():
    payload = request.get_json()
    labels, thrWT, path, y_lim, bins = (
        payload["labels"],
        payload["thrWT"],
        payload["path"],
        payload["yLim"],
        payload["bins"],
    )

    predictor_matrix = ASCIIDecoder(path=path).dataframe

    thrWT = [float(cell) for cell in thrWT]
    series = pandas.Series(dict(zip(labels, thrWT)))
    thrL, thrH = series.iloc[::2], series.iloc[1::2]

    bins = [float(each) for each in bins]

    wt = WeatherType(thrL=thrL,
                     thrH=thrH,
                     thrL_labels=labels[::2],
                     thrH_labels=labels[1::2])
    error, _, title = wt.evaluate(predictor_matrix)
    plot = wt.plot(error, bins, title, int(y_lim))

    return jsonify({"histogram": plot})
Exemple #6
0
def test_good_parquet_file():
    path = TEST_DATA_DIR / "good_parquet.ascii"

    df = ASCIIDecoder(path=path).dataframe

    with NamedTemporaryFile() as f:
        w = ParquetPointDataTableWriter(f.name)
        w.add_metadata("header", "foo")
        w.add_metadata("footer", "bar")
        w.append(df.copy())
        w.close()

        r = ParquetPointDataTableReader(f.name)
        metadata = r.metadata
        df_pq = r.dataframe

    assert metadata == {"header": "foo", "footer": "bar"}
    assert df.memory_usage(deep=True).sum() > df_pq.memory_usage(
        deep=True).sum()

    assert_frame_equal(df_pq, df, check_dtype=False, check_categorical=False)
Exemple #7
0
def test_good_parquet_file_clone(tmp_path):
    path = TEST_DATA_DIR / "good_parquet.ascii"
    df = ASCIIDecoder(path=path).dataframe

    with NamedTemporaryFile() as f:
        w = ParquetPointDataTableWriter(f.name)
        w.add_columns_chunk(df.copy())
        w.close()

        r = ParquetPointDataTableReader(f.name)
        exclude_cols = ["tp_acc", "cape_wa"]
        cloned_path = tmp_path / "good_parquet.parquet"
        cols = [col for col in r.columns if col not in exclude_cols]
        r.clone(*cols, path=cloned_path)

        cloned_data = ParquetPointDataTableReader(cloned_path)

        assert_frame_equal(
            cloned_data.dataframe,
            df.drop(exclude_cols, axis=1),
            check_dtype=False,
            check_categorical=False,
        )
Exemple #8
0
def load_point_data_by_path(path: str,
                            cheaper: bool = False) -> BasePointDataReader:
    from core.loaders.ascii import ASCIIDecoder
    from core.loaders.parquet import ParquetPointDataTableReader

    if path.endswith(".ascii") or path.endswith(".csv"):
        loader = ASCIIDecoder(path=path, cheaper=cheaper)
    elif path.endswith(".parquet"):
        loader = ParquetPointDataTableReader(path=path, cheaper=cheaper)
    else:
        raise ValueError(f"invalid file extension: {path}")

    print(f"Loaded point data table: {loader}")
    return loader
Exemple #9
0
def get_breakpoints_suggestions():
    payload = request.get_json()

    labels, thrWT, path, predictor, minNumCases, numSubSamples = (
        payload["labels"],
        payload["thrWT"],
        payload["path"],
        payload["predictor"],
        int(payload["minNumCases"]),
        int(payload["numSubSamples"]),
    )

    predictor_matrix = ASCIIDecoder(path=path).dataframe

    thrWT = [float(cell) for cell in thrWT]
    series = pandas.Series(dict(zip(labels, thrWT)))
    thrL, thrH = series.iloc[::2], series.iloc[1::2]

    wt = WeatherType(thrL=thrL,
                     thrH=thrH,
                     thrL_labels=labels[::2],
                     thrH_labels=labels[1::2])

    error, predictor_matrix, title = wt.evaluate(predictor_matrix)
    predictor = predictor_matrix[predictor]
    error = np.asarray(error)

    sort_indices = predictor.argsort()
    error = error[sort_indices]
    predictor = predictor[::]
    predictor.sort_values(inplace=True)

    PosAll = pandas.Series(range(len(predictor)))
    PosBP = pandas.Series(
        list(range(0, len(predictor),
                   len(predictor) // numSubSamples)) + [len(predictor)])
    breakpoints = KolmogorovSmirnovEngine().run(predictor, error, PosAll,
                                                PosBP)

    return Response(json.dumps({"breakpoints": breakpoints}),
                    mimetype="application/json")
Exemple #10
0
def test_alfa_units():
    path = TEST_DATA_DIR / "ecmwf" / "alfa.ascii"
    data = ASCIIDecoder(path=path)

    assert data.units == {
        "predictand": {
            "tp": "m"
        },
        "predictors": {
            "TP": "mm",
            "CP": "mm",
            "CPR": "-",
            "CAPE": "J kg**-1",
            "WSPD": "m s**-1",
            "SR24H": "W m**-2",
            "LST": "Hours (0 to 24)",
        },
        "observations": {
            "tp": "mm"
        },
    }
Exemple #11
0
def save_wt_histograms():
    payload = request.get_json()
    labels, thrGridOut, path, y_lim, destination, bins = (
        payload["labels"],
        payload["thrGridOut"],
        payload["path"],
        payload["yLim"],
        payload["destinationDir"],
        payload["bins"],
    )

    predictor_matrix = ASCIIDecoder(path=path).dataframe

    matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut]
    df = pandas.DataFrame.from_records(matrix, columns=labels)

    bins = [float(each) for each in bins]

    thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2]

    for idx in range(len(thrL_out)):
        thrL = thrL_out.iloc[idx]
        thrH = thrH_out.iloc[idx]
        wt = WeatherType(thrL=thrL,
                         thrH=thrH,
                         thrL_labels=labels[::2],
                         thrH_labels=labels[1::2])

        error, _, title = wt.evaluate(predictor_matrix)

        wt_code = thrGridOut[idx][0]
        wt.plot(
            error,
            bins,
            title,
            y_lim=int(y_lim),
            out_path=os.path.join(destination, f"WT_{wt_code}"),
        )

    return jsonify({"status": "success"})
Exemple #12
0
def get_fields_from_ascii_table():
    payload = request.get_json()
    path = payload["path"]

    df = ASCIIDecoder(path=path).dataframe
    fields = set(df.columns) - {
        "BaseDate",
        "BaseTime",
        "StepF",
        "Step",
        "DateOBS",
        "TimeOBS",
        "LatOBS",
        "LonOBS",
        "OBS",
        "Predictand",
        "FER",
        "FE",
    }

    error = "FER" if "FER" in df.columns else "FE"

    return Response(
        json.dumps({
            "fields":
            list(fields),
            "minValue":
            min(df[error]),
            "maxValue":
            max(df[error]),
            "count":
            len(df[error]),
            "error":
            error,
            "bins":
            WeatherType.DEFAULT_FER_BINS if error == "FER" else [],
        }),
        mimetype="application/json",
    )