def test_good_ascii_file_clone(tmp_path): path = TEST_DATA_DIR / "good_ascii_file.ascii" data = ASCIIDecoder(path=path) exclude_cols = ["TP", "CAPE"] cloned_path = tmp_path / "good_ascii_file.ascii" cols = [col for col in data.columns if col not in exclude_cols] data.clone(*cols, path=cloned_path) cloned_data = ASCIIDecoder(path=cloned_path) assert_frame_equal(cloned_data.dataframe, data.dataframe.drop(exclude_cols, axis=1))
def test_good_ascii_file(): path = TEST_DATA_DIR / "good_ascii_file.ascii" data = ASCIIDecoder(path=path) assert numpy.allclose(data.dataframe["WSPD"], [6.163900, 4.319410, 6.537020, 8.513560, 8.770020])
def get_obs_frequency(): payload = request.get_json() labels, thrWT, path, code, mode = ( payload["labels"], payload["thrWT"], payload["path"], payload["code"], payload["mode"], ) predictor_matrix = ASCIIDecoder(path=path).dataframe thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] wt = WeatherType(thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]) error, predictor_matrix, _ = wt.evaluate(predictor_matrix) cv_map = wt.plot_maps(predictor_matrix, code, mode.lower()) return jsonify(cv_map)
def get_error_rep(): payload = request.get_json() labels, matrix, path, numCols = ( payload["labels"], payload["matrix"], payload["path"], payload["numCols"], ) matrix = [[float(cell) for cell in row] for row in matrix] df = pandas.DataFrame.from_records(matrix, columns=labels) thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2] predictor_matrix = ASCIIDecoder(path=path).dataframe rep = DecisionTree.cal_rep_error(predictor_matrix, thrL_out=thrL, thrH_out=thrH, nBin=int(numCols)) s = StringIO() np.savetxt(s, rep, delimiter=",") return jsonify(s.getvalue())
def get_wt_histogram(): payload = request.get_json() labels, thrWT, path, y_lim, bins = ( payload["labels"], payload["thrWT"], payload["path"], payload["yLim"], payload["bins"], ) predictor_matrix = ASCIIDecoder(path=path).dataframe thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] bins = [float(each) for each in bins] wt = WeatherType(thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]) error, _, title = wt.evaluate(predictor_matrix) plot = wt.plot(error, bins, title, int(y_lim)) return jsonify({"histogram": plot})
def test_good_parquet_file(): path = TEST_DATA_DIR / "good_parquet.ascii" df = ASCIIDecoder(path=path).dataframe with NamedTemporaryFile() as f: w = ParquetPointDataTableWriter(f.name) w.add_metadata("header", "foo") w.add_metadata("footer", "bar") w.append(df.copy()) w.close() r = ParquetPointDataTableReader(f.name) metadata = r.metadata df_pq = r.dataframe assert metadata == {"header": "foo", "footer": "bar"} assert df.memory_usage(deep=True).sum() > df_pq.memory_usage( deep=True).sum() assert_frame_equal(df_pq, df, check_dtype=False, check_categorical=False)
def test_good_parquet_file_clone(tmp_path): path = TEST_DATA_DIR / "good_parquet.ascii" df = ASCIIDecoder(path=path).dataframe with NamedTemporaryFile() as f: w = ParquetPointDataTableWriter(f.name) w.add_columns_chunk(df.copy()) w.close() r = ParquetPointDataTableReader(f.name) exclude_cols = ["tp_acc", "cape_wa"] cloned_path = tmp_path / "good_parquet.parquet" cols = [col for col in r.columns if col not in exclude_cols] r.clone(*cols, path=cloned_path) cloned_data = ParquetPointDataTableReader(cloned_path) assert_frame_equal( cloned_data.dataframe, df.drop(exclude_cols, axis=1), check_dtype=False, check_categorical=False, )
def load_point_data_by_path(path: str, cheaper: bool = False) -> BasePointDataReader: from core.loaders.ascii import ASCIIDecoder from core.loaders.parquet import ParquetPointDataTableReader if path.endswith(".ascii") or path.endswith(".csv"): loader = ASCIIDecoder(path=path, cheaper=cheaper) elif path.endswith(".parquet"): loader = ParquetPointDataTableReader(path=path, cheaper=cheaper) else: raise ValueError(f"invalid file extension: {path}") print(f"Loaded point data table: {loader}") return loader
def get_breakpoints_suggestions(): payload = request.get_json() labels, thrWT, path, predictor, minNumCases, numSubSamples = ( payload["labels"], payload["thrWT"], payload["path"], payload["predictor"], int(payload["minNumCases"]), int(payload["numSubSamples"]), ) predictor_matrix = ASCIIDecoder(path=path).dataframe thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] wt = WeatherType(thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]) error, predictor_matrix, title = wt.evaluate(predictor_matrix) predictor = predictor_matrix[predictor] error = np.asarray(error) sort_indices = predictor.argsort() error = error[sort_indices] predictor = predictor[::] predictor.sort_values(inplace=True) PosAll = pandas.Series(range(len(predictor))) PosBP = pandas.Series( list(range(0, len(predictor), len(predictor) // numSubSamples)) + [len(predictor)]) breakpoints = KolmogorovSmirnovEngine().run(predictor, error, PosAll, PosBP) return Response(json.dumps({"breakpoints": breakpoints}), mimetype="application/json")
def test_alfa_units(): path = TEST_DATA_DIR / "ecmwf" / "alfa.ascii" data = ASCIIDecoder(path=path) assert data.units == { "predictand": { "tp": "m" }, "predictors": { "TP": "mm", "CP": "mm", "CPR": "-", "CAPE": "J kg**-1", "WSPD": "m s**-1", "SR24H": "W m**-2", "LST": "Hours (0 to 24)", }, "observations": { "tp": "mm" }, }
def save_wt_histograms(): payload = request.get_json() labels, thrGridOut, path, y_lim, destination, bins = ( payload["labels"], payload["thrGridOut"], payload["path"], payload["yLim"], payload["destinationDir"], payload["bins"], ) predictor_matrix = ASCIIDecoder(path=path).dataframe matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) bins = [float(each) for each in bins] thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType(thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2]) error, _, title = wt.evaluate(predictor_matrix) wt_code = thrGridOut[idx][0] wt.plot( error, bins, title, y_lim=int(y_lim), out_path=os.path.join(destination, f"WT_{wt_code}"), ) return jsonify({"status": "success"})
def get_fields_from_ascii_table(): payload = request.get_json() path = payload["path"] df = ASCIIDecoder(path=path).dataframe fields = set(df.columns) - { "BaseDate", "BaseTime", "StepF", "Step", "DateOBS", "TimeOBS", "LatOBS", "LonOBS", "OBS", "Predictand", "FER", "FE", } error = "FER" if "FER" in df.columns else "FE" return Response( json.dumps({ "fields": list(fields), "minValue": min(df[error]), "maxValue": max(df[error]), "count": len(df[error]), "error": error, "bins": WeatherType.DEFAULT_FER_BINS if error == "FER" else [], }), mimetype="application/json", )