def get_pdt_statistics(path: str) -> dict: loader = load_point_data_by_path(path) fields = loader.predictors def get_field_summary(name: str) -> dict: df = loader.select(name) return dict( name=name, min=f"{df.min():.2f}", max=f"{df.max():.2f}", mean=f"{df.mean():.2f}", median=f"{df.median():.2f}", count=f"{int(df.count())}", ) summary = [get_field_summary(field) for field in fields + [loader.error_type.name]] error_count = next( each["count"] for each in summary if each["name"] == loader.error_type.name ) return dict( fields=fields, summary=summary, units=loader.units, count=error_count, error=loader.error_type.name, bins=WeatherType.DEFAULT_FER_BINS if loader.error_type == ErrorType.FER else [], )
def get_wt_histogram(): payload = request.get_json() labels, thrWT, path, y_lim, bins, num_bins, cheaper = ( payload["labels"], payload["thrWT"], sanitize_path(payload["path"]), payload["yLim"], payload["bins"], payload["numBins"], payload["cheaper"], ) loader = load_point_data_by_path(path, cheaper=cheaper) thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] bins = [float(each) for each in bins] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) df, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) title = wrap_title(title=title_tokens, chunk_size=6) error = df[loader.error_type.name] plot = wt.plot(error, bins, title, y_lim=int(y_lim), num_bins=int(num_bins)) return jsonify({"histogram": plot})
def get_obs_frequency(): payload = request.get_json() labels, thrWT, path, code, mode, cheaper = ( payload["labels"], payload["thrWT"], sanitize_path(payload["path"]), payload["code"], payload["mode"], payload["cheaper"], ) loader = load_point_data_by_path(path, cheaper=cheaper) thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) df, _ = wt.evaluate( loader.error_type.name, "LonOBS", "LatOBS", "OBS", loader=loader ) cv_map = wt.plot_maps(df, code, mode.lower()) return jsonify(cv_map)
def get_pdt_metadata(): payload = request.get_json() path = sanitize_path(payload["path"]) loader = load_point_data_by_path(path) return Response(json.dumps(loader.metadata), mimetype="application/json")
def save_wt_histograms(): payload = request.get_json() labels, thrGridOut, path, y_lim, destination, bins, num_bins, cheaper = ( payload["labels"], payload["thrGridOut"], sanitize_path(payload["path"]), payload["yLim"], payload["destinationDir"], payload["bins"], payload["numBins"], payload["cheaper"], ) destination = sanitize_path(destination) loader = load_point_data_by_path(path, cheaper=cheaper) matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) bins = [float(each) for each in bins] thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) title = wrap_title(title=title_tokens, chunk_size=6) error = dataframe[loader.error_type.name] wt_code = thrGridOut[idx][0] wt.plot( error, bins, title, y_lim=int(y_lim), num_bins=int(num_bins), out_path=os.path.join(destination, f"WT_{wt_code}.png"), ) return jsonify({"status": "success"})
def test_alfa(client, alfa_cassette, alfa_loader, fmt, tmp_path): path = tmp_path / f"pdt.{fmt.lower()}" request = alfa_cassette(output_path=str(path), fmt=fmt) response = client.post("/computations/start", json=request) assert response.status_code == 200 got_loader = load_point_data_by_path(path=str(path)) assert got_loader.error_type == ErrorType.FER want_loader = alfa_loader(fmt="ASCII") assert got_loader.columns == want_loader.columns assert_frame_equal( got_loader.dataframe, want_loader.dataframe, check_dtype=False, check_categorical=False, )
def get_error_rep(): payload = request.get_json() labels, matrix, path, numCols, cheaper, ranges = ( payload["labels"], payload["matrix"], sanitize_path(payload["path"]), payload["numCols"], payload["cheaper"], payload["ranges"], ) matrix = [[float(cell) for cell in row] for row in matrix] df = pandas.DataFrame.from_records(matrix, columns=labels) thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2] loader = load_point_data_by_path(path, cheaper=cheaper) dt = DecisionTree(threshold_low=thrL, threshold_high=thrH, ranges=ranges) rep = dt.cal_rep_error(loader, nBin=int(numCols)) s = StringIO() rep.to_csv(s) return jsonify(s.getvalue())
def f(fmt=Literal["ASCII", "PARQUET"]): return load_point_data_by_path(path=str(TEST_DATA_DIR / "ecmwf" / "alfa.ascii"))
def get_breakpoints_suggestions(): payload = request.get_json() labels, thrWT, path, predictor, num_bp, lower_bound, upper_bound, cheaper = ( payload["labels"], payload["thrWT"], sanitize_path(payload["path"]), payload["predictor"], int(payload["numBreakpoints"]), payload.get("lowerBound"), payload.get("upperBound"), payload["cheaper"], ) loader = load_point_data_by_path(path, cheaper=cheaper) thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] lower_bound = float(lower_bound) if lower_bound else -inf upper_bound = float(upper_bound) if upper_bound else inf wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) df, title_tokens = wt.evaluate(loader.error_type.name, predictor, loader=loader) title_tokens = [ f"({lower_bound} <= {predictor} < {upper_bound})" if predictor in token else token for token in title_tokens ] title_ks = wrap_title(title_tokens, chunk_size=4) df_breakpoints, df_size = ks_test_engine( df=df, predictor_name=predictor, error_name=loader.error_type.name, breakpoints_num=num_bp, lower_bound=lower_bound, upper_bound=upper_bound, ) plot = plot_ks_stats( df=df_breakpoints, node=title_ks + f"\n\nNo. of points: {df_size}", predictor=predictor, unit=loader.units["predictors"][predictor], ) df_breakpoints = format_ks_stats(df_breakpoints) return Response( json.dumps( { "records": df_breakpoints.to_dict("records"), "figure": plot, "count": df_size, } ), mimetype="application/json", )
def save_operation(): payload = request.get_json() labels = payload["labels"] matrix = payload["matrix"] ranges = payload["fieldRanges"] pdt_path = sanitize_path(payload["pdtPath"]) mf_cols = payload["mfcols"] cheaper = payload["cheaper"] mode = payload["mode"] output_path = Path(sanitize_path(payload["outPath"])) if mode == "all": version = payload["version"] family = payload["family"] accumulation = payload["accumulation"] accumulation = f"{accumulation}h" if accumulation else "" dataset_name = payload["datasetName"] output_path = output_path / f"{family}{accumulation}{dataset_name}_{version}" os.makedirs(output_path, exist_ok=True) if mode in ["breakpoints", "all"]: csv = payload["breakpointsCSV"] path = output_path if mode == "all": path = path / "BP.csv" with open(path, "w") as f: f.write(csv) if mode in ["mf", "all"]: matrix = [[float(cell) for cell in row] for row in matrix] df = pandas.DataFrame.from_records(matrix, columns=labels) thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2] loader = load_point_data_by_path(pdt_path, cheaper=cheaper) dt = DecisionTree(threshold_low=thrL, threshold_high=thrH, ranges=ranges) rep = dt.cal_rep_error(loader, nBin=int(mf_cols)) path = output_path if mode == "all": path = path / f"{loader.error_type.name}.csv" with open(path, "w") as f: rep.to_csv( f, header=[str(i + 1) for i in range(int(mf_cols))], index_label="WT Code", ) if mode in ["wt", "all"]: ylim = payload["yLim"] bins = payload["bins"] num_bins = payload["numBins"] thrGridOut = payload["thrGridOut"] matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) loader = load_point_data_by_path(pdt_path, cheaper=cheaper) bins = [float(each) for each in bins] thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] path = output_path if mode == "all": path = path / "WTs" os.makedirs(path, exist_ok=True) for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) title = wrap_title(title=title_tokens, chunk_size=6) error = dataframe[loader.error_type.name] wt_code = thrGridOut[idx][0] wt.plot( error, bins, title, y_lim=int(ylim), num_bins=int(num_bins), out_path=os.path.join(path, f"WT_{wt_code}.png"), ) if mode in ["bias", "all"]: thrGridOut = payload["thrGridOut"] bins = payload["bins"] num_bins = payload["numBins"] bins = [float(each) for each in bins] matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) loader = load_point_data_by_path(pdt_path, cheaper=cheaper) thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] path = output_path if mode == "all": path = path / "Bias.csv" csv = [] for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) error = dataframe[loader.error_type.name] discretized_error = wt.discretize_error(error=error, num_bins=int(num_bins)) bias = loader.error_type.bias( error=discretized_error, low=bins[0], high=bins[-1] ) bias = f"{bias:.2f}" wt_code = thrGridOut[idx][0] csv += [(wt_code, bias)] pandas.DataFrame.from_records(csv, columns=["WT Code", "Bias"]).to_csv( path, index=False ) if mode == "all": family = payload["family"] version = payload["version"] accumulation = payload["accumulation"] accumulation = f", {accumulation}-hourly" if accumulation else "" with open(output_path / "README.txt", "w") as f: text = dedent( f""" ecPoint-{family}{accumulation} Version: {version} Timestamp: {datetime.now()} """ ) f.write(text.lstrip()) loader = load_point_data_by_path(pdt_path, cheaper=cheaper) if pdt_path.endswith(".ascii"): ext = "ascii" elif pdt_path.endswith(".parquet"): ext = "parquet" else: ext = "ascii" exclude_cols = payload["excludePredictors"] cols = [col for col in loader.columns if col not in exclude_cols] loader.clone(*cols, path=output_path / f"PDT.{ext}") return Response(json.dumps({}), mimetype="application/json")