def get_obs_frequency(): payload = request.get_json() labels, thrWT, path, code, mode, cheaper = ( payload["labels"], payload["thrWT"], sanitize_path(payload["path"]), payload["code"], payload["mode"], payload["cheaper"], ) loader = load_point_data_by_path(path, cheaper=cheaper) thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) df, _ = wt.evaluate( loader.error_type.name, "LonOBS", "LatOBS", "OBS", loader=loader ) cv_map = wt.plot_maps(df, code, mode.lower()) return jsonify(cv_map)
def get_wt_histogram(): payload = request.get_json() labels, thrWT, path, y_lim, bins, num_bins, cheaper = ( payload["labels"], payload["thrWT"], sanitize_path(payload["path"]), payload["yLim"], payload["bins"], payload["numBins"], payload["cheaper"], ) loader = load_point_data_by_path(path, cheaper=cheaper) thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] bins = [float(each) for each in bins] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) df, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) title = wrap_title(title=title_tokens, chunk_size=6) error = df[loader.error_type.name] plot = wt.plot(error, bins, title, y_lim=int(y_lim), num_bins=int(num_bins)) return jsonify({"histogram": plot})
def get_pdt_metadata(): payload = request.get_json() path = sanitize_path(payload["path"]) loader = load_point_data_by_path(path) return Response(json.dumps(loader.metadata), mimetype="application/json")
def save_wt_histograms(): payload = request.get_json() labels, thrGridOut, path, y_lim, destination, bins, num_bins, cheaper = ( payload["labels"], payload["thrGridOut"], sanitize_path(payload["path"]), payload["yLim"], payload["destinationDir"], payload["bins"], payload["numBins"], payload["cheaper"], ) destination = sanitize_path(destination) loader = load_point_data_by_path(path, cheaper=cheaper) matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) bins = [float(each) for each in bins] thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) title = wrap_title(title=title_tokens, chunk_size=6) error = dataframe[loader.error_type.name] wt_code = thrGridOut[idx][0] wt.plot( error, bins, title, y_lim=int(y_lim), num_bins=int(num_bins), out_path=os.path.join(destination, f"WT_{wt_code}.png"), ) return jsonify({"status": "success"})
def get_obs_metadata(): payload = request.get_json() path = Path(sanitize_path(payload["path"])) first_geo_file = next(path.glob("**/*.geo")) try: units = geopoints_loader.read_units(first_geo_file) except ValueError: units = None return Response(json.dumps({"units": units}), mimetype="application/json")
def get_predictors(): payload = request.get_json() path = sanitize_path(payload["path"]) codes = [ name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name)) and not name.startswith(".") ] # Warming up the LRU cache for fetching units for code in codes: get_metadata(os.path.join(path, code)) return Response(json.dumps(codes), mimetype="application/json")
def get_error_rep(): payload = request.get_json() labels, matrix, path, numCols, cheaper, ranges = ( payload["labels"], payload["matrix"], sanitize_path(payload["path"]), payload["numCols"], payload["cheaper"], payload["ranges"], ) matrix = [[float(cell) for cell in row] for row in matrix] df = pandas.DataFrame.from_records(matrix, columns=labels) thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2] loader = load_point_data_by_path(path, cheaper=cheaper) dt = DecisionTree(threshold_low=thrL, threshold_high=thrH, ranges=ranges) rep = dt.cal_rep_error(loader, nBin=int(numCols)) s = StringIO() rep.to_csv(s) return jsonify(s.getvalue())
def get_pdt_statistics(): payload = request.get_json() path = sanitize_path(payload["path"]) resp = postprocessing_svc.get_pdt_statistics(path) return Response(json.dumps(resp), mimetype="application/json")
def get_breakpoints_suggestions(): payload = request.get_json() labels, thrWT, path, predictor, num_bp, lower_bound, upper_bound, cheaper = ( payload["labels"], payload["thrWT"], sanitize_path(payload["path"]), payload["predictor"], int(payload["numBreakpoints"]), payload.get("lowerBound"), payload.get("upperBound"), payload["cheaper"], ) loader = load_point_data_by_path(path, cheaper=cheaper) thrWT = [float(cell) for cell in thrWT] series = pandas.Series(dict(zip(labels, thrWT))) thrL, thrH = series.iloc[::2], series.iloc[1::2] lower_bound = float(lower_bound) if lower_bound else -inf upper_bound = float(upper_bound) if upper_bound else inf wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) df, title_tokens = wt.evaluate(loader.error_type.name, predictor, loader=loader) title_tokens = [ f"({lower_bound} <= {predictor} < {upper_bound})" if predictor in token else token for token in title_tokens ] title_ks = wrap_title(title_tokens, chunk_size=4) df_breakpoints, df_size = ks_test_engine( df=df, predictor_name=predictor, error_name=loader.error_type.name, breakpoints_num=num_bp, lower_bound=lower_bound, upper_bound=upper_bound, ) plot = plot_ks_stats( df=df_breakpoints, node=title_ks + f"\n\nNo. of points: {df_size}", predictor=predictor, unit=loader.units["predictors"][predictor], ) df_breakpoints = format_ks_stats(df_breakpoints) return Response( json.dumps( { "records": df_breakpoints.to_dict("records"), "figure": plot, "count": df_size, } ), mimetype="application/json", )
def get_predictor_units(): payload = request.get_json() path = sanitize_path(payload["path"]) metadata = get_metadata(path) return Response(json.dumps(metadata), mimetype="application/json")
def save_operation(): payload = request.get_json() labels = payload["labels"] matrix = payload["matrix"] ranges = payload["fieldRanges"] pdt_path = sanitize_path(payload["pdtPath"]) mf_cols = payload["mfcols"] cheaper = payload["cheaper"] mode = payload["mode"] output_path = Path(sanitize_path(payload["outPath"])) if mode == "all": version = payload["version"] family = payload["family"] accumulation = payload["accumulation"] accumulation = f"{accumulation}h" if accumulation else "" dataset_name = payload["datasetName"] output_path = output_path / f"{family}{accumulation}{dataset_name}_{version}" os.makedirs(output_path, exist_ok=True) if mode in ["breakpoints", "all"]: csv = payload["breakpointsCSV"] path = output_path if mode == "all": path = path / "BP.csv" with open(path, "w") as f: f.write(csv) if mode in ["mf", "all"]: matrix = [[float(cell) for cell in row] for row in matrix] df = pandas.DataFrame.from_records(matrix, columns=labels) thrL, thrH = df.iloc[:, ::2], df.iloc[:, 1::2] loader = load_point_data_by_path(pdt_path, cheaper=cheaper) dt = DecisionTree(threshold_low=thrL, threshold_high=thrH, ranges=ranges) rep = dt.cal_rep_error(loader, nBin=int(mf_cols)) path = output_path if mode == "all": path = path / f"{loader.error_type.name}.csv" with open(path, "w") as f: rep.to_csv( f, header=[str(i + 1) for i in range(int(mf_cols))], index_label="WT Code", ) if mode in ["wt", "all"]: ylim = payload["yLim"] bins = payload["bins"] num_bins = payload["numBins"] thrGridOut = payload["thrGridOut"] matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) loader = load_point_data_by_path(pdt_path, cheaper=cheaper) bins = [float(each) for each in bins] thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] path = output_path if mode == "all": path = path / "WTs" os.makedirs(path, exist_ok=True) for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) title = wrap_title(title=title_tokens, chunk_size=6) error = dataframe[loader.error_type.name] wt_code = thrGridOut[idx][0] wt.plot( error, bins, title, y_lim=int(ylim), num_bins=int(num_bins), out_path=os.path.join(path, f"WT_{wt_code}.png"), ) if mode in ["bias", "all"]: thrGridOut = payload["thrGridOut"] bins = payload["bins"] num_bins = payload["numBins"] bins = [float(each) for each in bins] matrix = [[float(cell) for cell in row[1:]] for row in thrGridOut] df = pandas.DataFrame.from_records(matrix, columns=labels) loader = load_point_data_by_path(pdt_path, cheaper=cheaper) thrL_out, thrH_out = df.iloc[:, ::2], df.iloc[:, 1::2] path = output_path if mode == "all": path = path / "Bias.csv" csv = [] for idx in range(len(thrL_out)): thrL = thrL_out.iloc[idx] thrH = thrH_out.iloc[idx] wt = WeatherType( thrL=thrL, thrH=thrH, thrL_labels=labels[::2], thrH_labels=labels[1::2] ) dataframe, title_tokens = wt.evaluate(loader.error_type.name, loader=loader) error = dataframe[loader.error_type.name] discretized_error = wt.discretize_error(error=error, num_bins=int(num_bins)) bias = loader.error_type.bias( error=discretized_error, low=bins[0], high=bins[-1] ) bias = f"{bias:.2f}" wt_code = thrGridOut[idx][0] csv += [(wt_code, bias)] pandas.DataFrame.from_records(csv, columns=["WT Code", "Bias"]).to_csv( path, index=False ) if mode == "all": family = payload["family"] version = payload["version"] accumulation = payload["accumulation"] accumulation = f", {accumulation}-hourly" if accumulation else "" with open(output_path / "README.txt", "w") as f: text = dedent( f""" ecPoint-{family}{accumulation} Version: {version} Timestamp: {datetime.now()} """ ) f.write(text.lstrip()) loader = load_point_data_by_path(pdt_path, cheaper=cheaper) if pdt_path.endswith(".ascii"): ext = "ascii" elif pdt_path.endswith(".parquet"): ext = "parquet" else: ext = "ascii" exclude_cols = payload["excludePredictors"] cols = [col for col in loader.columns if col not in exclude_cols] loader.clone(*cols, path=output_path / f"PDT.{ext}") return Response(json.dumps({}), mimetype="application/json")