def main() -> None: args = parse_arguments() seaborn_setup() annotation_xy = [] dfs = [] for idx, one in enumerate(args.input): dftmp = pd.read_csv(one, names=["Value"], sep=args.sep, header=None) args.names[idx] = f"{args.names[idx]}" if args.remove_tail is not None: threshold = np.percentile(dftmp["Value"], args.remove_tail) dftmp = dftmp[dftmp["Value"] < threshold] height, width = np.histogram(dftmp["Value"], bins=args.bins) for q in args.annotate: value = np.percentile(dftmp["Value"], q) loc = ((width - value) > 0).nonzero()[0][0] - 1 annotation_xy.append((value, height[loc], q)) dfs.append(dftmp) df = pd.concat(dfs, names=["Sys"], keys=args.names) fig, axes = plt.subplots(1, 1, figsize=(args.width, args.height)) ax = axes sns.histplot( x="Value", data=df, hue="Sys", hue_order=args.names, palette=args.palette, ax=ax, kde=False, element="step", stat=args.stat, common_norm=False, bins=args.bins, ) if args.x_log_scale: ax.set_xscale("log") if args.y_log_scale: ax.set_yscale("log") for x, y, v in annotation_xy: ax.annotate( f"{v}%: {x:.0f}", (x, y), ha="center", va="center", xytext=(0, 50), textcoords="offset points", size="xx-small", arrowprops=dict(arrowstyle="-|>", color="black", connectionstyle="arc3"), ) ax.get_legend().set_title("") ax.set_xlabel(args.xlabel) if isinstance(args.output, str): fig.tight_layout() fig.savefig(args.output)
def main() -> None: args = parse_arguments() df = load_qrels(args.qrels, session=args.session, min_rel=args.min_rel) seaborn_setup() if not args.width: args.width = 20 if not args.height: args.height = 10 fig, axes = plt.subplots(1, 1, figsize=(args.width, args.height)) ax = axes sns.countplot(x="Rel", data=df, ax=ax, palette="deep") for p in ax.patches: ax.annotate( f"{p.get_height():.0f}", (p.get_x() + p.get_width() / 2.0, p.get_height() + 0.2), ha="center", va="center", xytext=(0, 10), textcoords="offset points", # size=20, ) ax.set_ylabel("Count") ax.set_xticklabels(ax.get_xticklabels()) if args.session: ax.set_xlabel("Session ID") else: ax.set_xlabel("Query ID") fig.tight_layout() fig.savefig(args.save)
def main() -> None: args = parse_arguments() seaborn_setup() dfs = [load_run(x) for x in args.run] if args.qrels: qrels = load_qrels(args.qrels, args.binarize, args.min_rel) dfs = [x.merge(qrels, how="left").fillna(0) for x in dfs] else: for df in dfs: df.loc[:, "Rel"] = "All" fig, axes = plt.subplots(1, 1, figsize=(args.width, args.height)) ax = axes df = pd.concat(dfs, names=["Sys"], keys=args.names).reset_index() df.loc[:, "Rel"] = df.loc[:, "Rel"].astype(int) uniq_rel = sorted(df["Rel"].unique()) if uniq_rel == [0, 1]: df.loc[df["Rel"] == 1, "Rel"] = "Relevant" df.loc[df["Rel"] == 0, "Rel"] = "Non-relevant" hue_order = [ f"{y} {x}" for x, y in product(["Non-relevant", "Relevant"], args.names) ] else: df.loc[:, "Rel"] = "Rel=" + df.loc[:, "Rel"].astype(str) hue_order = [f"{y} Rel={x}" for x, y in product(uniq_rel, args.names)] df.loc[:, "SysRel"] = df["Sys"] + " " + df["Rel"].astype(str) sns.histplot( x="Score", data=df, hue="SysRel", hue_order=hue_order, palette=args.palette, ax=ax, kde=False, element="step", stat="density", common_norm=False, bins=args.bins, ) ax.get_legend().set_title("") if isinstance(args.save, str): fig.tight_layout() fig.savefig(args.save)
def main() -> None: args = parse_arguments() seaborn_setup() evals = [TrecEval(x) for x in args.evals] fig, ax = plt.subplots(1, 1, figsize=(30, 15)) table = [] for eval_ in evals: table.extend(list(eval_)) data = pd.DataFrame(data=table, columns=["Metric", "Qid", "Value"]) sns.boxplot(x="Qid", y="Value", hue="Metric", data=data, ax=ax) ax.tick_params(axis="x", rotation=45) if args.no_xticks: ax.set_xticks([]) if args.show: plt.show() if isinstance(args.save, str): fig.tight_layout() fig.savefig(args.save)
def prepare_eval(args: argparse.Namespace) -> List[pd.DataFrame]: seaborn_setup() if hasattr(args, "seed"): np.random.seed(args.seed) dfs = [TrecEval(x).to_frame() for x in always_iterable(args.eval)] args.names = (args.names.split(",") if args.names else [f"Sys{i}" for i in range(len(dfs))]) assert len(args.names) == len(args.eval) if args.metric: dfs = [x.loc[:, args.metric] for x in dfs] if hasattr(args, "sample") and args.sample is not None: if args.sample >= 1: dfs[0] = dfs[0].sample(n=int(args.sample), random_state=args.seed) else: dfs[0] = dfs[0].sample(frac=args.sample, random_state=args.seed) dfs = [x.loc[dfs[0].index] for x in dfs] sorted_metrics = sorted(dfs[0].columns) dfs = [df[sorted_metrics] for df in dfs] return dfs