Exemple #1
0
def main() -> None:
    args = parse_arguments()
    seaborn_setup()

    annotation_xy = []
    dfs = []
    for idx, one in enumerate(args.input):
        dftmp = pd.read_csv(one, names=["Value"], sep=args.sep, header=None)

        args.names[idx] = f"{args.names[idx]}"
        if args.remove_tail is not None:
            threshold = np.percentile(dftmp["Value"], args.remove_tail)
            dftmp = dftmp[dftmp["Value"] < threshold]

        height, width = np.histogram(dftmp["Value"], bins=args.bins)
        for q in args.annotate:
            value = np.percentile(dftmp["Value"], q)
            loc = ((width - value) > 0).nonzero()[0][0] - 1
            annotation_xy.append((value, height[loc], q))

        dfs.append(dftmp)
    df = pd.concat(dfs, names=["Sys"], keys=args.names)
    fig, axes = plt.subplots(1, 1, figsize=(args.width, args.height))
    ax = axes
    sns.histplot(
        x="Value",
        data=df,
        hue="Sys",
        hue_order=args.names,
        palette=args.palette,
        ax=ax,
        kde=False,
        element="step",
        stat=args.stat,
        common_norm=False,
        bins=args.bins,
    )
    if args.x_log_scale:
        ax.set_xscale("log")
    if args.y_log_scale:
        ax.set_yscale("log")

    for x, y, v in annotation_xy:
        ax.annotate(
            f"{v}%: {x:.0f}",
            (x, y),
            ha="center",
            va="center",
            xytext=(0, 50),
            textcoords="offset points",
            size="xx-small",
            arrowprops=dict(arrowstyle="-|>",
                            color="black",
                            connectionstyle="arc3"),
        )
    ax.get_legend().set_title("")
    ax.set_xlabel(args.xlabel)
    if isinstance(args.output, str):
        fig.tight_layout()
        fig.savefig(args.output)
Exemple #2
0
def main() -> None:
    args = parse_arguments()
    df = load_qrels(args.qrels, session=args.session, min_rel=args.min_rel)

    seaborn_setup()

    if not args.width:
        args.width = 20
    if not args.height:
        args.height = 10

    fig, axes = plt.subplots(1, 1, figsize=(args.width, args.height))

    ax = axes
    sns.countplot(x="Rel", data=df, ax=ax, palette="deep")

    for p in ax.patches:
        ax.annotate(
            f"{p.get_height():.0f}",
            (p.get_x() + p.get_width() / 2.0, p.get_height() + 0.2),
            ha="center",
            va="center",
            xytext=(0, 10),
            textcoords="offset points",
            # size=20,
        )
    ax.set_ylabel("Count")
    ax.set_xticklabels(ax.get_xticklabels())
    if args.session:
        ax.set_xlabel("Session ID")
    else:
        ax.set_xlabel("Query ID")

    fig.tight_layout()
    fig.savefig(args.save)
Exemple #3
0
def main() -> None:
    args = parse_arguments()
    seaborn_setup()
    dfs = [load_run(x) for x in args.run]
    if args.qrels:
        qrels = load_qrels(args.qrels, args.binarize, args.min_rel)
        dfs = [x.merge(qrels, how="left").fillna(0) for x in dfs]
    else:
        for df in dfs:
            df.loc[:, "Rel"] = "All"

    fig, axes = plt.subplots(1, 1, figsize=(args.width, args.height))

    ax = axes
    df = pd.concat(dfs, names=["Sys"], keys=args.names).reset_index()
    df.loc[:, "Rel"] = df.loc[:, "Rel"].astype(int)
    uniq_rel = sorted(df["Rel"].unique())
    if uniq_rel == [0, 1]:
        df.loc[df["Rel"] == 1, "Rel"] = "Relevant"
        df.loc[df["Rel"] == 0, "Rel"] = "Non-relevant"
        hue_order = [
            f"{y} {x}" for x, y in product(["Non-relevant", "Relevant"], args.names)
        ]
    else:
        df.loc[:, "Rel"] = "Rel=" + df.loc[:, "Rel"].astype(str)
        hue_order = [f"{y} Rel={x}" for x, y in product(uniq_rel, args.names)]
    df.loc[:, "SysRel"] = df["Sys"] + " " + df["Rel"].astype(str)
    sns.histplot(
        x="Score",
        data=df,
        hue="SysRel",
        hue_order=hue_order,
        palette=args.palette,
        ax=ax,
        kde=False,
        element="step",
        stat="density",
        common_norm=False,
        bins=args.bins,
    )
    ax.get_legend().set_title("")
    if isinstance(args.save, str):
        fig.tight_layout()
        fig.savefig(args.save)
Exemple #4
0
def main() -> None:
    args = parse_arguments()
    seaborn_setup()
    evals = [TrecEval(x) for x in args.evals]
    fig, ax = plt.subplots(1, 1, figsize=(30, 15))

    table = []
    for eval_ in evals:
        table.extend(list(eval_))
    data = pd.DataFrame(data=table, columns=["Metric", "Qid", "Value"])

    sns.boxplot(x="Qid", y="Value", hue="Metric", data=data, ax=ax)
    ax.tick_params(axis="x", rotation=45)
    if args.no_xticks:
        ax.set_xticks([])

    if args.show:
        plt.show()
    if isinstance(args.save, str):
        fig.tight_layout()
        fig.savefig(args.save)
Exemple #5
0
def prepare_eval(args: argparse.Namespace) -> List[pd.DataFrame]:
    seaborn_setup()
    if hasattr(args, "seed"):
        np.random.seed(args.seed)
    dfs = [TrecEval(x).to_frame() for x in always_iterable(args.eval)]
    args.names = (args.names.split(",")
                  if args.names else [f"Sys{i}" for i in range(len(dfs))])
    assert len(args.names) == len(args.eval)

    if args.metric:
        dfs = [x.loc[:, args.metric] for x in dfs]

    if hasattr(args, "sample") and args.sample is not None:
        if args.sample >= 1:
            dfs[0] = dfs[0].sample(n=int(args.sample), random_state=args.seed)
        else:
            dfs[0] = dfs[0].sample(frac=args.sample, random_state=args.seed)
        dfs = [x.loc[dfs[0].index] for x in dfs]

    sorted_metrics = sorted(dfs[0].columns)
    dfs = [df[sorted_metrics] for df in dfs]
    return dfs