Exemple #1
0
def plot_runs(rnn_runs, feedforward_runs):
    feedforward_runs = group(feedforward_runs, ["layer_sizes"])
    rnn_runs = group(rnn_runs, ["tuple.mode"])
    plot_all("rnn", rnn_runs)
    plot_all("feedforward", feedforward_runs)
    print(
        f"********************************** all **********************************"
    )
    plot_both(feedforward_runs["layer_sizes_2000,2000,2000,2000"],
              rnn_runs["tuple.mode_together"])
def plot(runs, baseline, fname: str):
    groups = {"runs": runs}

    stats = calc_stat(
        groups, lambda k: k.startswith("analysis_results/") and "/accuracy/" in
        k and "/train/" not in k)["runs"]
    baseline_stats = calc_stat(
        group(baseline, ["scan.train_split"]),
        lambda k: k.startswith("validation/") and "/accuracy/" in k)
    for k, s in stats.items():
        print(k)

    print("Baseline groups", baseline_stats.keys())

    means = {k: stats[v].get().mean for k, v in plots.items()}
    std = {k: stats[v].get().std for k, v in plots.items()}

    #validation/jump/accuracy/total
    for k, v in refs.items():
        print("----------------================---------------------")
        print(baseline_stats[f"scan.train_split_{v}"])

    ref_stats = {
        k: baseline_stats[f"scan.train_split_{v}"]
        [f"validation/{v}/accuracy/total"].get()
        for k, v in refs.items()
    }

    ref_means = {k: v.mean for k, v in ref_stats.items()}
    ref_std = {k: v.std for k, v in ref_stats.items()}

    fig = plt.figure(figsize=[3, 1.5])

    plt.bar([2.25 * x for x in range(len(names))],
            [ref_means[n] * 100 for n in names],
            yerr=[ref_std[n] * 100 for n in names],
            align='center')
    plt.bar([2.25 * x + 1 for x in range(len(names))],
            [means[n] * 100 for n in names],
            yerr=[std[n] * 100 for n in names],
            align='center')
    plt.xticks([2.25 * x + 0.5 for x in range(len(names))], names)
    plt.ylabel("Test accuracy [\\%]")
    # plt.legend(["Before", "After"])

    fig.savefig(fname, bbox_inches='tight')
Exemple #3
0
]  #Force sans-serif math mode (for axes labels)
plt.rcParams['font.family'] = 'sans-serif'  # ... for regular text
plt.rcParams[
    'font.sans-serif'] = 'Helvetica, Avant Garde, Computer Modern Sans serif'  # Choose a nice font here

TEST = False

api = wandb.Api()

runs = lib.get_runs(["addmul_feedforward_big", "addmul_rnn"])

BASE_DIR = "out/addmul_confusion_plot/download"
shutil.rmtree(BASE_DIR, ignore_errors=True)
os.makedirs(BASE_DIR, exist_ok=True)

runs = group(runs, ['layer_sizes', "task"])
print(runs.keys())


def draw_confusion(means: np.ndarray, std: np.ndarray):
    print("MEAN", means)
    figure = plt.figure(figsize=[2.5, 0.5])  #means.shape)

    ax = plt.gca()
    im = plt.imshow(means,
                    interpolation='nearest',
                    cmap=plt.cm.viridis,
                    aspect='auto',
                    vmin=0,
                    vmax=100)
    x_marks = ["$+$", "$*$", "none"]
Exemple #4
0
#!/usr/bin/env python3
import lib
from lib import StatTracker
from lib.common import group
import os

import matplotlib.pyplot as plt

runs = lib.get_runs([
    "addmul_feedforward", "addmul_feedforward_big", "addmul_feedforward_huge"
])

os.makedirs("out", exist_ok=True)

runs = group(runs, ["layer_sizes"])
print(runs)
all_stats = {}

for grp, runs in runs.items():
    print("----------------------------------- ", grp)
    for run in runs:
        tsum = 0
        ssum = 0
        # print(stats)
        for k, v in run.summary.items():
            kparts = k.split("/")
            if kparts[-1] != "n_1" or "/all/" in k or not k.startswith(
                    "mask_stat/"):
                continue

            print(k, v)
#!/usr/bin/env python3

import lib
from lib import StatTracker
from lib.common import group
import os

import matplotlib.pyplot as plt

runs = lib.get_runs(["addmul_feedforward_big", "addmul_rnn"])

runs = group(runs, ["layer_sizes", "task"])

all_stats = {}

for grp, rn in runs.items():
    if grp not in all_stats:
        all_stats[grp] = {}

    stats = all_stats[grp]

    for r in rn:
        for k, v in r.summary.items():
            if not k.startswith("mask_stat/") or "/n_" not in k:
                continue

            if k not in stats:
                stats[k] = StatTracker()

            stats[k].add(v)
    print(f"Downloading run {i}, {r.name}, {r.id}")
    run_dir = os.path.join(WEIGHTS_DIR, r.name, r.id)
    if os.path.isdir(run_dir):
        continue

    for f in tqdm(r.files()):
        if "export/stage_final_masks/stage_0" not in f.name:
            continue

        dl_name = os.path.join(run_dir, f.name)
        os.makedirs(os.path.dirname(dl_name), exist_ok=True)
        f.download(root=run_dir, replace=True)

N_POINTS = 500

runs = group(runs, ["task", 'layer_sizes', "tuple.mode"])

trackers: Dict[str, StatTracker] = {}
trackers_all: Dict[str, StatTracker] = {}


def add_tracker(trackers, name, data):
    if name not in trackers:
        trackers[name] = StatTracker()

    hist, _ = np.histogram(data, N_POINTS, [0, 1])
    trackers[name].add(hist)


human_readable_names = OrderedDict()
human_readable_names[
#!/usr/bin/env python3

import lib
from lib import StatTracker
from lib.common import group
import os
from mpl_toolkits.axes_grid1 import make_axes_locatable

import matplotlib.pyplot as plt

runs = lib.get_runs(["addmul_ff_alpha_analysis"])
runs = group(runs, ["mask_loss_weight"])

sharing_stats = {}
accuracy_stats = {}

for grp, runs in runs.items():
    print("----------------------------------- ", grp)
    for run in runs:
        print("RUN ID", run.id)
        tsum = 0
        ssum = 0
        # print(stats)
        for k, v in run.summary.items():
            kparts = k.split("/")
            if kparts[-1] != "n_1" or "/all/" in k or not k.startswith(
                    "mask_stat/"):
                continue

            print(k, v)
Exemple #8
0
    for t in range(2):
        this_rnn_stats = [rnn_stats[f"{plots[n]}{t}"].get() for n in names]
        means_rnn = [s.mean * 100 for s in this_rnn_stats]
        std_rnn = [s.std * 100 for s in this_rnn_stats]
        plt.bar([5.5 * r + 1 + t * 2.5 for r in range(len(names))],
                means_rnn,
                yerr=std_rnn,
                align='center')

    plt.xticks([5.5 * r + 1.75 for r in range(len(names))], names)
    plt.ylabel("Accuracy [\\%]")

    # plt.legend(["F1", "F2", "R1", "R2"], bbox_to_anchor=(1.1, 1.05))

    fname = f"{BASE_DIR}/tuple_performance.pdf"
    fig.axes[0].yaxis.set_label_coords(-0.12, 0.4)
    fig.savefig(fname, bbox_inches='tight', pad_inches=0.01)


rnn_runs = lib.get_runs(["tuple_rnn"])
feedforward_runs = lib.get_runs(["tuple_feedforward_big"])

feedforward_runs = group(feedforward_runs, ["layer_sizes"])
rnn_runs = group(rnn_runs, ["tuple.mode"])
plot_all("rnn", rnn_runs)
plot_all("feedforward", feedforward_runs)

plot_both(feedforward_runs["layer_sizes_2000,2000,2000,2000"],
          rnn_runs["tuple.mode_together"])
Exemple #9
0
def do_plot(runs, prefix):
    runs = group(runs, ["layer_sizes", "task"])

    all_stats = {}

    download_dir = f"{prefix}/weights"

    @dataclass
    class Similarity:
        iou: Union[float, lib.StatTracker, lib.Stat]
        subsetness: Union[float, lib.StatTracker, lib.Stat]

    def calc_stats(run: str) -> Dict[str, Similarity]:
        base_dir = os.path.join(download_dir, run, "export/stage_final_masks")
        dir1=f"{base_dir}/stage_1/"
        dir2=f"{base_dir}/stage_2/"

        res = {}

        for f in os.listdir(dir1):
            assert f.endswith(".pth")
            m1 = (torch.load(os.path.join(dir1, f)) > 0)
            m2 = (torch.load(os.path.join(dir2, f)) > 0)

            n_min = min(m1.astype(np.int64).sum(), m2.astype(np.int64).sum())

            intersect = (m1 & m2).astype(np.int64).sum()
            union = (m1 | m2).astype(np.int64).sum()

            res[f[:-4]] = Similarity(intersect/union, intersect/n_min)

        return res

    for grp, rn in runs.items():
        if grp not in all_stats:
            all_stats[grp] = {}

        stats = all_stats[grp]

        for run in rn:
            for f in run.files(per_page=10000):
                if not f.name.startswith("export") or "/stage_final_masks" not in f.name:
                    continue

                fname = os.path.join(download_dir, run.id, f.name)
                if not os.path.isfile(fname):
                    print(fname)
                    target_dir = os.path.dirname(fname)

                    os.makedirs(target_dir, exist_ok=True)

                    print(f"Run {run.id}: downloading {fname}...")
                    f.download(root=os.path.join(download_dir, run.id), replace=True)

            for name, val in calc_stats(run.id).items():
                if name not in stats:
                    stats[name] = Similarity(lib.StatTracker(), lib.StatTracker())

                stats[name].iou.add(val.iou)
                stats[name].subsetness.add(val.subsetness)

        for v in stats.values():
            v.iou = v.iou.get()
            v.subsetness = v.subsetness.get()


    def friendly_name(name: str) -> str:
        if name.startswith("mask_"):
            name = name[5:]

        if name.endswith("_weight"):
            name = name[:-7]

        name=name.replace("_weight_", "_")
        name=name.replace("_cells_", "_")

        lparts = name.split("_")
        if lparts[0] == "layers" and lparts[1].isdecimal():
            name = f"layer {int(lparts[1])+1}"

        if name in ["output_projection", "layer 5"]:
            name = "output"

        return name.replace("_","\\_")



    for grp, stats in all_stats.items():
        print("-------------------- GROUP --------", grp)
        print(stats.keys())

        fig = plt.figure(figsize=[4.5,1.4])

        keys = list(sorted(stats.keys()))
        if keys[0].startswith("lstm_cells"):
            for i in range(1, len(keys), 2):
                keys[i], keys[i-1] = keys[i-1], keys[i]


        # print([friendly_name(k) for k in keys])
        names = [friendly_name(k) for k in keys]

        legend = ["IoU", "IoMin"]

        plt.bar([2.25 * x for x in range(len(names))], [stats[n].iou.mean for n in keys], yerr=[stats[n].iou.std for n in keys], align='center')
        plt.bar([2.25 * x + 1 for x in range(len(names))], [stats[n].subsetness.mean for n in keys], yerr=[stats[n].subsetness.std for n in keys], align='center')


        plt.xticks([2.25 * x + 0.5 for x in range(len(names))], names)
        plt.ylabel("Proportion")
        plt.ylim(0,1)
        plt.legend(legend)

        f = f"{prefix}/{grp}.pdf"
        os.makedirs(os.path.dirname(f), exist_ok=True)
        fig.savefig(f, bbox_inches='tight')