Example #1
0
def test_imed():
    np.random.seed(0)

    img1 = np.random.normal(size=[5, 5])
    img2 = np.random.normal(size=[5, 5])
    diff = img1 - img2

    kernel = gauss_kernel([5, 5], sigma=0.5)
    conv_diff = convolve2d(diff, kernel, mode="same")

    G = metric_matrix(diff.shape, sigma=0.5)
    G_diff = G.dot(diff.reshape(-1)).reshape(diff.shape)

    # import matplotlib.pyplot as plt
    # fig, ax = plt.subplots(1,2)
    # ax[0].imshow(kernel)
    # ax[1].imshow(G)

    # fig2, ax2 = plt.subplots(1,2)
    # ax2[0].imshow(conv_diff)
    # ax2[1].imshow(G_diff)
    # plt.show()

    assert np.allclose(conv_diff, G_diff)

    imed = imed_metric(img1[None, :, :], img2[None, :, :], G=G)
    conv_diff = (diff * conv_diff).sum()

    assert np.allclose(imed, conv_diff)
Example #2
0
def esn_perf(outdir, animate=False):
    pred_data_nc_files = list(outdir.glob("pred_data_idx*.nc"))
    nr_files = len(pred_data_nc_files)

    with nc.Dataset(pred_data_nc_files[0], "r") as src:
        img_shape = src["outputs"].shape[1:]
        pred_length = src["outputs"].shape[0]
        G = metric_matrix(img_shape)
    esn_error = np.empty([nr_files, pred_length])
    trivial_error = np.empty([nr_files, pred_length])

    G = None
    img_shape = None
    for i, pred_data_nc in tqdm(enumerate(pred_data_nc_files), total=nr_files):
        with nc.Dataset(pred_data_nc, 'r') as src:

            esn_error[i] = src["imed"][:]

            example_lbls = src["labels"][:]
            example_pred = src["outputs"][:]
            triv_pred = np.tile(example_lbls[0], (example_lbls.shape[0], 1, 1))
            t1 = time.time()
            im = imed_metric(triv_pred, example_lbls, G=G)
            t2 = time.time()
            trivial_error[i] = im
            t3 = time.time()
            tqdm.write(f"imed: {t2-t1}")
            tqdm.write(f"asgn: {t3-t2}")
    if animate:
        anim = animate_double_imshow(example_lbls, example_pred)
        plt.show()
    return np.array(esn_error).mean(axis=0), np.array(trivial_error).mean(
        axis=0)
Example #3
0
def read_imed(pred_data_ncfiles,
              only_first_n=100,
              read_cycle_pred=True,
              return_cycle_pred=False):
    import numpy as np
    from torsk.scripts.pred_perf import sort_filenames
    from torsk.imed import imed_metric
    from tqdm import tqdm
    metric_log_idx = 25
    labels = []
    esn_imed, cycle_imed = [], []
    pred_data_ncfiles, indices = sort_filenames(pred_data_ncfiles,
                                                return_indices=True)
    pred_data_ncfiles = pred_data_ncfiles[:only_first_n]
    indices = indices[:only_first_n]

    # read prediction files and animate
    for ii, (idx,
             pred_data_nc) in tqdm(enumerate(zip(indices, pred_data_ncfiles)),
                                   total=len(pred_data_ncfiles)):
        assert "pred_data" in pred_data_nc.as_posix()

        with nc.Dataset(pred_data_nc, "r") as src:

            esn_imed.append(src["imed"][:])
            tqdm.write(f"{pred_data_nc.name}: IMED at step {metric_log_idx}: "
                       f"{esn_imed[ii][metric_log_idx]}")

            labels.append(src["labels"][:])
            outputs = src["outputs"][:]

        if read_cycle_pred:
            cycle_pred_file = pred_data_nc.parent / f"cycle_pred_data_idx{idx}.npy"
            if cycle_pred_file.exists():
                cpred = np.load(cycle_pred_file)[:labels[0].shape[0]]
                cycle_imed.append(imed_metric(cpred, labels[-1]))
            else:
                raise ValueError(f"{cycle_pred_file} does not exist. "
                                 "Cannot compute cycle prediction. "
                                 "Create it with `torsk cycle-predict`")
    if return_cycle_pred:
        return np.array(esn_imed), np.array(cycle_imed), np.array(
            labels), cpred
    return np.array(esn_imed), np.array(cycle_imed), np.array(labels)
Example #4
0
def dump_prediction(fname, outputs, labels, states, attrs=None):
    if not isinstance(outputs, np.ndarray):
        raise ValueError("Check that this acutally works...")
        msg = "Inputs are not numpy arrays. " \
              "Assuming Tensors of shape [time, batch, features]"
        logger.debug(msg)
        outputs = outputs.numpy().reshape([-1, outputs.size(2)])
        labels = labels.numpy().reshape([-1, labels.size(2)])
        states = states.numpy().reshape([-1, states.size(2)])

    if not isinstance(fname, pathlib.Path):
        fname = pathlib.Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir(parents=True)

    with nc.Dataset(fname, "w") as dst:

        dst.createDimension("pred_length", outputs.shape[0])
        dst.createDimension("image_height", outputs.shape[1])
        dst.createDimension("image_width", outputs.shape[2])
        dst.createDimension("hidden_size", states.shape[1])

        dst.createVariable(
            "outputs", float, ["pred_length", "image_height", "image_width"])
        dst.createVariable(
            "labels", float, ["pred_length", "image_height", "image_width"])
        dst.createVariable("states", float, ["pred_length", "hidden_size"])
        dst.createVariable("imed", float, ["pred_length"])
        dst.createVariable("eucd", float, ["pred_length"])

        if attrs is not None:
            dst.setncatts(attrs)

        dst["outputs"][:] = outputs
        dst["labels"][:] = labels
        dst["states"][:] = states
        dst["imed"][:] = imed_metric(outputs, labels)
        dst["eucd"][:] = eucd_metric(outputs, labels)
Example #5
0
def cli(pred_data_ncfiles, outfile, show, valid_pred_length, large_window,
        small_window, pred_plot_step, prob_normality, mackey):
    from itertools import cycle
    from tqdm import tqdm
    import numpy as np
    import netCDF4 as nc
    import matplotlib.pyplot as plt
    import seaborn as sns

    from torsk.scripts.pred_perf import sort_filenames
    from torsk.data.utils import mackey_anomaly_sequence, normalize
    from torsk.imed import imed_metric
    from torsk.anomaly import sliding_score
    from torsk import Params

    sns.set_style("whitegrid")
    sns.set_context("notebook")
    kuro = False
    if kuro:
        kuro_start = 0
        kuro_step = 5

    if mackey:
        ax_offset = 1
    else:
        ax_offset = 0

    pred_data_ncfiles, indices = sort_filenames(pred_data_ncfiles,
                                                return_indices=True)
    params = Params(json_path=pred_data_ncfiles[0].parent /
                    f"idx{indices[0]}-params.json")

    nr_plots = 4 if mackey else 3
    figsize = (8, 6) if mackey else (8, 5)
    fig, ax = plt.subplots(nr_plots, 1, sharex=True, figsize=figsize)
    ax[ax_offset].set_ylabel("Error")

    cmap = plt.get_cmap("inferno")
    colors = cycle([cmap(i) for i in np.linspace(0, 1, 10)])

    imed_error, cycle_error = [], []
    for pred_data_nc, idx in tqdm(zip(pred_data_ncfiles, indices),
                                  total=len(indices)):
        tqdm.write(pred_data_nc.as_posix())

        with nc.Dataset(pred_data_nc, "r") as src:

            pred_imed = src["imed"][:valid_pred_length]
            labels = src["labels"][:valid_pred_length]
            imed_error.append(pred_imed.mean(axis=0))

            if idx % pred_plot_step == 0:
                if kuro:
                    start = (idx +
                             params.train_length) * kuro_step + kuro_start
                    stop = start + kuro_step * valid_pred_length
                    x = np.arange(start, stop, kuro_step)
                else:
                    x = np.arange(idx, idx + valid_pred_length)
                ax[ax_offset].plot(x, pred_imed, color=next(colors))

        cycle_data_nc = pred_data_nc.parent / f"cycle_pred_data_idx{idx}.npy"
        cpred = np.load(cycle_data_nc)[:valid_pred_length]
        cycle_imed = imed_metric(cpred, labels)
        cycle_error.append(cycle_imed.mean(axis=0))

    imed_error = np.array(imed_error)
    cycle_error = np.array(cycle_error)

    imed_score, lw_mu, lw_std, sw_mu = sliding_score(imed_error,
                                                     small_window=small_window,
                                                     large_window=large_window)
    cycle_score, _, _, _ = sliding_score(cycle_error,
                                         small_window=small_window,
                                         large_window=large_window)

    indices = np.array(indices)
    if kuro:
        indices = (indices + params.train_length) * kuro_step + kuro_start
        shifted_indices = indices + valid_pred_length * kuro_step
    else:
        shifted_indices = indices + valid_pred_length

    if mackey:
        mackey_seq, anomaly = mackey_anomaly_sequence(
            N=indices[-1] + params.train_length,
            anomaly_start=params.anomaly_start,
            anomaly_step=params.anomaly_step)
        mackey_seq = normalize(mackey_seq)

        length = anomaly[params.train_length:].shape[0]
        ax[0].plot(mackey_seq[params.train_length:],
                   label="x-Component",
                   color="black")
        ax[0].fill_between(np.arange(length),
                           np.zeros(length),
                           anomaly[params.train_length:],
                           color="grey",
                           alpha=0.5,
                           label="True Anomaly")
        ax[0].legend(loc="lower left")

    plot_start = indices[0]
    plot_end = indices[-1]
    if kuro:
        plot_end += valid_pred_length * kuro_step
        ax[-1].set_xlabel("Time [days]")
    else:
        plot_end += valid_pred_length

    plot_indices = shifted_indices
    ones = np.ones_like(plot_indices)

    ax[ax_offset + 1].plot([plot_start, plot_end],
                           [prob_normality, prob_normality],
                           ":",
                           label=rf"$\Sigma={prob_normality}$",
                           color="black")
    ax[ax_offset + 1].plot(plot_indices,
                           imed_score,
                           "-",
                           label="ESN",
                           color="C0")
    ax[ax_offset + 1].fill_between(plot_indices,
                                   ones,
                                   imed_score > prob_normality,
                                   label="Detected Anomaly",
                                   alpha=0.5,
                                   color="C0")
    ax[ax_offset + 1].set_ylim(1e-3, 1.)
    ax[ax_offset + 1].set_ylabel("Normality")

    ax[ax_offset + 2].plot([plot_start, plot_end],
                           [prob_normality, prob_normality],
                           ":",
                           label=rf"$\Sigma={prob_normality}$",
                           color="black")
    ax[ax_offset + 2].plot(plot_indices,
                           cycle_score,
                           "-.",
                           label="Cycle",
                           color="C1")
    ax[ax_offset + 2].fill_between(plot_indices,
                                   ones,
                                   cycle_score > prob_normality,
                                   label="Detected Anomaly",
                                   alpha=0.5,
                                   color="C1")
    ax[ax_offset + 2].set_ylim(1e-3, 1.)
    ax[ax_offset + 2].set_ylabel("Normality")

    # ax[ax_offset+2].plot(plot_indices, imed_error, label=r"error", color="black")
    # ax[ax_offset+2].plot(plot_indices, lw_mu, label=r"$\mu_m$")
    # ax[ax_offset+2].plot(plot_indices, lw_std, label=r"$\sigma_m$")
    # ax[ax_offset+2].plot(plot_indices, sw_mu, label=r"$\mu_n$")

    bbox = {
        "boxstyle": "round",
        "pad": 0.3,
        "fc": "white",
        "ec": "gray",
        "lw": 1
    }
    for a, l in zip(ax, 'ABCD'):
        a.annotate(l, xy=(0.05, 0.8), xycoords='axes fraction', bbox=bbox)

    ax[ax_offset + 1].set_yscale("log")
    ax[ax_offset + 1].legend(loc="lower left")
    ax[ax_offset + 2].set_yscale("log")
    ax[ax_offset + 2].legend(loc="lower left")

    for a in ax:
        if kuro:
            a.set_xticks(np.arange(0, indices[-1], 365))
        a.set_xlim(plot_start, plot_end)

    plt.tight_layout()

    if outfile is not None:
        plt.savefig(outfile, transparent=True)
    if show:
        plt.show()
    else:
        plt.close()
logger.info(params)

if params.backend == "numpy":
    logger.info("Running with NUMPY backend")
    from torsk.data.numpy_dataset import NumpyImageDataset as ImageDataset
    from torsk.models.numpy_esn import NumpyESN as ESN
else:
    logger.info("Running with TORCH backend")
    from torsk.data.torch_dataset import TorchImageDataset as ImageDataset
    from torsk.models.torch_esn import TorchESN as ESN

npypath = pathlib.Path(
    "/home/niklas/erda_save/Ocean/esn/Kuro_SSH_5daymean.npy")
images = np.load(npypath)[:, 90:190, 90:190]
images[images > 10000.] = 0.
images = resample2d_sequence(images, params.input_shape)
dataset = ImageDataset(images, params, scale_images=True)

logger.info("Building model ...")
model = ESN(params)

logger.info("Training + predicting ...")
model, outputs, pred_labels = torsk.train_predict_esn(
    model,
    dataset,
    "/home/niklas/erda_save/kuro_conv_5daymean100x100",
    steps=1,
    step_length=1)

print(imed_metric(outputs, pred_labels)[25])
Example #7
0
def trivial_imed(labels):
    trivial_pred = np.tile(labels[0], [labels.shape[0], 1, 1])
    trivial_imed = imed_metric(labels, trivial_pred)
    return trivial_imed
Example #8
0
def cli(pred_data_ncfiles, save_video, outfile, show, ylogscale,
        metric_log_idx, xlim, plot_label, only_first_n, sns_context,
        lstm_pred_path):
    """Create animations and averaged performance plot of prediction files.
    The ESN `pred_data_ncfiles` must be named like: pred_data_idx0.nc
    The cycle prediction files are assumed to be in the same directory with
    names like: cycle_pred_data_idx0.npy as created with `torsk cycle-predict`
    """
    sns.set_style("whitegrid")
    sns.set_context(sns_context)

    labels = []
    esn_imed, cycle_imed = [], []
    pred_data_ncfiles, indices = sort_filenames(pred_data_ncfiles,
                                                return_indices=True)

    if only_first_n is not None:
        pred_data_ncfiles = pred_data_ncfiles[:only_first_n]
        indices = indices[:only_first_n]

    # read prediction files and animate
    for ii, (idx,
             pred_data_nc) in tqdm(enumerate(zip(indices, pred_data_ncfiles)),
                                   total=len(pred_data_ncfiles)):
        assert "pred_data" in pred_data_nc.as_posix()

        with nc.Dataset(pred_data_nc, "r") as src:

            esn_imed.append(src["imed"][:])
            tqdm.write(f"{pred_data_nc.name}: IMED at step {metric_log_idx}: "
                       f"{esn_imed[ii][metric_log_idx]}")

            labels.append(src["labels"][:])
            outputs = src["outputs"][:]

        cycle_pred_file = pred_data_nc.parent / f"cycle_pred_data_idx{idx}.npy"
        if cycle_pred_file.exists():
            cpred = np.load(cycle_pred_file)[:labels[0].shape[0]]
            cycle_imed.append(imed_metric(cpred, labels[-1]))
        else:
            raise ValueError(f"{cycle_pred_file} does not exist. "
                             "Cannot compute cycle prediction. "
                             "Create it with `torsk cycle-predict`")

        if lstm_pred_path is not None:
            lstm_pred = np.load(lstm_pred_path)

        # japan = np.load("/home/niklas/Downloads/japan.npy")
        # japan = np.tile(japan, [cpred.shape[0], 1, 1])
        # cpred = np.ma.masked_array(cpred, mask=japan)[:, ::-1]
        # labels[ii] = np.ma.masked_array(labels[ii], mask=japan)[:, ::-1]
        # outputs = np.ma.masked_array(outputs, mask=japan)[:, ::-1]

        if save_video is not None:
            frames = np.concatenate([labels[ii], outputs], axis=1)
            videofile = pred_data_nc.with_suffix(f".{save_video}").as_posix()
            if save_video == "gif":
                anim = animate_quad_imshow(
                    labels[ii],
                    outputs,
                    lstm_pred,
                    cpred,
                    axes_labels=["Truth", "ESN", "LSTM", "Cycle"])
                anim.save(videofile, writer="imagemagick")
            else:
                write_video(videofile, frames)

        if show:
            anim = animate_quad_imshow(
                labels[ii],
                outputs,
                lstm_pred,
                cpred,
                axes_labels=["Truth", "ESN", "LSTM", "Cycle"])
            plt.show()

    # plot performance
    labels = np.array(labels)
    esn_imed, cycle_imed = np.array(esn_imed), np.array(cycle_imed)

    fig, ax = imed_plot(esn_imed, cycle_imed, labels)
    if xlim is not None:
        ax.set_xlim(0, xlim)
    if plot_label is not None:
        bbox = {
            "boxstyle": "round",
            "pad": 0.3,
            "fc": "white",
            "ec": "gray",
            "lw": 2
        }
        ax.annotate(plot_label,
                    xy=(0.05, 0.9),
                    xycoords='axes fraction',
                    bbox=bbox)
    plt.tight_layout()
    if ylogscale:
        ax.set_yscale("log")
    if outfile is not None:
        plt.savefig(outfile, transparent=True)
    if show:
        plt.show()
    else:
        plt.close()