Beispiel #1
0
def ot_for_diffs(diffs):
    transport_mat = ot.emd([], [], var_to_np(diffs))
    # sometimes weird low values, try to prevent them
    # 0.5 could be 1.0, just set ot 0.5 to make more sure nothing
    # removed accidentally
    transport_mat = transport_mat * (transport_mat >= (0.5 / (diffs.numel())))

    transport_mat = np_to_var(transport_mat, dtype=np.float32,
                              device=diffs.device)
    loss = th.sum(transport_mat * diffs)
    return loss
Beispiel #2
0
def to_signal_target(train_inputs, test_inputs):
    sets = []
    for inputs in (train_inputs, test_inputs):
        X = np.concatenate([var_to_np(ins) for ins in inputs]).astype(
            np.float32
        )
        y = np.concatenate(
            [np.ones(len(ins)) * i_class for i_class, ins in enumerate(inputs)]
        )
        y = y.astype(np.int64)
        set = SignalAndTarget(X, y)
        sets.append(set)
    train_set = sets[0]
    valid_set = sets[1]
    return train_set, valid_set
Beispiel #3
0
def compute_clf_accs(clf, feature_model, train_inputs, test_inputs,):
    results = {}
    for setname, set_inputs in (("Train", train_inputs), ("Test", test_inputs)):
        outs = [feature_model(ins) for ins in set_inputs]
        preds_per_class = [clf(o) for o in outs]

        pred_labels_per_class = [np.argmax(var_to_np(preds), axis=1)
                       for preds in preds_per_class]

        labels = np.concatenate([np.ones(len(set_inputs[i_cls])) * i_cls
         for i_cls in range(len(train_inputs))])

        acc = np.mean(labels == np.concatenate(pred_labels_per_class))
        results['{:s}_clf_acc'.format(setname.lower())] = acc
    return results
Beispiel #4
0
def get_matched_samples(samples_a, samples_b):
    assert len(samples_a) <= len(samples_b)
    with th.no_grad():
        diffs = samples_a.unsqueeze(1) - samples_b.unsqueeze(0)
        diffs = th.sqrt(th.clamp(th.sum(diffs * diffs, dim=2), min=1e-6))
    transport_mat = ot.emd([], [], var_to_np(diffs))
    transport_mat = transport_mat * (transport_mat >= (0.5 / (diffs.numel())))
    del diffs

    b_corresponding = []
    for i_row in range(transport_mat.shape[0]):
        matches = np.flatnonzero(transport_mat[i_row])
        if len(matches) == 0: return th.zeros(1,
                                              device=samples_a.device)  # dunno why that happens
        b_corresponding.append(samples_b[matches])
    b_corresponding = th.stack(b_corresponding, dim=0)
    return b_corresponding
Beispiel #5
0
def plot_outs(feature_model, train_inputs, test_inputs, class_dist):

    # Compute dist for mean/std of encodings
    data_cls_dists = []
    for i_class in range(len(train_inputs)):
        this_class_outs = feature_model(train_inputs[i_class])[:, :2]
        data_cls_dists.append(
            th.distributions.MultivariateNormal(th.mean(this_class_outs,
                                                        dim=0),
                                                covariance_matrix=th.diag(
                                                    th.std(this_class_outs,
                                                           dim=0)**2)))
    for setname, set_inputs in (("Train", train_inputs), ("Test",
                                                          test_inputs)):

        outs = [feature_model(ins) for ins in set_inputs]
        c_outs = [o[:, :2] for o in outs]

        c_outs_all = th.cat(c_outs)

        cls_dists = []
        for i_class in range(len(c_outs)):
            mean, std = class_dist.get_mean_std(i_class)
            cls_dists.append(
                th.distributions.MultivariateNormal(mean[:2],
                                                    covariance_matrix=th.diag(
                                                        std[:2]**2)))

        preds_per_class = [
            th.stack([
                cls_dists[i_cls].log_prob(c_out)
                for i_cls in range(len(cls_dists))
            ],
                     dim=-1) for c_out in c_outs
        ]

        pred_labels_per_class = [
            np.argmax(var_to_np(preds), axis=1) for preds in preds_per_class
        ]

        labels = np.concatenate([
            np.ones(len(set_inputs[i_cls])) * i_cls
            for i_cls in range(len(train_inputs))
        ])

        acc = np.mean(labels == np.concatenate(pred_labels_per_class))

        data_preds_per_class = [
            th.stack([
                data_cls_dists[i_cls].log_prob(c_out)
                for i_cls in range(len(cls_dists))
            ],
                     dim=-1) for c_out in c_outs
        ]
        data_pred_labels_per_class = [
            np.argmax(var_to_np(data_preds), axis=1)
            for data_preds in data_preds_per_class
        ]
        data_acc = np.mean(
            labels == np.concatenate(data_pred_labels_per_class))

        print("{:s} Accuracy: {:.1f}%".format(setname, acc * 100))
        fig = plt.figure(figsize=(5, 5))
        ax = plt.gca()
        for i_class in range(len(c_outs)):
            #if i_class == 0:
            #    continue
            o = var_to_np(c_outs[i_class]).squeeze()
            incorrect_pred_mask = pred_labels_per_class[i_class] != i_class
            plt.scatter(o[:, 0],
                        o[:, 1],
                        s=20,
                        alpha=0.75,
                        label=["Right", "Rest"][i_class])
            assert len(incorrect_pred_mask) == len(o)
            plt.scatter(o[incorrect_pred_mask, 0],
                        o[incorrect_pred_mask, 1],
                        marker='x',
                        color='black',
                        alpha=1,
                        s=5)
            means, stds = class_dist.get_mean_std(i_class)
            means = var_to_np(means)[:2]
            stds = var_to_np(stds)[:2]
            for sigma in [0.5, 1, 2, 3]:
                ellipse = Ellipse(means, stds[0] * sigma, stds[1] * sigma)
                ax.add_artist(ellipse)
                ellipse.set_edgecolor(seaborn.color_palette()[i_class])
                ellipse.set_facecolor("None")
        for i_class in range(len(c_outs)):
            o = var_to_np(c_outs[i_class]).squeeze()
            plt.scatter(np.mean(o[:, 0]),
                        np.mean(o[:, 1]),
                        color=seaborn.color_palette()[i_class + 2],
                        s=80,
                        marker="^",
                        label=["Right Mean", "Rest Mean"][i_class])

        plt.title("{:6s} Accuracy:        {:.1f}%\n"
                  "From data mean/std: {:.1f}%".format(setname, acc * 100,
                                                       data_acc * 100))
        plt.legend(bbox_to_anchor=(1, 1, 0, 0))
        display_close(fig)
    return
Beispiel #6
0
def plot_outs(feature_model_a, train_inputs, test_inputs, class_dist):
    # Compute dist for mean/std of encodings
    data_cls_dists = []
    for i_class in range(len(train_inputs)):
        this_class_outs = feature_model_a(train_inputs[i_class])[:, :2]
        data_cls_dists.append(
            th.distributions.MultivariateNormal(th.mean(this_class_outs,
                                                        dim=0),
                                                covariance_matrix=th.diag(
                                                    th.std(this_class_outs,
                                                           dim=0))))
    for setname, set_inputs in (("Train", train_inputs), ("Test",
                                                          test_inputs)):

        outs = [feature_model_a(ins) for ins in set_inputs]
        c_outs = [o[:, :2] for o in outs]

        c_outs_all = th.cat(c_outs)

        cls_dists = []
        for i_class in range(len(c_outs)):
            mean, std = class_dist.get_mean_std(i_class)
            cls_dists.append(
                th.distributions.MultivariateNormal(mean[:2],
                                                    covariance_matrix=th.diag(
                                                        std[:2])))

        preds = th.stack([
            cls_dists[i_cls].log_prob(c_outs_all)
            for i_cls in range(len(cls_dists))
        ],
                         dim=-1)

        pred_labels = np.argmax(var_to_np(preds), axis=1)

        labels = np.concatenate([
            np.ones(len(set_inputs[i_cls])) * i_cls
            for i_cls in range(len(train_inputs))
        ])

        acc = np.mean(labels == pred_labels)

        data_preds = th.stack([
            data_cls_dists[i_cls].log_prob(c_outs_all)
            for i_cls in range(len(cls_dists))
        ],
                              dim=-1)
        data_pred_labels = np.argmax(var_to_np(data_preds), axis=1)
        data_acc = np.mean(labels == data_pred_labels)

        print("{:s} Accuracy: {:.2f}%".format(setname, acc * 100))
        fig = plt.figure(figsize=(5, 5))
        ax = plt.gca()
        for i_class in range(len(c_outs)):
            o = var_to_np(c_outs[i_class]).squeeze()
            plt.scatter(o[:, 0], o[:, 1], s=20, alpha=0.75)
            means, stds = class_dist.get_mean_std(i_class)
            means = var_to_np(means)[:2]
            stds = var_to_np(stds)[:2]
            for sigma in [0.5, 1, 2, 3]:
                ellipse = Ellipse(means, stds[0] * sigma, stds[1] * sigma)
                ax.add_artist(ellipse)
                ellipse.set_edgecolor(seaborn.color_palette()[i_class])
                ellipse.set_facecolor("None")
        for i_class in range(len(c_outs)):
            o = var_to_np(c_outs[i_class]).squeeze()
            plt.scatter(np.mean(o[:, 0]),
                        np.mean(o[:, 1]),
                        color=seaborn.color_palette()[i_class + 2],
                        s=80,
                        marker="^")

        plt.title("{:6s} Accuracy:        {:.2f}%\n"
                  "From data mean/std: {:.2f}%".format(setname, acc * 100,
                                                       data_acc * 100))
        plt.legend(("Right", "Rest", "Right Mean", "Rest Mean"))
        display_close(fig)
Beispiel #7
0
def compute_accs(feature_model, train_inputs, test_inputs, class_dist):
    with th.no_grad():
        # Compute dist for mean/std of encodings
        data_cls_dists = []
        if hasattr(class_dist, 'i_class_inds'):
            i_class_inds = class_dist.i_class_inds
        else:
            i_class_inds = list(range(len(class_dist.get_mean_std(0)[0])))
        for i_class in range(len(train_inputs)):
            this_class_outs = feature_model(train_inputs[i_class])[
                :, i_class_inds
            ]
            data_cls_dists.append(
                th.distributions.MultivariateNormal(
                    th.mean(this_class_outs, dim=0),
                    covariance_matrix=th.diag(th.std(this_class_outs, dim=0) ** 2),
                )
            )
        results = {}
        for setname, set_inputs in (("Train", train_inputs), ("Test", test_inputs)):
            outs = [feature_model(ins) for ins in set_inputs]
            c_outs = [o[:, i_class_inds] for o in outs]

            cls_dists = []
            for i_class in range(len(c_outs)):
                mean, std = class_dist.get_mean_std(i_class)
                cls_dists.append(
                    th.distributions.MultivariateNormal(
                        mean[i_class_inds],
                        covariance_matrix=th.diag(std[i_class_inds] ** 2),
                    )
                )

            preds_per_class = [
                th.stack(
                    [
                        cls_dists[i_cls].log_prob(c_out)
                        for i_cls in range(len(cls_dists))
                    ],
                    dim=-1,
                )
                for c_out in c_outs
            ]

            pred_labels_per_class = [
                np.argmax(var_to_np(preds), axis=1) for preds in preds_per_class
            ]

            labels = np.concatenate(
                [
                    np.ones(len(set_inputs[i_cls])) * i_cls
                    for i_cls in range(len(train_inputs))
                ]
            )

            acc = np.mean(labels == np.concatenate(pred_labels_per_class))

            data_preds_per_class = [
                th.stack(
                    [
                        data_cls_dists[i_cls].log_prob(c_out)
                        for i_cls in range(len(cls_dists))
                    ],
                    dim=-1,
                )
                for c_out in c_outs
            ]
            data_pred_labels_per_class = [
                np.argmax(var_to_np(data_preds), axis=1)
                for data_preds in data_preds_per_class
            ]
            data_acc = np.mean(labels == np.concatenate(data_pred_labels_per_class))
            results["{:s}_acc".format(setname.lower())] = acc
            results["{:s}_data_acc".format(setname.lower())] = data_acc
    return results