Exemple #1
0
def test_emd_1d_emd2_1d_with_weights():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    w_u = rng.uniform(0., 1., n)
    w_u = w_u / w_u.sum()

    w_v = rng.uniform(0., 1., m)
    w_v = w_v / w_v.sum()

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd(w_u, w_v, M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )), w_u,
                                   w_v)
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(w_u, G.sum(1))
    np.testing.assert_allclose(w_v, G.sum(0))
Exemple #2
0
def test_emd1d_device_tf():
    nx = ot.backend.TensorflowBackend()
    rng = np.random.RandomState(0)
    n = 10
    x = np.linspace(0, 5, n)
    rho_u = np.abs(rng.randn(n))
    rho_u /= rho_u.sum()
    rho_v = np.abs(rng.randn(n))
    rho_v /= rho_v.sum()

    # Check that everything stays on the CPU
    with tf.device("/CPU:0"):
        xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v)
        emd = ot.emd_1d(xb, xb, rho_ub, rho_vb)
        emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb)
        nx.assert_same_dtype_device(xb, emd)
        nx.assert_same_dtype_device(xb, emd2)

    if len(tf.config.list_physical_devices('GPU')) > 0:
        # Check that everything happens on the GPU
        xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v)
        emd = ot.emd_1d(xb, xb, rho_ub, rho_vb)
        emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb)
        nx.assert_same_dtype_device(xb, emd)
        nx.assert_same_dtype_device(xb, emd2)
        assert nx.dtype_device(emd)[1].startswith("GPU")
Exemple #3
0
def unique_local_minimum(means_star, alpha, algo):
    weights = np.array([alpha, 1 - alpha])
    for mu_0 in np.linspace(-10 * means_star, 10 * means_star, 10):
        #mu_0 = np.array([mu_0, -mu_0])
        if algo == "sEM":
            means_em, weights_sinkhorn, seq, weights_list = sinkhorn_em_algorithm_single(
                samples=samples,
                mu0=mu_0,
                sigma=1,
                log_theta0=np.log(weights).reshape(2, 1),
                n_iter=10,
                n_iter_sinkhorn=20)
            thetaseq = [
                np.array(weights).reshape(2, 1) for i in range(len(seq))
            ]

        else:
            means_em, thetaseq, seq, weights_list = em_algorithm_single(
                samples=samples,
                mu0=mu_0,
                sigma=1,
                log_theta0=np.log(weights).reshape(2, 1),
                n_iter=10,
                update_theta=(algo == "oEM"))

        error = ot.emd2_1d(means_em[:, 0], means_star[:, 0],
                           thetaseq[-1].reshape(-1), weights.reshape(-1))
        if error > 1:
            return False
    return True
Exemple #4
0
    def __call__(
        self,
        x: dict,
        x_0: dict,
        t: int = None,
        par: dict = None,
    ) -> float:
        # compute summary statistics, shape (n, dim), (n0, dim)
        s, s0 = self.sumstat(x), self.sumstat(x_0)
        n, n0 = s.shape[0], s0.shape[0]

        dim, dim0 = s.shape[1], s0.shape[1]
        if dim != dim0:
            raise ValueError(f"Sumstat dimensions do not match: {dim}!={dim0}")

        # unit sphere samples for Monte-Carlo approximation,
        #  shape (n_proj, dim)
        sphere_samples = uniform_unit_sphere_samples(
            n_proj=self.n_proj,
            dim=dim,
            seed=self.seed,
        )

        # 1d linear projections, shape (n_proj, {n, n0})
        s_projs = np.dot(sphere_samples, s.T)
        s0_projs = np.dot(sphere_samples, s0.T)

        # weights (could also be passed/learned?)
        w, w0 = np.ones((n, )) / n, np.ones((n0, )) / n0

        # approximate integral over sphere via Monte-Carlo samples
        cost = 0.0
        for s_proj, s0_proj in zip(s_projs, s0_projs):
            # calculate optimal 1d earth mover's distance
            # this is computationally O(n*log(n)) efficient via simple sorting
            cost += ot.emd2_1d(
                x_a=s_proj,
                x_b=s0_proj,
                a=w,
                b=w0,
                metric=self.metric,
                p=self.p,
                log=False,
                **self.emd_1d_args,
            )
        cost /= self.n_proj

        # take root to match Wasserstein distance definition
        if self.p < np.inf:
            cost = cost**(1 / self.p)

        return cost
Exemple #5
0
def test_1d_sliced_equals_emd():
    n = 100
    m = 120
    rng = np.random.RandomState(0)

    x = rng.randn(n, 1)
    a = rng.uniform(0, 1, n)
    a /= a.sum()
    y = rng.randn(m, 1)
    u = ot.utils.unif(m)
    res = ot.sliced_wasserstein_distance(x, y, a, u, 10, seed=42)
    expected = ot.emd2_1d(x.squeeze(), y.squeeze(), a, u)
    np.testing.assert_almost_equal(res**2, expected)
Exemple #6
0
def test_emd_1d_emd2_1d():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd([], [], M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, [], [], metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, [], [], metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, [], [], metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )))
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(np.ones((n, )) / n, G.sum(1))
    np.testing.assert_allclose(np.ones((m, )) / m, G.sum(0))

    # check G is similar
    np.testing.assert_allclose(G, G_1d)

    # check AssertionError is raised if called on non 1d arrays
    u = np.random.randn(n, 2)
    v = np.random.randn(m, 2)
    with pytest.raises(AssertionError):
        ot.emd_1d(u, v, [], [])
Exemple #7
0
def test_emd1d_type_devices(nx):
    rng = np.random.RandomState(0)

    n = 10
    x = np.linspace(0, 5, n)
    rho_u = np.abs(rng.randn(n))
    rho_u /= rho_u.sum()
    rho_v = np.abs(rng.randn(n))
    rho_v /= rho_v.sum()

    for tp in nx.__type_list__:
        print(nx.dtype_device(tp))

        xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v, type_as=tp)

        emd = ot.emd_1d(xb, xb, rho_ub, rho_vb)
        emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb)

        nx.assert_same_dtype_device(xb, emd)
        nx.assert_same_dtype_device(xb, emd2)
Exemple #8
0
means_em, weights_sinkhorn, seq, weights_list = sinkhorn_em_algorithm_single(
    samples=samples,
    mu0=np.array([np.array([-2, 0]), np.array([2, 0])]),
    sigma=1,
    log_theta0=np.array([np.log(weights[0]),
                         np.log(weights[1])]).reshape(2, 1),
    n_iter=1,
    n_iter_sinkhorn=50)
thetaseq = [np.array(weights).reshape(2, 1) for i in range(len(seq))]

print(means_em)
print(means)
print(error(means_em, means, weights))
print(weights)
print(
    ot.emd2_1d(means_em[:, 0], means[:, 0], thetaseq[-1].reshape(-1),
               weights.reshape(-1)))

animate = False

if animate:

    fig = plt.figure()
    camera = celluloid.Camera(fig)
    cm = "viridis"

    #intital setup
    proba_mode = (weights_list[0][0] /
                  (weights_list[0][0] + weights_list[0][1]))
    plt.scatter(x=samples[:, 0],
                y=samples[:, 1],
                c=proba_mode,
Exemple #9
0
cor = 0
for i, (imgs, labels) in enumerate(dataloader):
    real_imgs = Variable(imgs.type(FloatTensor))
    batch_size = real_imgs.shape[0]
    encodings = encoder1(real_imgs)
    src = np.random.randint(0, batch_size, size=opt.num_paths)
    dest = np.random.randint(0, batch_size, size=opt.num_paths)
    sink_dist1 = sinkhorn1(real_imgs[src], real_imgs[dest]).to("cuda")
    sink_dist1[src == dest] = 0
    # sink_dist2 = sinkhorn2(real_imgs[src],real_imgs[dest]).to("cuda")
    # sink_dist2 [src == dest] = 0
    for i in range(opt.num_paths):
        if src[i] != dest[i]:
            wass1.append(
                ot.emd2_1d(real_imgs[src[i]].cpu().numpy(),
                           real_imgs[dest[i]].cpu().numpy(),
                           metric='euclidean'))
    euc_dist = torch.norm(encodings[src] - encodings[dest], p=2, dim=1)
    sink1.extend(sink_dist1[src != dest].cpu().detach().numpy())
    # sink2.extend(sink_dist2[src != dest].cpu().detach().numpy())
    euc.extend(euc_dist[src != dest].cpu().detach().numpy())
    # corr, _ = pearsonr(sink_dist.cpu().detach().numpy(),euc_dist.cpu().detach().numpy())
    # cor += corr

corr1, _ = pearsonr(sink1, euc)
# corr2, _ = pearsonr(sink2,euc)
corr2, _ = pearsonr(wass1, euc)
print(corr1, corr2)
plt.figure()
plt.scatter(sink1, euc)
plt.xlabel("Sinkhorn Distance")