Esempio n. 1
0
def OT_sinkhorn(Xl, Xr, lambd=1e-2):
    # loss matrix
    C = ot.dist(Xl, Xr) + ll * ot.dist(hl, hr)
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = np.ones((n, )) / n, np.ones((m, )) / m

    Gs = ot.sinkhorn(a, b, M, lambd)

    plt.figure(5)
    plt.imshow(Gs, interpolation='nearest')
    plt.title('OT matrix sinkhorn')

    plt.figure(6)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], Gs, color=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix Sinkhorn with samples')

    return (Gs)
Esempio n. 2
0
def OT_scores_sinkhorn(Xl, Xr, scoresl, scoresr, mu=0.5, lambd=1e-2):
    # loss matrix
    C = ot.dist(Xl, Xr) + mu * ot.dist(np.expand_dims(scoresl, axis=1),
                                       np.expand_dims(scoresr, axis=1))
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = scoresl, scoresr

    Gs = ot.sinkhorn(a, b, M, lambd)

    plt.figure(5)
    plt.imshow(Gs, interpolation='nearest')
    plt.title('OT matrix sinkhorn')

    plt.figure(6)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], Gs, color=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix Sinkhorn with samples')

    return (Gs)
Esempio n. 3
0
def OT_scores_emd(Xl, Xr, scoresl, scoresr, mu=0.5):
    # loss matrix
    print(scoresl)
    C = ot.dist(Xl, Xr) + mu * ot.dist(np.expand_dims(scoresl, axis=1),
                                       np.expand_dims(scoresr, axis=1))
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = scoresl, scoresr

    G0 = ot.emd(a, b, M)

    plt.figure(3)
    plt.imshow(G0, interpolation='nearest')
    plt.title('OT matrix G0')

    plt.figure(4)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], G0, c=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix with samples')

    return (G0)
def get_sim(x, sim, **kwargs):
    if sim == 'gauss':
        try:
            rbfparam = kwargs['rbfparam']
        except KeyError:
            rbfparam = 1 / (2 * (np.mean(ot.dist(x, x, 'sqeuclidean')) ** 2))
        S = rbf_kernel(x, x, rbfparam)
    elif sim == 'gaussthr':
        try:
            rbfparam = kwargs['rbfparam']
        except KeyError:
            rbfparam = 1 / (2 * (np.mean(ot.dist(x, x, 'sqeuclidean')) ** 2))
        try:
            thrg = kwargs['thrg']
        except KeyError:
            thrg = .5
        S = np.float64(rbf_kernel(x, x, rbfparam) > thrg)
    elif sim == 'knn':
        try:
            num_neighbors = kwargs['nn']
        except KeyError('sim="knn" requires the number of neighbors nn to be set'):
            num_neighbors = 3
        S = kn_graph(x, num_neighbors).toarray()
        S = (S + S.T) / 2
    return S
Esempio n. 5
0
def test_lazy_empirical_sinkhorn():
    # test sinkhorn
    n = 10
    a = ot.unif(n)
    b = ot.unif(n)
    numIterMax = 1000

    X_s = np.reshape(np.arange(n), (n, 1))
    X_t = np.reshape(np.arange(0, n), (n, 1))
    M = ot.dist(X_s, X_t)
    M_m = ot.dist(X_s, X_t, metric='minkowski')

    f, g = ot.bregman.empirical_sinkhorn(X_s,
                                         X_t,
                                         1,
                                         numIterMax=numIterMax,
                                         isLazy=True,
                                         batchSize=(1, 3),
                                         verbose=True)
    G_sqe = np.exp(f[:, None] + g[None, :] - M / 1)
    sinkhorn_sqe = ot.sinkhorn(a, b, M, 1)

    f, g, log_es = ot.bregman.empirical_sinkhorn(X_s,
                                                 X_t,
                                                 0.1,
                                                 numIterMax=numIterMax,
                                                 isLazy=True,
                                                 batchSize=1,
                                                 log=True)
    G_log = np.exp(f[:, None] + g[None, :] - M / 0.1)
    sinkhorn_log, log_s = ot.sinkhorn(a, b, M, 0.1, log=True)

    f, g = ot.bregman.empirical_sinkhorn(X_s,
                                         X_t,
                                         1,
                                         metric='minkowski',
                                         numIterMax=numIterMax,
                                         isLazy=True,
                                         batchSize=1)
    G_m = np.exp(f[:, None] + g[None, :] - M_m / 1)
    sinkhorn_m = ot.sinkhorn(a, b, M_m, 1)

    loss_emp_sinkhorn, log = ot.bregman.empirical_sinkhorn2(
        X_s, X_t, 1, numIterMax=numIterMax, isLazy=True, batchSize=1, log=True)
    loss_sinkhorn = ot.sinkhorn2(a, b, M, 1)

    # check constratints
    np.testing.assert_allclose(sinkhorn_sqe.sum(1), G_sqe.sum(1),
                               atol=1e-05)  # metric sqeuclidian
    np.testing.assert_allclose(sinkhorn_sqe.sum(0), G_sqe.sum(0),
                               atol=1e-05)  # metric sqeuclidian
    np.testing.assert_allclose(sinkhorn_log.sum(1), G_log.sum(1),
                               atol=1e-05)  # log
    np.testing.assert_allclose(sinkhorn_log.sum(0), G_log.sum(0),
                               atol=1e-05)  # log
    np.testing.assert_allclose(sinkhorn_m.sum(1), G_m.sum(1),
                               atol=1e-05)  # metric euclidian
    np.testing.assert_allclose(sinkhorn_m.sum(0), G_m.sum(0),
                               atol=1e-05)  # metric euclidian
    np.testing.assert_allclose(loss_emp_sinkhorn, loss_sinkhorn, atol=1e-05)
def incremental_bary_map_emd(xs, xt, a, b, m1, m2, k):
    '''
    Compute the incomplete minibatch barycenter mapping
      between a source and a target distributions. 
      (faster for small batch size)

    Parameters
    ----------
    - xs : ndarray(ns, d)
        source data
    - xt : ndarray(nt, d)
        target data
    - a : ndarray(ns)
        source distribution weights
    - b : ndarray(nt)
        target distribution weights
    - m1 : int
        source batch size
    - m2 : int
        target batch size
    - k : int
        number of batch couples

    Returns
    -------
    - new_xs : ndarray(ns, d)
        Transported source measure
    - new_xt : ndarray(nt, d)
        Transported target measure
    '''
    new_xs = np.zeros(xs.shape)
    new_xt = np.zeros(xt.shape)
    Ns = np.shape(xs)[0]
    Nt = np.shape(xt)[0]

    if m1 < 101:
        for i in range(k):
            #Test mini batch
            sub_xs, sub_weights_a, id_a = small_mini_batch(xs, a, m1, Ns)
            sub_xt, sub_weights_b, id_b = small_mini_batch(xt, b, m2, Nt)

            sub_M = ot.dist(sub_xs, sub_xt, "sqeuclidean").copy()
            G0 = ot.emd(sub_weights_a, sub_weights_b, sub_M)

            new_xs[id_a] += G0.dot(xt[id_b])
            new_xt[id_b] += G0.T.dot(xs[id_a])

    else:
        for i in range(k):
            #Test mini batch
            sub_xs, sub_weights_a, id_a = mini_batch(xs, a, m1, Ns)
            sub_xt, sub_weights_b, id_b = mini_batch(xt, b, m2, Nt)

            sub_M = ot.dist(sub_xs, sub_xt, "sqeuclidean").copy()
            G0 = ot.emd(sub_weights_a, sub_weights_b, sub_M)

            new_xs[id_a] += G0.dot(xt[id_b])
            new_xt[id_b] += G0.T.dot(xs[id_a])

    return 1. / k * Ns * new_xs, 1. / k * Nt * new_xt
Esempio n. 7
0
    def graph_d(self, graph1, graph2):
        """ Compute the Wasserstein distance between two graphs. Uniform weights are used.        
        Parameters
        ----------
        graph1 : a Graph object
        graph2 : a Graph object
        Returns
        -------
        The Wasserstein distance between the features of graph1 and graph2
        """

        nodes1 = graph1.nodes()
        nodes2 = graph2.nodes()
        t1masses = np.ones(len(nodes1)) / len(nodes1)
        t2masses = np.ones(len(nodes2)) / len(nodes2)
        x1 = self.reshaper(graph1.all_matrix_attr())
        x2 = self.reshaper(graph2.all_matrix_attr())

        if self.features_metric == 'dirac':
            f = lambda x, y: x != y
            M = ot.dist(x1, x2, metric=f)
        else:
            M = ot.dist(x1, x2, metric=self.features_metric)
        if np.max(M) != 0:
            M = M / np.max(M)
        self.M = M

        transp = ot.emd(t1masses, t2masses, M)
        self.transp = transp

        return np.sum(transp * M)
Esempio n. 8
0
def test_gromov_entropic_barycenter():

    ns = 50
    nt = 60

    Xs, ys = ot.datasets.get_data_classif('3gauss', ns)
    Xt, yt = ot.datasets.get_data_classif('3gauss2', nt)

    C1 = ot.dist(Xs)
    C2 = ot.dist(Xt)

    n_samples = 3
    Cb = ot.gromov.entropic_gromov_barycenters(
        n_samples, [C1, C2], [ot.unif(ns), ot.unif(nt)],
        ot.unif(n_samples), [.5, .5],
        'square_loss',
        1e-3,
        max_iter=100,
        tol=1e-3)
    np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))

    Cb2 = ot.gromov.entropic_gromov_barycenters(
        n_samples, [C1, C2], [ot.unif(ns), ot.unif(nt)],
        ot.unif(n_samples), [.5, .5],
        'kl_loss',
        1e-3,
        max_iter=100,
        tol=1e-3)
    np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))
Esempio n. 9
0
def test_gromov_entropic_barycenter():
    ns = 20
    nt = 30

    Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
    Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)

    C1 = ot.dist(Xs)
    C2 = ot.dist(Xt)

    n_samples = 2
    Cb = ot.gromov.entropic_gromov_barycenters(
        n_samples, [C1, C2], [ot.unif(ns), ot.unif(nt)],
        ot.unif(n_samples), [.5, .5],
        'square_loss',
        1e-3,
        max_iter=50,
        tol=1e-5,
        verbose=True)
    np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))

    Cb2 = ot.gromov.entropic_gromov_barycenters(
        n_samples, [C1, C2], [ot.unif(ns), ot.unif(nt)],
        ot.unif(n_samples), [.5, .5],
        'kl_loss',
        1e-3,
        max_iter=100,
        tol=1e-3)
    np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))
Esempio n. 10
0
def test_gromov():
    n_samples = 50  # nb samples

    mu_s = np.array([0, 0])
    cov_s = np.array([[1, 0], [0, 1]])

    xs = ot.datasets.get_2D_samples_gauss(n_samples, mu_s, cov_s)

    xt = xs[::-1].copy()

    p = ot.unif(n_samples)
    q = ot.unif(n_samples)

    C1 = ot.dist(xs, xs)
    C2 = ot.dist(xt, xt)

    C1 /= C1.max()
    C2 /= C2.max()

    G = ot.gromov_wasserstein(C1, C2, p, q, 'square_loss', epsilon=5e-4)

    # check constratints
    np.testing.assert_allclose(p, G.sum(1),
                               atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(q, G.sum(0),
                               atol=1e-04)  # cf convergence gromov
Esempio n. 11
0
def test_fgw_barycenter():
    np.random.seed(42)

    ns = 50
    nt = 60

    Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
    Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)

    ys = np.random.randn(Xs.shape[0], 2)
    yt = np.random.randn(Xt.shape[0], 2)

    C1 = ot.dist(Xs)
    C2 = ot.dist(Xt)

    n_samples = 3
    X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2],
                                     [ot.unif(ns), ot.unif(nt)], [.5, .5],
                                     0.5,
                                     fixed_structure=False,
                                     fixed_features=False,
                                     p=ot.unif(n_samples),
                                     loss_fun='square_loss',
                                     max_iter=100,
                                     tol=1e-3)
    np.testing.assert_allclose(C.shape, (n_samples, n_samples))
    np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1]))

    xalea = np.random.randn(n_samples, 2)
    init_C = ot.dist(xalea, xalea)

    X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2],
                                     ps=[ot.unif(ns), ot.unif(nt)],
                                     lambdas=[.5, .5],
                                     alpha=0.5,
                                     fixed_structure=True,
                                     init_C=init_C,
                                     fixed_features=False,
                                     p=ot.unif(n_samples),
                                     loss_fun='square_loss',
                                     max_iter=100,
                                     tol=1e-3)
    np.testing.assert_allclose(C.shape, (n_samples, n_samples))
    np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1]))

    init_X = np.random.randn(n_samples, ys.shape[1])

    X, C, log = ot.gromov.fgw_barycenters(
        n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5],
        0.5,
        fixed_structure=False,
        fixed_features=True,
        init_X=init_X,
        p=ot.unif(n_samples),
        loss_fun='square_loss',
        max_iter=100,
        tol=1e-3,
        log=True)
    np.testing.assert_allclose(C.shape, (n_samples, n_samples))
    np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1]))
Esempio n. 12
0
def _compute_copula_ot_dependence(empirical: np.array, target: np.array,
                                  forget: np.array, n_obs: int) -> float:
    """
    Calculates optimal copula transport dependence measure.

    :param empirical: (np.array) Empirical copula.
    :param target: (np.array) Target copula.
    :param forget: (np.array) Forget copula.
    :param nb_obs: (int) Number of observations.
    :return: (float) Optimal copula transport dependence.
    """

    # Uniform distribution on samples
    t_measure, f_measure, e_measure = (np.ones(
        (n_obs, )) / n_obs, np.ones((n_obs, )) / n_obs, np.ones(
            (n_obs, )) / n_obs)

    # Compute the ground distance matrix between locations
    gdist_e2t = ot.dist(empirical, target)
    gdist_e2f = ot.dist(empirical, forget)

    # Compute the optimal transport matrix
    e2t_ot = ot.emd(t_measure, e_measure, gdist_e2t)
    e2f_ot = ot.emd(f_measure, e_measure, gdist_e2f)

    # Compute the optimal transport distance:
    # <optimal transport matrix, ground distance matrix>_F
    e2t_dist = np.trace(np.dot(np.transpose(e2t_ot), gdist_e2t))
    e2f_dist = np.trace(np.dot(np.transpose(e2f_ot), gdist_e2f))

    # Compute the copula ot dependence measure
    ot_measure = 1 - e2t_dist / (e2f_dist + e2t_dist)

    return ot_measure
Esempio n. 13
0
def test_gromov_barycenter():

    ns = 50
    nt = 60

    Xs, ys = ot.datasets.get_data_classif('3gauss', ns)
    Xt, yt = ot.datasets.get_data_classif('3gauss2', nt)

    C1 = ot.dist(Xs)
    C2 = ot.dist(Xt)

    n_samples = 3
    Cb = ot.gromov.gromov_barycenters(n_samples, [C1, C2],
                                      [ot.unif(ns), ot.unif(nt)
                                       ], ot.unif(n_samples), [.5, .5],
                                      'square_loss',  # 5e-4,
                                      max_iter=100, tol=1e-3)
    np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))

    Cb2 = ot.gromov.gromov_barycenters(n_samples, [C1, C2],
                                       [ot.unif(ns), ot.unif(nt)
                                        ], ot.unif(n_samples), [.5, .5],
                                       'kl_loss',  # 5e-4,
                                       max_iter=100, tol=1e-3)
    np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))
Esempio n. 14
0
def test_empirical_sinkhorn_divergence():
    #Test sinkhorn divergence
    n = 10
    a = ot.unif(n)
    b = ot.unif(n)
    X_s = np.reshape(np.arange(n), (n, 1))
    X_t = np.reshape(np.arange(0, n * 2, 2), (n, 1))
    M = ot.dist(X_s, X_t)
    M_s = ot.dist(X_s, X_s)
    M_t = ot.dist(X_t, X_t)

    emp_sinkhorn_div = ot.bregman.empirical_sinkhorn_divergence(X_s, X_t, 1)
    sinkhorn_div = (ot.sinkhorn2(a, b, M, 1) -
                    1 / 2 * ot.sinkhorn2(a, a, M_s, 1) -
                    1 / 2 * ot.sinkhorn2(b, b, M_t, 1))

    emp_sinkhorn_div_log, log_es = ot.bregman.empirical_sinkhorn_divergence(
        X_s, X_t, 1, log=True)
    sink_div_log_ab, log_s_ab = ot.sinkhorn2(a, b, M, 1, log=True)
    sink_div_log_a, log_s_a = ot.sinkhorn2(a, a, M_s, 1, log=True)
    sink_div_log_b, log_s_b = ot.sinkhorn2(b, b, M_t, 1, log=True)
    sink_div_log = sink_div_log_ab - 1 / 2 * (sink_div_log_a + sink_div_log_b)

    # check constratints
    np.testing.assert_allclose(emp_sinkhorn_div, sinkhorn_div,
                               atol=1e-05)  # cf conv emp sinkhorn
    np.testing.assert_allclose(emp_sinkhorn_div_log, sink_div_log,
                               atol=1e-05)  # cf conv emp sinkhorn
Esempio n. 15
0
    def forward_seq(self, x_train, x_test):
        N = self.Nmasses
        (Pl_train, P_train) = particleApproximation_v0(x_train, N)
        (Pl_test, P_test) = particleApproximation_v0(x_test, N)

        Pl_tem = 0
        for a in range(2):  #x_train.shape[0]):
            t = Pl_train[a]
            Pl_tem = Pl_tem + t
        Pl_tem = Pl_tem / 2  #x_train.shape[0]
        P_tem = np.ones((N, )) / float(N)

        #Pl_tem_vec=np.reshape(Pl_tem,(Pl_tem.shape[0]*Pl_tem.shape[1],),order='F')

        V = list()
        M = x_train.shape[0]
        for ind in range(M):
            Ni = Pl_train[ind].shape[0]
            C = ot.dist(Pl_train[ind], Pl_tem)
            b = P_tem  # b=np.ones((N,))/float(N)
            a = P_train[ind]  # a=np.ones((Ni,))/float(Ni)
            p = ot.emd(a, b, C)  # exact linear program

            #V.append(np.matmul((N*p).T,Pl_train[ind])-Pl_tem)
            V.append(np.matmul((N * p).T, Pl_train[ind]) +
                     Pl_tem)  # already giving transport displacement?

        V = np.asarray(V)

        x_train_hat = np.zeros((len(V), V[0].shape[0] * V[0].shape[1]))
        for a in range(len(V)):
            x_train_hat[a, :] = np.reshape(V[a],
                                           (V[0].shape[0] * V[0].shape[1], ),
                                           order='F')

        V = list()
        M = x_test.shape[0]
        for ind in range(M):
            Ni = Pl_test[ind].shape[0]
            C = ot.dist(Pl_test[ind], Pl_tem)
            b = P_tem  # b=np.ones((N,))/float(N)
            a = P_test[ind]  # a=np.ones((Ni,))/float(Ni)
            p = ot.emd(a, b, C)  # exact linear program

            #V.append(np.matmul((N*p).T,Pl_test[ind])-Pl_tem)
            V.append(np.matmul((N * p).T, Pl_test[ind]) + Pl_tem)

        V = np.asarray(V)

        x_test_hat = np.zeros((len(V), V[0].shape[0] * V[0].shape[1]))
        for a in range(len(V)):
            x_test_hat[a, :] = np.reshape(V[a],
                                          (V[0].shape[0] * V[0].shape[1], ),
                                          order='F')

        return x_train_hat, x_test_hat, Pl_tem, P_tem
    def graph_d(self, graph1, graph2):
        """ Compute the Fused Gromov-Wasserstein distance between two graphs. Uniform weights are used.
        Parameters
        ----------
        graph1 : a Graph object
        graph2 : a Graph object
        Returns
        -------
        The Fused Gromov-Wasserstein distance between the features of graph1 and graph2
        """
        gofeature = True
        nodes1 = graph1.nodes()
        nodes2 = graph2.nodes()
        startstruct = time.time()
        C1 = graph1.distance_matrix(method=self.method)
        C2 = graph2.distance_matrix(method=self.method)
        end2 = time.time()
        t1masses = np.ones(len(nodes1)) / len(nodes1)
        t2masses = np.ones(len(nodes2)) / len(nodes2)
        try:
            x1 = self.reshaper(graph1.all_matrix_attr())
            x2 = self.reshaper(graph2.all_matrix_attr())
        except NoAttrMatrix:
            x1 = None
            x2 = None
            gofeature = False
        if gofeature:
            if self.features_metric == 'dirac':

                def f(x, y):
                    return x != y

                M = ot.dist(x1, x2, metric=f)
            elif self.features_metric == 'hamming_dist':  # see experimental setup in the original paper

                def f(x, y):
                    return hamming_dist(x, y)

                M = ot.dist(x1, x2, metric=f)
            else:
                M = ot.dist(x1, x2, metric=self.features_metric)
            self.M = M
        else:
            M = np.zeros((C1.shape[0], C2.shape[0]))

        startdist = time.time()
        transpwgw, log = self.calc_fgw(M, C1, C2, t1masses, t2masses)
        enddist = time.time()

        enddist = time.time()
        log['struct_time'] = (end2 - startstruct)
        log['dist_time'] = (enddist - startdist)
        self.transp = transpwgw
        self.log = log

        return log['loss'][::-1][0]
Esempio n. 17
0
def test_dual_sgd_sinkhorn():
    # test all dual algorithms
    n = 10
    reg = 1
    nb_iter = 15000
    batch_size = 10
    rng = np.random.RandomState(0)

    # Test uniform
    x = rng.randn(n, 2)
    u = ot.utils.unif(n)
    M = ot.dist(x, x)

    G_sgd = ot.stochastic.solve_dual_entropic(u,
                                              u,
                                              M,
                                              reg,
                                              batch_size,
                                              numItermax=nb_iter)

    G_sinkhorn = ot.sinkhorn(u, u, M, reg)

    # check constratints
    np.testing.assert_allclose(G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
    np.testing.assert_allclose(G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
    np.testing.assert_allclose(G_sgd, G_sinkhorn,
                               atol=1e-03)  # cf convergence sgd

    # Test gaussian
    n = 30
    reg = 1
    batch_size = 30

    a = ot.datasets.make_1D_gauss(n, 15, 5)  # m= mean, s= std
    b = ot.datasets.make_1D_gauss(n, 15, 5)
    X_source = np.arange(n, dtype=np.float64)
    Y_target = np.arange(n, dtype=np.float64)
    M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1)))
    M /= M.max()

    G_sgd = ot.stochastic.solve_dual_entropic(a,
                                              b,
                                              M,
                                              reg,
                                              batch_size,
                                              numItermax=nb_iter)

    G_sinkhorn = ot.sinkhorn(a, b, M, reg)

    # check constratints
    np.testing.assert_allclose(G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
    np.testing.assert_allclose(G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
    np.testing.assert_allclose(G_sgd, G_sinkhorn,
                               atol=1e-03)  # cf convergence sgd
Esempio n. 18
0
def barycenter_free(b1,
                    b2,
                    xs1,
                    xs2,
                    w1,
                    w2,
                    entr_reg,
                    k,
                    tol=1e-5,
                    max_iter=100,
                    verbose=False):

    d = xs1.shape[1]
    ys = np.random.normal(size=(k, d))
    cost_old = np.float("Inf")
    its = 0
    converged = False

    while not converged and its < max_iter:
        its += 1
        if verbose:
            print("Barycenter points iteration: {}".format(its))

        if its > 1:
            ys = (w1 * np.matmul(gamma1, xs1) +
                  w2 * np.matmul(gamma2, xs2)) / (np.dot(
                      (w1 * np.sum(gamma1, axis=1) +
                       w2 * np.sum(gamma2, axis=1)).reshape(
                           (k, 1)), np.ones((1, d))))

        M1 = ot.dist(ys, xs1)
        M2 = ot.dist(ys, xs2)
        (gamma1, gamma2) = barycenter_bregman(b1,
                                              b2,
                                              M1,
                                              M2,
                                              w1,
                                              w2,
                                              entr_reg,
                                              max_iter=500)
        cost = (w1 * np.sum(gamma1 * M1) + w2 * np.sum(gamma2 * M2)
                ) + entr_reg * (w1 * stats.entropy(gamma1.reshape(
                    (-1, 1))) + w2 * stats.entropy(gamma2.reshape((-1, 1))))
        # TODO: Calculate the entropy safely
        # cost = w1 * np.sum(gamma1 * M1) + w2 * np.sum(gamma2 * M2)

        err = abs(cost - cost_old) / max(abs(cost), 1e-12)
        cost_old = cost.copy()
        if verbose:
            print("Relative change in points iteration: {}".format(err))
        if err < tol:
            converged = True

    return (ys, np.dot(gamma1, np.ones(gamma1.shape[1])), cost, gamma1, gamma2)
Esempio n. 19
0
def test_entropic_gromov():
    n_samples = 50  # nb samples

    mu_s = np.array([0, 0])
    cov_s = np.array([[1, 0], [0, 1]])

    xs = ot.datasets.make_2D_samples_gauss(n_samples,
                                           mu_s,
                                           cov_s,
                                           random_state=42)

    xt = xs[::-1].copy()

    p = ot.unif(n_samples)
    q = ot.unif(n_samples)

    C1 = ot.dist(xs, xs)
    C2 = ot.dist(xt, xt)

    C1 /= C1.max()
    C2 /= C2.max()

    G = ot.gromov.entropic_gromov_wasserstein(C1,
                                              C2,
                                              p,
                                              q,
                                              'square_loss',
                                              epsilon=5e-4,
                                              verbose=True)

    # check constratints
    np.testing.assert_allclose(p, G.sum(1),
                               atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(q, G.sum(0),
                               atol=1e-04)  # cf convergence gromov

    gw, log = ot.gromov.entropic_gromov_wasserstein2(C1,
                                                     C2,
                                                     p,
                                                     q,
                                                     'kl_loss',
                                                     epsilon=1e-2,
                                                     log=True)

    G = log['T']

    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)

    # check constratints
    np.testing.assert_allclose(p, G.sum(1),
                               atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(q, G.sum(0),
                               atol=1e-04)  # cf convergence gromov
Esempio n. 20
0
def test_gromov():
    n_samples = 50  # nb samples

    mu_s = np.array([0, 0])
    cov_s = np.array([[1, 0], [0, 1]])

    xs = ot.datasets.make_2D_samples_gauss(n_samples,
                                           mu_s,
                                           cov_s,
                                           random_state=4)

    xt = xs[::-1].copy()

    p = ot.unif(n_samples)
    q = ot.unif(n_samples)

    C1 = ot.dist(xs, xs)
    C2 = ot.dist(xt, xt)

    C1 /= C1.max()
    C2 /= C2.max()

    G = ot.gromov.gromov_wasserstein(C1, C2, p, q, 'square_loss', verbose=True)

    # check constratints
    np.testing.assert_allclose(p, G.sum(1),
                               atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(q, G.sum(0),
                               atol=1e-04)  # cf convergence gromov

    Id = (1 / (1.0 * n_samples)) * np.eye(n_samples, n_samples)

    np.testing.assert_allclose(G, np.flipud(Id), atol=1e-04)

    gw, log = ot.gromov.gromov_wasserstein2(C1, C2, p, q, 'kl_loss', log=True)

    gw_val = ot.gromov.gromov_wasserstein2(C1, C2, p, q, 'kl_loss', log=False)

    G = log['T']

    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)

    np.testing.assert_allclose(gw, gw_val, atol=1e-1,
                               rtol=1e-1)  # cf log=False

    # check constratints
    np.testing.assert_allclose(p, G.sum(1),
                               atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(q, G.sum(0),
                               atol=1e-04)  # cf convergence gromov
Esempio n. 21
0
def calculate_gw_cpu(Xs, Xt, numItermax=500):
    st = time.time()
    C1 = ot.dist(Xs, Xs, 'sqeuclidean')
    C2 = ot.dist(Xt, Xt, 'sqeuclidean')
    p = np.ones(Xs.shape[0]) / Xs.shape[0]
    q = np.ones(Xt.shape[0]) / Xt.shape[0]
    T, log = gw(C1, C2, p, q, 'square_loss', log=True, numItermax=numItermax)
    d_gw = log['loss'][::-1][0]
    converge_gw_pot = abs(log['loss'][::-1][0] - log['loss'][::-1][1]) <= 1e-5

    ed = time.time()
    time_gw = ed - st

    return d_gw, time_gw, converge_gw_pot
Esempio n. 22
0
def test_fgw():

    n_samples = 50  # nb samples

    mu_s = np.array([0, 0])
    cov_s = np.array([[1, 0], [0, 1]])

    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=42)

    xt = xs[::-1].copy()

    ys = np.random.randn(xs.shape[0], 2)
    yt = ys[::-1].copy()

    p = ot.unif(n_samples)
    q = ot.unif(n_samples)

    C1 = ot.dist(xs, xs)
    C2 = ot.dist(xt, xt)

    C1 /= C1.max()
    C2 /= C2.max()

    M = ot.dist(ys, yt)
    M /= M.max()

    G, log = ot.gromov.fused_gromov_wasserstein(M, C1, C2, p, q, 'square_loss', alpha=0.5, log=True)

    # check constratints
    np.testing.assert_allclose(
        p, G.sum(1), atol=1e-04)  # cf convergence fgw
    np.testing.assert_allclose(
        q, G.sum(0), atol=1e-04)  # cf convergence fgw

    Id = (1 / (1.0 * n_samples)) * np.eye(n_samples, n_samples)

    np.testing.assert_allclose(
        G, np.flipud(Id), atol=1e-04)  # cf convergence gromov

    fgw, log = ot.gromov.fused_gromov_wasserstein2(M, C1, C2, p, q, 'square_loss', alpha=0.5, log=True)

    G = log['T']

    np.testing.assert_allclose(fgw, 0, atol=1e-1, rtol=1e-1)

    # check constratints
    np.testing.assert_allclose(
        p, G.sum(1), atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(
        q, G.sum(0), atol=1e-04)  # cf convergence gromov
Esempio n. 23
0
def test_generalized_conditional_gradient():

    n_bins = 100  # nb bins
    np.random.seed(0)
    # bin positions
    x = np.arange(n_bins, dtype=np.float64)

    # Gaussian distributions
    a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5)  # m= mean, s= std
    b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10)

    # loss matrix
    M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1)))
    M /= M.max()

    def f(G):
        return 0.5 * np.sum(G**2)

    def df(G):
        return G

    reg1 = 1e-3
    reg2 = 1e-1

    G, log = ot.optim.gcg(a, b, M, reg1, reg2, f, df, verbose=True, log=True)

    np.testing.assert_allclose(a, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(b, G.sum(0), atol=1e-05)
Esempio n. 24
0
def test_sinkhorn_empty():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10,
                         method='sinkhorn_stabilized', verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    G, log = ot.sinkhorn(
        [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling',
        verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)
Esempio n. 25
0
def test_sinkhorn_variants():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10)
    Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10)
    Ges = ot.sinkhorn(u,
                      u,
                      M,
                      1,
                      method='sinkhorn_epsilon_scaling',
                      stopThr=1e-10)
    Gerr = ot.sinkhorn(u, u, M, 1, method='do_not_exists', stopThr=1e-10)
    G_green = ot.sinkhorn(u, u, M, 1, method='greenkhorn', stopThr=1e-10)

    # check values
    np.testing.assert_allclose(G0, Gs, atol=1e-05)
    np.testing.assert_allclose(G0, Ges, atol=1e-05)
    np.testing.assert_allclose(G0, Gerr)
    np.testing.assert_allclose(G0, G_green, atol=1e-5)
    print(G0, G_green)
Esempio n. 26
0
def test_warnings():
    n = 100  # nb bins
    m = 100  # nb bins

    mean1 = 30
    mean2 = 50

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1)))**(1. / 2)

    print('Computing {} EMD '.format(1))
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        print('Computing {} EMD '.format(1))
        ot.emd(a, b, M, numItermax=1)
        assert "numItermax" in str(w[-1].message)
def double_wasserstein1(X_train_smote):

    n, m, r, _ = X_train_smote.shape

    # uniform measures at points clouds of card m
    a2 = np.ones(m) / m
    b2 = np.ones(m) / m

    # uniform measures at points of card r
    a1 = np.ones(r) / r
    b1 = np.ones(r) / r

    # 1st level distance matrix of size m x m
    M1 = np.zeros((m, m))

    # M1 loop
    for i in range(m):
        for j in range(i + 1, m):

            # pairwise squared Euclidean distances as the ground metric
            M0_ij = ot.dist(X_train_smote[0, i],
                            X_train_smote[1, j],
                            metric="sqeuclidean")

            # 2-Wasserstein distance btw point clouds, take square root
            M1[i, j] = ot.emd2(a1, b1, M0_ij)**0.5

    # 1st level symmetrize
    M1 = M1 + M1.T
    np.fill_diagonal(M1, 1e9)

    # 1-Wasserstein distance btw collections of point clouds
    W1 = ot.emd2(a2, b2, M1)

    return W1
Esempio n. 28
0
def test_emd_emd2_devices_tf():
    if not tf:
        return
    nx = ot.backend.TensorflowBackend()

    n_samples = 100
    n_features = 2
    rng = np.random.RandomState(0)
    x = rng.randn(n_samples, n_features)
    y = rng.randn(n_samples, n_features)
    a = ot.utils.unif(n_samples)
    M = ot.dist(x, y)

    # Check that everything stays on the CPU
    with tf.device("/CPU:0"):
        ab, Mb = nx.from_numpy(a, M)
        Gb = ot.emd(ab, ab, Mb)
        w = ot.emd2(ab, ab, Mb)
        nx.assert_same_dtype_device(Mb, Gb)
        nx.assert_same_dtype_device(Mb, w)

    if len(tf.config.list_physical_devices('GPU')) > 0:
        # Check that everything happens on the GPU
        ab, Mb = nx.from_numpy(a, M)
        Gb = ot.emd(ab, ab, Mb)
        w = ot.emd2(ab, ab, Mb)
        nx.assert_same_dtype_device(Mb, Gb)
        nx.assert_same_dtype_device(Mb, w)
        assert nx.dtype_device(Gb)[1].startswith("GPU")
Esempio n. 29
0
def test_conditional_gradient(nx):

    n_bins = 100  # nb bins
    np.random.seed(0)
    # bin positions
    x = np.arange(n_bins, dtype=np.float64)

    # Gaussian distributions
    a = ot.datasets.make_1D_gauss(n_bins, m=20, s=5)  # m= mean, s= std
    b = ot.datasets.make_1D_gauss(n_bins, m=60, s=10)

    # loss matrix
    M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1)))
    M /= M.max()

    def f(G):
        return 0.5 * np.sum(G**2)

    def df(G):
        return G

    def fb(G):
        return 0.5 * nx.sum(G ** 2)

    ab, bb, Mb = nx.from_numpy(a, b, M)

    reg = 1e-1

    G, log = ot.optim.cg(a, b, M, reg, f, df, verbose=True, log=True)
    Gb, log = ot.optim.cg(ab, bb, Mb, reg, fb, df, verbose=True, log=True)
    Gb = nx.to_numpy(Gb)

    np.testing.assert_allclose(Gb, G)
    np.testing.assert_allclose(a, Gb.sum(1))
    np.testing.assert_allclose(b, Gb.sum(0))
Esempio n. 30
0
def test_gpu_sinkhorn_lpl1():

    rng = np.random.RandomState(0)

    for n_samples in [50, 100, 500]:
        print(n_samples)
        a = rng.rand(n_samples // 4, 100)
        labels_a = np.random.randint(10, size=(n_samples // 4))
        b = rng.rand(n_samples, 100)

        wa = ot.unif(n_samples // 4)
        wb = ot.unif(n_samples)

        M = ot.dist(a.copy(), b.copy())
        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)

        reg = 1

        G = ot.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg)
        G1 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg)

        np.testing.assert_allclose(G1, G, rtol=1e-10)

        ot.gpu.da.sinkhorn_lpl1_mm(wa,
                                   labels_a,
                                   wb,
                                   M2,
                                   reg,
                                   to_numpy=False,
                                   log=True)
Esempio n. 31
0
def test_gpu_sinkhorn():

    rng = np.random.RandomState(0)

    for n_samples in [50, 100, 500, 1000]:
        a = rng.rand(n_samples // 4, 100)
        b = rng.rand(n_samples, 100)

        wa = ot.unif(n_samples // 4)
        wb = ot.unif(n_samples)

        wb2 = np.random.rand(n_samples, 20)
        wb2 /= wb2.sum(0, keepdims=True)

        M = ot.dist(a.copy(), b.copy())
        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)

        reg = 1

        G = ot.sinkhorn(wa, wb, M, reg)
        G1 = ot.gpu.sinkhorn(wa, wb, M, reg)

        np.testing.assert_allclose(G1, G, rtol=1e-10)

        # run all on gpu
        ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False, log=True)

        # run sinkhorn for multiple targets
        ot.gpu.sinkhorn(wa, wb2, M2, reg, to_numpy=False, log=True)
Esempio n. 32
0
def test_gpu_dist():

    rng = np.random.RandomState(0)

    for n_samples in [50, 100, 500, 1000]:
        print(n_samples)
        a = rng.rand(n_samples // 4, 100)
        b = rng.rand(n_samples, 100)

        M = ot.dist(a.copy(), b.copy())
        M2 = ot.gpu.dist(a.copy(), b.copy())

        np.testing.assert_allclose(M, M2, rtol=1e-10)

        M2 = ot.gpu.dist(a.copy(),
                         b.copy(),
                         metric='euclidean',
                         to_numpy=False)

        # check raise not implemented wrong metric
        with pytest.raises(NotImplementedError):
            M2 = ot.gpu.dist(a.copy(),
                             b.copy(),
                             metric='cityblock',
                             to_numpy=False)
Esempio n. 33
0
 def makeTransportPlan(self):
     if self.source_data and self.target_data:
         if self.source_data_size == self.target_data_size:
             loss_matrix = ot.dist(self.source_data, self.target_data)
             loss_matrix = loss_matrix / loss_matrix.max()
             if not self.source_weight:
                 self.source_weight = np.ones(
                     (self.source_data_size, )) / self.source_data_size
                 print(
                     "The Source weights are intiialized to one. If custome weight please load weight."
                 )
             if not self.target_weight:
                 self.target_weight = np.ones(
                     (self.target_data_size, )) / self.target_data_size
                 print(
                     "The Target weights are intiialized to one. If custome weight please load weight."
                 )
             transport = ot.emd(self.source_weight,
                                self.target_weight,
                                loss_matrix,
                                log=True)
             print("Transport Plan Complete")
             print("The cost is: {}".format(transport[1]['cost']))
             self.transport_plan = transport[0]
             return transport
         else:
             print(
                 "Optimal Transport Plan not complete due to mismatch in Source and Target Size."
             )
             return
     else:
         print(
             "Optimal Transport Plan not complete. Please add Source & Target data and rerun."
         )
         return
Esempio n. 34
0
def test_dist():

    n = 100

    x = np.random.randn(n, 2)

    D = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            D[i, j] = np.sum(np.square(x[i, :] - x[j, :]))

    D2 = ot.dist(x, x)
    D3 = ot.dist(x)

    # dist shoul return squared euclidean
    np.testing.assert_allclose(D, D2)
    np.testing.assert_allclose(D, D3)
Esempio n. 35
0
def test_entropic_gromov():
    n_samples = 50  # nb samples

    mu_s = np.array([0, 0])
    cov_s = np.array([[1, 0], [0, 1]])

    xs = ot.datasets.get_2D_samples_gauss(n_samples, mu_s, cov_s)

    xt = xs[::-1].copy()

    p = ot.unif(n_samples)
    q = ot.unif(n_samples)

    C1 = ot.dist(xs, xs)
    C2 = ot.dist(xt, xt)

    C1 /= C1.max()
    C2 /= C2.max()

    G = ot.gromov.entropic_gromov_wasserstein(
        C1, C2, p, q, 'square_loss', epsilon=5e-4)

    # check constratints
    np.testing.assert_allclose(
        p, G.sum(1), atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(
        q, G.sum(0), atol=1e-04)  # cf convergence gromov

    gw, log = ot.gromov.entropic_gromov_wasserstein2(
        C1, C2, p, q, 'kl_loss', epsilon=1e-2, log=True)

    G = log['T']

    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)

    # check constratints
    np.testing.assert_allclose(
        p, G.sum(1), atol=1e-04)  # cf convergence gromov
    np.testing.assert_allclose(
        q, G.sum(0), atol=1e-04)  # cf convergence gromov
Esempio n. 36
0
def test_emd2_multi():
    n = 1000  # nb bins

    # bin positions
    x = np.arange(n, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=20, s=5)  # m= mean, s= std

    ls = np.arange(20, 1000, 20)
    nb = len(ls)
    b = np.zeros((n, nb))
    for i in range(nb):
        b[:, i] = gauss(n, m=ls[i], s=10)

    # loss matrix
    M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
    # M/=M.max()

    print('Computing {} EMD '.format(nb))

    # emd loss 1 proc
    ot.tic()
    emd1 = ot.emd2(a, b, M, 1)
    ot.toc('1 proc : {} s')

    # emd loss multipro proc
    ot.tic()
    emdn = ot.emd2(a, b, M)
    ot.toc('multi proc : {} s')

    np.testing.assert_allclose(emd1, emdn)

    # emd loss multipro proc with log
    ot.tic()
    emdn = ot.emd2(a, b, M, log=True, return_matrix=True)
    ot.toc('multi proc : {} s')

    for i in range(len(emdn)):
        emd = emdn[i]
        log = emd[1]
        cost = emd[0]
        check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost)
        emdn[i] = cost

    emdn = np.array(emdn)
    np.testing.assert_allclose(emd1, emdn)
Esempio n. 37
0
def test_sinkhorn():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G = ot.sinkhorn(u, u, M, 1, stopThr=1e-10)

    # check constratints
    np.testing.assert_allclose(
        u, G.sum(1), atol=1e-05)  # cf convergence sinkhorn
    np.testing.assert_allclose(
        u, G.sum(0), atol=1e-05)  # cf convergence sinkhorn
Esempio n. 38
0
def test_plot1D_mat():

    import ot
    import ot.plot

    n_bins = 100  # nb bins

    # bin positions
    x = np.arange(n_bins, dtype=np.float64)

    # Gaussian distributions
    a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5)  # m= mean, s= std
    b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10)

    # loss matrix
    M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1)))
    M /= M.max()

    ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
Esempio n. 39
0
def test_sinkhorn_variants():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10)
    Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10)
    Ges = ot.sinkhorn(
        u, u, M, 1, method='sinkhorn_epsilon_scaling', stopThr=1e-10)
    Gerr = ot.sinkhorn(u, u, M, 1, method='do_not_exists', stopThr=1e-10)

    # check values
    np.testing.assert_allclose(G0, Gs, atol=1e-05)
    np.testing.assert_allclose(G0, Ges, atol=1e-05)
    np.testing.assert_allclose(G0, Gerr)
Esempio n. 40
0
def test_emd_empty():
    # test emd and emd2 for simple identity
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G = ot.emd([], [], M)

    # check G is identity
    np.testing.assert_allclose(G, np.eye(n) / n)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1))  # cf convergence sinkhorn
    np.testing.assert_allclose(u, G.sum(0))  # cf convergence sinkhorn

    w = ot.emd2([], [], M)
    # check loss=0
    np.testing.assert_allclose(w, 0)
Esempio n. 41
0
def test_dual_variables():
    n = 5000  # nb bins
    m = 6000  # nb bins

    mean1 = 1000
    mean2 = 1100

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)

    print('Computing {} EMD '.format(1))

    # emd loss 1 proc
    ot.tic()
    G, log = ot.emd(a, b, M, log=True)
    ot.toc('1 proc : {} s')

    ot.tic()
    G2 = ot.emd(b, a, np.ascontiguousarray(M.T))
    ot.toc('1 proc : {} s')

    cost1 = (G * M).sum()
    # Check symmetry
    np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum())
    # Check with closed-form solution for gaussians
    np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))

    # Check that both cost computations are equivalent
    np.testing.assert_almost_equal(cost1, log['cost'])
    check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])
Esempio n. 42
0
def test_warnings():
    n = 100  # nb bins
    m = 100  # nb bins

    mean1 = 30
    mean2 = 50

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)

    print('Computing {} EMD '.format(1))
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        print('Computing {} EMD '.format(1))
        ot.emd(a, b, M, numItermax=1)
        assert "numItermax" in str(w[-1].message)
        assert len(w) == 1
        a[0] = 100
        print('Computing {} EMD '.format(2))
        ot.emd(a, b, M)
        assert "infeasible" in str(w[-1].message)
        assert len(w) == 2
        a[0] = -1
        print('Computing {} EMD '.format(2))
        ot.emd(a, b, M)
        assert "infeasible" in str(w[-1].message)
        assert len(w) == 3
Esempio n. 43
0
# Generate data
# -------------

#%% parameters

n = 100  # nb bins

# bin positions
x = np.arange(n, dtype=np.float64)

# Gaussian distributions
a = ot.datasets.get_1D_gauss(n, m=20, s=5)  # m= mean, s= std
b = ot.datasets.get_1D_gauss(n, m=60, s=10)

# loss matrix
M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
M /= M.max()

##############################################################################
# Solve EMD
# ---------

#%% EMD

G0 = ot.emd(a, b, M)

pl.figure(3, figsize=(5, 5))
ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0')

##############################################################################
# Solve EMD with Frobenius norm regularization
Esempio n. 44
0
##############################################################################
# Dataset 1 : uniform sampling
# ----------------------------

n = 20  # nb samples
xs = np.zeros((n, 2))
xs[:, 0] = np.arange(n) + 1
xs[:, 1] = (np.arange(n) + 1) * -0.001  # to make it strictly convex...

xt = np.zeros((n, 2))
xt[:, 1] = np.arange(n) + 1

a, b = ot.unif(n), ot.unif(n)  # uniform distribution on samples

# loss matrix
M1 = ot.dist(xs, xt, metric='euclidean')
M1 /= M1.max()

# loss matrix
M2 = ot.dist(xs, xt, metric='sqeuclidean')
M2 /= M2.max()

# loss matrix
Mp = np.sqrt(ot.dist(xs, xt, metric='euclidean'))
Mp /= Mp.max()

# Data
pl.figure(1, figsize=(7, 3))
pl.clf()
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
Esempio n. 45
0
n = 50  # nb samples

mu_s = np.array([0, 0])
cov_s = np.array([[1, 0], [0, 1]])

mu_t = np.array([4, 4])
cov_t = np.array([[1, -.8], [-.8, 1]])

xs = ot.datasets.get_2D_samples_gauss(n, mu_s, cov_s)
xt = ot.datasets.get_2D_samples_gauss(n, mu_t, cov_t)

a, b = np.ones((n,)) / n, np.ones((n,)) / n  # uniform distribution on samples

# loss matrix
M = ot.dist(xs, xt)
M /= M.max()

##############################################################################
# Plot data
# ---------

#%% plot samples

pl.figure(1)
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
pl.legend(loc=0)
pl.title('Source and target distributions')

pl.figure(2)