Exemple #1
0
def test_emd1d_device_tf():
    nx = ot.backend.TensorflowBackend()
    rng = np.random.RandomState(0)
    n = 10
    x = np.linspace(0, 5, n)
    rho_u = np.abs(rng.randn(n))
    rho_u /= rho_u.sum()
    rho_v = np.abs(rng.randn(n))
    rho_v /= rho_v.sum()

    # Check that everything stays on the CPU
    with tf.device("/CPU:0"):
        xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v)
        emd = ot.emd_1d(xb, xb, rho_ub, rho_vb)
        emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb)
        nx.assert_same_dtype_device(xb, emd)
        nx.assert_same_dtype_device(xb, emd2)

    if len(tf.config.list_physical_devices('GPU')) > 0:
        # Check that everything happens on the GPU
        xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v)
        emd = ot.emd_1d(xb, xb, rho_ub, rho_vb)
        emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb)
        nx.assert_same_dtype_device(xb, emd)
        nx.assert_same_dtype_device(xb, emd2)
        assert nx.dtype_device(emd)[1].startswith("GPU")
Exemple #2
0
    def get_paths(self, n=5000, n_steps=3):
        # Only 3 steps are supported at this time.
        assert n_steps == 3
        np.random.seed(42)
        self.r1, self.r2, self.r3 = (0.5, 0.1, 0.1)
        labels = np.repeat([0, 2], n)

        data = np.abs(np.random.randn(n * 2) * 0.5 / np.pi)
        data[labels == 2] = 1 - data[labels == 2]
        # print(data)

        # McCann interpolant / barycenter interpolation
        import ot

        gamma = ot.emd_1d(data[labels == 0], data[labels == 2])
        ninterp = 5000
        i05 = interpolate_with_ot(
            data[labels == 0][:, np.newaxis],
            data[labels == 2][:, np.newaxis],
            gamma,
            0.5,
            ninterp,
        )
        # data = data.reshape(-1, 2)
        data = np.stack([data[labels == 0],
                         i05.flatten(), data[labels == 2]],
                        axis=-1)

        theta = data * np.pi  # transform to along the circle

        r = (1 + np.random.randn(n) * self.r2)[:, None, None]

        x2d = np.stack([np.cos(theta), np.sin(theta)], axis=-1) * r
        return x2d
Exemple #3
0
def test_emd_1d_emd2_1d_with_weights():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    w_u = rng.uniform(0., 1., n)
    w_u = w_u / w_u.sum()

    w_v = rng.uniform(0., 1., m)
    w_v = w_v / w_v.sum()

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd(w_u, w_v, M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )), w_u,
                                   w_v)
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(w_u, G.sum(1))
    np.testing.assert_allclose(w_v, G.sum(0))
Exemple #4
0
    def __init__(self):
        np.random.seed(42)
        n = 5000
        self.r1, self.r2, self.r3 = (0.5, 0.1, 0.1)
        self.labels = np.repeat([0, 2], n)

        data = np.abs(np.random.randn(n * 2) * 0.5 / np.pi)
        data[self.labels == 2] = 1 - data[self.labels == 2]
        # print(data)

        # McCann interpolant / barycenter interpolation
        import ot

        gamma = ot.emd_1d(data[self.labels == 0], data[self.labels == 2])
        ninterp = 5000
        i05 = interpolate_with_ot(
            data[self.labels == 0][:, np.newaxis],
            data[self.labels == 2][:, np.newaxis],
            gamma,
            0.5,
            ninterp,
        )
        data = np.concatenate([data, i05.flatten()])
        self.labels = np.concatenate([self.labels, np.ones(n)])
        theta = data * np.pi  # transform to along the circle

        r = (1 + np.random.randn(*theta.shape) * self.r2)[:, None]
        r = np.repeat(r, 2, axis=1)
        x2d = np.array([np.cos(theta), np.sin(theta)]).T * r

        ##########################
        # ONLY CHANGE FROM ABOVE #
        mask = np.random.rand(x2d.shape[0]) > 1.0
        ##########################

        mask *= x2d[:, 0] < 0
        x2d[mask] = [[0, 2]] + [[1, -1]] * x2d[mask]

        # x2d[self.labels == 1] -= [0.7, 0.0]
        # x2d[x2d[:, 1] < 0] *= [1, -1]
        self.data = x2d
        self.ncells = self.data.shape[0]

        next2d = np.array([np.cos(theta + 0.3), np.sin(theta + 0.3)]).T * r
        next2d[mask] = [[0, 2]] + [[1, -1]] * next2d[mask]
        # next2d += np.random.randn(*next2d.shape) * self.r3
        self.velocity = next2d - x2d

        # Mask out timepoint zero
        mask = self.labels != 0
        self.labels = self.labels[mask]
        self.labels -= 1
        self.data = self.data[mask]
        self.velocity = self.velocity[mask]
        self.ncells = self.labels.shape[0]
def find_submatching_locally_linear(Dist1,Dist2,coup1,coup2,i,j):

    subgraph_i = find_support(coup1[:,i])
    p_i = coup1[:,i][subgraph_i]/np.sum(coup1[:,i][subgraph_i])

    subgraph_j = find_support(coup2[:,j])
    p_j = coup2[:,j][subgraph_j]/np.sum(coup2[:,j][subgraph_j])

    x_i = list(Dist1[i,:][subgraph_i].reshape(len(subgraph_i),))
    x_j = list(Dist2[j,:][subgraph_j].reshape(len(subgraph_j),))

    coup_sub_ij = ot.emd_1d(x_i,x_j,p_i,p_j,p=2)

    return coup_sub_ij
Exemple #6
0
def test_emd_1d_emd2_1d():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd([], [], M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, [], [], metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, [], [], metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, [], [], metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )))
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(np.ones((n, )) / n, G.sum(1))
    np.testing.assert_allclose(np.ones((m, )) / m, G.sum(0))

    # check G is similar
    np.testing.assert_allclose(G, G_1d)

    # check AssertionError is raised if called on non 1d arrays
    u = np.random.randn(n, 2)
    v = np.random.randn(m, 2)
    with pytest.raises(AssertionError):
        ot.emd_1d(u, v, [], [])
Exemple #7
0
def test_emd1d_type_devices(nx):
    rng = np.random.RandomState(0)

    n = 10
    x = np.linspace(0, 5, n)
    rho_u = np.abs(rng.randn(n))
    rho_u /= rho_u.sum()
    rho_v = np.abs(rng.randn(n))
    rho_v /= rho_v.sum()

    for tp in nx.__type_list__:
        print(nx.dtype_device(tp))

        xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v, type_as=tp)

        emd = ot.emd_1d(xb, xb, rho_ub, rho_vb)
        emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb)

        nx.assert_same_dtype_device(xb, emd)
        nx.assert_same_dtype_device(xb, emd2)
def find_submatching_locally_linear_ts(dists1,dists2,coup1,coup2,i_enum,j_enum):
    """
    Compute locally linear matching assuming tall, skinny distance matrices

    Parameters:
    dists1, dists2 : tall skinny ndarrays
    coup1, coup2 : tall skinny csr matrices
    i_enum, j_enum : anchor node indices in tall skinny representation

    """
    subgraph_i = coup1[:,i_enum].nonzero()[0]
    subgraph_j = coup2[:,j_enum].nonzero()[0]

    p_i = coup1[subgraph_i,i_enum].toarray()
    p_i /= np.sum(p_i)
    p_j = coup2[subgraph_j,j_enum].toarray()
    p_j /= np.sum(p_j)

    x_i = dists1[subgraph_i,i_enum]
    x_j = dists2[subgraph_j,j_enum]

    coup_sub_ij = ot.emd_1d(x_i,x_j,p_i.reshape(-1),p_j.reshape(-1),p=2)

    return coup_sub_ij
Exemple #9
0
    def get_paths(self, n=5000, n_steps=3):
        # Only 3 steps are supported at this time.
        assert n_steps == 3
        np.random.seed(42)
        self.r1, self.r2, self.r3 = (0.5, 0.1, 0.1)
        labels = np.repeat([0, 2], n)

        data = np.abs(np.random.randn(n * 2) * 0.5 / np.pi)
        data[labels == 2] = 1 - data[labels == 2]
        # print(data)

        # McCann interpolant / barycenter interpolation
        import ot

        gamma = ot.emd_1d(data[labels == 0], data[labels == 2])
        ninterp = 5000
        i05 = interpolate_with_ot(
            data[labels == 0][:, np.newaxis],
            data[labels == 2][:, np.newaxis],
            gamma,
            0.5,
            ninterp,
        )
        # data = data.reshape(-1, 2)
        data = np.stack([data[labels == 0],
                         i05.flatten(), data[labels == 2]],
                        axis=-1)

        theta = data * np.pi  # transform to along the circle

        r = (1 + np.random.randn(n) * self.r2)[:, None, None]

        x2d = np.stack([np.cos(theta), np.sin(theta)], axis=-1) * r
        # mask = (r > 1.0)
        # TODO these reference paths could be improved to include better routing
        # along the manifold. Right now they are calculated using 1d and are just lifted into
        # 2d along the same radius. Trouble comes when the branch for the tree gets
        # Flipped over y=1, this gives opposite of expected radiuses.
        # Furthermore, 2d Transport is no longer the same as 1d when we have gaussian
        # Noise along the manifold.
        #
        # Right now they are good enough for our purposes, and making them better will only
        # improve how TrajectoryNet looks.
        """
        import optimal_transport.emd as emd
        _, log = emd.earth_mover_distance(x2d[:,0], x2d[:,1], return_matrix=True)
        print(np.where(log['G'] > 1e-8))
        path = np.stack([x2d[:,0], x2d[np.where(log['G'] > 1e-8)[1],1]])
        path = np.swapaxes(path, 0,1)
        import matplotlib.pyplot as plt
        #plt.hist(log['G'].flatten())
        fig, axes = plt.subplots(1,2,figsize=(20,10))

        for p in path[:1000]:
            axes[0].plot(p[:,0], p[:,1])
        for p in x2d[:1000,:2]:
            axes[1].plot(p[:,0], p[:,1])
        plt.show()
        exit()
        """
        mask = np.random.rand(*x2d.shape[:2]) > 0.5
        mask *= x2d[:, :, 0] < 0
        x2d[mask] = [[0, 2]] + [[1, -1]] * x2d[mask]
        x2d = x2d.reshape(n, n_steps, 2)
        return x2d
Exemple #10
0
pl.plot(x, b, 'r', label='Target distribution')
pl.legend()

#%% plot distributions and loss matrix

pl.figure(2, figsize=(5, 5))
ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')

##############################################################################
# Solve EMD
# ---------

#%% EMD

# use fast 1D solver
G0 = ot.emd_1d(x, x, a, b)

# Equivalent to
# G0 = ot.emd(a, b, M)

pl.figure(3, figsize=(5, 5))
ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0')

##############################################################################
# Solve Sinkhorn
# --------------

#%% Sinkhorn

lambd = 1e-3
Gs = ot.sinkhorn(a, b, M, lambd, verbose=True)
def compressed_fgw(dists1,dists2,fdists1,fdists2,
                   membership1,membership2,
                   features1,features2,p1,p2,
                   node_subset_idx1,node_subset_idx2,
                  dists_subset1,dists_subset2,
                  p_subset1,p_subset2, alpha=0.5,beta=0.5,verbose = False, return_dense = True):

    """
    Compressed FGW on partitioned data structures
    -----------
    Parameters:
    dists1,dists2,fdists1,fdists2,membership1,membership2 : |nodes| x |node_subset| csr matrices
    features1,features2 : |nodes| x |features| ndarrays
    p1,p2 : |nodes| x 1 ndarrays
    node_subset_idx1, node_subset_idx2 : |node_subset| lists
    dists_subset1, dists_subset2 : |node_subset| x |node_subset| ndarrays
    p_subset1, p_subset2 : |node_subset| x 1 ndarrays
    -----------
    Returns:
    full_coup : |nodes| x |nodes| csr matrix
    """


    M_compressed = pairwise_distances(features1[node_subset_idx1,:],features2[node_subset_idx2,:])
    # Match compressed graphs
    start = time.time()
    if verbose:
        print('Matching Compressed Graphs...')
    coup_compressed = ot.gromov.fused_gromov_wasserstein(M_compressed,
                                                         dists_subset1, dists_subset2,
                                                         p_subset1, p_subset2, alpha = alpha)

    if verbose:
        print('Time for Matching Compressed:', time.time() - start)

    # Find submatchings and create full coupling
    if verbose:
        print('Matching Subgraphs and Constructing Coupling...')
    full_coup = coo_matrix((dists1.shape[0], dists2.shape[0]))

    matching_time = 0
    matching_and_expanding_time = 0
    num_local_matches = 0

    for (i_enum, i) in enumerate(node_subset_idx1):
        subgraph_i = list(membership1[:,i_enum].nonzero())[0] #get indices anchored to i
        for (j_enum, j) in enumerate(node_subset_idx2):
            start = time.time()
            w_ij = coup_compressed[i_enum,j_enum]
            if w_ij > 1e-10:
                num_local_matches += 1
                subgraph_j = list(membership2[:,j_enum].nonzero())[0] #get indices anchored to j
                p_i = (p1[subgraph_i]/np.sum(p1[subgraph_i])).reshape(-1)
                p_j = (p2[subgraph_j]/np.sum(p2[subgraph_j])).reshape(-1)
                # Compute submatching based on graph distances
                if beta > 0:
                    coup_sub_dist_ij = ot.emd_1d(dists1[subgraph_i,i_enum].toarray(),
                                              dists2[subgraph_j,j_enum].toarray(),
                                              p_i,p_j, p=2)
                else:
                    coup_sub_dist_ij = np.zeros([len(subgraph_i),len(subgraph_j)])
                # Compute submatching based on node features
                if beta < 1:
                    coup_sub_features_ij = ot.emd_1d(fdists1[subgraph_i,i_enum].toarray(),
                                              fdists2[subgraph_j,j_enum].toarray(),
                                              p_i,p_j,p=2)
                else:
                    coup_sub_features_ij = np.zeros([len(subgraph_i),len(subgraph_j)])
                # Take weighted average
                coup_sub_ij = (1-beta)*coup_sub_features_ij + beta*coup_sub_dist_ij
                matching_time += time.time()-start

                # Expand to correct size
                idx = np.argwhere(coup_sub_ij > 1e-10)
                idx_i = idx.T[0]
                idx_j = idx.T[1]
                row = np.array(subgraph_i)[idx_i]
                col = np.array(subgraph_j)[idx_j]
                data = w_ij*np.array([coup_sub_ij[p[0],p[1]] for p in list(idx)])
                expanded_coup_sub_ij = coo_matrix((data, (row,col)),
                                                  shape=(full_coup.shape[0], full_coup.shape[1]))
                # Update full coupling
                full_coup += expanded_coup_sub_ij
                matching_and_expanding_time += time.time()-start


    if verbose:
        print('Total Time for',num_local_matches,'local matches:')
        print('Local matching:', matching_time)
        print('Local Matching Plus Expansion:', matching_and_expanding_time)

    if return_dense:
        return full_coup.toarray()
    else:
        return full_coup