def test_emd1d_device_tf(): nx = ot.backend.TensorflowBackend() rng = np.random.RandomState(0) n = 10 x = np.linspace(0, 5, n) rho_u = np.abs(rng.randn(n)) rho_u /= rho_u.sum() rho_v = np.abs(rng.randn(n)) rho_v /= rho_v.sum() # Check that everything stays on the CPU with tf.device("/CPU:0"): xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v) emd = ot.emd_1d(xb, xb, rho_ub, rho_vb) emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb) nx.assert_same_dtype_device(xb, emd) nx.assert_same_dtype_device(xb, emd2) if len(tf.config.list_physical_devices('GPU')) > 0: # Check that everything happens on the GPU xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v) emd = ot.emd_1d(xb, xb, rho_ub, rho_vb) emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb) nx.assert_same_dtype_device(xb, emd) nx.assert_same_dtype_device(xb, emd2) assert nx.dtype_device(emd)[1].startswith("GPU")
def get_paths(self, n=5000, n_steps=3): # Only 3 steps are supported at this time. assert n_steps == 3 np.random.seed(42) self.r1, self.r2, self.r3 = (0.5, 0.1, 0.1) labels = np.repeat([0, 2], n) data = np.abs(np.random.randn(n * 2) * 0.5 / np.pi) data[labels == 2] = 1 - data[labels == 2] # print(data) # McCann interpolant / barycenter interpolation import ot gamma = ot.emd_1d(data[labels == 0], data[labels == 2]) ninterp = 5000 i05 = interpolate_with_ot( data[labels == 0][:, np.newaxis], data[labels == 2][:, np.newaxis], gamma, 0.5, ninterp, ) # data = data.reshape(-1, 2) data = np.stack([data[labels == 0], i05.flatten(), data[labels == 2]], axis=-1) theta = data * np.pi # transform to along the circle r = (1 + np.random.randn(n) * self.r2)[:, None, None] x2d = np.stack([np.cos(theta), np.sin(theta)], axis=-1) * r return x2d
def test_emd_1d_emd2_1d_with_weights(): # test emd1d gives similar results as emd n = 20 m = 30 rng = np.random.RandomState(0) u = rng.randn(n, 1) v = rng.randn(m, 1) w_u = rng.uniform(0., 1., n) w_u = w_u / w_u.sum() w_v = rng.uniform(0., 1., m) w_v = w_v / w_v.sum() M = ot.dist(u, v, metric='sqeuclidean') G, log = ot.emd(w_u, w_v, M, log=True) wass = log["cost"] G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True) wass1d = log["cost"] wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False) wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False) # check loss is similar np.testing.assert_allclose(wass, wass1d) np.testing.assert_allclose(wass, wass1d_emd2) # check loss is similar to scipy's implementation for Euclidean metric wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )), w_u, w_v) np.testing.assert_allclose(wass_sp, wass1d_euc) # check constraints np.testing.assert_allclose(w_u, G.sum(1)) np.testing.assert_allclose(w_v, G.sum(0))
def __init__(self): np.random.seed(42) n = 5000 self.r1, self.r2, self.r3 = (0.5, 0.1, 0.1) self.labels = np.repeat([0, 2], n) data = np.abs(np.random.randn(n * 2) * 0.5 / np.pi) data[self.labels == 2] = 1 - data[self.labels == 2] # print(data) # McCann interpolant / barycenter interpolation import ot gamma = ot.emd_1d(data[self.labels == 0], data[self.labels == 2]) ninterp = 5000 i05 = interpolate_with_ot( data[self.labels == 0][:, np.newaxis], data[self.labels == 2][:, np.newaxis], gamma, 0.5, ninterp, ) data = np.concatenate([data, i05.flatten()]) self.labels = np.concatenate([self.labels, np.ones(n)]) theta = data * np.pi # transform to along the circle r = (1 + np.random.randn(*theta.shape) * self.r2)[:, None] r = np.repeat(r, 2, axis=1) x2d = np.array([np.cos(theta), np.sin(theta)]).T * r ########################## # ONLY CHANGE FROM ABOVE # mask = np.random.rand(x2d.shape[0]) > 1.0 ########################## mask *= x2d[:, 0] < 0 x2d[mask] = [[0, 2]] + [[1, -1]] * x2d[mask] # x2d[self.labels == 1] -= [0.7, 0.0] # x2d[x2d[:, 1] < 0] *= [1, -1] self.data = x2d self.ncells = self.data.shape[0] next2d = np.array([np.cos(theta + 0.3), np.sin(theta + 0.3)]).T * r next2d[mask] = [[0, 2]] + [[1, -1]] * next2d[mask] # next2d += np.random.randn(*next2d.shape) * self.r3 self.velocity = next2d - x2d # Mask out timepoint zero mask = self.labels != 0 self.labels = self.labels[mask] self.labels -= 1 self.data = self.data[mask] self.velocity = self.velocity[mask] self.ncells = self.labels.shape[0]
def find_submatching_locally_linear(Dist1,Dist2,coup1,coup2,i,j): subgraph_i = find_support(coup1[:,i]) p_i = coup1[:,i][subgraph_i]/np.sum(coup1[:,i][subgraph_i]) subgraph_j = find_support(coup2[:,j]) p_j = coup2[:,j][subgraph_j]/np.sum(coup2[:,j][subgraph_j]) x_i = list(Dist1[i,:][subgraph_i].reshape(len(subgraph_i),)) x_j = list(Dist2[j,:][subgraph_j].reshape(len(subgraph_j),)) coup_sub_ij = ot.emd_1d(x_i,x_j,p_i,p_j,p=2) return coup_sub_ij
def test_emd_1d_emd2_1d(): # test emd1d gives similar results as emd n = 20 m = 30 rng = np.random.RandomState(0) u = rng.randn(n, 1) v = rng.randn(m, 1) M = ot.dist(u, v, metric='sqeuclidean') G, log = ot.emd([], [], M, log=True) wass = log["cost"] G_1d, log = ot.emd_1d(u, v, [], [], metric='sqeuclidean', log=True) wass1d = log["cost"] wass1d_emd2 = ot.emd2_1d(u, v, [], [], metric='sqeuclidean', log=False) wass1d_euc = ot.emd2_1d(u, v, [], [], metric='euclidean', log=False) # check loss is similar np.testing.assert_allclose(wass, wass1d) np.testing.assert_allclose(wass, wass1d_emd2) # check loss is similar to scipy's implementation for Euclidean metric wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, ))) np.testing.assert_allclose(wass_sp, wass1d_euc) # check constraints np.testing.assert_allclose(np.ones((n, )) / n, G.sum(1)) np.testing.assert_allclose(np.ones((m, )) / m, G.sum(0)) # check G is similar np.testing.assert_allclose(G, G_1d) # check AssertionError is raised if called on non 1d arrays u = np.random.randn(n, 2) v = np.random.randn(m, 2) with pytest.raises(AssertionError): ot.emd_1d(u, v, [], [])
def test_emd1d_type_devices(nx): rng = np.random.RandomState(0) n = 10 x = np.linspace(0, 5, n) rho_u = np.abs(rng.randn(n)) rho_u /= rho_u.sum() rho_v = np.abs(rng.randn(n)) rho_v /= rho_v.sum() for tp in nx.__type_list__: print(nx.dtype_device(tp)) xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v, type_as=tp) emd = ot.emd_1d(xb, xb, rho_ub, rho_vb) emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb) nx.assert_same_dtype_device(xb, emd) nx.assert_same_dtype_device(xb, emd2)
def find_submatching_locally_linear_ts(dists1,dists2,coup1,coup2,i_enum,j_enum): """ Compute locally linear matching assuming tall, skinny distance matrices Parameters: dists1, dists2 : tall skinny ndarrays coup1, coup2 : tall skinny csr matrices i_enum, j_enum : anchor node indices in tall skinny representation """ subgraph_i = coup1[:,i_enum].nonzero()[0] subgraph_j = coup2[:,j_enum].nonzero()[0] p_i = coup1[subgraph_i,i_enum].toarray() p_i /= np.sum(p_i) p_j = coup2[subgraph_j,j_enum].toarray() p_j /= np.sum(p_j) x_i = dists1[subgraph_i,i_enum] x_j = dists2[subgraph_j,j_enum] coup_sub_ij = ot.emd_1d(x_i,x_j,p_i.reshape(-1),p_j.reshape(-1),p=2) return coup_sub_ij
def get_paths(self, n=5000, n_steps=3): # Only 3 steps are supported at this time. assert n_steps == 3 np.random.seed(42) self.r1, self.r2, self.r3 = (0.5, 0.1, 0.1) labels = np.repeat([0, 2], n) data = np.abs(np.random.randn(n * 2) * 0.5 / np.pi) data[labels == 2] = 1 - data[labels == 2] # print(data) # McCann interpolant / barycenter interpolation import ot gamma = ot.emd_1d(data[labels == 0], data[labels == 2]) ninterp = 5000 i05 = interpolate_with_ot( data[labels == 0][:, np.newaxis], data[labels == 2][:, np.newaxis], gamma, 0.5, ninterp, ) # data = data.reshape(-1, 2) data = np.stack([data[labels == 0], i05.flatten(), data[labels == 2]], axis=-1) theta = data * np.pi # transform to along the circle r = (1 + np.random.randn(n) * self.r2)[:, None, None] x2d = np.stack([np.cos(theta), np.sin(theta)], axis=-1) * r # mask = (r > 1.0) # TODO these reference paths could be improved to include better routing # along the manifold. Right now they are calculated using 1d and are just lifted into # 2d along the same radius. Trouble comes when the branch for the tree gets # Flipped over y=1, this gives opposite of expected radiuses. # Furthermore, 2d Transport is no longer the same as 1d when we have gaussian # Noise along the manifold. # # Right now they are good enough for our purposes, and making them better will only # improve how TrajectoryNet looks. """ import optimal_transport.emd as emd _, log = emd.earth_mover_distance(x2d[:,0], x2d[:,1], return_matrix=True) print(np.where(log['G'] > 1e-8)) path = np.stack([x2d[:,0], x2d[np.where(log['G'] > 1e-8)[1],1]]) path = np.swapaxes(path, 0,1) import matplotlib.pyplot as plt #plt.hist(log['G'].flatten()) fig, axes = plt.subplots(1,2,figsize=(20,10)) for p in path[:1000]: axes[0].plot(p[:,0], p[:,1]) for p in x2d[:1000,:2]: axes[1].plot(p[:,0], p[:,1]) plt.show() exit() """ mask = np.random.rand(*x2d.shape[:2]) > 0.5 mask *= x2d[:, :, 0] < 0 x2d[mask] = [[0, 2]] + [[1, -1]] * x2d[mask] x2d = x2d.reshape(n, n_steps, 2) return x2d
pl.plot(x, b, 'r', label='Target distribution') pl.legend() #%% plot distributions and loss matrix pl.figure(2, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') ############################################################################## # Solve EMD # --------- #%% EMD # use fast 1D solver G0 = ot.emd_1d(x, x, a, b) # Equivalent to # G0 = ot.emd(a, b, M) pl.figure(3, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0') ############################################################################## # Solve Sinkhorn # -------------- #%% Sinkhorn lambd = 1e-3 Gs = ot.sinkhorn(a, b, M, lambd, verbose=True)
def compressed_fgw(dists1,dists2,fdists1,fdists2, membership1,membership2, features1,features2,p1,p2, node_subset_idx1,node_subset_idx2, dists_subset1,dists_subset2, p_subset1,p_subset2, alpha=0.5,beta=0.5,verbose = False, return_dense = True): """ Compressed FGW on partitioned data structures ----------- Parameters: dists1,dists2,fdists1,fdists2,membership1,membership2 : |nodes| x |node_subset| csr matrices features1,features2 : |nodes| x |features| ndarrays p1,p2 : |nodes| x 1 ndarrays node_subset_idx1, node_subset_idx2 : |node_subset| lists dists_subset1, dists_subset2 : |node_subset| x |node_subset| ndarrays p_subset1, p_subset2 : |node_subset| x 1 ndarrays ----------- Returns: full_coup : |nodes| x |nodes| csr matrix """ M_compressed = pairwise_distances(features1[node_subset_idx1,:],features2[node_subset_idx2,:]) # Match compressed graphs start = time.time() if verbose: print('Matching Compressed Graphs...') coup_compressed = ot.gromov.fused_gromov_wasserstein(M_compressed, dists_subset1, dists_subset2, p_subset1, p_subset2, alpha = alpha) if verbose: print('Time for Matching Compressed:', time.time() - start) # Find submatchings and create full coupling if verbose: print('Matching Subgraphs and Constructing Coupling...') full_coup = coo_matrix((dists1.shape[0], dists2.shape[0])) matching_time = 0 matching_and_expanding_time = 0 num_local_matches = 0 for (i_enum, i) in enumerate(node_subset_idx1): subgraph_i = list(membership1[:,i_enum].nonzero())[0] #get indices anchored to i for (j_enum, j) in enumerate(node_subset_idx2): start = time.time() w_ij = coup_compressed[i_enum,j_enum] if w_ij > 1e-10: num_local_matches += 1 subgraph_j = list(membership2[:,j_enum].nonzero())[0] #get indices anchored to j p_i = (p1[subgraph_i]/np.sum(p1[subgraph_i])).reshape(-1) p_j = (p2[subgraph_j]/np.sum(p2[subgraph_j])).reshape(-1) # Compute submatching based on graph distances if beta > 0: coup_sub_dist_ij = ot.emd_1d(dists1[subgraph_i,i_enum].toarray(), dists2[subgraph_j,j_enum].toarray(), p_i,p_j, p=2) else: coup_sub_dist_ij = np.zeros([len(subgraph_i),len(subgraph_j)]) # Compute submatching based on node features if beta < 1: coup_sub_features_ij = ot.emd_1d(fdists1[subgraph_i,i_enum].toarray(), fdists2[subgraph_j,j_enum].toarray(), p_i,p_j,p=2) else: coup_sub_features_ij = np.zeros([len(subgraph_i),len(subgraph_j)]) # Take weighted average coup_sub_ij = (1-beta)*coup_sub_features_ij + beta*coup_sub_dist_ij matching_time += time.time()-start # Expand to correct size idx = np.argwhere(coup_sub_ij > 1e-10) idx_i = idx.T[0] idx_j = idx.T[1] row = np.array(subgraph_i)[idx_i] col = np.array(subgraph_j)[idx_j] data = w_ij*np.array([coup_sub_ij[p[0],p[1]] for p in list(idx)]) expanded_coup_sub_ij = coo_matrix((data, (row,col)), shape=(full_coup.shape[0], full_coup.shape[1])) # Update full coupling full_coup += expanded_coup_sub_ij matching_and_expanding_time += time.time()-start if verbose: print('Total Time for',num_local_matches,'local matches:') print('Local matching:', matching_time) print('Local Matching Plus Expansion:', matching_and_expanding_time) if return_dense: return full_coup.toarray() else: return full_coup