def test_Roll2D(par): """Dot-test and comparison with PyLops for Roll operator on 2d signal """ np.random.seed(10) x = {} x['0'] = da.outer(np.arange(par['ny']), da.ones(par['nx'])) + \ par['imag'] * np.outer(da.arange(par['ny']), da.ones(par['nx'])) x['1'] = da.outer(da.ones(par['ny']), da.arange(par['nx'])) + \ par['imag'] * np.outer(da.ones(par['ny']), da.arange(par['nx'])) for dir in [0, 1]: dRop = dRoll(par['ny'] * par['nx'], dims=(par['ny'], par['nx']), dir=dir, shift=-2, dtype=par['dtype']) Rop = Roll(par['ny'] * par['nx'], dims=(par['ny'], par['nx']), dir=dir, shift=-2, dtype=par['dtype']) assert dottest(dRop, par['ny'] * par['nx'], par['ny'] * par['nx'], chunks=(par['ny'] * par['nx'], par['ny'] * par['nx'])) dy = dRop * x[str(dir)].ravel() y = Rop * x[str(dir)].compute().ravel() assert_array_equal(dy, y)\
def test_inner(shape1, shape2): np.random.random(1337) x = 2 * np.random.random(shape1) - 1 y = 2 * np.random.random(shape2) - 1 a = da.from_array(x, chunks=3) b = da.from_array(y, chunks=3) assert_eq(np.outer(x, y), da.outer(a, b)) assert_eq(np.outer(y, x), da.outer(b, a))
def test_inner(shape1, shape2): np.random.random(1337) x = 2 * np.random.random(shape1) - 1 y = 2 * np.random.random(shape2) - 1 a = da.from_array(x, chunks=3) b = da.from_array(y, chunks=3) assert_eq(np.outer(x, y), da.outer(a, b)) assert_eq(np.outer(y, x), da.outer(b, a))
def mvn_random_DASK(mean, cov, N, dim): da.random.seed(10) epsilon = 0.0001 A = da.linalg.cholesky(cov + epsilon * da.eye(dim), lower=True) z = da.random.standard_normal(size=(N, dim)) x = da.outer(da.ones((N, )), mean).transpose() + da.dot(A, z.transpose()) return x
def test_blockwise_cull(flat): if flat: # Simple "flat" mapping between input and # outut indices x = da.from_array(np.arange(40).reshape((4, 10)), (2, 4)) + 100 else: # Complex mapping between input and output # indices (outer product and transpose) x = da.from_array(np.arange(10).reshape((10, )), (4, )) y = da.from_array(np.arange(10).reshape((10, )), (4, )) x = da.outer(x, y).transpose() # Check that blockwise culling results in correct # output keys and that full graph is not materialized dsk = x.__dask_graph__() select = (1, 1) # Select a single chunk keys = {(x._name, *select)} dsk_cull = dsk.cull(keys) for name, layer in dsk_cull.layers.items(): if not isinstance(layer, dask.blockwise.Blockwise): # The original layer shouldn't be Blockwise if the new one isn't assert not isinstance(dsk.layers[name], dask.blockwise.Blockwise) continue assert isinstance(dsk.layers[name], dask.blockwise.Blockwise) assert not layer.is_materialized() out_keys = layer.get_output_keys() assert out_keys == {(layer.output, *select)} assert not layer.is_materialized()
def _center_x(self, x, dx, transpose: bool = False) -> da.core.Array: """ Centers the product of matrix multiplication instead of center the matrix Let A be a matrix of shape (n by p) with non zero column means, U of shape (p,). Matrix B could be constructed as follows with zero column mean. B = A - 1'U where 1 is a 1 vector. And 1'U is an outer product of shape (n by p) However, this is inefficient if only the matrix product of B, with a matrix x is needed. Instead `_center_x` implements: Ax - Ux ^ ^- dx being passed in, | x being passed in with efficient broadcasting. Parameters ---------- x : array_like Usually the product of Ax that needs to be center dx : array_like Usually the original x before being multiplied by A transpose : bool Flag whether to indicate if A'x or Ax. Adjusts dimensions Returns ------- x_centered: array_like """ if transpose: # Computes mu1'x_k_h return x - da.squeeze( da.outer(self._array_moment.center_vector, dx.sum(axis=0))) else: return x - self._array_moment.center_vector.dot(dx)
def coclustering(Z, nclusters_row, nclusters_col, errobj, niters, epsilon, col_clusters_init=None, row_clusters_init=None, run_on_worker=False): """ Run the co-clustering, Dask implementation :param Z: m x n data matrix :param nclusters_row: num row clusters :param nclusters_col: number of column clusters :param errobj: convergence threshold for the objective function :param niters: maximum number of iterations :param epsilon: numerical parameter, avoids zero arguments in log :param row_clusters_init: initial row cluster assignment :param col_clusters_init: initial column cluster assignment :param run_on_worker: whether the function is submitted to a Dask worker :return: has converged, number of iterations performed. final row and column clustering, error value """ client = get_client() Z = da.array(Z) if not isinstance(Z, da.Array) else Z [m, n] = Z.shape row_chunks, col_chunks = Z.chunksize row_clusters = da.array(row_clusters_init) \ if row_clusters_init is not None \ else _initialize_clusters(m, nclusters_row, chunks=row_chunks) col_clusters = da.array(col_clusters_init) \ if col_clusters_init is not None \ else _initialize_clusters(n, nclusters_col, chunks=col_chunks) R = _setup_cluster_matrix(nclusters_row, row_clusters) C = _setup_cluster_matrix(nclusters_col, col_clusters) e, old_e = 2 * errobj, 0 s = 0 converged = False Gavg = Z.mean() while (not converged) & (s < niters): logger.debug(f'Iteration # {s} ..') # Calculate cluster based averages # nel_clusters is a matrix with the number of elements per co-cluster # originally computed as: da.dot(da.dot(R.T, da.ones((m, n))), C) nel_row_clusters = da.bincount(row_clusters, minlength=nclusters_row) nel_col_clusters = da.bincount(col_clusters, minlength=nclusters_col) logger.debug('num of populated clusters: row {}, col {}'.format( da.sum(nel_row_clusters > 0).compute(), da.sum(nel_col_clusters > 0).compute())) nel_clusters = da.outer(nel_row_clusters, nel_col_clusters) CoCavg = (da.matmul(da.matmul(R.T, Z), C) + Gavg * epsilon) / \ (nel_clusters + epsilon) # Calculate distance based on row approximation d_row = _distance(Z, da.matmul(C, CoCavg.T), epsilon) # Assign to best row cluster row_clusters = da.argmin(d_row, axis=1) R = _setup_cluster_matrix(nclusters_row, row_clusters) # Calculate distance based on column approximation d_col = _distance(Z.T, da.matmul(R, CoCavg), epsilon) # Assign to best column cluster col_clusters = da.argmin(d_col, axis=1) C = _setup_cluster_matrix(nclusters_col, col_clusters) # Error value (actually just the column components really) old_e = e minvals = da.min(d_col, axis=1) # power 1 divergence, power 2 euclidean e = da.sum(da.power(minvals, 1)) row_clusters, R, col_clusters, C, e = client.persist( [row_clusters, R, col_clusters, C, e]) if run_on_worker: # this is workaround for e.compute() for a function that runs # on a worker with multiple threads # https://github.com/dask/distributed/issues/3827 e = client.compute(e) secede() e = e.result() rejoin() else: e = e.compute() logger.debug(f'Error = {e:+.15e}, dE = {e - old_e:+.15e}') converged = abs(e - old_e) < errobj s = s + 1 if converged: logger.debug(f'Coclustering converged in {s} iterations') else: logger.debug(f'Coclustering not converged in {s} iterations') return converged, s, row_clusters, col_clusters, e