Example #1
0
def test_Roll2D(par):
    """Dot-test and comparison with PyLops for Roll operator on 2d signal
    """
    np.random.seed(10)
    x = {}
    x['0'] = da.outer(np.arange(par['ny']), da.ones(par['nx'])) + \
             par['imag'] * np.outer(da.arange(par['ny']),
                                    da.ones(par['nx']))
    x['1'] = da.outer(da.ones(par['ny']), da.arange(par['nx'])) + \
             par['imag'] * np.outer(da.ones(par['ny']),
                                    da.arange(par['nx']))

    for dir in [0, 1]:
        dRop = dRoll(par['ny'] * par['nx'],
                     dims=(par['ny'], par['nx']),
                     dir=dir,
                     shift=-2,
                     dtype=par['dtype'])
        Rop = Roll(par['ny'] * par['nx'],
                   dims=(par['ny'], par['nx']),
                   dir=dir,
                   shift=-2,
                   dtype=par['dtype'])
        assert dottest(dRop,
                       par['ny'] * par['nx'],
                       par['ny'] * par['nx'],
                       chunks=(par['ny'] * par['nx'], par['ny'] * par['nx']))
        dy = dRop * x[str(dir)].ravel()
        y = Rop * x[str(dir)].compute().ravel()
        assert_array_equal(dy, y)\
Example #2
0
def test_inner(shape1, shape2):
    np.random.random(1337)

    x = 2 * np.random.random(shape1) - 1
    y = 2 * np.random.random(shape2) - 1

    a = da.from_array(x, chunks=3)
    b = da.from_array(y, chunks=3)

    assert_eq(np.outer(x, y), da.outer(a, b))
    assert_eq(np.outer(y, x), da.outer(b, a))
Example #3
0
def test_inner(shape1, shape2):
    np.random.random(1337)

    x = 2 * np.random.random(shape1) - 1
    y = 2 * np.random.random(shape2) - 1

    a = da.from_array(x, chunks=3)
    b = da.from_array(y, chunks=3)

    assert_eq(np.outer(x, y), da.outer(a, b))
    assert_eq(np.outer(y, x), da.outer(b, a))
Example #4
0
def mvn_random_DASK(mean, cov, N, dim):
    da.random.seed(10)
    epsilon = 0.0001
    A = da.linalg.cholesky(cov + epsilon * da.eye(dim), lower=True)
    z = da.random.standard_normal(size=(N, dim))
    x = da.outer(da.ones((N, )), mean).transpose() + da.dot(A, z.transpose())
    return x
Example #5
0
def test_blockwise_cull(flat):
    if flat:
        # Simple "flat" mapping between input and
        # outut indices
        x = da.from_array(np.arange(40).reshape((4, 10)), (2, 4)) + 100
    else:
        # Complex mapping between input and output
        # indices (outer product and transpose)
        x = da.from_array(np.arange(10).reshape((10, )), (4, ))
        y = da.from_array(np.arange(10).reshape((10, )), (4, ))
        x = da.outer(x, y).transpose()

    # Check that blockwise culling results in correct
    # output keys and that full graph is not materialized
    dsk = x.__dask_graph__()
    select = (1, 1)  # Select a single chunk
    keys = {(x._name, *select)}
    dsk_cull = dsk.cull(keys)
    for name, layer in dsk_cull.layers.items():
        if not isinstance(layer, dask.blockwise.Blockwise):
            # The original layer shouldn't be Blockwise if the new one isn't
            assert not isinstance(dsk.layers[name], dask.blockwise.Blockwise)
            continue
        assert isinstance(dsk.layers[name], dask.blockwise.Blockwise)
        assert not layer.is_materialized()
        out_keys = layer.get_output_keys()
        assert out_keys == {(layer.output, *select)}
        assert not layer.is_materialized()
Example #6
0
    def _center_x(self, x, dx, transpose: bool = False) -> da.core.Array:
        """ Centers the product of matrix multiplication instead of center the matrix

        Let A be a matrix of shape (n by p) with non zero column means, U of shape (p,).

        Matrix B could be constructed as follows with zero column mean.
            B = A - 1'U where 1 is a 1 vector. And 1'U is an outer product of shape (n by p)
        However, this is inefficient if only the matrix product of B, with a matrix x is needed.
        Instead `_center_x` implements:

            Ax - Ux
             ^    ^- dx being passed in,
             |
             x being passed in
        with efficient broadcasting.


        Parameters
        ----------
        x : array_like
            Usually the product of Ax that needs to be center
        dx : array_like
            Usually the original x before being multiplied by A
        transpose : bool
            Flag whether to indicate if A'x or Ax. Adjusts dimensions

        Returns
        -------
        x_centered: array_like
        """
        if transpose:
            # Computes mu1'x_k_h
            return x - da.squeeze(
                da.outer(self._array_moment.center_vector, dx.sum(axis=0)))
        else:
            return x - self._array_moment.center_vector.dot(dx)
Example #7
0
def coclustering(Z,
                 nclusters_row,
                 nclusters_col,
                 errobj,
                 niters,
                 epsilon,
                 col_clusters_init=None,
                 row_clusters_init=None,
                 run_on_worker=False):
    """
    Run the co-clustering, Dask implementation

    :param Z: m x n data matrix
    :param nclusters_row: num row clusters
    :param nclusters_col: number of column clusters
    :param errobj: convergence threshold for the objective function
    :param niters: maximum number of iterations
    :param epsilon: numerical parameter, avoids zero arguments in log
    :param row_clusters_init: initial row cluster assignment
    :param col_clusters_init: initial column cluster assignment
    :param run_on_worker: whether the function is submitted to a Dask worker
    :return: has converged, number of iterations performed. final row and
    column clustering, error value
    """
    client = get_client()

    Z = da.array(Z) if not isinstance(Z, da.Array) else Z

    [m, n] = Z.shape
    row_chunks, col_chunks = Z.chunksize

    row_clusters = da.array(row_clusters_init) \
        if row_clusters_init is not None \
        else _initialize_clusters(m, nclusters_row, chunks=row_chunks)
    col_clusters = da.array(col_clusters_init) \
        if col_clusters_init is not None \
        else _initialize_clusters(n, nclusters_col, chunks=col_chunks)
    R = _setup_cluster_matrix(nclusters_row, row_clusters)
    C = _setup_cluster_matrix(nclusters_col, col_clusters)

    e, old_e = 2 * errobj, 0
    s = 0
    converged = False

    Gavg = Z.mean()

    while (not converged) & (s < niters):
        logger.debug(f'Iteration # {s} ..')
        # Calculate cluster based averages
        # nel_clusters is a matrix with the number of elements per co-cluster
        # originally computed as:  da.dot(da.dot(R.T, da.ones((m, n))), C)
        nel_row_clusters = da.bincount(row_clusters, minlength=nclusters_row)
        nel_col_clusters = da.bincount(col_clusters, minlength=nclusters_col)
        logger.debug('num of populated clusters: row {}, col {}'.format(
            da.sum(nel_row_clusters > 0).compute(),
            da.sum(nel_col_clusters > 0).compute()))
        nel_clusters = da.outer(nel_row_clusters, nel_col_clusters)
        CoCavg = (da.matmul(da.matmul(R.T, Z), C) + Gavg * epsilon) / \
                 (nel_clusters + epsilon)

        # Calculate distance based on row approximation
        d_row = _distance(Z, da.matmul(C, CoCavg.T), epsilon)
        # Assign to best row cluster
        row_clusters = da.argmin(d_row, axis=1)
        R = _setup_cluster_matrix(nclusters_row, row_clusters)

        # Calculate distance based on column approximation
        d_col = _distance(Z.T, da.matmul(R, CoCavg), epsilon)
        # Assign to best column cluster
        col_clusters = da.argmin(d_col, axis=1)
        C = _setup_cluster_matrix(nclusters_col, col_clusters)

        # Error value (actually just the column components really)
        old_e = e
        minvals = da.min(d_col, axis=1)
        # power 1 divergence, power 2 euclidean
        e = da.sum(da.power(minvals, 1))
        row_clusters, R, col_clusters, C, e = client.persist(
            [row_clusters, R, col_clusters, C, e])
        if run_on_worker:
            # this is workaround for e.compute() for a function that runs
            # on a worker with multiple threads
            # https://github.com/dask/distributed/issues/3827
            e = client.compute(e)
            secede()
            e = e.result()
            rejoin()
        else:
            e = e.compute()
        logger.debug(f'Error = {e:+.15e}, dE = {e - old_e:+.15e}')
        converged = abs(e - old_e) < errobj
        s = s + 1
    if converged:
        logger.debug(f'Coclustering converged in {s} iterations')
    else:
        logger.debug(f'Coclustering not converged in {s} iterations')
    return converged, s, row_clusters, col_clusters, e