예제 #1
0
    def _rmatvec(self, x):
        # apply forward fft
        x = da.reshape(x, self.dimsd)
        y = sqrt(1. / self.nt) * da.fft.rfft(x, n=self.nt, axis=0)
        y = y.astype(self.cdtype)
        y = y[:self.nfmax]

        # apply batched matrix mult
        y = y.rechunk((self.G.chunks[0], self.nr, self.nv))
        if self.saveGt:
            if self.conj:
                y = y.conj()
            y = da.matmul(self.GT, y)
            if self.conj:
                y = y.conj()
        else:
            if self.conj:
                y = da.matmul(y.transpose(0, 2, 1), self.G).transpose(0, 2, 1)
            else:
                y = da.matmul(y.transpose(0, 2, 1).conj(),
                              self.G).transpose(0, 2, 1).conj()
        if not self.prescaled:
            y *= self.dr * self.dt * np.sqrt(self.nt)

        # apply inverse fft
        y = da.pad(y, ((0, self.nfft - self.nfmax), (0, 0), (0, 0)),
                   mode='constant')
        y = y.rechunk(self.dimsdf)
        y = sqrt(self.nt) * da.fft.irfft(y, n=self.nt, axis=0)
        if self.twosided:
            y = da.fft.fftshift(y, axes=0)
        y = y.astype(self.dtype)
        y = da.real(y)
        return y.ravel()
예제 #2
0
 def _rmatvec(self, x):
     x = np.squeeze(x.reshape(self.nsl, self.nx, self.nz))
     if self.chunks[0] is not None:
         x = x.rechunk(self.chunks[0])
     if self.nz == 1:
         x = x[..., np.newaxis]
     if hasattr(self, 'GT'):
         y = da.matmul(self.GT, x)
     else:
         if self.transposeG:
             y = da.matmul(self.G.transpose((0, 2, 1)).conj(), x)
         else:
             y = da.matmul(x.transpose(0, 2, 1).conj(),
                           self.G).transpose(0, 2, 1).conj()
     return y.ravel()
예제 #3
0
def power_dask(data, x_init):
    A = da.matmul(data, da.transpose(data))
    A.compute()
    T = 150
    y = x_init
    for t in range(T):
        v = np.matmul(A, y)
        y = v / np.linalg.norm(v)
예제 #4
0
 def _matvec(self, x):
     x = da.squeeze(x.reshape(self.nsl, self.ny, self.nz))
     if self.chunks[0] is not None:
         x = x.rechunk(self.chunks[0])
     if self.nz == 1:
         x = x[..., np.newaxis]
     y = da.matmul(self.G, x)
     return y.ravel()
예제 #5
0
def test_optimize_blockwise_duplicate_dependency(optimize_graph):
    # Two blockwise operations in a row with duplicate name
    # (See: https://github.com/dask/dask/issues/8535)
    xx = da.from_array(np.array([[1, 1], [2, 2]]), chunks=1)
    xx = xx * 2
    z = da.matmul(xx, xx)

    # Compare to known answer
    result = z.compute(optimize_graph=optimize_graph)
    assert assert_eq(result, [[12, 12], [24, 24]])
예제 #6
0
def test_matmul(x_shape, y_shape):
    np.random.seed(3732)

    x = np.random.random(x_shape)[()]
    y = np.random.random(y_shape)[()]

    a = da.from_array(x, chunks=tuple((i // 2) for i in x.shape))
    b = da.from_array(y, chunks=tuple((i // 2) for i in y.shape))

    expected = None
    try:
        expected = np.matmul(x, y)
    except ValueError:
        pass

    for d1, d2 in itertools.product([a, x], [b, y]):
        if x.ndim == 0 or y.ndim == 0:
            with pytest.raises(ValueError):
                da.matmul(d1, d2)
        else:
            assert_eq(expected, da.matmul(d1, d2))
예제 #7
0
def test_matmul(x_shape, y_shape):
    np.random.seed(3732)

    x = np.random.random(x_shape)[()]
    y = np.random.random(y_shape)[()]

    a = da.from_array(x, chunks=tuple((i // 2) for i in x.shape))
    b = da.from_array(y, chunks=tuple((i // 2) for i in y.shape))

    expected = None
    try:
        expected = np.matmul(x, y)
    except ValueError:
        pass

    for d1, d2 in itertools.product([a, x], [b, y]):
        if x.ndim == 0 or y.ndim == 0:
            with pytest.raises(ValueError):
                da.matmul(d1, d2)
        else:
            assert_eq(expected, da.matmul(d1, d2))
예제 #8
0
def test_matmul():
    x = np.random.random((5, 5))
    y = np.random.random((5, 2))

    a = da.from_array(x, chunks=(1, 5))
    b = da.from_array(y, chunks=(5, 1))

    assert_eq(np.matmul(x, y), da.matmul(a, b))
    assert_eq(np.matmul(a, y), da.matmul(x, b))
    assert_eq(np.matmul(x, b), da.matmul(a, y))

    list_vec = list(range(1, 6))
    assert_eq(np.matmul(x, list_vec), da.matmul(a, list_vec))
    assert_eq(np.matmul(list_vec, y), da.matmul(list_vec, b))

    z = np.random.random((5, 5, 5))
    c = da.from_array(z, chunks=(1, 5, 1))
    with pytest.raises(NotImplementedError):
        da.matmul(a, z)

    assert_eq(np.matmul(z, x), da.matmul(c, a))
예제 #9
0
cph.fit(pheno[[T_name, event_name] + covname], T_name, event_col=event_name)
# res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'deviance').sort_index()['deviance']
res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'martingale').sort_index()['martingale']

# This is the most memory intensive part. Might need to change if we are dealing with biobank scale data

if args.apr_flag == 'N':
  logger.info('Calculating Null covariance matrix\n')
  mat = cph.predict_cumulative_hazard(pheno)
  P = np.diff(mat,axis=-0)
  for isubj in range(P.shape[1]):
    idx = np.abs(mat.index - pheno[T_name][isubj]).argmin()
    P[idx::,isubj] = 0
  V = da.diag(np.array(pheno[event_name] - res_surv)) - da.dot(P.transpose(),P)
  X = np.array(pheno[covname])
  C = V - da.matmul(da.matmul(da.matmul(V,X), da.linalg.inv(da.matmul(da.matmul(X.transpose(), V),X))), da.matmul(X.transpose(), V))
else:
  logger.info('Using first order approximations for testing statistics\n')

# auto chunk to reduce the query time
chunk_array = [bim.i.values[i:i + chunk_size] for i in xrange(0, len(bim.i.values), chunk_size)]  
nchunk = len(chunk_array)
chunk_ind = 1

#################################### Create HDF temporary file #########
# The idea is to use I/O to prevent memory overload
# Initialize the temporary files
tmp_f = outpath + '.tmp.hdf5'
try:
    os.remove(tmp_f)
except OSError:
예제 #10
0
h5read = tables.open_file('knockoff-data.h5', mode='r')
h5regression = tables.open_file('regression-data.h5', mode='r')
X = da.from_array(h5read.root.X)
pdim = X.shape[1]
Xtilde = da.from_array(h5read.root.Xtilde)
Y = da.from_array(h5regression.root.Y)
keepcols_svd = list(h5read.root.keepcols)
xcolnames_pdim = []
for k in xinfo['xcolnames']:
    if xinfo['xcolnames'][k] in keepcols_svd:
        xcolnames_pdim.append(k)

with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler() as cprof:
    Xaug = da.hstack([X, Xtilde])
    betahat_aug = da.linalg.solve(da.matmul(Xaug.T, Xaug),
                                  da.matmul(Xaug.T, Y)).compute()
    Wstat = [
        abs(betahat_aug[i]) - abs(betahat_aug[i + pdim]) for i in range(pdim)
    ]
    threshold = knockoff_threshold(Wstat, FDR, offset=1)
    sel = [Wstat[j] >= threshold for j in range(pdim)]
    Xdrop = X[:, sel]
    betahat_final = da.linalg.solve(da.matmul(Xdrop.T, Xdrop),
                                    da.matmul(Xdrop.T, Y)).compute()
    colnames_final = [i for i, j in zip(xcolnames_pdim, sel) if j]
    colnames_dropped = [i for i, j in zip(xcolnames_pdim, sel) if not j]
    print("desired FDR: ")
    print(FDR)
    print("\nKnockoff drops these columns:\n")
    print(colnames_dropped)
예제 #11
0
import dask.array as da
from dask.dot import dot_graph

image_1 = da.zeros((5, 5), chunks=(5, 5))
image_2 = da.ones((5, 5), chunks=(5, 5))
dot_graph(image_1.dask)

# In[15]:

image_4 = (image_1 - 10) + (image_2 * 50)
dot_graph(image_4.dask)

# In[16]:

image_5 = da.matmul(image_1, image_4)
dot_graph(image_5.dask)

# ## Image Processing
# the initial examples were shown on very simple image problems. Here we can see how it looks for real imaging issues.

# In[17]:

import dask.array as da
from dask.dot import dot_graph
import numpy as np
from skimage.io import imread
import matplotlib.pyplot as plt
from skimage.util import montage as montage2d

# for showing results
예제 #12
0
def coclustering(Z,
                 nclusters_row,
                 nclusters_col,
                 errobj,
                 niters,
                 epsilon,
                 col_clusters_init=None,
                 row_clusters_init=None,
                 run_on_worker=False):
    """
    Run the co-clustering, Dask implementation

    :param Z: m x n data matrix
    :param nclusters_row: num row clusters
    :param nclusters_col: number of column clusters
    :param errobj: convergence threshold for the objective function
    :param niters: maximum number of iterations
    :param epsilon: numerical parameter, avoids zero arguments in log
    :param row_clusters_init: initial row cluster assignment
    :param col_clusters_init: initial column cluster assignment
    :param run_on_worker: whether the function is submitted to a Dask worker
    :return: has converged, number of iterations performed. final row and
    column clustering, error value
    """
    client = get_client()

    Z = da.array(Z) if not isinstance(Z, da.Array) else Z

    [m, n] = Z.shape
    row_chunks, col_chunks = Z.chunksize

    row_clusters = da.array(row_clusters_init) \
        if row_clusters_init is not None \
        else _initialize_clusters(m, nclusters_row, chunks=row_chunks)
    col_clusters = da.array(col_clusters_init) \
        if col_clusters_init is not None \
        else _initialize_clusters(n, nclusters_col, chunks=col_chunks)
    R = _setup_cluster_matrix(nclusters_row, row_clusters)
    C = _setup_cluster_matrix(nclusters_col, col_clusters)

    e, old_e = 2 * errobj, 0
    s = 0
    converged = False

    Gavg = Z.mean()

    while (not converged) & (s < niters):
        logger.debug(f'Iteration # {s} ..')
        # Calculate cluster based averages
        # nel_clusters is a matrix with the number of elements per co-cluster
        # originally computed as:  da.dot(da.dot(R.T, da.ones((m, n))), C)
        nel_row_clusters = da.bincount(row_clusters, minlength=nclusters_row)
        nel_col_clusters = da.bincount(col_clusters, minlength=nclusters_col)
        logger.debug('num of populated clusters: row {}, col {}'.format(
            da.sum(nel_row_clusters > 0).compute(),
            da.sum(nel_col_clusters > 0).compute()))
        nel_clusters = da.outer(nel_row_clusters, nel_col_clusters)
        CoCavg = (da.matmul(da.matmul(R.T, Z), C) + Gavg * epsilon) / \
                 (nel_clusters + epsilon)

        # Calculate distance based on row approximation
        d_row = _distance(Z, da.matmul(C, CoCavg.T), epsilon)
        # Assign to best row cluster
        row_clusters = da.argmin(d_row, axis=1)
        R = _setup_cluster_matrix(nclusters_row, row_clusters)

        # Calculate distance based on column approximation
        d_col = _distance(Z.T, da.matmul(R, CoCavg), epsilon)
        # Assign to best column cluster
        col_clusters = da.argmin(d_col, axis=1)
        C = _setup_cluster_matrix(nclusters_col, col_clusters)

        # Error value (actually just the column components really)
        old_e = e
        minvals = da.min(d_col, axis=1)
        # power 1 divergence, power 2 euclidean
        e = da.sum(da.power(minvals, 1))
        row_clusters, R, col_clusters, C, e = client.persist(
            [row_clusters, R, col_clusters, C, e])
        if run_on_worker:
            # this is workaround for e.compute() for a function that runs
            # on a worker with multiple threads
            # https://github.com/dask/distributed/issues/3827
            e = client.compute(e)
            secede()
            e = e.result()
            rejoin()
        else:
            e = e.compute()
        logger.debug(f'Error = {e:+.15e}, dE = {e - old_e:+.15e}')
        converged = abs(e - old_e) < errobj
        s = s + 1
    if converged:
        logger.debug(f'Coclustering converged in {s} iterations')
    else:
        logger.debug(f'Coclustering not converged in {s} iterations')
    return converged, s, row_clusters, col_clusters, e
예제 #13
0
def _distance(Z, Y, epsilon):
    """ Distance function """
    Y = Y + epsilon
    # The first term below is equal to one row of: da.dot(da.ones(m, n), Y)
    # with Z.shape = (m, n) and Y.shape = (n, k)
    return Y.sum(axis=0, keepdims=True) - da.matmul(Z, da.log(Y))
예제 #14
0
import operator

import time
import timeit

import datetime
import sys

K = 1 << 10
mx = 64 * K
cx = 4 * K

name = 'matrixmul_64k_x_4k'

client = Client('10.255.23.115:8786', name=name)

x = da.random.random((mx, mx), chunks=(cx, cx))
y = da.random.random((mx, mx), chunks=(cx, cx))
z = da.matmul(x, y)

# Start the computation.

start = datetime.datetime.now()
results = z.compute(scheduler='distributed')
end = datetime.datetime.now()

print(f'Matrix multiplication is done in {end - start}')

z.visualize(filename=f'{name}.png')
예제 #15
0
    X = np.random.random((batch_size, 3**2 * input_channels, input_size**2))
    kernels_mean = np.random.random((total_kernels, 3**2 * input_channels))
    cov_list = [
        random_cov(3**2 * input_channels) for number in range(total_kernels)
    ]
    kernels_cov = np.stack(cov_list)

    X = da.from_array(X)
    kernels_mean = da.from_array(kernels_mean)
    kernels_cov = da.from_array(kernels_cov)

    batch_out = []
    for i in range(batch_size):
        kernel_out = []
        for j in range(total_kernels):
            mean = da.matmul(kernels_mean[j, :], X[i, :, :])
            cov = da.matmul(da.transpose(X[i, :, :]),
                            da.matmul(kernels_cov[j, :, :], X[i, :, :]))
            z = mvn_random_DASK(mean, cov, total_samples, input_size**2)
            g = relu(z)
            mean_g = da.mean(g, axis=1)
            kernel_out.append(mean_g)
        kernels_out = da.stack(kernel_out, axis=0)
        batch_out.append(kernels_out)
    batches_out = da.stack(batch_out, axis=0)
    print('task graph complete')
    mean_g.visualize(rankdir="LR",
                     filename="task_graph_mean_g.pdf",
                     cmap='viridis')
    kernels_out.visualize(rankdir="LR", filename="task_graph_conv_out.pdf")
예제 #16
0
    # validate =True
    validate = False
    if validate:
        numpy_validation_list = va.single_mean_single_covariance_validator(
            X.compute(), kernels_mean.compute(), kernels_cov.compute(),
            batch_size, total_kernels, input_size)

    times = []  # list for storing execution times
    cluster = 'localhost:8001'  # address of compute cluster
    with Client(cluster) as client:  # Using cluster as client do
        for n in range(itrs):  # itrs runs
            start = time.time()  # save start tikme
            batch_out = []  # create list for batch output
            for i in range(batch_size):  # for each image
                kernel_out = []  # create list for kernel outputs
                mean = da.matmul(kernels_mean,
                                 X[i, :, :])  # compute all kernel means
                for j in range(total_kernels):  # for each kernel
                    cov = da.matmul(
                        da.transpose(X[i, :, :]),  # compute covariance
                        da.matmul(kernels_cov[j, :, :], X[i, :, :]))
                    z = mvn_random_DASK(
                        mean[j, :], cov, total_samples,
                        input_size**2)  # sample from transformed distribution
                    g = relu(z)  # pass samples through relu
                    mean_g = da.mean(
                        g, axis=1)  # compute ensemble mean from samples
                    kernel_out.append(
                        mean_g)  # add ensemble mean to kernel outputs list
                kernels_out = da.stack(kernel_out,
                                       axis=0)  # stack all kernel outputs
                batch_out.append(
예제 #17
0
import dask.array as da
from dask.dot import dot_graph

image_1 = da.zeros((1024, 1024), chunks=(512, 512))
image_2 = da.ones((1024, 1024), chunks=(512, 512))
dot_graph(image_1.dask)

# In[36]:

image_4 = (image_1 - 10) + (image_2 * 50)
dot_graph(image_4.dask)

# In[37]:

image_5 = da.matmul(image_1, image_2)
dot_graph(image_5.dask)

# In[38]:

image_6 = (da.matmul(image_1, image_2) + image_1) * image_2
dot_graph(image_6.dask)

# In[39]:

import dask_ndfilters as da_ndfilt

image_7 = da_ndfilt.convolve(image_6, image_1)
dot_graph(image_7.dask)

# # Deep Learning
예제 #18
0
import pandas as pd

import dask.array as da
import sys
import os

#fluidity_fp  = '/mnt/c/Users/julia/fluidity/fluidity-master/python'
DA_project_fp = '/mnt/c/Users/julia/Documents/Imperial/DA_project'
#sys.path.append(fluidity_fp)
sys.path.append(DA_project_fp)

import vtktools
ug1 = vtktools.vtu(DA_project_fp + '/data/LSBU_c_0.vtu')
ug2 = vtktools.vtu(DA_project_fp + '/data/LSBU_0.vtu')

print(ug1.GetFieldNames())
print(ug2.GetFieldNames())

#read the values of the tracers and copy in a vector named p
p = ug2.GetVectorField('Velocity')

#create a dask array
p = da.from_array(p, chunks=[5000, 63])
n = len(p)
print(n)

#create background matrix
Background = da.matmul(p, p.T)
Background.compute()
print(Background[0, 0])
예제 #19
0
def cov_mult(conv_matrix, cov_matrix):
    conv_matrix = da.transpose(conv_matrix)
    return da.matmul(da.matmul(conv_matrix, cov_matrix),
                     da.transpose(conv_matrix))
예제 #20
0
    for i in range(itr1):
        x = 1000 * i + 1000
        y = 1000 * i + 1000
        z = 1000 * i + 1000

        A = np.random.random((x, y))
        B = np.random.random((y, z))

        Adask = da.from_array(A, chunks='auto')
        Bdask = da.from_array(B, chunks='auto')

        start = time.time()
        results = []
        for j in range(itr2):
            for k in range(z):
                results.append(da.matmul(Adask, Bdask[:, k]))
            results_stacked = da.stack(results)
            result = results_stacked.compute()
        execution_time_dask_dot.append((time.time() - start) / itr2)

        start = time.time()
        for j in range(itr2):
            C = da.matmul(Adask, Bdask)
            result2 = C.compute()
        execution_time_dask_matmul.append((time.time() - start) / itr2)

    #%%
    data_set = {
        'Operation': [
            "matrix-vector",
            "matrix-vector",
     
     itr = 1
     
     A = np.random.random((x,y))
     B = np.random.random((y,z))
     
     start = time.time()
     for i in range(itr):
         C = np.matmul(A,B)    
     execution_time_np_matmul = (time.time() - start)/itr
     
     Adask = da.from_array(A, chunks = 'auto')
     Bdask = da.from_array(B, chunks = 'auto')
     start = time.time()
     for i in range(itr): 
         C = da.matmul(Adask,Bdask) 
         result = C.compute()
     execution_time_dask_matmul = (time.time() - start)/itr
     
 %% Cholesky test
 with threadpool_limits(limits=1, user_api='blas'):
     x = 10000
     
     itr = 1
     
     A = np.random.random((x,x))
     A = np.matmul(A,A.transpose())
     start = time.time()
     for i in range(itr):
         B = np.linalg.cholesky(A)    
     execution_time_np_cholesky = (time.time() - start)/itr
def convolution_mean_DASK(X, mu_W, batch_size, input_kernels):
    mu_z = np.empty((batch_size, input_kernels, X.shape[2]))
    for i in range(batch_size):
        for j in range(input_kernels):
            mu_z[i, j, :] = da.matmul(mu_W[j, :], X[i, :, :])
    return mu_z