Esempio n. 1
0
 def better_rec(self, w, model, s=1, weights=1, damp_z=1):
     """Quick switch to allow reconstruction at unknown scale
     returns a,r and scale"""
     from numpy.core.umath_tests import matrix_multiply
     proj = matrix_multiply(self.cam[np.newaxis], model)
     proj[:, :2] = (proj[:, :2] * s + w * weights) / (s + weights)
     proj[:, 2] *= damp_z
     out = matrix_multiply(self.cam.T[np.newaxis], proj)
     return out
Esempio n. 2
0
def _map_params_to_P_zero(params, params_type, initial, params_slice, filler,
                          boo, cholesky_of_P_zero,
                          square_root_filters):
    """Map parameters from params to P_zero."""
    # write params in filler
    filler[:] = 0
    filler[boo] = params[params_slice]

    # transform the filler
    if params_type == 'short' or cholesky_of_P_zero is True:
        if square_root_filters is False:
            # make chol_t to not chol
            filler = matrix_multiply(
                np.transpose(filler, axes=(0, 2, 1)), filler)
    else:
        # make not_chol to not_chol (as covariance matrices are symmetric,
        # only half of its off-diagonal elements have to be estimated. here the
        # lower triangle is filled with he transpose of the upper triangle.)
        for i in range(len(filler)):
            filler[i] += (filler[i] - np.diag(np.diagonal(filler[i]))).T

        if square_root_filters is True:
            # make not_chol to chol_t
            filler = np.transpose(cholesky(filler), axes=(0, 2, 1))

    if square_root_filters is False:
        initial[:] = filler
    else:
        initial[:, :, 1:, 1:] = filler
Esempio n. 3
0
    def test_gufunc_new_axis(self):

        @guvectorize([void(float64[:, :], float64[:, :], float64[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore

        X = np.random.randn(10, 3, 3)
        Y = np.random.randn(3, 3)

        gold = ut.matrix_multiply(X, Y)

        res1 = gufunc(X, Y)
        np.testing.assert_allclose(gold, res1)

        res2 = gufunc(X, np.tile(Y, (10, 1, 1)))
        np.testing.assert_allclose(gold, res2)
Esempio n. 4
0
    def test_gufunc_auto_transfer(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        dB = cuda.to_device(B)

        C = gufunc(A, dB).copy_to_host()
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Esempio n. 5
0
    def test_gufunc(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Esempio n. 6
0
    def test_gufunc_stream(self):
        #cuda.driver.flush_pending_free()
        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        ts = time()
        stream = cuda.stream()
        dA = cuda.to_device(A, stream)
        dB = cuda.to_device(B, stream)

        dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
        dC = gufunc(dA, dB, out=dC, stream=stream)
        C = dC.copy_to_host(stream=stream)
        stream.synchronize()

        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        stream_speedups.append(tcpu / tcuda)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 7
0
 def get_gradient_by_agent(self, beta, data, depm):
     nobs, alts, nvars = data.shape
     self.upc_sequence.compute_utilities(data, beta, self.resources)
     p = self.upc_sequence.compute_probabilities(self.resources)
     d = (depm - p)
     ## WAS: g0 = (d[..., newaxis] * data).sum(axis=1)
     g = matrix_multiply(d[:,newaxis,:], data)
     g = squeeze(g)
     return g
Esempio n. 8
0
    def check_matmul_gufunc(self, gufunc):
        matrix_ct = 1001
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 9
0
    def check_matmul_gufunc(self, gufunc):
        matrix_ct = 1001
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        np.testing.assert_allclose(C, Gold, rtol=1e-5, atol=1e-8)
Esempio n. 10
0
    def build_and_rot_model(a, e, s0, r):
        """
        Build model and rotate according to the identified rotation matrix
        """
        from numpy.core.umath_tests import matrix_multiply

        r2 = Prob3dPose.upgrade_r(r.T).transpose((0, 2, 1))
        mod = Prob3dPose.build_model(a, e, s0)
        mod = matrix_multiply(r2, mod)
        return mod
Esempio n. 11
0
def transform_points_with_homography(H, _xys):
    """
    Args:
        H (ndarray[float64_t, ndim=2]):  homography/perspective matrix
        _xys (ndarray[ndim=2]): (N x 2) array
    """
    xyz  = add_homogenous_coordinate(_xys)
    xyz_t = matrix_multiply(H, xyz)
    xy_t  = remove_homogenous_coordinate(xyz_t)
    return xy_t
Esempio n. 12
0
    def test_gufunc_hidim(self):

        gufunc = _get_matmulcore_gufunc(max_blocksize=512)

        matrix_ct = 100  # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(4, 25, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(4, 25, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Esempio n. 13
0
def x_to_y(matFn, date, vecs, reverse=False):
    vecs = np.asarray(vecs)
    assert vecs.ndim == 2 
    assert vecs.shape[1] == 3
        
    et = date2es(date)
    mat = matFn(et)
    if reverse:
        mat = mat.T
    vecsOut = matrix_multiply(mat, vecs[...,np.newaxis]).reshape(vecs.shape)
    return vecsOut
Esempio n. 14
0
    def check_matmul_gufunc(self, gufunc):
        matrix_ct = 1001
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        np.testing.assert_allclose(C, Gold, rtol=1e-5, atol=1e-8)
Esempio n. 15
0
    def check_matmul_gufunc(self, gufunc):
        matrix_ct = 1001
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 16
0
    def compare_matrix_multiply_results(self, tp):
        d1 = np.array(rand(2, 3, 4), dtype=tp)
        d2 = np.array(rand(2, 3, 4), dtype=tp)
        msg = "matrix multiply on type %s" % d1.dtype.name

        def permute_n(n):
            if n == 1:
                return ([0], )
            ret = ()
            base = permute_n(n - 1)
            for perm in base:
                for i in range(n):
                    new = perm + [n - 1]
                    new[n - 1] = new[i]
                    new[i] = n - 1
                    ret += (new, )
            return ret

        def slice_n(n):
            if n == 0:
                return ((), )
            ret = ()
            base = slice_n(n - 1)
            for sl in base:
                ret += (sl + (slice(None), ), )
                ret += (sl + (slice(0, 1), ), )
            return ret

        def broadcastable(s1, s2):
            return s1 == s2 or s1 == 1 or s2 == 1

        permute_3 = permute_n(3)
        slice_3 = slice_n(3) + ((slice(None, None, -1), ) * 3, )

        ref = True
        for p1 in permute_3:
            for p2 in permute_3:
                for s1 in slice_3:
                    for s2 in slice_3:
                        a1 = d1.transpose(p1)[s1]
                        a2 = d2.transpose(p2)[s2]
                        ref = ref and a1.base != None
                        ref = ref and a2.base != None
                        if broadcastable(a1.shape[-1], a2.shape[-2]) and \
                           broadcastable(a1.shape[0], a2.shape[0]):
                            assert_array_almost_equal(
                                umt.matrix_multiply(a1, a2),
                                np.sum(a2[..., np.newaxis].swapaxes(-3, -1) *
                                       a1[..., np.newaxis, :],
                                       axis=-1),
                                err_msg=msg + ' %s %s' %
                                (str(a1.shape), str(a2.shape)))

        assert_equal(ref, True, err_msg="reference check")
Esempio n. 17
0
def x_to_y(matFn, date, vecs, reverse=False):
    vecs = np.asarray(vecs)
    assert vecs.ndim == 2
    assert vecs.shape[1] == 3

    et = date2es(date)
    mat = matFn(et)
    if reverse:
        mat = mat.T
    vecsOut = matrix_multiply(mat, vecs[..., np.newaxis]).reshape(vecs.shape)
    return vecsOut
Esempio n. 18
0
    def test_gufunc_adjust_blocksize(self):
        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        gufunc.max_blocksize = 32
        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Esempio n. 19
0
def calculate_wij(X, mu, pi):
    mu_p = np.reshape(mu, [-1, 1, 3])
    sub = X[:, None, :, :] - mu_p[None, :, :, :]
    temp = np.reshape(sub, [sub.shape[0] * sub.shape[1], 1, -1])
    cov = np.exp(-(1 / 2.) * matrix_multiply(temp, temp.transpose([0, 2, 1])))
    cov = np.reshape(cov, [sub.shape[0], sub.shape[1]])

    numerator = cov * pi
    denumerator = (np.sum(cov * pi, axis=1))
    wij = numerator / denumerator[:, None]
    return wij, cov
Esempio n. 20
0
    def compare_matrix_multiply_results(self, tp):
        d1 = np.array(np.random.rand(2, 3, 4), dtype=tp)
        d2 = np.array(np.random.rand(2, 3, 4), dtype=tp)
        msg = "matrix multiply on type %s" % d1.dtype.name

        def permute_n(n):
            if n == 1:
                return ([0],)
            ret = ()
            base = permute_n(n-1)
            for perm in base:
                for i in range(n):
                    new = perm + [n-1]
                    new[n-1] = new[i]
                    new[i] = n-1
                    ret += (new,)
            return ret

        def slice_n(n):
            if n == 0:
                return ((),)
            ret = ()
            base = slice_n(n-1)
            for sl in base:
                ret += (sl+(slice(None),),)
                ret += (sl+(slice(0, 1),),)
            return ret

        def broadcastable(s1, s2):
            return s1 == s2 or s1 == 1 or s2 == 1

        permute_3 = permute_n(3)
        slice_3 = slice_n(3) + ((slice(None, None, -1),)*3,)

        ref = True
        for p1 in permute_3:
            for p2 in permute_3:
                for s1 in slice_3:
                    for s2 in slice_3:
                        a1 = d1.transpose(p1)[s1]
                        a2 = d2.transpose(p2)[s2]
                        ref = ref and a1.base is not None
                        ref = ref and a2.base is not None
                        if (a1.shape[-1] == a2.shape[-2] and
                                broadcastable(a1.shape[0], a2.shape[0])):
                            assert_array_almost_equal(
                                umt.matrix_multiply(a1, a2),
                                np.sum(a2[..., np.newaxis].swapaxes(-3, -1) *
                                       a1[..., np.newaxis,:], axis=-1),
                                err_msg=msg + ' %s %s' % (str(a1.shape),
                                                          str(a2.shape)))

        assert_equal(ref, True, err_msg="reference check")
 def average(self):
     
     if len(self.shape) == 1:
         
         import numpy.core.umath_tests as ut
         system = ut.matrix_multiply(self.qs[:,:,np.newaxis], self.qs[:,np.newaxis,:]).sum(axis=0)
         w, v = np.linalg.eigh(system)
         qiT_dot_qref = (self.qs[:,:,np.newaxis] * v[np.newaxis,:,:]).sum(axis=1)
         return Quaternions(v[:,np.argmin((1.-qiT_dot_qref**2).sum(axis=0))])            
     
     else:
         
         raise NotImplementedError('Cannot average multi-dimensionsal Quaternions')
Esempio n. 22
0
    def test_gufunc(self):
        gufunc = GUVectorize(matmulcore, '(m,n),(n,p)->(m,p)', target='cpu')
        gufunc.add(argtypes=[float32[:, :], float32[:, :], float32[:, :]])
        gufunc = gufunc.build_ufunc()

        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 23
0
    def test_gufunc_small(self):

        gufunc = _get_matmulcore_gufunc(max_blocksize=512)

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Esempio n. 24
0
 def average(self):
     
     if len(self.shape) == 1:
         
         import numpy.core.umath_tests as ut
         system = ut.matrix_multiply(self.qs[:,:,np.newaxis], self.qs[:,np.newaxis,:]).sum(axis=0)
         w, v = np.linalg.eigh(system)
         qiT_dot_qref = (self.qs[:,:,np.newaxis] * v[np.newaxis,:,:]).sum(axis=1)
         return Quaternions(v[:,np.argmin((1.-qiT_dot_qref**2).sum(axis=0))])            
     
     else:
         
         raise NotImplementedError('Cannot average multi-dimensionsal Quaternions')
Esempio n. 25
0
    def backward(self, downstream_gradient):
        probs, = self.saved_tensors
        n_shape = probs.shape[1]

        jacobian = probs[..., :, np.newaxis] * (np.eye(n_shape) - probs[..., np.newaxis, :])

        # Downstream gradient is 2d, jacobian is 3d, and we need to perform
        # matrix-vector multiplication jacobian[i] * dL[i]. Since the jacobian
        # is symmetric, we can omit the transpose of the jacobian
        product = matrix_multiply(jacobian, downstream_gradient[..., np.newaxis])
        # matrix_multiply returns a 3d tensor, however we need a 2d matrix
        product = product.squeeze()

        return product
Esempio n. 26
0
def gmm(k, xs, tol=1e-6, max_iter=200):
    """Vectorized version of GMM. Faster than above but still rough."""

    n, p = xs.shape

    mus, z = initialization.kmeanspp(k, xs, ret='both')
    pis = np.array([len(np.where(z == i)[0]) / n for i in np.unique(z)])
    sigmas = np.array([np.eye(p)] * k)

    ll_old = 0
    for i in range(max_iter):
        exp_A = []
        exp_B = []
        ll_new = 0

        # E-step, ws are responsabilities
        ws = np.zeros((k, n))
        for j in range(k):
            ws[j, :] = pis[j] * multivariate_normal(mus[j], sigmas[j]).pdf(xs)
        ws /= ws.sum(0)

        # M-step
        pis = ws.sum(axis=1)
        pis /= n

        mus = np.dot(ws, xs)
        mus /= ws.sum(1)[:, None]

        sigmas = np.zeros((k, p, p))
        for j in range(k):
            ys = xs - mus[j, :]
            sigmas[j] = (ws[j,:,None,None]*\
                       matrix_multiply(ys[:,:,None], ys[:,None,:])).sum(axis=0)
        sigmas /= ws.sum(axis=1)[:, None, None]

        # update complete log likelihoood
        ll_new = 0
        for pi, mu, sigma in zip(pis, mus, sigmas):
            ll_new += pi * multivariate_normal(mu, sigma).pdf(xs)
        ll_new = np.log(ll_new).sum()

        # convergence test
        if np.abs(ll_new - ll_old) < tol:
            break
        ll_old = ll_new

    z = ws.T
    labels = np.argmax(z, axis=1)

    return labels
Esempio n. 27
0
    def test_gufunc_new_axis(self):

        gufunc = _get_matmulcore_gufunc(dtype=float64)

        X = np.random.randn(10, 3, 3)
        Y = np.random.randn(3, 3)

        gold = ut.matrix_multiply(X, Y)

        res1 = gufunc(X, Y)
        np.testing.assert_allclose(gold, res1)

        res2 = gufunc(X, np.tile(Y, (10, 1, 1)))
        np.testing.assert_allclose(gold, res2)
Esempio n. 28
0
def gmm(k, xs, tol=1e-6, max_iter=200):
    """Vectorized version of GMM. Faster than above but still rough."""
    
    n, p = xs.shape
    
    mus, z = initialization.kmeanspp(k, xs, ret='both')
    pis = np.array([len(np.where(z==i)[0])/n for i in np.unique(z)])
    sigmas = np.array([np.eye(p)]*k)

    ll_old = 0
    for i in range(max_iter):
        exp_A = []
        exp_B = []
        ll_new = 0

        # E-step, ws are responsabilities
        ws = np.zeros((k, n))
        for j in range(k):
            ws[j, :] = pis[j]*multivariate_normal(mus[j], sigmas[j]).pdf(xs)
        ws /= ws.sum(0)
            
        # M-step
        pis = ws.sum(axis=1)
        pis /= n

        mus = np.dot(ws, xs)
        mus /= ws.sum(1)[:, None]

        sigmas = np.zeros((k, p, p))
        for j in range(k):
            ys = xs - mus[j, :]
            sigmas[j] = (ws[j,:,None,None]*\
                       matrix_multiply(ys[:,:,None], ys[:,None,:])).sum(axis=0)
        sigmas /= ws.sum(axis=1)[:,None,None]

        # update complete log likelihoood
        ll_new = 0
        for pi, mu, sigma in zip(pis, mus, sigmas):
            ll_new += pi*multivariate_normal(mu, sigma).pdf(xs)
        ll_new = np.log(ll_new).sum()

        # convergence test
        if np.abs(ll_new - ll_old) < tol:
            break
        ll_old = ll_new

    z = ws.T
    labels = np.argmax(z, axis=1)

    return labels
Esempio n. 29
0
    def test_gufunc(self):
        gufunc = GUVectorize(matmulcore, '(m,n),(n,p)->(m,p)',
                             target=self.target)
        gufunc.add((float32[:, :], float32[:, :], float32[:, :]))
        gufunc = gufunc.build_ufunc()

        matrix_ct = 1001
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 30
0
 def flux_down(self, fluxDownTop, emission=None):
     '''Compute downwelling radiative flux at interfaces between layers.
     
     Inputs:
         fluxDownTop: flux down at top
         emission: emission from atmospheric levels (N)
             defaults to zero if not given
     Returns:
         vector of downwelling radiative flux between levels (N+1)
         element 0 is the flux down to the surface.'''
     if emission is None:
         emission = np.zeros_like(self.absorptivity)
     E = np.concatenate((emission, np.atleast_1d(fluxDownTop)), axis=-1)
     #  dot product (matrix multiplication) along last axes
     return np.squeeze(matrix_multiply(self.Tdown, E[..., np.newaxis]))
Esempio n. 31
0
    def test_gufunc_auto_transfer(self):

        gufunc = _get_matmulcore_gufunc(max_blocksize=512)

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        dB = cuda.to_device(B)

        C = gufunc(A, dB).copy_to_host()
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
    def test_cpu_guvectorize(self):
        target = 'cpu'

        gufunc = guvectorize([void(float32[:,:], float32[:,:], float32[:,:])],
                             '(m,n),(n,p)->(m,p)',
                             target=target)(matmulcore)

        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 33
0
 def flux_down(self, fluxDownTop, emission=None):
     '''Compute downwelling radiative flux at interfaces between layers.
     
     Inputs:
         fluxDownTop: flux down at top
         emission: emission from atmospheric levels (N)
             defaults to zero if not given
     Returns:
         vector of downwelling radiative flux between levels (N+1)
         element 0 is the flux down to the surface.'''
     if emission is None:
         emission = np.zeros_like(self.absorptivity)
     E = np.concatenate((emission, np.atleast_1d(fluxDownTop)), axis=-1)
     #  dot product (matrix multiplication) along last axes
     return np.squeeze(matrix_multiply(self.Tdown, E[..., np.newaxis]))
Esempio n. 34
0
 def flux_up(self, fluxUpBottom, emission=None):
     '''Compute upwelling radiative flux at interfaces between layers.
     
     Inputs:
         fluxUpBottom: flux up from bottom
         emission: emission from atmospheric levels (N)
             defaults to zero if not given
     Returns:
         vector of downwelling radiative flux between levels (N+1)
         element N is the flux up to space.'''
     if emission is None:
         emission = np.zeros_like(self.absorptivity)
     E = np.concatenate((np.atleast_1d(fluxUpBottom), emission), axis=-1)
     #  dot product (matrix multiplication) along last axes
     return np.squeeze(matrix_multiply(self.Tup, E[..., np.newaxis]))
Esempio n. 35
0
    def test_gufunc(self):
        gufunc = GUVectorize(matmulcore, '(m,n),(n,p)->(m,p)', target='cpu')
        gufunc.add(argtypes=[float32[:, :], float32[:, :], float32[:, :]])
        gufunc = gufunc.build_ufunc()

        matrix_ct = 1001  # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 36
0
 def flux_up(self, fluxUpBottom, emission=None):
     '''Compute upwelling radiative flux at interfaces between layers.
     
     Inputs:
         fluxUpBottom: flux up from bottom
         emission: emission from atmospheric levels (N)
             defaults to zero if not given
     Returns:
         vector of downwelling radiative flux between levels (N+1)
         element N is the flux up to space.'''        
     if emission is None:
         emission = np.zeros_like(self.absorptivity)
     E = np.concatenate((np.atleast_1d(fluxUpBottom),emission), axis=-1)
     #  dot product (matrix multiplication) along last axes
     return np.squeeze(matrix_multiply(self.Tup, E[..., np.newaxis]))
Esempio n. 37
0
def test_gufunc_array_expressions():
    gufunc = GUVectorize(array_expr_gufunc, '(m,n),(n,p)->(m,p)')
    gufunc.add(argtypes=[float_[:, :], float_[:, :], float_[:, :]])
    gufunc = gufunc.build_ufunc()

    matrix_ct = 10
    A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
    B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

    C = gufunc(A, B)
    Gold = ut.matrix_multiply(A, B)

    if (C != Gold).any():
        print(C)
        print(Gold)
        raise ValueError
Esempio n. 38
0
def transform_params_for_P_zero(params_for_P_zero, filler, boo,
                                estimate_cholesky_of_P_zero, direction):

    filler[:] = 0
    if estimate_cholesky_of_P_zero is True:
        return params_for_P_zero
    elif direction == 'short_to_long':
        filler[boo] = params_for_P_zero
        filler = matrix_multiply(np.transpose(filler, axes=(0, 2, 1)), filler)
        return filler[boo]
    else:
        filler[boo] = params_for_P_zero
        for i in range(len(filler)):
            filler[i] += (filler[i] - np.diag(np.diagonal(filler[i]))).T
        filler = np.transpose(cholesky(filler), axes=(0, 2, 1))
        return filler[boo]
Esempio n. 39
0
def test_gufunc_array_expressions():
    gufunc = GUVectorize(array_expr_gufunc, '(m,n),(n,p)->(m,p)')
    gufunc.add(argtypes=[float_[:,:], float_[:,:], float_[:,:]])
    gufunc = gufunc.build_ufunc()

    matrix_ct = 10
    A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
    B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

    C = gufunc(A, B)
    Gold = ut.matrix_multiply(A, B)

    if (C != Gold).any():
        print(C)
        print(Gold)
        raise ValueError
Esempio n. 40
0
    def test_gufunc_hidim(self):
        matrix_ct = 100 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(4, 25, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(4, 25, 4, 5)

        ts = time()
        C = gufunc(A, B)
        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        non_stream_speedups.append(tcpu / tcuda)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 41
0
def transform_params_for_P_zero(params_for_P_zero, filler, boo,
                                estimate_cholesky_of_P_zero, direction):

    filler[:] = 0
    if estimate_cholesky_of_P_zero is True:
        return params_for_P_zero
    elif direction == 'short_to_long':
        filler[boo] = params_for_P_zero
        filler = matrix_multiply(
            np.transpose(filler, axes=(0, 2, 1)), filler)
        return filler[boo]
    else:
        filler[boo] = params_for_P_zero
        for i in range(len(filler)):
            filler[i] += (filler[i] - np.diag(np.diagonal(filler[i]))).T
        filler = np.transpose(cholesky(filler), axes=(0, 2, 1))
        return filler[boo]
Esempio n. 42
0
def kpts_matrix(kpts):
    # We are given the keypoint in invA format
    # invV = perdoch.invA
    #    V = perdoch.A
    #    Z = perdoch.E
    # invert into V
    nKp = len(kpts)
    invV = kpts_to_invV(kpts)
    V = [np.linalg.inv(v) for v in invV]
    assert len(V) == (nKp)
    #V = faster_inverse(invV)
    # transform into conic matrix Z
    # Z = (V.T).dot(V)
    Vt = array(map(np.transpose, V))
    Z = matrix_multiply(Vt, V)
    assert Z.shape == (nKp, 3, 3)
    return invV, V, Z
Esempio n. 43
0
def _test_gufunc(backend, target):
    gufunc = GUVectorize(matmulcore, '(m,n),(n,p)->(m,p)')
    gufunc.add(argtypes=[f4[:,:], f4[:,:], f4[:,:]])
    gufunc = gufunc.build_ufunc()

    matrix_ct = 1001 # an odd number to test thread/block division in CUDA
    A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
    B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

    C = gufunc(A, B)
    Gold = ut.matrix_multiply(A, B)

#     print(A)
#     print(B)
#    print(C)
#    print(Gold)
    assert np.allclose(C, Gold)
Esempio n. 44
0
def _test_gufunc(backend, target):
    gufunc = GUVectorize(matmulcore, '(m,n),(n,p)->(m,p)')
    gufunc.add(argtypes=[f4[:, :], f4[:, :], f4[:, :]])
    gufunc = gufunc.build_ufunc()

    matrix_ct = 1001  # an odd number to test thread/block division in CUDA
    A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
    B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

    C = gufunc(A, B)
    Gold = ut.matrix_multiply(A, B)

    #     print(A)
    #     print(B)
    #    print(C)
    #    print(Gold)
    assert np.allclose(C, Gold)
Esempio n. 45
0
def kpts_matrix(kpts):
    # We are given the keypoint in invA format
    # invV = perdoch.invA
    #    V = perdoch.A
    #    Z = perdoch.E
    # invert into V
    nKp = len(kpts)
    invV = kpts_to_invV(kpts)
    V = [np.linalg.inv(v) for v in invV]
    assert len(V) == (nKp)
    #V = faster_inverse(invV)
    # transform into conic matrix Z
    # Z = (V.T).dot(V)
    Vt = array(map(np.transpose, V))
    Z = matrix_multiply(Vt, V)
    assert Z.shape == (nKp, 3, 3)
    return invV, V, Z
Esempio n. 46
0
def reestimate_a(w, e, s0, rot, camera_r=False, Lambda=False):
    """Reestimate a from rest shape, rotation, and basis coefficients
    as a least squares problem.
    solution minimises 
    ||W_i- P.dot(camera_r).dot(Mat(rot_i)).dot(s+a_i.e)||^2_2 + Lambda**2.dot(a**2)
    for each frame i. Where **2 is the elementwise square"""
    if camera_r is False:
        return reestimate_a_old(w, e, s0, rot)

    basis = e.shape[0]
    frames = w.shape[0]
    points = w.shape[-1]
    # co-ordinate frame is xzy
    # write P as short for the projection  P.dot(camera_r).dot(Mat(rot_i))
    mat_r = upgrade_r(rot.T).transpose(0, 2, 1)

    P = matrix_multiply(camera_r[np.newaxis, :2], mat_r)
    #For each frame project the shape and subtract that from the measurement matrix
    res = w - P.dot(s0)
    #vis.scatter2d(P.dot(s0)[0],w[0])
    res = res.reshape(frames, points * 2)
    if Lambda is not False:
        res = np.hstack((res, np.zeros((frames, basis))))
    #compute the rotated e basis for each frame
    # output is frames,basis, 2, points
    # input is frames, 2,3 + basis,3,points
    re = np.einsum('ilk,jkp', P, e).reshape(frames, basis, 2 * points)
    re2 = np.empty((frames, basis, 2 * points + basis))
    if Lambda is not False:
        re2[:, :, :2 * points] = re
        re2[:, :, 2 * points:] = np.diag(Lambda)
        re = re2
    #Now solve for ||res-a.dot(re)||^2_2
    a = np.empty((frames, basis))
    a.fill(np.NaN)
    residue = np.empty(frames)
    for i in xrange(frames):
        #    if i ==0:
        #print target[i]
        #print re[i]
        a[i], b, _, _ = np.linalg.lstsq(re[i].T, res[i])
        residue[i] = b  #.sum(1)
        #print aa
        #a[i]=aa
    #vis.scatter2d(P.dot(s0+(a[0,:,np.newaxis,np.newaxis]*e).sum(0))[0],w[0])
    return a, residue
Esempio n. 47
0
    def test_gufunc(self, target='cpu'):
        gufunc = GUVectorize(matmulcore,
                             '(m,n),(n,p)->(m,p)',
                             target=self.target)
        gufunc.add((float32[:, :], float32[:, :], float32[:, :]))
        gufunc = gufunc.build_ufunc()

        matrix_ct = 1001
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
    def test_cpu_guvectorize(self):
        target = 'cpu'

        gufunc = guvectorize(
            [void(float32[:, :], float32[:, :], float32[:, :])],
            '(m,n),(n,p)->(m,p)',
            target=target)(matmulcore)

        matrix_ct = 1001  # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 49
0
    def test_gufunc_stream(self):
        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        #cuda.driver.flush_pending_free()
        matrix_ct = 1001  # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        ts = time()
        stream = cuda.stream()
        dA = cuda.to_device(A, stream)
        dB = cuda.to_device(B, stream)

        dC = cuda.device_array(shape=(1001, 2, 5),
                               dtype=A.dtype,
                               stream=stream)
        dC = gufunc(dA, dB, out=dC, stream=stream)
        C = dC.copy_to_host(stream=stream)
        stream.synchronize()

        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        stream_speedups.append(tcpu / tcuda)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 50
0
    def test_gufunc_stream(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        #cuda.driver.flush_pending_free()
        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        ts = time()
        stream = cuda.stream()
        dA = cuda.to_device(A, stream)
        dB = cuda.to_device(B, stream)

        dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
        dC = gufunc(dA, dB, out=dC, stream=stream)
        C = dC.copy_to_host(stream=stream)
        stream.synchronize()

        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        stream_speedups.append(tcpu / tcuda)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 51
0
    def _calculateCameraToPixelDirection(self, el, az):
        el = np.deg2rad(el)
        az = np.deg2rad(-(az-180))
        
        x,y,z = spherical_to_cartesian(1,el,az)

        vecs = np.dstack((x,y,z))
        
        # simple spherical latitude rotation works here because
        # the latitude is the geodetic latitude which is the
        # angle between the normal and the equatorial plane
        matLat = rotation_matrix(np.deg2rad(90 - self._calData.lat), Y)[:3,:3]
        matLon = rotation_matrix(np.deg2rad(-self._calData.lon), Z)[:3,:3]
        mat = np.dot(matLon, matLat) # rotate latitude first, then longitude
        
        vecs = vecs.reshape(el.shape[0]*el.shape[1], 3)
        vecsRot = matrix_multiply(mat, vecs[...,np.newaxis]).reshape(el.shape[0], el.shape[1], 3)
                
        return vecsRot
Esempio n. 52
0
def setup_(weight):
    to_update = np.zeros((20, 4, 4))
    helper_bool = np.zeros((4, 4), dtype=bool)
    helper_bool[np.triu_indices(4)] = True

    for i in range(20):
        to_update[i][helper_bool] = np.random.randn(10) + 100

    pos_def_arr = matrix_multiply(np.transpose(to_update, axes=(0, 2, 1)),
                                  to_update)

    update_with = np.random.uniform(size=(20, 4))

    outer_prod = update_with.reshape(20, 4, 1) * \
        update_with.reshape(20, 1, 4)

    expected_result = np.transpose(
        cholesky(pos_def_arr + weight * outer_prod), axes=(0, 2, 1))

    return to_update, update_with, expected_result
Esempio n. 53
0
    def test_gufunc_small(self):
        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        ts = time()
        C = gufunc(A, B)
        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        non_stream_speedups.append(tcpu / tcuda)

        print(C, Gold)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 54
0
        def boostrap(i):
            np.random.seed(i)
            if smooth_bootstrap:
                b = Scms(self.data + np.random.randn(*self.data.shape) * self.adaptive_bw, self.bw,
                         min_radius=self.min_radius)
                if copy_bw:
                    b.adaptive_bw = self.adaptive_bw
            else:
                bdata, bi = bootstrap_resample(self.data)
                b = Scms(bdata, self.bw, min_radius=self.min_radius)
                if copy_bw:
                    b.adaptive_bw = self.adaptive_bw[bi]

            if method == 'LocInv' or method == 'GradientLogP':
                bh, bp, bg, _ = b._nlocal_inv_cov(self.landmarks)
            else:
                bp, bg, bh = b._kernel_density_estimate(self.landmarks)

            gproj = np.sum(self.landmarks_g * bg, axis=1) / np.linalg.norm(self.landmarks_g, axis=1)
            hproj = np.sum(
                matrix_multiply(bh.transpose((0, 2, 1)), self.landmarks_h_eigvecs) * self.landmarks_h_eigvecs, axis=1)
            return bp, bg, bh, gproj, hproj
Esempio n. 55
0
    def test_gufunc_auto_transfer(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        dB = cuda.to_device(B)

        ts = time()
        C = gufunc(A, dB).copy_to_host()
        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        non_stream_speedups.append(tcpu / tcuda)

        print(C, Gold)

        self.assertTrue(np.allclose(C, Gold))
Esempio n. 56
0
def rotatePole(lats, lons, altitude, angle=90, axis=[1, 0, 0]):
    """
    Rotates the given geodetic lat/lon coordinates around the origin.
    
    :param lats, lons: shape (n,) in radians
    :param altitude: in km
    :param angle: degrees
    :param axis: [1, 0, 0], [0, 1, 0], or [0, 0, 1] for x y z axis
    :rtype: tuple (lats, lons) in radians
    """
    assert lats.ndim == 1 and lons.ndim == 1
    assert len(axis) == 3

    x, y, z = geodetic2Ecef(lats, lons, altitude, wgs84A, wgs84B)
    xyz = np.asarray([x, y, z]).T

    alpha = np.deg2rad(angle)
    rot = rotation_matrix(alpha, axis)[:3, :3]

    xyzRot = matrix_multiply(rot, xyz[..., np.newaxis]).reshape(xyz.shape)
    lats, lons = ecef2Geodetic(xyzRot[:, 0], xyzRot[:, 1], xyzRot[:, 2],
                               wgs84A, wgs84B)
    return lats, lons
Esempio n. 57
0
    def test_gufunc_hidim(self):
        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 100  # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(4, 25, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(4, 25, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Esempio n. 58
0
def EM(x, k, omega, mu, sigma, maxIteration, tolerance=0.01):
    #k = len(omega);
    n,m  = x.shape; #dimension of x (n,m)
    loglike0 = 0;
    for l in range(maxIteration):
        expectA = [];
        expectB = [];
        loglikeN = 0;
        #E-step
        P = np.zeros((k,n));
        for j in range(k):
            #MVNj = np.random.multivariate_normal(mu[j], sigma[j]);
            P[j,:] = omega[j]*multivariate_normal(mu[j], sigma[j]).pdf(x);
        P /= P.sum(0);
        
        #M-steo
        omega = P.sum(axis=1);
        omega /= n;
        mu = np.dot(P,x);
        mu /= P.sum(1)[:,None];
        sigma = np.zeros((k,m,m));
        for j in range(k):
            y = x - mu[j,:];
            sigma[j] = (P[j,:,None,None]*matrix_multiply(y[:,:,None], y[:,None,:])).sum(axis=0);
        sigma /= P.sum(axis=1)[:,None,None];
        
        #Update complete log likelihood 
        loglikeN = 0;
        for omega, mu, sigma in zip(omega,mu,sigma):
            #MVN = np.random.multivariate_normal(mu,sigma);
            loglikeN += omega*multivariate_normal(mu,sigma).pdf(x);
        loglikeN = np.log(loglikeN).sum();
        if np.abs(loglikeN - loglike0) < tolerance:
            break
        loglike0 = loglikeN;

    return loglikeN, omega, mu, sigma;