Beispiel #1
0
 def run_test_matmul_ab_dtype_shape(self,
                                    shape,
                                    k,
                                    dtype,
                                    axes_a=None,
                                    axes_b=None,
                                    transpose=False):
     # TODO: Allow testing separate transpose_a, transpose_b
     ashape = shape[:-2] + (shape[-2], k)
     bshape = shape[:-2] + (k, shape[-1])
     a = ((np.random.random(size=ashape)) * 127).astype(dtype)
     b = ((np.random.random(size=bshape)) * 127).astype(dtype)
     if axes_a is None:
         axes_a = range(len(ashape))
     if axes_b is None:
         axes_b = range(len(bshape))
     aa = a.transpose(axes_a)
     bb = b.transpose(axes_b)
     if transpose:
         aa, bb = H(bb), H(aa)
     c_gold = np.matmul(aa, bb)
     a = bf.asarray(a, space='cuda')
     b = bf.asarray(b, space='cuda')
     aa = a.transpose(axes_a)
     bb = b.transpose(axes_b)
     if transpose:
         aa, bb = H(bb), H(aa)
     c = bf.zeros_like(c_gold, space='cuda')
     self.linalg.matmul(1, aa, bb, 0, c)
     c = c.copy('system')
     np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
Beispiel #2
0
 def run_test_matmul_aa_dtype_shape(self, shape, dtype):
     a = ((np.random.random(size=shape)) * 127).astype(dtype)
     c_gold = np.matmul(a, np.swapaxes(a, -1, -2).conj())
     triu = np.triu_indices(shape[-2], 1)
     c_gold[..., triu[0], triu[1]] = 0
     a = bf.asarray(a, space='cuda')
     c = bf.zeros_like(c_gold, space='cuda')
     self.linalg.matmul(1, a, None, 0, c)
     c = c.copy('system')
     np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
Beispiel #3
0
 def test_setitem(self):
     g = bf.zeros_like(self.known_vals, space='cuda')
     g[...] = self.known_vals
     np.testing.assert_equal(g.copy('system'), self.known_vals)
     g[:1,1:] = [[999]]
     np.testing.assert_equal(g.copy('system'), np.array([[0,999],[2,3],[4,5]]))
     g[0,0] = 888
     np.testing.assert_equal(g.copy('system'), np.array([[888,999],[2,3],[4,5]]))
     g[0] = [99,88]
     np.testing.assert_equal(g.copy('system'), np.array([[99,88],[2,3],[4,5]]))
     g[:,1] = [77,66,55]
     np.testing.assert_equal(g.copy('system'), np.array([[99,77],[2,66],[4,55]]))
Beispiel #4
0
 def run_test_matmul_aa_ci8_shape(self, shape):
     shape_complex = shape[:-1] + (shape[-1] * 2, )
     a8 = (np.random.random(size=shape_complex) * 255).astype(np.int8)
     a_gold = a8.astype(np.float32).view(np.complex64)
     a = a8.view(bf.DataType.ci8)
     # Note: np.matmul seems to be slow and inaccurate when there are batch dims
     c_gold = np.matmul(a_gold, np.swapaxes(a_gold, -1, -2).conj())
     triu = np.triu_indices(shape[-2], 1)
     c_gold[..., triu[0], triu[1]] = 0
     a = bf.asarray(a, space='cuda')
     c = bf.zeros_like(c_gold, space='cuda')
     self.linalg.matmul(1, a, None, 0, c)
     c = c.copy('system')
     np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
Beispiel #5
0
 def run_test_matmul_ab_ci8_shape(self, shape, k, transpose=False):
     ashape_complex = shape[:-2] + (shape[-2], k * 2)
     bshape_complex = shape[:-2] + (k, shape[-1] * 2)
     a8 = (np.random.random(size=ashape_complex) * 255).astype(np.int8)
     b8 = (np.random.random(size=bshape_complex) * 255).astype(np.int8)
     a_gold = a8.astype(np.float32).view(np.complex64)
     b_gold = b8.astype(np.float32).view(np.complex64)
     if transpose:
         a_gold, b_gold = H(b_gold), H(a_gold)
     c_gold = np.matmul(a_gold, b_gold)
     a = a8.view(bf.DataType.ci8)
     b = b8.view(bf.DataType.ci8)
     a = bf.asarray(a, space='cuda')
     b = bf.asarray(b, space='cuda')
     if transpose:
         a, b = H(b), H(a)
     c = bf.zeros_like(c_gold, space='cuda')
     self.linalg.matmul(1, a, b, 0, c)
     c = c.copy('system')
     np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
Beispiel #6
0
    def run_test_matmul_ab_beamformer_kernel(self, ntime, nbeam, nstand,
                                             nchan):
        x_shape = (ntime, nchan, nstand * 2)
        w_shape = (nbeam, nchan, nstand * 2)
        x8 = ((np.random.random(size=x_shape + (2, )) * 2 - 1) * 127).astype(
            np.int8)
        x = x8.astype(np.float32).view(np.complex64).reshape(x_shape)
        w = ((np.random.random(size=w_shape + (2, )) * 2 - 1) * 127).astype(
            np.int8).astype(np.float32).view(np.complex64).reshape(w_shape)
        b_gold = np.matmul(w.transpose(1, 0, 2), x.transpose(1, 2, 0))

        x = x8.view(bf.DataType.ci8).reshape(x_shape)
        x = bf.asarray(x, space='cuda')
        w = bf.asarray(w, space='cuda')
        b = bf.zeros_like(b_gold, space='cuda')
        self.linalg.matmul(1, w.transpose(1, 0, 2), x.transpose(1, 2, 0), 0, b)
        b_ = b.copy('system')

        np.testing.assert_allclose(b_, b_gold, RTOL, ATOL)
        '''
Beispiel #7
0
 def run_test_matmul_aa_ci8_shape(self, shape, transpose=False):
     # **TODO: This currently never triggers the transpose path in the backend
     shape_complex = shape[:-1] + (shape[-1] * 2, )
     # Note: The xGPU-like correlation kernel does not support input values of -128 (only [-127:127])
     a8 = ((np.random.random(size=shape_complex) * 2 - 1) * 127).astype(
         np.int8)
     a_gold = a8.astype(np.float32).view(np.complex64)
     if transpose:
         a_gold = H(a_gold)
     # Note: np.matmul seems to be slow and inaccurate when there are batch dims
     c_gold = np.matmul(a_gold, H(a_gold))
     triu = np.triu_indices(shape[-2] if not transpose else shape[-1], 1)
     c_gold[..., triu[0], triu[1]] = 0
     a = a8.view(bf.DataType.ci8)
     a = bf.asarray(a, space='cuda')
     if transpose:
         a = H(a)
     c = bf.zeros_like(c_gold, space='cuda')
     self.linalg.matmul(1, a, None, 0, c)
     c = c.copy('system')
     np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
Beispiel #8
0
 def run_test_matmul_aa_dtype_shape(self,
                                    shape,
                                    dtype,
                                    axes=None,
                                    conj=False):
     a = ((np.random.random(size=shape)) * 127).astype(dtype)
     if axes is None:
         axes = range(len(shape))
     aa = a.transpose(axes)
     if conj:
         aa = aa.conj()
     c_gold = np.matmul(aa, H(aa))
     triu = np.triu_indices(shape[axes[-2]], 1)
     c_gold[..., triu[0], triu[1]] = 0
     a = bf.asarray(a, space='cuda')
     aa = a.transpose(axes)
     if conj:
         aa = aa.conj()
     c = bf.zeros_like(c_gold, space='cuda')
     self.linalg.matmul(1, aa, None, 0, c)
     c = c.copy('system')
     np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
Beispiel #9
0
 def run_test_matmul_aa_correlator_kernel(self,
                                          ntime,
                                          nstand,
                                          nchan,
                                          misalign=0):
     x_shape = (ntime, nchan, nstand * 2)
     perm = [1, 0, 2]
     x8 = ((np.random.random(size=x_shape + (2, )) * 2 - 1) * 127).astype(
         np.int8)
     x = x8.astype(np.float32).view(np.complex64).reshape(x_shape)
     x = x.transpose(perm)
     x = x[..., misalign:]
     b_gold = np.matmul(H(x), x)
     triu = np.triu_indices(x.shape[-1], 1)
     b_gold[..., triu[0], triu[1]] = 0
     x = x8.view(bf.DataType.ci8).reshape(x_shape)
     x = bf.asarray(x, space='cuda')
     x = x.transpose(perm)
     x = x[..., misalign:]
     b = bf.zeros_like(b_gold, space='cuda')
     self.linalg.matmul(1, None, x, 0, b)
     b = b.copy('system')
     np.testing.assert_allclose(b, b_gold, RTOL * 10, ATOL)
Beispiel #10
0
 def run_benchmark_matmul_aa_correlator_kernel(self, ntime, nstand, nchan):
     x_shape = (ntime, nchan, nstand * 2)
     perm = [1, 0, 2]
     x8 = ((np.random.random(size=x_shape + (2, )) * 2 - 1) * 127).astype(
         np.int8)
     x = x8.astype(np.float32).view(np.complex64).reshape(x_shape)
     x = x.transpose(perm)
     b_gold = np.matmul(H(x[:, [0], :]), x[:, [0], :])
     triu = np.triu_indices(x_shape[-1], 1)
     b_gold[..., triu[0], triu[1]] = 0
     x = x8.view(bf.DataType.ci8).reshape(x_shape)
     x = bf.asarray(x, space='cuda')
     x = x.transpose(perm)
     b = bf.zeros_like(b_gold, space='cuda')
     bf.device.stream_synchronize()
     t0 = time.time()
     nrep = 200
     for _ in xrange(nrep):
         self.linalg.matmul(1, None, x, 0, b)
     bf.device.stream_synchronize()
     dt = time.time() - t0
     nflop = nrep * nchan * ntime * nstand * (nstand + 1) / 2 * 2 * 2 * 8
     print nstand, '\t', nflop / dt / 1e9, 'GFLOP/s'
     print '\t\t', nrep * ntime * nchan / dt / 1e6, 'MHz'
Beispiel #11
0
 def test_zeros_like(self):
     g = bf.ndarray(self.known_vals, dtype='f32', space='cuda')
     g = bf.zeros_like(g)
     g = g.copy('system')
     known = np.zeros_like(self.known_array)
     np.testing.assert_equal(g, known)