def test_elemwise_bool(): a = gpuarray.empty((2,), context=context) exc = None try: bool(a) except ValueError as e: exc = e assert exc is not None a = gpuarray.zeros((1,), context=context) assert not bool(a) a = gpuarray.zeros((), context=context) assert not bool(a)
def perform(self, node, inputs, outs): out, = outs v = inputs[0] sh = tuple(map(int, inputs[1:])) if out[0] is None or out[0].shape != sh: if self.memset_0: out[0] = gpuarray.zeros(sh, dtype=v.dtype) else: out[0] = gpuarray.empty(sh, dtype=v.dtype) out[0][...] = v else: out[0][...] = v if config.gpuarray.sync: out[0].sync()
def perform(self, node, inputs, outs): out, = outs v = inputs[0] sh = tuple(map(int, inputs[1:])) if out[0] is None or out[0].shape != sh: if v.size == 1 and numpy.asarray(v)[0].item() == 0: out[0] = gpuarray.zeros(sh, dtype=v.dtype) else: out[0] = gpuarray.empty(sh, dtype=v.dtype) out[0][...] = v else: out[0][...] = v if config.gpuarray.sync: out[0].sync()
def perform(self, node, inputs, outputs): (x,) = inputs (z,) = outputs dim = x.shape[0] + abs(self.offset) z[0] = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context) if self.offset <= 0: # diag in the lower triangle diag_z = z[0][-self.offset, :(dim + self.offset)] else: # diag in the upper triangle diag_z = z[0][:(dim - self.offset), self.offset] diag_z.strides = (sum(z[0].strides),) diag_z[:] = x[:]
def test_shape(): x = GpuArrayType(dtype='float32', broadcastable=[False, False, False])() v = gpuarray.zeros((3, 4, 5), dtype='float32', context=get_context(test_ctx_name)) f = theano.function([x], x.shape) topo = f.maker.fgraph.toposort() assert np.all(f(v) == (3, 4, 5)) if theano.config.mode != 'FAST_COMPILE': assert len(topo) == 4 assert isinstance(topo[0].op, T.opt.Shape_i) assert isinstance(topo[1].op, T.opt.Shape_i) assert isinstance(topo[2].op, T.opt.Shape_i) assert isinstance(topo[3].op, T.opt.MakeVector) mode = mode_with_gpu.excluding("local_shape_to_shape_i") f = theano.function([x], x.shape, mode=mode) topo = f.maker.fgraph.toposort() assert np.all(f(v) == (3, 4, 5)) assert len(topo) == 1 assert isinstance(topo[0].op, T.Shape)
def perform(self, node, inputs, outputs): (x,) = inputs (z,) = outputs axis1 = np.minimum(self.axis1, self.axis2) axis2 = np.maximum(self.axis1, self.axis2) offset = self.offset # Initialise a buffer the same size as the output result_shape = x.shape[:-1] + (x.shape[-1] + abs(offset),) * 2 result_buffer_shape = (np.prod(x.shape[:-1]).astype(np.int64),) + ( x.shape[-1] + abs(offset), ) * 2 result_buffer = gpuarray.zeros( result_buffer_shape, dtype=x.dtype, context=x.context ) # Slice out a view of the diagonals if offset < 0: # diag in the lower triangle diag_view = result_buffer[:, abs(offset) :, 0] else: # diag in the upper triangle diag_view = result_buffer[:, : x.shape[-1], abs(offset)] diag_view.strides = ( diag_view.strides[0], diag_view.strides[1] + x.dtype.itemsize, ) # Fill view with flattened array of diagonals diag_view[:] = x.reshape(diag_view.shape)[:] # Unflatten buffer into output size result = result_buffer.reshape(result_shape) if len(x.shape) > 1: # Re-order axes so they correspond to diagonals at axis1, axis2 axes = list(range(len(x.shape[:-1]))) last_idx = axes[-1] axes = axes[:axis1] + [last_idx + 1] + axes[axis1:] axes = axes[:axis2] + [last_idx + 2] + axes[axis2:] result = result.transpose(axes) z[0] = result
def thunk(): x, boxes, grad = inputs[0], inputs[1], inputs[2] context = None if hasattr(x[0], 'context'): context = x[0].context z = outputs[0] if z[0] is None or z[0].shape != x[0].shape: z[0] = pygpu.zeros(x[0].shape, dtype=theano.config.floatX, context=context) else: z[0][:] = 0 x_ptr, _ = get_tens_ptr(x[0]) boxes_ptr, _ = get_tens_ptr(boxes[0]) grad_ptr, _ = get_tens_ptr(grad[0]) z_ptr, z_tens = get_tens_ptr(z[0]) grid = (x[0].shape[0], x[0].shape[1], 1) block = (1, 1, 1) pycuda_func(z_ptr, x_ptr, boxes_ptr, grad_ptr, block=block, grid=grid)
def test_zero_noparam(): try: gpu_ndarray.zeros() assert False except TypeError: pass
def zeros(shp, order, dtype): x = gpu_ndarray.zeros(shp, dtype, order, context=ctx) y = numpy.zeros(shp, dtype, order) check_all(x, y)
def thunk(): context = inputs[0][0].context # Size of the matrices to invert. z = outputs[0] # Matrix. A = inputs[0][0] # Solution vectors. b = inputs[1][0] assert(len(A.shape) == 2) assert(len(b.shape) == 2) if self.trans in ['T', 'C']: trans = 1 l, n = A.shape k, m = b.shape elif self.trans == 'N': trans = 0 n, l = A.shape k, m = b.shape else: raise ValueError('Invalid value for trans') if l != n: raise ValueError('A must be a square matrix') if n != k: raise ValueError('A and b must be aligned.') lda = max(1, n) ldb = max(1, k, m) # We copy A and b as cusolver operates inplace b = gpuarray.array(b, copy=True, order='F') if not self.inplace: A = gpuarray.array(A, copy=True) A_ptr = A.gpudata b_ptr = b.gpudata # cusolver expects a F ordered matrix, but A is not explicitly # converted between C and F order, instead we switch the # "transpose" flag. if A.flags['C_CONTIGUOUS']: trans = 1 - trans workspace_size = cusolver.cusolverDnSgetrf_bufferSize( cusolver_handle, n, n, A_ptr, lda) if (thunk.workspace is None or thunk.workspace.size != workspace_size): thunk.workspace = gpuarray.zeros((workspace_size,), dtype='float32', context=context) if thunk.pivots is None or thunk.pivots.size != min(n, n): thunk.pivots = gpuarray.zeros((min(n, n),), dtype='float32', context=context) if thunk.dev_info is None: thunk.dev_info = gpuarray.zeros((1,), dtype='float32', context=context) workspace_ptr = thunk.workspace.gpudata pivots_ptr = thunk.pivots.gpudata dev_info_ptr = thunk.dev_info.gpudata cusolver.cusolverDnSgetrf( cusolver_handle, n, n, A_ptr, lda, workspace_ptr, pivots_ptr, dev_info_ptr) cusolver.cusolverDnSgetrs( cusolver_handle, trans, n, m, A_ptr, lda, pivots_ptr, b_ptr, ldb, dev_info_ptr) z[0] = b
def thunk(): x, truth = inputs[0], inputs[1] z = outputs[0] z_shape = (x[0].shape[:0], ) if return_extras: cost_coord, cost_class, cost_object = outputs[2], outputs[ 3], outputs[4] context = None if hasattr(x[0], 'context'): context = x[0].context anchor_indices = outputs[1] ai_shape = (np.prod(truth[0].shape[:2]) + 1, ) if anchor_indices[0] is None or anchor_indices[ 0].shape != ai_shape: anchor_indices[0] = pygpu.zeros(ai_shape, dtype='int32', context=context) anchor_indices[0][-1] = x[0].shape[ 0] # store associated batch_size x_ptr, _ = get_tens_ptr(x[0]) truth_ptr, _ = get_tens_ptr(truth[0]) cost_ptr, cost_obj = get_tens_ptr( np.zeros_like(x[0], dtype=theano.config.floatX)) if return_extras: best_idx_ptr = gpuarray.GPUArray( gpudata=anchor_indices[0].gpudata, dtype=anchor_indices[0].dtype, shape=anchor_indices[0].shape) else: best_idx_ptr = gpuarray.GPUArray(shape=(np.prod( truth[0].shape[:2]), ), dtype=np.int32) best_iou_ptr = gpuarray.GPUArray(shape=(np.prod( truth[0].shape[:2]), ), dtype=np.float32) yolo_ptr, _ = get_yolo_info(n_classes, n_anchors, l_obj, l_noobj, anchors) # get best index index_fn(best_idx_ptr, best_iou_ptr, x_ptr, truth_ptr, yolo_ptr, block=(1, 1, 1), grid=(x[0].shape[0], 1, 1)) n_total = np.int32(x[0].shape[0] * n_anchors * np.prod(x[0].shape[-2:])) n_matched = np.int32(gpuarray.sum(best_idx_ptr != -1).get()) cost_fn(cost_ptr, best_idx_ptr, best_iou_ptr, x_ptr, truth_ptr, yolo_ptr, n_matched, n_total, block=(n_anchors, 1, 1), grid=(x[0].shape[0], x[0].shape[2], x[0].shape[3])) tmp = gpuarray.sum( gpuarray.GPUArray( cost_obj.shape, cost_obj.dtype, gpudata=cost_obj.data)) # do sum using reduction foo = np.zeros(1, dtype=np.float32) tmp.get(foo) z[0] = foo[0] if return_extras: cost_on_gpu = cost_obj.get_val() # transfer data onto host cost_coord[0], cost_class[0], cost_object[0] = 0., 0., 0. for i in range(0, (5 + n_classes) * n_anchors, 5 + n_classes): cost_coord[0] += np.sum(cost_on_gpu[:, i:i + 4]) cost_class[0] += np.sum(cost_on_gpu[:, i + 5:i + 5 + n_classes]) cost_object[0] += np.sum(cost_on_gpu[:, i + 4]) # free all memory if not return_extras: del best_idx_ptr cost_ptr.free() del best_iou_ptr yolo_ptr.free()
def test_zeros_no_dtype(): # no dtype and order param x = gpu_ndarray.zeros((), context=ctx) y = numpy.zeros(()) check_meta(x, y)
assert out_c[1].shape == out_g[1].shape assert out_c[0].dtype == out_g[0].dtype assert out_c[1].dtype == out_g[1].dtype assert numpy.allclose(out_c[0], numpy.asarray(out_g[0])) assert numpy.allclose(out_c[1], numpy.asarray(out_g[1])) def test_elemwise_bool(): a = gpuarray.empty((2,), context=context) exc = None try: bool(a) except ValueError, e: exc = e assert e is not None a = gpuarray.zeros((1,), context=context) assert bool(a) == False a = gpuarray.zeros((), context=context) assert bool(a) == False def test_broadcast(): for shapea, shapeb in [((3, 5), (3, 5)), ((1, 5), (3, 5)), ((3, 5), (3, 1)), ((1, 5), (3, 1)), ((3, 1), (3, 5)), ((3, 5), (3, 1)), ((1, 1), (1, 1)), ((3, 4, 5), (4, 5)), ((4, 5), (3, 4, 5)),
def thunk(): context = inputs[0][0].context # Size of the matrices to invert. z = outputs[0] # Matrix. A = inputs[0][0] # Solution vectors. b = inputs[1][0] assert (len(A.shape) == 2) assert (len(b.shape) == 2) if self.trans in ['T', 'C']: trans = 1 l, n = A.shape k, m = b.shape elif self.trans == 'N': trans = 0 n, l = A.shape k, m = b.shape else: raise ValueError('Invalid value for trans') if l != n: raise ValueError('A must be a square matrix') if n != k: raise ValueError('A and b must be aligned.') lda = max(1, n) ldb = max(1, k, m) # We copy A and b as cusolver operates inplace b = gpuarray.array(b, copy=True, order='F') if not self.inplace: A = gpuarray.array(A, copy=True) A_ptr = A.gpudata b_ptr = b.gpudata # cusolver expects a F ordered matrix, but A is not explicitly # converted between C and F order, instead we switch the # "transpose" flag. if A.flags['C_CONTIGUOUS']: trans = 1 - trans workspace_size = cusolver.cusolverDnSgetrf_bufferSize( cusolver_handle, n, n, A_ptr, lda) if (thunk.workspace is None or thunk.workspace.size != workspace_size): thunk.workspace = gpuarray.zeros((workspace_size, ), dtype='float32', context=context) if thunk.pivots is None or thunk.pivots.size != min(n, n): thunk.pivots = gpuarray.zeros((min(n, n), ), dtype='float32', context=context) if thunk.dev_info is None: thunk.dev_info = gpuarray.zeros((1, ), dtype='float32', context=context) workspace_ptr = thunk.workspace.gpudata pivots_ptr = thunk.pivots.gpudata dev_info_ptr = thunk.dev_info.gpudata cusolver.cusolverDnSgetrf(cusolver_handle, n, n, A_ptr, lda, workspace_ptr, pivots_ptr, dev_info_ptr) cusolver.cusolverDnSgetrs(cusolver_handle, trans, n, m, A_ptr, lda, pivots_ptr, b_ptr, ldb, dev_info_ptr) z[0] = b