Example #1
0
def test_elemwise_bool():
    a = gpuarray.empty((2,), context=context)
    exc = None
    try:
        bool(a)
    except ValueError as e:
        exc = e
    assert exc is not None
    a = gpuarray.zeros((1,), context=context)
    assert not bool(a)
    a = gpuarray.zeros((), context=context)
    assert not bool(a)
Example #2
0
 def perform(self, node, inputs, outs):
     out, = outs
     v = inputs[0]
     sh = tuple(map(int, inputs[1:]))
     if out[0] is None or out[0].shape != sh:
         if self.memset_0:
             out[0] = gpuarray.zeros(sh, dtype=v.dtype)
         else:
             out[0] = gpuarray.empty(sh, dtype=v.dtype)
             out[0][...] = v
     else:
         out[0][...] = v
     if config.gpuarray.sync:
         out[0].sync()
Example #3
0
 def perform(self, node, inputs, outs):
     out, = outs
     v = inputs[0]
     sh = tuple(map(int, inputs[1:]))
     if out[0] is None or out[0].shape != sh:
         if v.size == 1 and numpy.asarray(v)[0].item() == 0:
             out[0] = gpuarray.zeros(sh, dtype=v.dtype)
         else:
             out[0] = gpuarray.empty(sh, dtype=v.dtype)
             out[0][...] = v
     else:
         out[0][...] = v
     if config.gpuarray.sync:
         out[0].sync()
Example #4
0
    def perform(self, node, inputs, outputs):
        (x,) = inputs
        (z,) = outputs

        dim = x.shape[0] + abs(self.offset)
        z[0] = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context)

        if self.offset <= 0:  # diag in the lower triangle
            diag_z = z[0][-self.offset, :(dim + self.offset)]
        else:  # diag in the upper triangle
            diag_z = z[0][:(dim - self.offset), self.offset]
        diag_z.strides = (sum(z[0].strides),)

        diag_z[:] = x[:]
Example #5
0
def test_shape():
    x = GpuArrayType(dtype='float32', broadcastable=[False, False, False])()
    v = gpuarray.zeros((3, 4, 5), dtype='float32', context=get_context(test_ctx_name))
    f = theano.function([x], x.shape)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    if theano.config.mode != 'FAST_COMPILE':
        assert len(topo) == 4
        assert isinstance(topo[0].op, T.opt.Shape_i)
        assert isinstance(topo[1].op, T.opt.Shape_i)
        assert isinstance(topo[2].op, T.opt.Shape_i)
        assert isinstance(topo[3].op, T.opt.MakeVector)
    mode = mode_with_gpu.excluding("local_shape_to_shape_i")
    f = theano.function([x], x.shape, mode=mode)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Shape)
def test_shape():
    x = GpuArrayType(dtype='float32', broadcastable=[False, False, False])()
    v = gpuarray.zeros((3, 4, 5),
                       dtype='float32',
                       context=get_context(test_ctx_name))
    f = theano.function([x], x.shape)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    if theano.config.mode != 'FAST_COMPILE':
        assert len(topo) == 4
        assert isinstance(topo[0].op, T.opt.Shape_i)
        assert isinstance(topo[1].op, T.opt.Shape_i)
        assert isinstance(topo[2].op, T.opt.Shape_i)
        assert isinstance(topo[3].op, T.opt.MakeVector)
    mode = mode_with_gpu.excluding("local_shape_to_shape_i")
    f = theano.function([x], x.shape, mode=mode)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Shape)
Example #7
0
    def perform(self, node, inputs, outputs):
        (x,) = inputs
        (z,) = outputs
        axis1 = np.minimum(self.axis1, self.axis2)
        axis2 = np.maximum(self.axis1, self.axis2)
        offset = self.offset

        # Initialise a buffer the same size as the output
        result_shape = x.shape[:-1] + (x.shape[-1] + abs(offset),) * 2
        result_buffer_shape = (np.prod(x.shape[:-1]).astype(np.int64),) + (
            x.shape[-1] + abs(offset),
        ) * 2
        result_buffer = gpuarray.zeros(
            result_buffer_shape, dtype=x.dtype, context=x.context
        )

        # Slice out a view of the diagonals
        if offset < 0:  # diag in the lower triangle
            diag_view = result_buffer[:, abs(offset) :, 0]
        else:  # diag in the upper triangle
            diag_view = result_buffer[:, : x.shape[-1], abs(offset)]
        diag_view.strides = (
            diag_view.strides[0],
            diag_view.strides[1] + x.dtype.itemsize,
        )

        # Fill view with flattened array of diagonals
        diag_view[:] = x.reshape(diag_view.shape)[:]

        # Unflatten buffer into output size
        result = result_buffer.reshape(result_shape)

        if len(x.shape) > 1:
            # Re-order axes so they correspond to diagonals at axis1, axis2
            axes = list(range(len(x.shape[:-1])))
            last_idx = axes[-1]
            axes = axes[:axis1] + [last_idx + 1] + axes[axis1:]
            axes = axes[:axis2] + [last_idx + 2] + axes[axis2:]
            result = result.transpose(axes)

        z[0] = result
Example #8
0
 def thunk():
     x, boxes, grad = inputs[0], inputs[1], inputs[2]
     context = None
     if hasattr(x[0], 'context'):
         context = x[0].context
     z = outputs[0]
     if z[0] is None or z[0].shape != x[0].shape:
         z[0] = pygpu.zeros(x[0].shape,
                            dtype=theano.config.floatX,
                            context=context)
     else:
         z[0][:] = 0
     x_ptr, _ = get_tens_ptr(x[0])
     boxes_ptr, _ = get_tens_ptr(boxes[0])
     grad_ptr, _ = get_tens_ptr(grad[0])
     z_ptr, z_tens = get_tens_ptr(z[0])
     grid = (x[0].shape[0], x[0].shape[1], 1)
     block = (1, 1, 1)
     pycuda_func(z_ptr,
                 x_ptr,
                 boxes_ptr,
                 grad_ptr,
                 block=block,
                 grid=grid)
def test_zero_noparam():
    try:
        gpu_ndarray.zeros()
        assert False
    except TypeError:
        pass
def zeros(shp, order, dtype):
    x = gpu_ndarray.zeros(shp, dtype, order, context=ctx)
    y = numpy.zeros(shp, dtype, order)
    check_all(x, y)
def zeros(shp, order, dtype):
    x = gpu_ndarray.zeros(shp, dtype, order, context=ctx)
    y = numpy.zeros(shp, dtype, order)
    check_all(x, y)
Example #12
0
        def thunk():
            context = inputs[0][0].context

            # Size of the matrices to invert.
            z = outputs[0]

            # Matrix.
            A = inputs[0][0]

            # Solution vectors.
            b = inputs[1][0]

            assert(len(A.shape) == 2)
            assert(len(b.shape) == 2)

            if self.trans in ['T', 'C']:
                trans = 1
                l, n = A.shape
                k, m = b.shape
            elif self.trans == 'N':
                trans = 0
                n, l = A.shape
                k, m = b.shape
            else:
                raise ValueError('Invalid value for trans')
            if l != n:
                raise ValueError('A must be a square matrix')
            if n != k:
                raise ValueError('A and b must be aligned.')

            lda = max(1, n)
            ldb = max(1, k, m)

            # We copy A and b as cusolver operates inplace
            b = gpuarray.array(b, copy=True, order='F')
            if not self.inplace:
                A = gpuarray.array(A, copy=True)
            A_ptr = A.gpudata
            b_ptr = b.gpudata

            # cusolver expects a F ordered matrix, but A is not explicitly
            # converted between C and F order, instead we switch the
            # "transpose" flag.
            if A.flags['C_CONTIGUOUS']:
                trans = 1 - trans

            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)

            if (thunk.workspace is None or
                    thunk.workspace.size != workspace_size):
                thunk.workspace = gpuarray.zeros((workspace_size,),
                                                 dtype='float32',
                                                 context=context)

            if thunk.pivots is None or thunk.pivots.size != min(n, n):
                thunk.pivots = gpuarray.zeros((min(n, n),),
                                              dtype='float32',
                                              context=context)

            if thunk.dev_info is None:
                thunk.dev_info = gpuarray.zeros((1,),
                                                dtype='float32',
                                                context=context)

            workspace_ptr = thunk.workspace.gpudata
            pivots_ptr = thunk.pivots.gpudata
            dev_info_ptr = thunk.dev_info.gpudata

            cusolver.cusolverDnSgetrf(
                cusolver_handle, n, n, A_ptr, lda, workspace_ptr,
                pivots_ptr, dev_info_ptr)

            cusolver.cusolverDnSgetrs(
                cusolver_handle, trans, n, m, A_ptr, lda,
                pivots_ptr, b_ptr, ldb, dev_info_ptr)

            z[0] = b
Example #13
0
        def thunk():
            x, truth = inputs[0], inputs[1]
            z = outputs[0]
            z_shape = (x[0].shape[:0], )

            if return_extras:
                cost_coord, cost_class, cost_object = outputs[2], outputs[
                    3], outputs[4]
                context = None
                if hasattr(x[0], 'context'):
                    context = x[0].context
                anchor_indices = outputs[1]
                ai_shape = (np.prod(truth[0].shape[:2]) + 1, )
                if anchor_indices[0] is None or anchor_indices[
                        0].shape != ai_shape:
                    anchor_indices[0] = pygpu.zeros(ai_shape,
                                                    dtype='int32',
                                                    context=context)
                    anchor_indices[0][-1] = x[0].shape[
                        0]  # store associated batch_size

            x_ptr, _ = get_tens_ptr(x[0])
            truth_ptr, _ = get_tens_ptr(truth[0])
            cost_ptr, cost_obj = get_tens_ptr(
                np.zeros_like(x[0], dtype=theano.config.floatX))

            if return_extras:
                best_idx_ptr = gpuarray.GPUArray(
                    gpudata=anchor_indices[0].gpudata,
                    dtype=anchor_indices[0].dtype,
                    shape=anchor_indices[0].shape)
            else:
                best_idx_ptr = gpuarray.GPUArray(shape=(np.prod(
                    truth[0].shape[:2]), ),
                                                 dtype=np.int32)

            best_iou_ptr = gpuarray.GPUArray(shape=(np.prod(
                truth[0].shape[:2]), ),
                                             dtype=np.float32)

            yolo_ptr, _ = get_yolo_info(n_classes, n_anchors, l_obj, l_noobj,
                                        anchors)

            # get best index
            index_fn(best_idx_ptr,
                     best_iou_ptr,
                     x_ptr,
                     truth_ptr,
                     yolo_ptr,
                     block=(1, 1, 1),
                     grid=(x[0].shape[0], 1, 1))

            n_total = np.int32(x[0].shape[0] * n_anchors *
                               np.prod(x[0].shape[-2:]))
            n_matched = np.int32(gpuarray.sum(best_idx_ptr != -1).get())

            cost_fn(cost_ptr,
                    best_idx_ptr,
                    best_iou_ptr,
                    x_ptr,
                    truth_ptr,
                    yolo_ptr,
                    n_matched,
                    n_total,
                    block=(n_anchors, 1, 1),
                    grid=(x[0].shape[0], x[0].shape[2], x[0].shape[3]))

            tmp = gpuarray.sum(
                gpuarray.GPUArray(
                    cost_obj.shape, cost_obj.dtype,
                    gpudata=cost_obj.data))  # do sum using reduction
            foo = np.zeros(1, dtype=np.float32)
            tmp.get(foo)
            z[0] = foo[0]

            if return_extras:
                cost_on_gpu = cost_obj.get_val()  # transfer data onto host
                cost_coord[0], cost_class[0], cost_object[0] = 0., 0., 0.

                for i in range(0, (5 + n_classes) * n_anchors, 5 + n_classes):
                    cost_coord[0] += np.sum(cost_on_gpu[:, i:i + 4])
                    cost_class[0] += np.sum(cost_on_gpu[:, i + 5:i + 5 +
                                                        n_classes])
                    cost_object[0] += np.sum(cost_on_gpu[:, i + 4])

            # free all memory
            if not return_extras:
                del best_idx_ptr

            cost_ptr.free()
            del best_iou_ptr
            yolo_ptr.free()
def test_zero_noparam():
    try:
        gpu_ndarray.zeros()
        assert False
    except TypeError:
        pass
def test_zeros_no_dtype():
    # no dtype and order param
    x = gpu_ndarray.zeros((), context=ctx)
    y = numpy.zeros(())
    check_meta(x, y)
Example #16
0
    assert out_c[1].shape == out_g[1].shape
    assert out_c[0].dtype == out_g[0].dtype
    assert out_c[1].dtype == out_g[1].dtype
    assert numpy.allclose(out_c[0], numpy.asarray(out_g[0]))
    assert numpy.allclose(out_c[1], numpy.asarray(out_g[1]))


def test_elemwise_bool():
    a = gpuarray.empty((2,), context=context)
    exc = None
    try:
        bool(a)
    except ValueError, e:
        exc = e
    assert e is not None
    a = gpuarray.zeros((1,), context=context)
    assert bool(a) == False
    a = gpuarray.zeros((), context=context)
    assert bool(a) == False


def test_broadcast():
    for shapea, shapeb in [((3, 5), (3, 5)),
                           ((1, 5), (3, 5)),
                           ((3, 5), (3, 1)),
                           ((1, 5), (3, 1)),
                           ((3, 1), (3, 5)),
                           ((3, 5), (3, 1)),
                           ((1, 1), (1, 1)),
                           ((3, 4, 5), (4, 5)),
                           ((4, 5), (3, 4, 5)),
Example #17
0
        def thunk():
            context = inputs[0][0].context

            # Size of the matrices to invert.
            z = outputs[0]

            # Matrix.
            A = inputs[0][0]

            # Solution vectors.
            b = inputs[1][0]

            assert (len(A.shape) == 2)
            assert (len(b.shape) == 2)

            if self.trans in ['T', 'C']:
                trans = 1
                l, n = A.shape
                k, m = b.shape
            elif self.trans == 'N':
                trans = 0
                n, l = A.shape
                k, m = b.shape
            else:
                raise ValueError('Invalid value for trans')
            if l != n:
                raise ValueError('A must be a square matrix')
            if n != k:
                raise ValueError('A and b must be aligned.')

            lda = max(1, n)
            ldb = max(1, k, m)

            # We copy A and b as cusolver operates inplace
            b = gpuarray.array(b, copy=True, order='F')
            if not self.inplace:
                A = gpuarray.array(A, copy=True)
            A_ptr = A.gpudata
            b_ptr = b.gpudata

            # cusolver expects a F ordered matrix, but A is not explicitly
            # converted between C and F order, instead we switch the
            # "transpose" flag.
            if A.flags['C_CONTIGUOUS']:
                trans = 1 - trans

            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)

            if (thunk.workspace is None
                    or thunk.workspace.size != workspace_size):
                thunk.workspace = gpuarray.zeros((workspace_size, ),
                                                 dtype='float32',
                                                 context=context)

            if thunk.pivots is None or thunk.pivots.size != min(n, n):
                thunk.pivots = gpuarray.zeros((min(n, n), ),
                                              dtype='float32',
                                              context=context)

            if thunk.dev_info is None:
                thunk.dev_info = gpuarray.zeros((1, ),
                                                dtype='float32',
                                                context=context)

            workspace_ptr = thunk.workspace.gpudata
            pivots_ptr = thunk.pivots.gpudata
            dev_info_ptr = thunk.dev_info.gpudata

            cusolver.cusolverDnSgetrf(cusolver_handle, n, n, A_ptr, lda,
                                      workspace_ptr, pivots_ptr, dev_info_ptr)

            cusolver.cusolverDnSgetrs(cusolver_handle, trans, n, m, A_ptr, lda,
                                      pivots_ptr, b_ptr, ldb, dev_info_ptr)

            z[0] = b
def test_zeros_no_dtype():
    # no dtype and order param
    x = gpu_ndarray.zeros((), context=ctx)
    y = numpy.zeros(())
    check_meta(x, y)