Exemple #1
0
    def __call__(self, persid):
        from theano.gpuarray.type import get_context
        from theano.gpuarray import pygpu

        array_type, name = persid.split(".")

        if name in self.cache:
            return self.cache[name]
        ret = None
        if array_type == "gpuarray":
            with self.zip_file.open(name) as f:
                ctx_name = pickle.load(f)
                array = np.lib.format.read_array(f)
            if config.experimental.unpickle_gpu_on_cpu:
                # directly return numpy array
                warnings.warn(
                    "config.experimental.unpickle_gpu_on_cpu is set "
                    "to True. Unpickling GpuArray as numpy.ndarray"
                )
                ret = array
            elif pygpu:
                ret = pygpu.array(array, context=get_context(ctx_name))
            else:
                raise ImportError("pygpu not found. Cannot unpickle GpuArray")
        else:
            with self.zip_file.open(name) as f:
                ret = np.lib.format.read_array(f)
        self.cache[name] = ret
        return ret
Exemple #2
0
    def __call__(self, persid):
        from theano.gpuarray.type import get_context
        from theano.gpuarray import pygpu
        array_type, name = persid.split('.')

        if name in self.cache:
            return self.cache[name]
        ret = None
        if array_type == 'gpuarray':
            with self.zip_file.open(name) as f:
                ctx_name = pickle.load(f)
                array = np.lib.format.read_array(f)
            if config.experimental.unpickle_gpu_on_cpu:
                # directly return numpy array
                warnings.warn("config.experimental.unpickle_gpu_on_cpu is set "
                              "to True. Unpickling GpuArray as numpy.ndarray")
                ret = array
            elif pygpu:
                ret = pygpu.array(array, context=get_context(ctx_name))
            else:
                raise ImportError("pygpu not found. Cannot unpickle GpuArray")
        else:
            with self.zip_file.open(name) as f:
                ret = np.lib.format.read_array(f)
        self.cache[name] = ret
        return ret
Exemple #3
0
 def perform(self, node, inputs, output_storage):
     x = inputs[0]
     z = output_storage[0]
     z[0] = pygpu.empty(x.shape,
                        dtype=x.dtype,
                        context=get_context(self.context_name))
     self.my_op(x, z[0])
def test_hostfromgpu_shape_i():
    # Test that the shape is lifted over hostfromgpu

    m = mode_with_gpu.including("local_dot_to_dot22",
                                "local_dot22_to_dot22scalar", "specialize")
    a = tt.fmatrix("a")
    ca = theano.gpuarray.type.GpuArrayType("float32", (False, False))()
    av = np.asarray(np.random.rand(5, 4), dtype="float32")
    cv = gpuarray.asarray(np.random.rand(5, 4),
                          dtype="float32",
                          context=get_context(test_ctx_name))

    f = theano.function([a], GpuFromHost(test_ctx_name)(a), mode=m)
    assert any(
        isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort())
    f = theano.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, tt.opt.Shape_i)
    assert isinstance(topo[1].op, tt.opt.Shape_i)
    assert isinstance(topo[2].op, tt.opt.MakeVector)
    assert tuple(f(av)) == (5, 4)

    f = theano.function([ca], host_from_gpu(ca), mode=m)
    assert host_from_gpu in [x.op for x in f.maker.fgraph.toposort()]
    f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, theano.compile.Shape_i)
    assert isinstance(topo[1].op, theano.compile.Shape_i)
    assert isinstance(topo[2].op, tt.opt.MakeVector)
    assert tuple(f(cv)) == (5, 4)
Exemple #5
0
    def get_params(self, node):
        pygpu_gpuarray = pytest.importorskip("pygpu.gpuarray")

        return self.params_type.get_params(
            typecode=pygpu_gpuarray.dtype_to_typecode(self.dtype),
            context=get_context(self.context_name),
        )
 def setup_method(self):
     self.input = gpu_ftensor4()
     self.filters = gpu_ftensor4()
     self.topgrad = gpu_ftensor4()
     self.constant_tensor = gpuarray.array(
         np.zeros((3, 5, 7, 11), dtype="float32"),
         context=get_context(test_ctx_name))
     super().setup_method()
def rand_gpuarray(*shape, **kwargs):
    r = rng.rand(*shape) * 2 - 1
    dtype = kwargs.pop("dtype", theano.config.floatX)
    cls = kwargs.pop("cls", None)
    if len(kwargs) != 0:
        raise TypeError("Unexpected argument %s", list(kwargs.keys())[0])
    return gpuarray.array(r,
                          dtype=dtype,
                          cls=cls,
                          context=get_context(test_ctx_name))
Exemple #8
0
    def make_node(self, x):
        x = as_gpuarray_variable(x, self.context_name)

        x_arg = pygpu.elemwise.arg('x', 'float32', read=True)
        c_arg = pygpu.elemwise.arg('c', 'float32', read=True, write=True)
        self.my_op = pygpu.elemwise.GpuElemwise(
            get_context(self.context_name),
            "c = " + str(self.a) + " * x + " + str(self.b), [x_arg, c_arg],
            convert_f16=True)

        return Apply(self, [x], [x.type()])
def test_transfer_cpu_gpu():
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
    def setup_method(self):
        super().setup_method()
        test_ctx = get_context(test_ctx_name)
        if test_ctx.kind != b"cuda":
            pytest.skip("Cuda specific tests")
        self.max_threads_dim0 = test_ctx.maxlsize0
        self.max_grid_size1 = test_ctx.maxgsize2
        self.op_class = CumOp

        # The CPU implementation is not so accurate, which throws out DebugMode.
        # Since propagating .tag.values_eq_approx to the output of every
        # GpuFromHost seems overkill, we just relax the rtol for these tests
        self.old_rtol = tt.float32_rtol
        tt.float32_rtol *= 2
def test_transfer_gpu_gpu():
    g = GpuArrayType(dtype="float32",
                     broadcastable=(False, False),
                     context_name=test_ctx_name)()

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))
    mode = mode_with_gpu.excluding("cut_gpua_host_transfers",
                                   "local_cut_gpua_host_gpua")
    f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuToGpu)
    fv = f(gv)
    assert GpuArrayType.values_eq(fv, gv)
def test_transfer_strided():
    # This is just to ensure that it works in theano
    # libgpuarray has a much more comprehensive suit of tests to
    # ensure correctness
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 8), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    av = av[:, ::2]
    gv = gv[:, ::2]

    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
def test_shape():
    x = GpuArrayType(dtype="float32", broadcastable=[False, False, False])()
    v = gpuarray.zeros((3, 4, 5),
                       dtype="float32",
                       context=get_context(test_ctx_name))
    f = theano.function([x], x.shape)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    if theano.config.mode != "FAST_COMPILE":
        assert len(topo) == 4
        assert isinstance(topo[0].op, tt.opt.Shape_i)
        assert isinstance(topo[1].op, tt.opt.Shape_i)
        assert isinstance(topo[2].op, tt.opt.Shape_i)
        assert isinstance(topo[3].op, tt.opt.MakeVector)
    mode = mode_with_gpu.excluding("local_shape_to_shape_i")
    f = theano.function([x], x.shape, mode=mode)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    assert len(topo) == 1
    assert isinstance(topo[0].op, tt.Shape)
Exemple #14
0
 def setup_method(self):
     if get_context(test_ctx_name).kind != b"cuda":
         pytest.skip("Cuda specific tests")
Exemple #15
0
 def get_params(self, node):
     return get_context(self.context_name)
Exemple #16
0
def transfer(x, target):
    try:
        get_context(target)
        return as_gpuarray_variable(x, target)
    except ContextNotDefined:
        pass
 def get_params(self, node):
     return self.params_type.get_params(self, context=get_context(self.context_name), keepdims=self.keepdims)
 def get_params(self, node):
     return self.params_type.get_params(self, context=get_context(self.context_name), dtype_int64=self.dtype_int64)
        def local_opt(node):
            if type(node.op) in OP:
                # Either one of our inputs is on the gpu or
                # all of our clients are on the gpu
                replace = False
                # TODO: Maybe set context_name with infer_context_name()?
                context_name = None
                # We replace if any input is a host_from_gpu
                for i in node.inputs:
                    if i.owner and i.owner.op == host_from_gpu and move_to_gpu(
                            i):
                        context_name = i.owner.inputs[0].type.context_name
                        replace = True
                        break

                if not replace:
                    # We replace if *all* clients are on the GPU
                    clients = [c for o in node.outputs for c in o.clients]
                    replace = len(clients) != 0
                    for c, idx in clients:
                        if c == "output" or not isinstance(c.op, GpuFromHost):
                            replace = False
                    # TODO: check that the clients want the same context?
                    if replace:
                        # All clients are GpuFromHost and we have at least one
                        context_name = clients[0][0].op.context_name

                # Check if we should replace
                if (not replace or
                    (cuda_only and get_context(context_name).kind != b"cuda")
                        or any([
                            "complex" in getattr(i, "dtype", "")
                            for i in node.inputs
                        ])):
                    return False

                # tag the inputs with the context in case
                # the context was derived from the outputs
                for i in node.inputs:
                    i.tag.context_name = context_name

                new_op = maker(node.op, context_name, node.inputs,
                               node.outputs)

                # This is needed as sometimes new_op inherits from OP.
                if new_op and new_op != node.op:
                    if isinstance(new_op, Op):
                        new_outputs = new_op(*node.inputs, return_list=True)
                        to_cpu_fn = safe_to_cpu
                    elif isinstance(new_op, (tuple, list)):
                        new_outputs = new_op
                        to_cpu_fn = safe_to_cpu
                    else:  # suppose it is a variable on the GPU
                        new_outputs = [new_op]

                        def to_cpu_fn(x):
                            return x.transfer("cpu")

                    # copy stack traces onto gpu outputs
                    # also copy the stack traces onto HostFromGpu outputs
                    on_cpu = []
                    for old_output, new_output in zip(node.outputs,
                                                      new_outputs):
                        copy_stack_trace(old_output, new_output)
                        cpu = to_cpu_fn(new_output)
                        on_cpu.append(cpu)
                        copy_stack_trace(old_output, cpu)
                    return on_cpu
            return False
Exemple #20
0
def test_may_share_memory():
    ctx = get_context(test_ctx_name)
    a = pygpu.empty((5, 4), context=ctx)
    b = pygpu.empty((5, 4), context=ctx)

    may_share_memory_core(a, b)
def gpu_alloc_expected(x, *shp):
    g = gpuarray.empty(shp, dtype=x.dtype, context=get_context(test_ctx_name))
    g[:] = x
    return g