Exemple #1
0
def test_local_gpualloc_memset_0():
    i = theano.tensor.iscalar()
    z = numpy.zeros((1,), dtype='float32')
    o = numpy.ones((1,), dtype='float32')
    ones = numpy.ones((2,), dtype='float32')

    # Test with 0
    a = gpu_alloc(z, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
    assert (numpy.asarray(f(6)) == 0).all()

    # Test with 1
    a = gpu_alloc(o, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc)
    assert not topo[0].op.memset_0
    assert (numpy.asarray(f(6)) == 1).all()

    # Test with 1, 1
    a = gpu_alloc(ones, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc)
    assert not topo[0].op.memset_0
    assert (numpy.asarray(f(2)) == 1).all()
Exemple #2
0
def test_local_gpualloc_memset_0():
    i = theano.tensor.iscalar()
    z = numpy.zeros((1, ), dtype='float32')
    o = numpy.ones((1, ), dtype='float32')
    ones = numpy.ones((2, ), dtype='float32')

    # Test with 0
    a = gpu_alloc(z, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
    assert (numpy.asarray(f(6)) == 0).all()

    # Test with 1
    a = gpu_alloc(o, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc)
    assert not topo[0].op.memset_0
    assert (numpy.asarray(f(6)) == 1).all()

    # Test with 1, 1
    a = gpu_alloc(ones, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuAlloc)
    assert not topo[0].op.memset_0
    assert (numpy.asarray(f(2)) == 1).all()
Exemple #3
0
def local_gpuaalloc(node):
    new_out = gpu_alloc(*node.inputs)
    # We need to hide new broadcastable dimensions because
    # ReplaceValidate doesn't like when they change.
    if new_out.broadcastable != node.outputs[0].broadcastable:
        # but if a dim is suddenly not broadcastable anymore then that's a bug
        for b_old, b_new in zip(node.outputs[0].broadcastable, new_out.broadcastable):
            assert b_new or (not b_old)
        new_out = tensor.patternbroadcast(new_out, node.outputs[0].broadcastable)
    return (new_out,)
Exemple #4
0
def local_gpuaalloc2(node):
    """
    Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if (isinstance(node.op, tensor.Alloc)
            and all(c != 'output' and c.op == tensor.join and all(
                i.owner and i.owner.op in [host_from_gpu, tensor.alloc]
                for i in c.inputs[1:]) for c, idx in node.outputs[0].clients)):
        return [host_from_gpu(gpu_alloc(*node.inputs))]
Exemple #5
0
def local_gpuaalloc(node):
    new_out = gpu_alloc(*node.inputs)
    # We need to hide new broadcastable dimensions because
    # ReplaceValidate doesn't like when they change.
    if new_out.broadcastable != node.outputs[0].broadcastable:
        # but if a dim is suddenly not broadcastable anymore then that's a bug
        for b_old, b_new in zip(node.outputs[0].broadcastable,
                                new_out.broadcastable):
            assert b_new or (not b_old)
        new_out = tensor.patternbroadcast(new_out,
                                          node.outputs[0].broadcastable)
    return (new_out,)
Exemple #6
0
def local_gpuaalloc2(node):
    """
    Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if isinstance(node.op, tensor.Alloc) and all(
        c != "output"
        and c.op == tensor.join
        and all(i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:])
        for c, idx in node.outputs[0].clients
    ):
        return [host_from_gpu(gpu_alloc(*node.inputs))]