def test_local_gpualloc_memset_0(): i = theano.tensor.iscalar() z = numpy.zeros((1,), dtype='float32') o = numpy.ones((1,), dtype='float32') ones = numpy.ones((2,), dtype='float32') # Test with 0 a = gpu_alloc(z, i) f = theano.function([i], a, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0 assert (numpy.asarray(f(6)) == 0).all() # Test with 1 a = gpu_alloc(o, i) f = theano.function([i], a, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuAlloc) assert not topo[0].op.memset_0 assert (numpy.asarray(f(6)) == 1).all() # Test with 1, 1 a = gpu_alloc(ones, i) f = theano.function([i], a, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuAlloc) assert not topo[0].op.memset_0 assert (numpy.asarray(f(2)) == 1).all()
def test_local_gpualloc_memset_0(): i = theano.tensor.iscalar() z = numpy.zeros((1, ), dtype='float32') o = numpy.ones((1, ), dtype='float32') ones = numpy.ones((2, ), dtype='float32') # Test with 0 a = gpu_alloc(z, i) f = theano.function([i], a, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0 assert (numpy.asarray(f(6)) == 0).all() # Test with 1 a = gpu_alloc(o, i) f = theano.function([i], a, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuAlloc) assert not topo[0].op.memset_0 assert (numpy.asarray(f(6)) == 1).all() # Test with 1, 1 a = gpu_alloc(ones, i) f = theano.function([i], a, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuAlloc) assert not topo[0].op.memset_0 assert (numpy.asarray(f(2)) == 1).all()
def local_gpuaalloc(node): new_out = gpu_alloc(*node.inputs) # We need to hide new broadcastable dimensions because # ReplaceValidate doesn't like when they change. if new_out.broadcastable != node.outputs[0].broadcastable: # but if a dim is suddenly not broadcastable anymore then that's a bug for b_old, b_new in zip(node.outputs[0].broadcastable, new_out.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out, node.outputs[0].broadcastable) return (new_out,)
def local_gpuaalloc2(node): """ Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...) Moves an alloc that is an input to join to the gpu. """ if (isinstance(node.op, tensor.Alloc) and all(c != 'output' and c.op == tensor.join and all( i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]) for c, idx in node.outputs[0].clients)): return [host_from_gpu(gpu_alloc(*node.inputs))]
def local_gpuaalloc2(node): """ Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...) Moves an alloc that is an input to join to the gpu. """ if isinstance(node.op, tensor.Alloc) and all( c != "output" and c.op == tensor.join and all(i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]) for c, idx in node.outputs[0].clients ): return [host_from_gpu(gpu_alloc(*node.inputs))]