gpu_op = GpuMultinomialFromUniform(node.op.odtype) return [ host_from_gpu(gpu_op(*[gpu_from_host(i) for i in [p, u]])).T ] if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform): multi = node.inputs[0].owner if len(node.inputs) == 2: p, u = node.inputs n_samples = 1 else: p, u, n_samples = node.inputs try: if get_scalar_constant_value(n_samples) != 1: return None except NotScalarConstantError: return None m, = multi.outputs if (p.dtype == u.dtype == m.dtype == 'float32'): gpu_op = GpuMultinomialFromUniform(multi.op.odtype) ret = gpu_op(*[gpu_from_host(i) for i in [p, u]]).T # The dimshuffle is on the cpu, but will be moved to the # gpu by an opt. return [gpu_from_host(ret)] if cuda_available: register_opt()(local_gpu_multinomial) pass
theano.sandbox.cuda.HostFromGpu) for i in node.inputs])): gpu_op = GpuMultinomialFromUniform(node.op.odtype) return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in [p, u]])).T] if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform): multi = node.inputs[0].owner if len(node.inputs) == 2: p, u = node.inputs n_samples = 1 else: p, u, n_samples = node.inputs try: if get_scalar_constant_value(n_samples) != 1: return None except NotScalarConstantError: return None m, = multi.outputs if (p.dtype == u.dtype == m.dtype == 'float32'): gpu_op = GpuMultinomialFromUniform(multi.op.odtype) ret = gpu_op(*[gpu_from_host(i) for i in [p, u]]).T # The dimshuffle is on the cpu, but will be moved to the # gpu by an opt. return [gpu_from_host(ret)] if cuda_available: register_opt()(local_gpu_multinomial) pass
if (p.dtype == vals.dtype == 'float32'): gpu_op = GpuProbsGrabber() ret_vals = gpu_op(gpu_from_host(p),indx) return [gpu_from_host(ret_vals)] @local_optimizer([Assigner]) def local_assigner(node): if type(node.op) is Assigner: p, indx, gr, = node.inputs vals, = node.outputs if (p.dtype == vals.dtype == 'float32' and any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu) for i in node.inputs])): gpu_op = GpuAssigner() ret = gpu_op(gpu_from_host(p),indx,gpu_from_host(gr)) return [host_from_gpu(ret),] if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and node.inputs[0].owner and type(node.inputs[0].owner.op) is Assigner): multi = node.inputs[0].owner p,indx,gr = multi.inputs vals, = multi.outputs if (p.dtype == vals.dtype == 'float32'): gpu_op = GpuAssigner() ret_vals = gpu_op(gpu_from_host(p),indx,gpu_from_host(gr)) return [gpu_from_host(ret_vals)] if cuda_available: register_opt()(local_assigner) register_opt()(local_probs_grabber) register_opt()(local_gpu_argmax)