def local_gpu_elemwise(node): do_replace = False gpu_out = False # check for gpu_from_host(Elemwise)) and extract the Elemwise node if node.op == gpu_from_host: host_i, = node.inputs if (host_i.owner and isinstance(host_i.owner.op, tensor.Elemwise) and len(host_i.clients) == 1): node = host_i.owner do_replace = True gpu_out = True # check for elemwise(..., host_from_gpu, ...) if isinstance(node.op, tensor.Elemwise): if numpy.any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]): do_replace = True if numpy.all([_is_scalar(i) for i in node.inputs]): do_replace = False if do_replace: new_op = GpuElemwise(node.op.scalar_op) gpu_elemwise = new_op(*(gpu_from_host(i) for i in node.inputs)) if gpu_out: return [gpu_elemwise] else: return [host_from_gpu(gpu_elemwise)] else: return False
def local_gpu_elemwise(node): do_replace = False gpu_out = False # check for gpu_from_host(Elemwise)) and extract the Elemwise node if node.op == gpu_from_host: host_i, = node.inputs if (host_i.owner and isinstance(host_i.owner.op, tensor.Elemwise) and len(host_i.clients) == 1): node = host_i.owner do_replace = True gpu_out = True # check for elemwise(..., host_from_gpu, ...) if isinstance(node.op, tensor.Elemwise): if numpy.any( [i.owner and i.owner.op == host_from_gpu for i in node.inputs]): do_replace = True if numpy.all([_is_scalar(i) for i in node.inputs]): do_replace = False if do_replace: new_op = GpuElemwise(node.op.scalar_op) gpu_elemwise = new_op(*(gpu_from_host(i) for i in node.inputs)) if gpu_out: return [gpu_elemwise] else: return [host_from_gpu(gpu_elemwise)] else: return False
def apply(self, fgraph): for input in fgraph.inputs: if isinstance(input.type, GpuArrayType): continue if (len(input.clients) == 1 and (input.clients[0][0] == 'output' or input.clients[0][0].op == gpu_from_host)): continue try: new_input = host_from_gpu(gpu_from_host(input)) fgraph.replace_validate(input, new_input, "InputToGpuOptimizer") except TypeError, e: # This could fail if the inputs are not TensorTypes pass