def local_gpualloc(node): replace = False if node.op == tensor.alloc: if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu: replace = True elif all([c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients]): replace = True elif all([c != 'output' and c.op == tensor.join and all([i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]]) for c, idx in node.outputs[0].clients]): replace = True if replace: val = node.inputs[0] shp = node.inputs[1:] old_out = node.outputs[0] val2 = tensor.shape_padleft(val, len(shp) - val.ndim) new_out = host_from_gpu(gpu_alloc(val, *shp)) if new_out.type != old_out.type: assert new_out.type.ndim == old_out.type.ndim assert new_out.type.dtype == old_out.type.dtype for b_old, b_new in zip(old_out.type.broadcastable, new_out.type.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out. old_out.broadcastable) return [new_out]
def local_gpu_elemwise(node): do_replace = False gpu_out = False # check for gpu_from_host(Elemwise)) and extract the Elemwise node if node.op == gpu_from_host: host_i, = node.inputs if (host_i.owner and isinstance(host_i.owner.op, tensor.Elemwise) and len(host_i.clients) == 1): node = host_i.owner do_replace = True gpu_out = True # check for elemwise(..., host_from_gpu, ...) if isinstance(node.op, tensor.Elemwise): if numpy.any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]): do_replace = True if numpy.all([_is_scalar(i) for i in node.inputs]): do_replace = False if do_replace: new_op = GpuElemwise(node.op.scalar_op) gpu_elemwise = new_op(*(gpu_from_host(i) for i in node.inputs)) if gpu_out: return [gpu_elemwise] else: return [host_from_gpu(gpu_elemwise)] else: return False
def local_gpualloc(node): replace = False if node.op == tensor.alloc: if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu: replace = True elif all([ c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients ]): replace = True elif all([ c != 'output' and c.op == tensor.join and all([ i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:] ]) for c, idx in node.outputs[0].clients ]): replace = True if replace: val = node.inputs[0] shp = node.inputs[1:] old_out = node.outputs[0] val2 = tensor.shape_padleft(val, len(shp) - val.ndim) new_out = host_from_gpu(gpu_alloc(val, *shp)) if new_out.type != old_out.type: assert new_out.type.ndim == old_out.type.ndim assert new_out.type.dtype == old_out.type.dtype for b_old, b_new in zip(old_out.type.broadcastable, new_out.type.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out.old_out.broadcastable) return [new_out]
def local_gpu_elemwise(node): do_replace = False gpu_out = False # check for gpu_from_host(Elemwise)) and extract the Elemwise node if node.op == gpu_from_host: host_i, = node.inputs if (host_i.owner and isinstance(host_i.owner.op, tensor.Elemwise) and len(host_i.clients) == 1): node = host_i.owner do_replace = True gpu_out = True # check for elemwise(..., host_from_gpu, ...) if isinstance(node.op, tensor.Elemwise): if numpy.any( [i.owner and i.owner.op == host_from_gpu for i in node.inputs]): do_replace = True if numpy.all([_is_scalar(i) for i in node.inputs]): do_replace = False if do_replace: new_op = GpuElemwise(node.op.scalar_op) gpu_elemwise = new_op(*(gpu_from_host(i) for i in node.inputs)) if gpu_out: return [gpu_elemwise] else: return [host_from_gpu(gpu_elemwise)] else: return False
def apply(self, fgraph): for input in fgraph.inputs: if isinstance(input.type, GpuArrayType): continue if (len(input.clients) == 1 and (input.clients[0][0] == 'output' or input.clients[0][0].op == gpu_from_host)): continue try: new_input = host_from_gpu(gpu_from_host(input)) fgraph.replace_validate(input, new_input, "InputToGpuOptimizer") except TypeError, e: # This could fail if the inputs are not TensorTypes pass
def _as_TensorVariable(self): from basic_ops import host_from_gpu return host_from_gpu(self)