def local_inv_1_plus_exp(node): """ 1/(1+exp(x)) -> sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.inv: inv_arg = node.inputs[0] if inv_arg.owner and inv_arg.owner.op == tensor.add: scalars, scalar_inputs, nonconsts = \ opt.scalarconsts_rest(inv_arg.owner.inputs) # scalar_inputs are potentially dimshuffled and fill'd scalars if len(nonconsts) == 1: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if scalars and numpy.allclose(numpy.sum(scalars), 1): out = opt._fill_chain( sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])), scalar_inputs) # keep combined stack traces of # exp(x): nonconsts[0], # 1 + exp(x): inv_arg, # 1 / (1 + exp(x)): node.outputs[0] copy_stack_trace( [nonconsts[0], inv_arg, node.outputs[0]], out) return out
def local_ultra_fast_sigmoid(node): """ When enabled, change all sigmoid to ultra_fast_sigmoid. For example do mode.including('local_ultra_fast_sigmoid') or use the Theano flag optimizer_including=local_ultra_fast_sigmoid. This speeds up the sigmoid op by using an approximation. This is done after the stabilization and specialize phases to avoid interacting with them. """ if (isinstance(node.op, tensor.Elemwise) and node.op.scalar_op == scalar_sigmoid): out = ultra_fast_sigmoid(node.inputs[0]) copy_stack_trace(node.outputs[0], out) def values_eq_approx_remove_low_prec(a, b): # atol is found by trial/error. # Other test could fail without good reason. return tensor.TensorType.values_eq_approx(a, b, atol=0.02) # Let DebugMode know that there this opt approx the values. out.tag.values_eq_approx = values_eq_approx_remove_low_prec return [out]
def local_inv_1_plus_exp(node): """ 1/(1+exp(x)) -> sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.inv: inv_arg = node.inputs[0] if inv_arg.owner and inv_arg.owner.op == tensor.add: scalars, scalar_inputs, nonconsts = \ opt.scalarconsts_rest(inv_arg.owner.inputs) # scalar_inputs are potentially dimshuffled and fill'd scalars if len(nonconsts) == 1: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if scalars and numpy.allclose(numpy.sum(scalars), 1): out = opt._fill_chain( sigmoid( tensor.neg(nonconsts[0].owner.inputs[0])), scalar_inputs) # keep combined stack traces of # exp(x): nonconsts[0], # 1 + exp(x): inv_arg, # 1 / (1 + exp(x)): node.outputs[0] copy_stack_trace( [nonconsts[0], inv_arg, node.outputs[0]], out) return out
def local_inplace_DiagonalSubtensor(node): """Also work for IncDiagonalSubtensor.""" if isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and not node.op.inplace: new_op = node.op.__class__(inplace=True) new_node = new_op(*node.inputs) copy_stack_trace(node.outputs[0], new_node) return [new_node] return False
def local_inplace_sparse_block_outer(node): """ SparseBlockOuter(inplace=False) -> SparseBlockOuter(inplace=True) """ if isinstance(node.op, SparseBlockOuter) and not node.op.inplace: new_node = sparse_block_outer_inplace(*node.inputs) copy_stack_trace(node.outputs[0], new_node) return [new_node] return False
def local_inplace_DiagonalSubtensor(node): """Also work for IncDiagonalSubtensor.""" if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and not node.op.inplace): new_op = node.op.__class__(inplace=True) new_node = new_op(*node.inputs) copy_stack_trace(node.outputs[0], new_node) return [new_node] return False
def local_hard_sigmoid(node): if (isinstance(node.op, tensor.Elemwise) and node.op.scalar_op == scalar_sigmoid): out = hard_sigmoid(node.inputs[0]) copy_stack_trace(node.outputs[0], out) def values_eq_approx_remove_low_prec(a, b): # atol is found by trial/error. # Other test could fail without good reason. return tensor.TensorType.values_eq_approx(a, b, atol=0.1) # Let DebugMode know that there this opt approx the values. out.tag.values_eq_approx = values_eq_approx_remove_low_prec return [out]
def local_1msigmoid(node): """ 1-sigm(x) -> sigm(-x) """ if node.op == tensor.sub: sub_l, sub_r = node.inputs if len(sub_r.clients) > 1: return # graph is using both sigm and 1-sigm if sub_r.owner and sub_r.owner.op == sigmoid: try: val_l = opt.get_scalar_constant_value(sub_l) except Exception: return if numpy.allclose(numpy.sum(val_l), 1): out = sigmoid(-sub_r.owner.inputs[0]) copy_stack_trace([sub_r, node.outputs[0]], out) return [out]
def local_abstractconv_gemm(node): if theano.config.cxx == "" or not theano.config.blas.ldflags: return if not isinstance(node.op, AbstractConv2d): return None img, kern = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(kern.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1] rval = CorrMM(border_mode=node.op.border_mode, subsample=node.op.subsample)(img, kern) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_abstractconv_gradinputs_gemm(node): if theano.config.cxx == "": return if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or \ not isinstance(topgrad.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1] rval = CorrMM_gradInputs(border_mode=node.op.border_mode, subsample=node.op.subsample)(kern, topgrad, shape) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_abstractconv_gradweight_gemm(node): if theano.config.cxx == "" or not theano.config.blas.ldflags: return if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(topgrad.type, TensorType): return None rval = CorrMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample)(img, topgrad, shape) copy_stack_trace(node.outputs[0], rval) # need to flip the kernel if necessary if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_to_gpu(node): """ op(host_from_gpu()) -> host_from_gpu(op) gpu_from_host(op) -> op(gpu_from_host) """ if isinstance(node.op, op): # op(host_from_gpu()) -> host_from_gpu(op) # If any of the input that go on the GPU are on the GPU, # move the op to the gpu. if any(node.inputs[idx].owner and isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu) for idx in to_gpu): new_inp = list(node.inputs) for idx in to_gpu: new_inp[idx] = cuda.gpu_from_host(new_inp[idx]) result_node = op()(*new_inp) copy_stack_trace(node.outputs[0], result_node) transfer_node = cuda.host_from_gpu(result_node) copy_stack_trace(node.outputs[0], transfer_node) return [transfer_node] if node.op == cuda.gpu_from_host: # gpu_from_host(op) -> op(gpu_from_host) host_input = node.inputs[0] if host_input.owner and isinstance(host_input.owner.op, op): op_node = host_input.owner new_inp = list(op_node.inputs) for idx in to_gpu: new_inp[idx] = cuda.gpu_from_host(new_inp[idx]) new_node = op()(*new_inp) copy_stack_trace(host_input, new_node) return [new_node] return False
def local_conv2d_cpu(node): if not isinstance(node.op, AbstractConv2d): return None img, kern = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(kern.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None rval = conv2d(img, kern, node.op.imshp, node.op.kshp, border_mode=node.op.border_mode, subsample=node.op.subsample) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_sigm_times_exp(node): """ exp(x) * sigm(-x) -> sigm(x) exp(-x) * sigm(x) -> sigm(-x) """ # Bail early if it is not a multiplication. if node.op != tensor.mul: return None # Obtain tree of multiplications starting at this node. mul_tree = parse_mul_tree(node.outputs[0]) # Perform core optimization. did_something = perform_sigm_times_exp(mul_tree) if not did_something: # No change. return None # The optimization may have introduced multiplications by 1 in the tree: # get rid of them. mul_tree = simplify_mul(mul_tree) # Recompute final output based on the updated tree. out = compute_mul(mul_tree) # keep the stack trace copy_stack_trace(node.outputs[0], out) return [out]
def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): # Use the gradient as defined in conv3D, because the implementation # by Conv is slow (about 3x slower than conv3D, and probably 10x # slower than it could be), and incorrect when subsample > 2. # build a "node", that should be equivalent to the one given by # self.make_node, but using convGrad3D instead. shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) rval = convGrad3D(V=shuffled_img, d=(node.op.subsample[0], node.op.subsample[1], 1), WShape=(shuffled_topgrad.shape[4], shape[0], shape[1], 1, shuffled_img.shape[4]), dCdH=shuffled_topgrad) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]