def local_inv_1_plus_exp(node): """ 1/(1+exp(x)) -> sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.inv: inv_arg = node.inputs[0] if inv_arg.owner and inv_arg.owner.op == tensor.add: scalars, scalar_inputs, nonconsts = opt.scalarconsts_rest( inv_arg.owner.inputs, only_process_constants=True) # scalar_inputs are potentially dimshuffled and fill'd scalars if len(nonconsts) == 1: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if scalars and np.allclose(np.sum(scalars), 1): out = opt._fill_chain( sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])), scalar_inputs, ) # keep combined stack traces of # exp(x): nonconsts[0], # 1 + exp(x): inv_arg, # 1 / (1 + exp(x)): node.outputs[0] copy_stack_trace( [nonconsts[0], inv_arg, node.outputs[0]], out) return out
def local_inv_1_plus_exp(node): """ 1/(1+exp(x)) -> sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.inv: inv_arg = node.inputs[0] if inv_arg.owner and inv_arg.owner.op == tensor.add: scalars, scalar_inputs, nonconsts = \ opt.scalarconsts_rest(inv_arg.owner.inputs, only_process_constants=True) # scalar_inputs are potentially dimshuffled and fill'd scalars if len(nonconsts) == 1: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if scalars and numpy.allclose(numpy.sum(scalars), 1): out = opt._fill_chain( sigmoid( tensor.neg(nonconsts[0].owner.inputs[0])), scalar_inputs) # keep combined stack traces of # exp(x): nonconsts[0], # 1 + exp(x): inv_arg, # 1 / (1 + exp(x)): node.outputs[0] copy_stack_trace( [nonconsts[0], inv_arg, node.outputs[0]], out) return out
def grad(self, inp, grads): dy, sm = inp g, = grads tmp = g + tensor.neg(tensor.sum(g * sm, axis=1).dimshuffle((0, 'x'))) g_dy = tmp * sm tmp2 = tensor.sum(dy * sm, axis=1).dimshuffle((0, 'x')) g_sm = tmp * dy - g * tmp2 return g_dy, g_sm
def local_inv_1_plus_exp(node): """ 1/(1+exp(x)) -> sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.inv: inv_arg = node.inputs[0] if inv_arg.owner and inv_arg.owner.op == tensor.add: scalars, scalar_inputs, nonconsts = opt.scalarconsts_rest(inv_arg.owner.inputs) # scalar_inputs are potentially dimshuffled and fill'd scalars if len(nonconsts) == 1: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if scalars and numpy.allclose(numpy.sum(scalars), 1): return opt._fill_chain(sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])), scalar_inputs)
def local_inv_1_plus_exp(node): """ 1/(1+exp(x)) -> sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.inv: inv_arg = node.inputs[0] if inv_arg.owner and inv_arg.owner.op == tensor.add: scalars, scalar_inputs, nonconsts = \ opt.scalarconsts_rest(inv_arg.owner.inputs) # scalar_inputs are potentially dimshuffled and fill'd scalars if len(nonconsts) == 1: if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp: if scalars and numpy.allclose(numpy.sum(scalars), 1): return opt._fill_chain( sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])), scalar_inputs)