def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Inputs x0 = inputs[0].data x1 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dx1 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_x1 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dx1 = outputs[1].grad # Computation if prop_down[2]: mask = F.less(x0, x1) if accum[2]: g_dy += g_dx0 * mask + g_dx1 * (1.0 - mask) else: g_dy.copy_from(g_dx0 * mask + g_dx1 * (1.0 - mask))
def minimum2_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] m0 = F.less(x0, x1) m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) dx0 = dy * m0 dx1 = dy * m1 return dx0, dx1
def parametric_pow2_quantize(x, sign=True, with_zero=True, n_init=8, n_min=1, n_max=16, m_init=1, m_min=-8, m_max=8, fix_parameters=False): """Parametric version of `pow2_quantize` where the bitwidth `n` and dynamic range `m` are learnable parameters. Args: x(~nnabla.Variable): N-D array as input sign (bool): keep sign information during quantization. with_zero (bool): quantize small weights to zero. n_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for bitwidth parameter. n_min (int): lower bound for bitwidth. n_max (int): upper bound for bitwidth. m_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for dynamic range. m_min (float): lower bound for dynamic range. m_max (float): upper bound for dynamic range. fix_parameters (bool): When set to `True`, the negative slope values will not be updated. Returns: ~nnabla.Variable: N-D array. """ def clip_scalar(v, min_value, max_value): return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value) def broadcast_scalar(v, shape): return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False), shape=shape) def quantize_pow2(v): return 2**F.round(F.log(F.abs(v)) / np.log(2.)) n = get_parameter_or_create("n", (), ConstantInitializer(n_init), need_grad=True, as_need_grad=not fix_parameters) m = get_parameter_or_create("m", (), ConstantInitializer(m_init), need_grad=True, as_need_grad=not fix_parameters) # ensure that bitwidth is in specified range and an integer n_q = F.round(clip_scalar(n, n_min, n_max)) if sign: n_q = n_q - 1 if with_zero: n_q = n_q - 1 # ensure that dynamic range is in specified range and an integer m_q = F.round(clip_scalar(m, m_min, m_max)) # compute min/max value that we can represent x_max = 2**m_q x_min = 2**(m_q - (2**n_q) + 1) # broadcast variables to correct size x_min = broadcast_scalar(x_min, shape=x.shape) x_max = broadcast_scalar(x_max, shape=x.shape) # if unsigned, then quantize all negative values to zero if not sign: x = F.relu(x) # compute absolute value/sign of input ax = F.abs(x) sx = F.sign(x) if with_zero: # prune smallest elements (in magnitude) to zero if they are smaller # than `x_min / \sqrt(2)` x_threshold = x_min / np.sqrt(2) idx1 = F.greater_equal(ax, x_threshold) * F.less(ax, x_min) idx2 = F.greater_equal(ax, x_min) * F.less(ax, x_max) idx3 = F.greater_equal(ax, x_max) else: idx1 = F.less(ax, x_min) idx2 = F.greater_equal(ax, x_min) * F.less(ax, x_max) idx3 = F.greater_equal(ax, x_max) # do not backpropagate gradient through indices idx1.need_grad = False idx2.need_grad = False idx3.need_grad = False # do not backpropagate gradient through sign sx.need_grad = False # take care of values outside of dynamic range return sx * (x_min * idx1 + quantize_pow2(ax) * idx2 + x_max * idx3)
def parametric_pow2_quantize_xmin_xmax(x, sign=True, with_zero=True, xmin_init=2**-7, xmin_min=2**-15, xmin_max=256, xmax_init=2**0, xmax_min=2**-8, xmax_max=256, fix_parameters=False): """Parametric version of `pow2_quantize` where the min value `xmin` and max value `xmax` are learnable parameters. Returns: ~nnabla.Variable: N-D array. """ def clip_scalar(v, min_value, max_value): return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value) def broadcast_scalar(v, shape): return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False), shape=shape) def quantize_pow2(v): return 2.**F.round(F.log(F.abs(v)) / np.log(2.)) xmin = get_parameter_or_create("xmin", (), ConstantInitializer(xmin_init), need_grad=True, as_need_grad=not fix_parameters) xmax = get_parameter_or_create("xmax", (), ConstantInitializer(xmax_init), need_grad=True, as_need_grad=not fix_parameters) # ensure that minimum dynamic range is in specified range and a power-of-two xmin = quantize_pow2(clip_scalar(xmin, xmin_min, xmin_max)) # ensure that minimum dynamic range is in specified range and a power-of-two xmax = quantize_pow2(clip_scalar(xmax, xmax_min, xmax_max)) # broadcast variables to correct size xmin = broadcast_scalar(xmin, shape=x.shape) xmax = broadcast_scalar(xmax, shape=x.shape) # if unsigned, then quantize all negative values to zero if not sign: x = F.relu(x) # compute absolute value/sign of input ax = F.abs(x) sx = F.sign(x) if with_zero: # prune smallest elements (in magnitude) to zero if they are smaller # than `x_min / \sqrt(2)` x_threshold = xmin / np.sqrt(2) idx1 = F.greater_equal(ax, x_threshold) * F.less(ax, xmin) idx2 = F.greater_equal(ax, xmin) * F.less(ax, xmax) idx3 = F.greater_equal(ax, xmax) else: idx1 = F.less(ax, xmin) idx2 = F.greater_equal(ax, xmin) * F.less(ax, xmax) idx3 = F.greater_equal(ax, xmax) # do not backpropagate gradient through indices idx1.need_grad = False idx2.need_grad = False idx3.need_grad = False # do not backpropagate gradient through sign sx.need_grad = False # take care of values outside of dynamic range return sx * (xmin * idx1 + quantize_pow2(ax) * idx2 + xmax * idx3)
def bidirectional_sphere_trace(self, camloc, raydir, t_start, t_finish): t_f = F.identity(t_start) x_f = camloc + t_f * raydir s_f = self.sdf(x_f) mask_hit_eps_f = 0 * F.identity(t_f) t_b = F.identity(t_finish) x_b = camloc + t_b * raydir s_b = self.sdf(x_b) mask_hit_eps_b = 0 * F.identity(t_b) for i in range(self.sphere_trace_itr - 1): # Forward direction mask_hit_eps_f_i = F.less_equal_scalar(F.abs(s_f), self.eps) mask_hit_eps_f += (1 - mask_hit_eps_f) * mask_hit_eps_f_i t_f += (1 - mask_hit_eps_f) * s_f x_f = camloc + t_f * raydir s_f_prev = F.identity(s_f) s_f = self.sdf(x_f) mask_pos_f_prev = (1 - mask_hit_eps_f) * \ F.greater_scalar(s_f_prev, 0) mask_neg_f = (1 - mask_hit_eps_f) * F.less_scalar(s_f, 0) mask_revert_f = mask_pos_f_prev * mask_neg_f t_f -= mask_revert_f * s_f_prev s_f = mask_revert_f * s_f_prev + (1 - mask_revert_f) * s_f # Backward direction mask_hit_eps_b_i = F.less_equal_scalar(F.abs(s_b), self.eps) mask_hit_eps_b += (1 - mask_hit_eps_b) * mask_hit_eps_b_i t_b -= (1 - mask_hit_eps_b) * s_b x_b = camloc + t_b * raydir s_b_prev = F.identity(s_b) s_b = self.sdf(x_b) mask_pos_b_prev = (1 - mask_hit_eps_b) * \ F.greater_scalar(s_b_prev, 0) mask_neg_b = (1 - mask_hit_eps_b) * F.less_scalar(s_b, 0) mask_revert_b = mask_pos_b_prev * mask_neg_b t_b += mask_revert_b * s_b_prev s_b = mask_revert_b * s_b_prev + (1 - mask_revert_b) * s_b ## print("s_f neg", np.sum(s_f.data < 0)) ## print("s_b neg", np.sum(s_b.data < 0)) # Fine grained start/finish points t_f0 = t_f t_f1 = t_f + mask_revert_f * s_f_prev x_hit_st0 = camloc + t_f0 * raydir ## x0, x1 = self.post_method(x_hit_st0, camloc + t_f1 * raydir) ## t_f0 = F.norm((x0 - camloc), axis=(x0.ndim - 1), keepdims=True) ## t_f1 = F.norm((x1 - camloc), axis=(x1.ndim - 1), keepdims=True) mask_hit_f1b = mask_revert_f * F.less(t_f1, t_b) t_b = t_f1 * mask_hit_f1b + t_b * (1 - mask_hit_f1b) # Reverse the opposite case mask_fb = F.less(t_f, t_b) t_f = t_f * mask_fb + t_start * (1 - mask_fb) t_b = t_b * mask_fb + t_finish * (1 - mask_fb) return x_hit_st0, t_f, t_b, mask_hit_eps_f