def grad(self, inputs, outputs_gradients): grad = [ [ disconnected_type(), ] * 4, ] * 3 return grad
def grad(self, inputs, output_gradients): num_ins = len(inputs) if num_ins == 3: x, v, sorter = inputs else: x, v = inputs x_grad = gradient._float_zeros_like(x) v_grad = gradient._float_zeros_like(v) if num_ins == 3: return [x_grad, v_grad, disconnected_type()] else: return [x_grad, v_grad]
def grad(self, inp, grads): x, w, xq, xmin, xmax, wq, wmin, wmax, z = inp gz, = grads # quantizing Output's gradient gzmax = 8.* T.std(gz) gzmin = -gzmax # gzq = gz gzq = quantizer(gz,new_min=gzmin,new_max=gzmax, stochastic=True) # Inputs' gradient # Low precision dot product gxq = T.dot(T.cast(gzq,'float32'),T.cast(wq.T,'float32')) gxq = unquantizer(gzq, wq.T, gxq, x_min = gzmin, x_max=gzmax, y_min=wmin, y_max=wmax) gx = gxq # Weights' gradient # Low precision dot product gwq = T.dot(T.cast(xq.T,'float32'),T.cast(gzq,'float32')) gwq = unquantizer(xq.T, gzq, gwq, x_min = xmin, x_max=xmax, y_min=gzmin, y_max=gzmax) gw = gwq return gx, gw, gxq, disconnected_type(), disconnected_type(), gwq, disconnected_type(), disconnected_type(), gz
def grad(self, *args, **kwargs): return disconnected_type(), disconnected_type()
def grad(self, inputs, outputs_gradients): # The gradient of all outputs is 0 w.r.t to all inputs return [disconnected_type(),]*5
def grad(self, inputs, outputs_gradients): grad_0 = disconnected_type() grad_1 = disconnected_type() return [grad_0, grad_1]
def grad(self, inputs, outputs_gradients): grad_0 = self(*inputs)[1] grad_1 = disconnected_type() grad_2 = disconnected_type() return [grad_0, grad_1, grad_2]