def ReLUBackward(outputs: Tensor): if 'inplace' in outputs.cache and outputs.cache['inplace']: mask = outputs.cache['mask'] outputs.grad.eval[mask] = 0 outputs.grad_fn = outputs.cache['grad_fn'].pop() outputs.grad_fn(outputs) else: inputs, = outputs.in_bounds if inputs.requires_grad: grad = outputs.grad.eval.copy() grad[inputs.eval < 0] = 0 GLOBAL.np.add(inputs.grad.eval, grad, out=inputs.grad.eval)
def MSELossBackward(outputs: Tensor): y_pred, y_true = outputs.in_bounds gradients =GLOBAL.np.multiply(GLOBAL.np.subtract(y_pred.eval, y_true.eval), outputs.grad.eval) if outputs.cache['reduction'] == 'mean': GLOBAL.np.divide(gradients, GLOBAL.np.prod(y_pred.shape), out=gradients) if y_true.requires_grad: if y_true.grad is None: y_true.grad = Tensor(-gradients) else: GLOBAL.np.add(y_true.grad.eval, -gradients, out=y_true.grad.eval) if y_pred.requires_grad: if y_pred.grad is None: y_pred.grad = Tensor(gradients) else: GLOBAL.np.add(y_pred.grad.eval, gradients, out=y_pred.grad.eval)
def MAEBackward(outputs: Tensor): y_pred, y_true = outputs.in_bounds pos = GLOBAL.np.where((y_pred.eval - y_true.eval) < 0) mask = GLOBAL.np.ones_like(y_pred.eval) mask[pos] = -1 if outputs.cache['reduction'] == 'mean': GLOBAL.np.divide(mask, y_pred.shape[0], out=mask) if y_true.requires_grad: if y_true.grad is None: y_true.grad = Tensor(mask) else: GLOBAL.np.add(y_true.grad.eval, mask, out=y_true.grad.eval) if y_pred.requires_grad: if y_pred.grad is None: y_pred.grad = Tensor(mask) else: GLOBAL.np.add(y_pred.grad.eval, mask, out=y_pred.grad.eval)
def BCEWithLogitsLossBackward(outputs: Tensor): y_pred, y_true = outputs.in_bounds logits = nn.td_functional.sigmoid(y_pred.eval) gradients = GLOBAL.np.subtract(GLOBAL.np.divide(GLOBAL.np.subtract(1, y_true.eval), GLOBAL.np.subtract(1, logits)), GLOBAL.np.divide(y_true.eval, logits)) if outputs.cache['reduction'] == 'mean': avg = GLOBAL.np.prod(y_pred.shape) GLOBAL.np.divide(gradients, avg, out=gradients) if y_true.requires_grad: if y_true.grad is None: y_true.grad = Tensor(gradients) else: GLOBAL.np.add(y_true.grad.eval, gradients, out=y_true.grad.eval) if y_pred.requires_grad: GLOBAL.np.multiply(GLOBAL.np.multiply(gradients, logits), GLOBAL.np.subtract(1, logits), out=gradients) if y_pred.grad is None: y_pred.grad = Tensor(gradients) else: GLOBAL.np.add(y_pred.grad.eval, gradients, out=y_pred.grad.eval)
def CrossEntropyLossBackward(outputs: Tensor): before_softmax_y_pred, y_true = outputs.in_bounds y_pred = before_softmax_y_pred.cache['softmax'] to_sum_dim = GLOBAL.np.prod(GLOBAL.np.asarray(before_softmax_y_pred.shape[:-1])).item() probs = y_pred.eval.reshape(-1, before_softmax_y_pred.shape[-1]) y_flat = y_true.eval.reshape(to_sum_dim) probs[GLOBAL.np.arange(to_sum_dim), y_flat] -= 1 gradients = probs.reshape(before_softmax_y_pred.shape) if outputs.cache['reduction'] == 'mean': n = before_softmax_y_pred.eval.shape[0] GLOBAL.np.divide(gradients, n, out=gradients) gradients = GLOBAL.np.multiply(gradients, outputs.grad.eval, out=gradients) if before_softmax_y_pred.requires_grad: if before_softmax_y_pred.grad is None: before_softmax_y_pred.grad = Tensor(gradients) else: GLOBAL.np.add(before_softmax_y_pred.grad.eval, gradients, out=before_softmax_y_pred.grad.eval)
def ChannelAvgpoolBackward(outputs: Tensor): mode = outputs.get_cache('mode') inputs, = outputs.in_bounds if mode == 'reshape': dx_reshaped = GLOBAL.np.zeros_like(outputs.cache['x_reshaped']) out_newaxis = outputs.eval[:, :, GLOBAL.np.newaxis, :, :] mask = (outputs.cache['x_reshaped'] == out_newaxis) dout_newaxis = outputs.grad.eval.eval[:, :, GLOBAL.np.newaxis, :, :] dout_broadcast, _ = GLOBAL.np.broadcast_arrays(dout_newaxis, dx_reshaped) dx_reshaped[mask] = dout_broadcast[mask] dx_reshaped /= GLOBAL.np.mean(mask, axis=2, keepdims=True) grad = dx_reshaped.reshape(inputs.eval.shape) if outputs.cache['pad_size']: grad = grad[:, outputs.cache['pad_size']: -outputs.cache['pad_size']] else: raise NotImplemented if inputs.requires_grad: inputs.grad.eval.eval += grad
def __call__(self, shape, **kwargs): return Tensor( GLOBAL.np.random.normal(loc=self.mean, scale=self.std, size=shape), **kwargs)
def tensor(data, **kwargs): requires_grad = kwargs.pop('requires_grad', False) return Tensor(data=data, requires_grad=requires_grad, **kwargs)
def __call__(self, shape, **kwargs): return Tensor( GLOBAL.np.random.uniform(-self.scale, self.scale, size=shape), **kwargs)
def zeros_like(a, **kwargs): requires_grad = kwargs.pop('requires_grad', a.requires_grad) return Tensor(GLOBAL.np.zeros_like(a.data), requires_grad=requires_grad, **kwargs)
def __call__(self, low, high=None, shape=None, **kwargs): return Tensor(GLOBAL.np.random.randint(low=low, high=high, size=shape), **kwargs)
def __call__(self, shape: Tuple, **kwargs): return Tensor(GLOBAL.np.random.rand(*shape), **kwargs)
def __call__(self, shape: Tuple, **kwargs): return Tensor(GLOBAL.np.empty(shape), **kwargs)
def __call__(self, shape, **kwargs): return Tensor(GLOBAL.np.ones(shape), **kwargs)