def _oper_cpu(cls, x, w, b, momentum, mov_m, mov_s, inference, mode, epsilon): if mode == BATCH_NORMALIZE_FEATUREMAP: axs = (0, 2, 3) else: axs = (0, ) if inference: mean = mov_m var = mov_s else: mean = np.mean(to_value(x), axis=axs, keepdims=True) var = np.var(to_value(x), axis=axs, keepdims=True) sq_var = 1.0 / np.sqrt(var + epsilon) xh = (to_value(x) - mean) * sq_var z = to_value(w) * xh + to_value(b) ret = cls._create_node(z) ret.attrs._axs = axs ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b ret.attrs._m = mean ret.attrs._v = sq_var if not inference: N = np.prod([x.shape[s] for s in axs]) ret.attrs._mov_m = (1 - momentum) * mov_m + momentum * mean ret.attrs._mov_v = ( 1 - momentum) * mov_s + momentum * var * N / max(N - 1., 1.) return ret
def _backward_cpu(self, context, dy, **kwargs): if isinstance(self.attrs._lhs, Node): self.attrs._lhs._update_diff( context, np.dot(dy, to_value(self.attrs._rhs).T), **kwargs) if isinstance(self.attrs._rhs, Node): self.attrs._rhs._update_diff( context, np.dot(to_value(self.attrs._lhs).T, dy), **kwargs)
def forward(self, x): ret = peephole_lstm(x, getattr(self, "_z", None), getattr(self, "_state", None), self.params) self._z = to_value(ret) self._state = to_value(getattr(ret, '_state', None)) if hasattr(self, "_last_node") and self._last_node is not None and ret.attrs.get_attrs(): setattr(self._last_node.attrs, "_pgated_f", to_value(ret.attrs._gated_f)) self._last_node = ret return ret
def _oper_cpu(cls, lhs, rhs): N = len(lhs) z = 1. / (1. + np.exp(to_value(-lhs))) loss = -np.sum(to_value(rhs) * np.log(z + 1e-8) + to_value(1 - rhs) * np.log(1 - z + 1e-8)) / N ret = cls._create_node(loss) ret.attrs._z = z ret.attrs._lhs = lhs ret.attrs._rhs = rhs return ret
def _backward_cpu(self, context, dy): p = self.attrs._p s = self.attrs._state ps = self.attrs._pstate u = self.attrs._u go = self.attrs._gated_o gf = self.attrs._gated_f gi = self.attrs._gated_i pgf = np.zeros_like(gf) if self.attrs._pgated_f is None else self.attrs._pgated_f drt, dit, dft, dot, dct = (context.restore(dt, np.zeros_like(dy)) for dt in self.attrs._dt_d) activated_s = tanh(s) activated_u = tanh(u) e = dy + np.dot(drt, p["wr"].T) + np.dot(dit, p["wir"].T) + \ np.dot(dft, p["wfr"].T) + np.dot(dot, p["wor"].T) do = gate_diff(go) * activated_s * e ds = go * activation_diff(activated_s) * e dc = ds + pgf * dct + p["wfc"] * dft + p["wic"] * dit + p["woc"] * do df = gate_diff(gf) * ps * dc if ps is not None else np.zeros_like(gf) di = gate_diff(gi) * activated_u * dc d = gi * activation_diff(activated_u) * dc dx = np.dot(d, p["w"].T) \ + np.dot(di, p["wi"].T) \ + np.dot(do, p["wo"].T) \ + np.dot(df, p["wf"].T) for dt_d, dt in zip(self.attrs._dt_d, (d, di, df, do, dc)): context.store(dt_d, dt) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx) for k, diff in zip(("w", "wo", "wi", "wf"), (d, do, di, df)): if isinstance(p[k], Node): p[k]._update_diff(context, np.dot(to_value(self.attrs._x).T, diff)) for k, diff in zip(("wr", "wor", "wir", "wfr"), (drt, dot, dit, dft)): if isinstance(p[k], Node): p[k]._update_diff(context, np.dot(to_value(self).T, diff)) for k, diff in zip(("wfc", "wic", "woc"), (dft, dit, do)): if isinstance(p[k], Node): p[k]._update_diff(context, np.sum(diff * s, axis=0, keepdims=True)) for k, diff in zip(("b", "bf", "bi", "bo"), (d, df, di, do)): if isinstance(p[k], Node): p[k]._update_diff(context, np.sum(diff, axis=0, keepdims=True))
def _backward_cpu(self, context, dy, **kwargs): a = self.attrs._axs sq_var = self.attrs._v meaned = self.attrs._x - self.attrs._m N = np.prod([self.attrs._x.shape[s] for s in a]) if isinstance(self.attrs._x, Node): dxh = dy * to_value(self.attrs._w) ds = np.sum(dxh * meaned * -np.power(sq_var, 3) / 2, axis=a, keepdims=True) du = np.sum(-dxh * sq_var, axis=a, keepdims=True) dx = dxh * sq_var + (ds * 2 * meaned + du) / N self.attrs._x._update_diff(context, dx, **kwargs) if isinstance(self.attrs._w, Node): xh = meaned * sq_var self.attrs._w._update_diff(context, np.sum(xh * dy, axis=a, keepdims=True), **kwargs) if isinstance(self.attrs._b, Node): self.attrs._b._update_diff(context, np.sum(dy, axis=a, keepdims=True), **kwargs)
def _backward_cpu(self, context, dy, **kwargs): axis = self.attrs._axis args = np.split(to_value(dy), self.attrs._index, axis=axis) for i in range(len(self.attrs._index) + 1): arg = getattr(self.attrs, "_arg%d" % i) if isinstance(arg, Node): arg._update_diff(context, args[i], **kwargs)
def _backward_cpu(self, context, dy): ldy, rdy = np.hsplit(to_value(dy), [self.attrs._index]) if isinstance(self.attrs._lhs, Node): self.attrs._lhs._update_diff(context, ldy) if isinstance(self.attrs._rhs, Node): self.attrs._rhs._update_diff(context, rdy)
def region_cordinates(roi, spatial_scale): idx, xmin, ymin, xmax, ymax = to_value(roi) idx = int(idx) xmin = int(round(xmin * spatial_scale)) ymin = int(round(ymin * spatial_scale)) xmax = int(round(xmax * spatial_scale)) ymax = int(round(ymax * spatial_scale)) return idx, xmin, ymin, xmax, ymax
def _oper_cpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding): col = im2col(to_value(x), out_shape[1:], kernel, stride, padding) value = np.rollaxis( np.tensordot(col, to_value(w), ([1, 2, 3], [1, 2, 3])), 3, 1) if b is not None: value += b ret = cls._create_node(value) ret.attrs._col = col ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b ret.attrs._in_shape = in_shape ret.attrs._out_shape = out_shape ret.attrs._kernel = kernel ret.attrs._stride = stride ret.attrs._padding = padding return ret
def _oper_cpu(cls, x, w, b, in_shape, kernel, stride, padding): col = imncol(to_value(x), w, stride, padding) if b is not None: col += b ret = cls._create_node(col) ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b ret.attrs._kernel = kernel ret.attrs._stride = stride ret.attrs._padding = padding return ret
def _oper_cpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding, dilation, groups): N, in_channels, in_h, in_w = x.shape k_h, k_w = kernel out_channels = w.shape[0] iCg = in_channels // groups oCg = out_channels // groups col = im2col(to_value(x), out_shape[1:], kernel, stride, padding, dilation) out_h, out_w = col.shape[-2:] col = col.transpose(1, 2, 3, 0, 4, 5) col = col.reshape(groups, iCg * k_h * k_w, N * out_h * out_w) w_new = w.reshape(groups, oCg, iCg * k_h * k_w) value = np.matmul(to_value(w_new), col) value = value.reshape(groups * oCg, N, out_h, out_w) value = value.transpose(1, 0, 2, 3) if b is not None: value += b.reshape(1, b.size, 1, 1) ret = cls._create_node(value) ret.attrs._col = col ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b ret.attrs._in_shape = in_shape ret.attrs._out_shape = out_shape ret.attrs._kernel = kernel ret.attrs._stride = stride ret.attrs._padding = padding ret.attrs._dilation = dilation ret.attrs._groups = groups ret.attrs._iCg = iCg ret.attrs._oCg = oCg return ret
def _oper_gpu(cls, x, w, b, momentum, mov_m, mov_s, inference, mode, epsilon): if mode == BATCH_NORMALIZE_FEATUREMAP: axs = 1 else: axs = 0 if b is None: b = get_gpu(w).zeros_like_me() y, mean, sq_var = (get_gpu(g).empty_like_me() for g in (x, w, w)) if inference: inv_var = 1.0 / np.sqrt(to_value(mov_s) + epsilon) if isinstance(inv_var, Number): inv_var = inv_var * np.ones_like(w) mov_m = get_gpu(mov_m) mov_s = get_gpu(mov_s) mv_m = mov_m if isinstance(mov_m, GPUValue) else get_gpu(w).zeros_like_me() mv_v = mov_s if isinstance(mov_s, GPUValue) else get_gpu(w).zeros_like_me() with cu.cudnn_handler() as handle: cu.cuBatchNormalizatoinForward(handle, get_gpu(x), mv_m, mv_v, get_gpu(w), get_gpu(b), y, mean, sq_var, momentum=momentum, mode=axs, inference=inference, eps=epsilon) ret = cls._create_node(y) ret.attrs._axs = axs ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b if inference: ret.attrs._m = mv_m ret.attrs._v = inv_var else: ret.attrs._m = mean ret.attrs._v = sq_var ret.attrs._mov_m = mv_m ret.attrs._mov_v = mv_v return ret
def _backward_cpu(self, context, dy, **kwargs): if isinstance(self.attrs._x, Node): dy = to_value(dy) unit_scale = self.attrs._unit_scale scale = self.attrs._scale a = self.attrs._a b = self.attrs._b n = self.attrs._n x = self.attrs._x sum1 = (self * dy / unit_scale).view(np.ndarray) sum2 = sum1.copy() for i in range(1, n // 2 + 1): sum2[:, i:, :, :] += sum1[:, :-i, :, :] sum2[:, :-i, :, :] += sum1[:, i:, :, :] self.attrs._x._update_diff(context, dy * scale - 2 * a * b * x * sum2, **kwargs)
def _backward_cpu(self, context, dy): dy = to_value(dy) if isinstance(self.attrs._x, Node): dx = np.tensordot(self.attrs._w, dy, (0, 1)) dx = np.rollaxis(dx, 3) dx = col2im(dx, self.attrs._in_shape[1:], self.attrs._stride, self.attrs._padding) self.attrs._x._update_diff(context, dx) if isinstance(self.attrs._w, Node): self.attrs._w._update_diff(context, np.tensordot( dy, self.attrs._col, ([0, 2, 3], [0, 4, 5]))) if isinstance(self.attrs._b, Node): self.attrs._b._update_diff(context, np.sum(dy, (0, 2, 3), keepdims=True))
def _backward_cpu(self, context, dy, **kwargs): dy = to_value(dy) N, in_channels, in_h, in_w = self.attrs._x.shape groups = self.attrs._groups oCg = self.attrs._oCg iCg = self.attrs._iCg out_h, out_w = self.attrs._out_shape[-2:] k_h, k_w = self.attrs._kernel if isinstance(self.attrs._x, Node): dy_temp = dy.transpose(1, 0, 2, 3) dy_temp = dy_temp.reshape(groups, oCg, N * out_h * out_w) w_temp = self.attrs._w.reshape(groups, oCg, iCg * k_h * k_w) w_temp = w_temp.transpose(0, 2, 1) dx = np.matmul(w_temp, dy_temp) dx = dx.reshape(groups * iCg, k_h, k_w, N, out_h, out_w) dx = np.rollaxis(dx, 3) dx = col2im(dx, self.attrs._in_shape[1:], self.attrs._stride, self.attrs._padding, self.attrs._dilation) self.attrs._x._update_diff(context, dx, **kwargs) if isinstance(self.attrs._w, Node): col_temp = self.attrs._col col_temp = col_temp.transpose(0, 2, 1) dy_temp = dy.transpose(1, 0, 2, 3) dy_temp = dy_temp.reshape(groups, oCg, N * out_h * out_w) dw = np.matmul(dy_temp, col_temp) dw = dw.reshape(groups * oCg, iCg, k_h, k_w) self.attrs._w._update_diff(context, dw, **kwargs) if isinstance(self.attrs._b, Node): self.attrs._b._update_diff(context, np.sum(dy, (0, 2, 3), keepdims=True), **kwargs)
def _oper_cpu(cls, arg, axis, keepdims): array = to_value(arg) return np.amin(array, axis, keepdims=keepdims), np.argmin(array, axis)
def _oper_cpu(cls, arg, axis, keepdims): array = to_value(arg) # Max is calculated twice, update? return np.amax(array, axis, keepdims=keepdims), np.argmax(array, axis)
def close(a, b): assert np.allclose(to_value(a), to_value(b), atol=1e-4, rtol=1e-3)
def close(GPU, CPU): print('GPU =') print(to_value(GPU)) print('CPU =') print(to_value(CPU)) assert np.allclose(to_value(GPU), to_value(CPU), atol=1e-4, rtol=1e-3)