def _oper_cpu(cls, x, pz, ps, w, wr, b): s = np.zeros((x.shape[0], w.shape[1] // 4), dtype=precision) if ps is None else ps z = np.zeros((x.shape[0], w.shape[1] // 4), dtype=precision) if pz is None else pz u = dot(x, w) + dot(z, wr) + b m = u.shape[1] // 4 u, gated = np.split(u, [m, ], axis=1) u = tanh(u) gated = sigmoid(gated) state = gated[:, m:m * 2] * u + gated[:, :m] * s z = tanh(state) * gated[:, m * 2:] ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._b = b ret.attrs._pz = pz ret.attrs._u = u ret.attrs._pstate = ps ret.attrs._state = state ret.attrs._gated = gated ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = gated[:, :m] return ret
def _oper_gpu(cls, x, pz, ps, parameter): p = parameter if ps is None: tmp = GPUValue(shape=(x.shape[0], p["w"].shape[1] // 4)) s_p = tmp.zeros_like_me() z_p = tmp.zeros_like_me() else: s_p = ps z_p = get_gpu(pz) u = dot(x, p["w"]) + dot(z_p, p["wr"]) + p["b"] z = get_gpu(z_p).empty_like_me() state = get_gpu(s_p).empty_like_me() cu.culstm_forward_activate(get_gpu(u)) cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z)) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._p = parameter ret.attrs._u = u ret.attrs._pstate = s_p ret.attrs._state = state ret.attrs._dt_d = [p[k] for k in ["wr", "w"]] ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def _backward_cpu(self, context, dy, **kwargs): x = self.attrs._x w = self.attrs._w gain = self.attrs._gain weight = self.attrs._weight dx = op.dot(dy, w.T) normal_dw = op.dot(x.T, dy) dgain = normal_dw * w / gain dw = w / weight * (normal_dw - np.sum(w * normal_dw / gain, keepdims=True) * w / gain) db = np.ones_like(dy) * dy if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx, **kwargs) if isinstance(self.attrs._gain, Node): self.attrs._gain._update_diff(context, np.sum(dgain, axis=0, keepdims=True), **kwargs) if isinstance(self.attrs._weight, Node): self.attrs._weight._update_diff(context, dw, **kwargs) if isinstance(self.attrs._bias, Node): db = dy self.attrs._bias._update_diff(context, np.sum(db, axis=0, keepdims=True), **kwargs)
def _oper_gpu(cls, x, pz, ps, w, wr, wc, b): if ps is None: s_p = GPUValue(shape=(x.shape[0], w.shape[1] // 4)).zeros_like_me() z_p = s_p.zeros_like_me() else: s_p, z_p = map(get_gpu, (ps, pz)) s = s_p.empty_like_me() u = op.dot(x, w) + op.dot(z_p, wr) if b is not None: u += b u = get_gpu(u) z = z_p.zeros_like_me() cu.cupeepholelstm_forward(u, get_gpu(wc), s_p, s, z) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._wc = wc ret.attrs._b = b ret.attrs._u = u ret.attrs._pz = pz ret.attrs._pstate = ps ret.attrs._state = s if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def test_gpu_node_dot(a, b): set_cuda_active(True) g1 = Variable(a) g2 = Variable(b) g3 = dot(g1, g2) g4 = rm.sum(g3) g = g4.grad() g_g1 = g.get(g1) g_g2 = g.get(g2) g_g3 = g.get(g3) g3.to_cpu() g4.to_cpu() set_cuda_active(False) c3 = dot(g1, g2) c4 = rm.sum(c3) c = c4.grad() c_g1 = c.get(g1) c_g2 = c.get(g2) c_c3 = c.get(c3) close(g3, c3) close(g4, c4) close(c_g1, g_g1) close(c_g2, g_g2) close(c_c3, g_g3)
def _oper_gpu(cls, x, pz, ps, w, wr, b): if ps is None: tmp = GPUValue(shape=(x.shape[0], w.shape[1] // 4)) s_p = tmp.zeros_like_me() z_p = tmp.zeros_like_me() else: s_p = ps z_p = get_gpu(pz) u = dot(x, w) + dot(z_p, wr) if b is not None: u += b z = get_gpu(z_p).empty_like_me() state = get_gpu(s_p).empty_like_me() cu.culstm_forward_activate(get_gpu(u)) cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z)) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._b = b ret.attrs._pz = pz ret.attrs._u = u ret.attrs._pstate = s_p ret.attrs._state = state ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def _backward_gpu(self, context, dy, **kwargs): x = get_gpu(self.attrs._x) w = get_gpu(self.attrs._w) gain = get_gpu(self.attrs._gain) weight = get_gpu(self.attrs._weight) dx = get_gpu(op.dot(dy, w.T)) normal_dw = get_gpu(op.dot(x.T, dy)) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx, **kwargs) if isinstance(self.attrs._gain, Node): dgain = normal_dw * w / gain self.attrs._gain._update_diff(context, op.sum(dgain, axis=0, keepdims=True), **kwargs) if isinstance(self.attrs._weight, Node): dw = w / get_gpu(weight) * get_gpu(normal_dw - get_gpu(op.sum(w * get_gpu(normal_dw) / get_gpu(gain), keepdims=True) * w / get_gpu(gain))) self.attrs._weight._update_diff(context, dw, **kwargs) if isinstance(self.attrs._bias, Node): db = dy self.attrs._bias._update_diff(context, op.sum(db, axis=0, keepdims=True), **kwargs)
def _backward_gpu(self, context, dy, **kwargs): x = self.attrs._x w = self.attrs._w b = self.attrs._b u = self.attrs._u hminus = self.attrs._pz ABC = self.attrs._ABC dx = get_gpu(x).empty_like_me() db = get_gpu(b).empty_like_me() yconc = get_gpu(ABC).empty_like_me() du = get_gpu(u).empty_like_me() dpz = get_gpu(hminus).empty_like_me() dxx = get_gpu(x).empty_like_me() cu.cugru_backward(get_gpu(ABC), get_gpu(dy), yconc, get_gpu(u), get_gpu(hminus), db, du, dpz, dxx) # Calculate dx dx = get_gpu(dot(yconc, w.T)) xconc = get_gpu(x.T) dw = dot(get_gpu(xconc), get_gpu(yconc)) self.attrs._x._update_diff(context, dx) self.attrs._w._update_diff(context, dw) self.attrs._b._update_diff(context, db) self.attrs._u._update_diff(context, du) if isinstance(self.attrs._pz, Node): self.attrs._pz._update_diff(context, dpz)
def _backward_gpu(self, context, dy): p = self.attrs._p u = self.attrs._u s = tanh(self.attrs._state) ps = self.attrs._pstate drt = context.restore(p["wr"], get_gpu(u).zeros_like_me()) dou = context.restore(p["w"], get_gpu(dy).zeros_like_me()) pfg = getattr(self.attrs, "_pfgate", get_gpu(u).zeros_like_me()) e = get_gpu(dy) + get_gpu(dot(drt, p["wr"].T)) dr, dou_n = (get_gpu(a).empty_like_me() for a in (drt, dou)) cu.culstm_backward(*map(get_gpu, (u, dr, s, ps, e, pfg, dou, dou_n))) dx = dot(dr, p["w"].T) context.store(p["wr"], dr) context.store(p["w"], dou_n) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx) if isinstance(p["w"], Node): p["w"]._update_diff(context, dot(self.attrs._x.T, dr)) if isinstance(p["wr"], Node): p["wr"]._update_diff(context, dot(self.T, drt)) if isinstance(p["b"], Node): p["b"]._update_diff(context, sum(dr, axis=0))
def _oper_cpu(cls, x, pz, ps, parameter): p = parameter s = np.zeros( (x.shape[0], p["w"].shape[1] // 4), dtype=precision) if ps is None else ps z = np.zeros( (x.shape[0], p["w"].shape[1] // 4), dtype=precision) if pz is None else pz u = dot(x, p["w"]) + dot(z, p["wr"]) + p["b"] m = u.shape[1] // 4 u, gated = np.split(u, [ m, ], axis=1) u = tanh(u) gated = sigmoid(gated) state = gated[:, m:m * 2] * u + gated[:, :m] * s z = tanh(state) * gated[:, m * 2:] ret = cls._create_node(z) ret.attrs._x = x ret.attrs._p = parameter ret.attrs._u = u ret.attrs._pstate = ps ret.attrs._state = state ret.attrs._gated = gated ret.attrs._dt_d = [p[k] for k in ["wr", "w"]] ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = gated[:, :m] return ret
def _backward_gpu(self, context, dy): p = self.attrs._p s = self.attrs._state ps = self.attrs._pstate u = self.attrs._u go = self.attrs._gated_o gf = self.attrs._gated_f gi = self.attrs._gated_i pgf = get_gpu(gf).zeros_like_me() if self.attrs._pgated_f is None else self.attrs._pgated_f drt, dit, dft, doot, dct = (context.restore(dt, get_gpu(dy).zeros_like_me()) for dt in self.attrs._dt_d) activated_s = tanh(s) activated_u = tanh(u) e = dy + get_gpu(dot(drt, p["wr"].T)) \ + get_gpu(dot(dit, p["wir"].T)) + \ + get_gpu(dot(dft, p["wfr"].T)) + \ + get_gpu(dot(doot, p["wor"].T)) do = gate_diff(go) * activated_s * e ds = go * activation_diff(activated_s) * e dc = ds + pgf * dct + p["wfc"] * dft + p["wic"] * dit + p["woc"] * do df = gate_diff(gf) * ps * dc if ps is not None else get_gpu(gf).zeros_like_me() di = gate_diff(gi) * activated_u * dc d = gi * activation_diff(activated_u) * dc dx = dot(d, p["w"].T) \ + dot(di, p["wi"].T) \ + dot(do, p["wo"].T) \ + dot(df, p["wf"].T) for dt_d, dt in zip(self.attrs._dt_d, (d, di, df, do, dc)): context.store(dt_d, get_gpu(dt)) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, get_gpu(dx)) for k, diff in zip(("w", "wo", "wi", "wf"), (d, do, di, df)): if isinstance(p[k], Node): p[k]._update_diff(context, get_gpu(dot(self.attrs._x.T, diff))) for k, diff in zip(("wr", "wor", "wir", "wfr"), (drt, doot, dit, dft)): if isinstance(p[k], Node): p[k]._update_diff(context, get_gpu(dot(self.T, diff))) for k, diff in zip(("wfc", "wic", "woc"), (dft, dit, do)): if isinstance(p[k], Node): p[k]._update_diff(context, sum(diff * get_gpu(s), axis=0)) for k, diff in zip(("b", "bf", "bi", "bo"), (d, df, di, do)): if isinstance(p[k], Node): p[k]._update_diff(context, sum(diff, axis=0))
def _oper_gpu(cls, x, pz, ps, parameter): p = parameter s = get_gpu(np.zeros((x.shape[0], p["w"].shape[1]), dtype=precision)) if ps is None else ps z = get_gpu(s).zeros_like_me() if pz is None else pz u = dot(x, p["w"]) + dot(z, p["wr"]) + p["b"] gate_f = sigmoid(dot(x, p["wf"]) + dot(z, p["wfr"]) + p["wfc"] * s + p["bf"]) gate_i = sigmoid(dot(x, p["wi"]) + dot(z, p["wir"]) + p["wic"] * s + p["bi"]) state = gate_i * tanh(u) + gate_f * s gate_o = sigmoid( dot(x, p["wo"]) + dot(z, p["wor"]) + p["bo"] + p["woc"] * state) z = tanh(state) * gate_o ret = cls._create_node(get_gpu(z)) ret.attrs._x = x ret.attrs._p = parameter ret.attrs._u = u ret.attrs._pgated_f = None ret.attrs._pstate = ps ret.attrs._state = state ret.attrs._gated_o = gate_o ret.attrs._gated_f = gate_f ret.attrs._gated_i = gate_i ret.attrs._dt_d = [p[k] for k in ["wr", "wi", "wf", "wo", "w"]] ret._state = state return ret
def _backward_gpu(self, context, dy, **kwargs): n, m = dy.shape w = self.attrs._w wr = self.attrs._wr wc = self.attrs._wc b = self.attrs._b u = self.attrs._u s = self.attrs._state ps = get_gpu(s).zeros_like_me( ) if self.attrs._pstate is None else self.attrs._pstate dot = context.restore(w, get_gpu(dy).zeros_like_me()) drt = context.restore(wr, get_gpu(u).zeros_like_me()) pfg = self.attrs.get("_pfgate", get_gpu(u).zeros_like_me()) dr = get_gpu(drt).empty_like_me() dwc = GPUValue(shape=(n, m * 3)) dou = get_gpu(dot).empty_like_me() cu.cupeepholelstm_backward( *map(get_gpu, (u, ps, s, pfg, wc, dy, drt, dot, dr, dou, dwc))) context.store(wr, dr) context.store(w, dou) if isinstance(self.attrs._x, Node): dx = op.dot(dr, w.T) self.attrs._x._update_diff(context, dx) if isinstance(w, Node): w._update_diff(context, op.dot(self.attrs._x.T, dr)) if isinstance(wr, Node): wr._update_diff(context, op.dot(self.T, drt)) if isinstance(wc, Node): wc._update_diff(context, op.sum(dwc, axis=0)) if isinstance(b, Node): b._update_diff(context, op.sum(dr, axis=0)) if isinstance(self.attrs._pz, Node): self.attrs._pz._update_diff(context, op.dot(dr, wr.T))
def _oper_gpu(cls, x, weight, gain, bias): assert len(x.shape) is 2, \ "Currently only normalizes for dense networks." w = get_gpu(weight) / normalized_form(get_gpu(weight)) * get_gpu(gain) ret = cls._create_node(get_gpu(op.dot(get_gpu(x), w) + get_gpu(bias))) ret.attrs._x = x ret.attrs._w = w ret.attrs._weight = weight ret.attrs._gain = gain ret.attrs._bias = bias return ret
def _oper_cpu(cls, x, weight, gain, bias): assert len(x.shape) is 2, \ "Currently only normalizes for dense networks." w = weight / normalized_form(weight) * gain ret = cls._create_node(op.dot(x, w) + bias) ret.attrs._x = x ret.attrs._w = w ret.attrs._weight = weight ret.attrs._gain = gain ret.attrs._bias = bias return ret
def test_node_clear(): DEBUG_GRAPH_INIT(True) a = Variable(np.random.rand(2, 2).astype(np.float32)) b = Variable(np.random.rand(2, 2).astype(np.float32)) layer = R.Lstm(2) c = layer(O.dot(a, b)) # NOQA DEBUG_NODE_STAT()
def test_gpu_node_dot(a, b): set_cuda_active(True) g1 = Variable(a) g2 = Variable(b) g3 = rm.sum(dot(g1, g2)) g = g3.grad() g_g1 = g.get(g1) g_g2 = g.get(g2) g3.to_cpu() set_cuda_active(False) c3 = rm.sum(dot(g1, g2)) c = c3.grad() c_g1 = c.get(g1) c_g2 = c.get(g2) close(g3, c3) close(c_g1, g_g1) close(c_g2, g_g2)
def _oper_cpu(cls, x, pz, w, u, b): # Initialize Variables m = w.shape[1] // 3 w_z, w_r, w_h = np.split(w, [m, m * 2, ], axis=1) u_z, u_r, u_h = np.split(u, [m, m * 2], axis=1) hminus = Variable(np.zeros((x.shape[0], w.shape[1] // 3), dtype=precision)) if pz is None else pz b_z, b_r, b_h = np.split(b, [m, m * 2], axis=1) if b is not None else (0, 0, 0) A = dot(x, w_z) + dot(hminus, u_z) + b_z B = dot(x, w_r) + dot(hminus, u_r) + b_r C = dot(x, w_h) + sigmoid(B) * dot(hminus, u_h) + b_h h = sigmoid(A) * hminus + (1 - sigmoid(A)) * tanh(C) # Store Variables for Graph ret = cls._create_node(h) ret.attrs._x = x ret.attrs._w = w ret.attrs._w_z = w_z ret.attrs._w_r = w_r ret.attrs._w_h = w_h ret.attrs._u = u ret.attrs._u_z = u_z ret.attrs._u_h = u_h ret.attrs._u_r = u_r ret.attrs._pz = hminus ret.attrs._A = A ret.attrs._B = B ret.attrs._C = C if b is not None: ret.attrs._b = b return ret
def _backward_gpu(self, context, dy, **kwargs): w = self.attrs._w wr = self.attrs._wr b = self.attrs._b u = self.attrs._u s = tanh(self.attrs._state) ps = self.attrs._pstate drt = context.restore(wr, get_gpu(u).zeros_like_me()) dou = context.restore(w, get_gpu(dy).zeros_like_me()) pfg = self.attrs.get("_pfgate", get_gpu(u).zeros_like_me()) e = get_gpu(dy) dr, dou_n = (get_gpu(a).empty_like_me() for a in (drt, dou)) cu.culstm_backward(*map(get_gpu, (u, dr, s, ps, e, pfg, dou, dou_n))) dx = dot(dr, w.T) context.store(wr, dr) context.store(w, dou_n) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx) if isinstance(w, Node): w._update_diff(context, dot(self.attrs._x.T, dr)) if isinstance(wr, Node): wr._update_diff(context, dot(self.T, drt)) if isinstance(b, Node): b._update_diff(context, sum(dr, axis=0)) if isinstance(self.attrs._pz, Node): self.attrs._pz._update_diff(context, dot(dr, wr.T))
def _backward_cpu(self, context, dy, **kwargs): x = self.attrs._x w_z = self.attrs._w_z w_r = self.attrs._w_r w_h = self.attrs._w_h A = self.attrs._A B = self.attrs._B C = self.attrs._C u_z = self.attrs._u_z u_h = self.attrs._u_h u_r = self.attrs._u_r hminus = self.attrs._pz y = dy dA = sigmoid_diff(A) dB = sigmoid_diff(B) dC = tanh_diff(C) # Calculate dx dx_z = dot(y * dA, w_z.T) dx_r = dot(y * dB * dC * u_h * hminus, w_r.T) dx_h = dot(y * dC, w_h.T) dx = dx_z + dx_r + dx_h # Calculate dw dw_z = dot(x.T, y * dA) dw_r = dot(x.T, y * dB * dC * u_h * hminus) dw_h = dot(x.T, y * dC) dw = np.concatenate([dw_z, dw_r, dw_h], axis=1) # Calculate db db_z = np.sum(y * dA, axis=0, keepdims=True) db_r = np.sum(y * dB * dC * u_h * hminus, axis=0, keepdims=True) db_h = np.sum(y * dC, axis=0, keepdims=True) db = np.concatenate([db_z, db_r, db_h], axis=1) du_z = np.sum(dA * hminus * y, axis=0, keepdims=True) du_r = np.sum(y * dC * dB * u_h * hminus * hminus, axis=0, keepdims=True) du_h = np.sum(sigmoid(B) * dC * y * hminus, axis=0, keepdims=True) du = np.concatenate([du_z, du_r, du_h], axis=1) pz_z = y * dA * u_z pz_r = y * dC * dB * u_h * hminus * u_r pz_h = y * dC * sigmoid(B) * u_h dpz = pz_z + pz_r + pz_h self.attrs._x._update_diff(context, dx) self.attrs._w._update_diff(context, dw) self.attrs._b._update_diff(context, db) self.attrs._u._update_diff(context, du) if isinstance(self.attrs._pz, Node): self.attrs._pz._update_diff(context, dpz)
def _oper_cpu(cls, x, pz, w, u, b): # Initialize Variables m = w.shape[1] // 3 w_z, w_r, w_h = np.split(w, [ m, m * 2, ], axis=1) u_z, u_r, u_h = np.split(u, [m, m * 2], axis=1) hminus = Variable( np.zeros((x.shape[0], w.shape[1] // 3), dtype=precision)) if pz is None else pz # Perform Forward Calcuations if b is None: A = dot(x, w_z) + hminus * u_z B = dot(x, w_r) + u_r * hminus C = dot(x, w_h) + sigmoid(B) * u_h * hminus else: b_z, b_r, b_h = np.split(b, [m, m * 2], axis=1) A = dot(x, w_z) + hminus * u_z + b_z B = dot(x, w_r) + u_r * hminus + b_r C = dot(x, w_h) + sigmoid(B) * u_h * hminus + b_h h = sigmoid(A) + tanh(C) # Store Variables for Graph ret = cls._create_node(h) ret.attrs._x = x ret.attrs._w = w ret.attrs._w_z = w_z ret.attrs._w_r = w_r ret.attrs._w_h = w_h ret.attrs._b = b ret.attrs._b_z = b_z ret.attrs._b_r = b_r ret.attrs._b_h = b_h ret.attrs._u = u ret.attrs._u_z = u_z ret.attrs._u_h = u_h ret.attrs._u_r = u_r ret.attrs._pz = hminus ret.attrs._A = A ret.attrs._B = B ret.attrs._C = C return ret
def _oper_gpu(cls, x, pz, w, u, b): # Initialize Variables m = w.shape[1] // 3 hminus = Variable(np.zeros( (x.shape[0], m), dtype=precision)) if pz is None else pz get_gpu(hminus) # Perform Forward Calcuations input = dot(get_gpu(x), get_gpu(w)) + get_gpu(b) ABC = get_gpu(input).empty_like_me() h = get_gpu(hminus).empty_like_me() cu.cugru_forward(get_gpu(input), get_gpu(hminus), get_gpu(u), get_gpu(ABC), get_gpu(h)) # Store Variables for Graph ret = cls._create_node(h) ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b ret.attrs._u = u ret.attrs._pz = hminus ret.attrs._ABC = ABC return ret
def forward(self, x): return dot(x, self.params["w"])
def _backward_cpu(self, context, dy, **kwargs): x = self.attrs._x w_z = self.attrs._w_z w_r = self.attrs._w_r w_h = self.attrs._w_h A = self.attrs._A B = self.attrs._B C = self.attrs._C u_z = self.attrs._u_z u_h = self.attrs._u_h u_r = self.attrs._u_r hminus = self.attrs._pz y = dy dA = y * (hminus - tanh(C)) * sigmoid_diff(A) dC = y * (1 - sigmoid(A)) * tanh_diff(C) dB = dC * dot(hminus, u_h) * sigmoid_diff(B) # Calculate dx dx_z = dot(dA, w_z.T) dx_r = dot(dB, w_r.T) dx_h = dot(dC, w_h.T) dx = dx_z + dx_r + dx_h # Calculate dw dw_z = dot(x.T, dA) dw_r = dot(x.T, dB) dw_h = dot(x.T, dC) dw = np.concatenate([dw_z, dw_r, dw_h], axis=1) # Calculate db db_z = np.sum(dA, axis=0, keepdims=True) db_r = np.sum(dB, axis=0, keepdims=True) db_h = np.sum(dC, axis=0, keepdims=True) db = np.concatenate([db_z, db_r, db_h], axis=1) du_z = dot(hminus.T, dA) du_r = dot(hminus.T, dB) du_h = dot(hminus.T, dC * sigmoid(B)) du = np.concatenate([du_z, du_r, du_h], axis=1) pz_z = dot(dA, u_z.T) pz_r = dot(dB, u_r.T) pz_h = dot(dC * sigmoid(B), u_h.T) dpz = pz_z + pz_r + pz_h + y * sigmoid(A) self.attrs._w._update_diff(context, dw) self.attrs._u._update_diff(context, du) if hasattr(self.attrs, "_b"): self.attrs._b._update_diff(context, db) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx) if isinstance(self.attrs._pz, Node): self.attrs._pz._update_diff(context, dpz)
def forward(self, x): z = dot(x, self.params["w"]) if self.params.get("b", None) is not None: z += self.params['b'] return z