Esempio n. 1
0
    def _oper_cpu(cls, x, pz, ps, w, wr, b):
        s = np.zeros((x.shape[0], w.shape[1] // 4), dtype=precision) if ps is None else ps
        z = np.zeros((x.shape[0], w.shape[1] // 4), dtype=precision) if pz is None else pz

        u = dot(x, w) + dot(z, wr) + b
        m = u.shape[1] // 4
        u, gated = np.split(u, [m, ], axis=1)
        u = tanh(u)

        gated = sigmoid(gated)

        state = gated[:, m:m * 2] * u + gated[:, :m] * s
        z = tanh(state) * gated[:, m * 2:]

        ret = cls._create_node(z)
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._wr = wr
        ret.attrs._b = b
        ret.attrs._pz = pz
        ret.attrs._u = u
        ret.attrs._pstate = ps
        ret.attrs._state = state
        ret.attrs._gated = gated
        ret._state = state

        if isinstance(pz, Node):
            pz.attrs._pfgate = gated[:, :m]

        return ret
Esempio n. 2
0
    def _oper_gpu(cls, x, pz, ps, parameter):
        p = parameter

        if ps is None:
            tmp = GPUValue(shape=(x.shape[0], p["w"].shape[1] // 4))
            s_p = tmp.zeros_like_me()
            z_p = tmp.zeros_like_me()
        else:
            s_p = ps
            z_p = get_gpu(pz)

        u = dot(x, p["w"]) + dot(z_p, p["wr"]) + p["b"]

        z = get_gpu(z_p).empty_like_me()
        state = get_gpu(s_p).empty_like_me()

        cu.culstm_forward_activate(get_gpu(u))
        cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z))

        ret = cls._create_node(z)
        ret.attrs._x = x
        ret.attrs._p = parameter
        ret.attrs._u = u
        ret.attrs._pstate = s_p
        ret.attrs._state = state
        ret.attrs._dt_d = [p[k] for k in ["wr", "w"]]
        ret._state = state

        if isinstance(pz, Node):
            pz.attrs._pfgate = u

        return ret
Esempio n. 3
0
    def _backward_cpu(self, context, dy, **kwargs):
        x = self.attrs._x
        w = self.attrs._w
        gain = self.attrs._gain
        weight = self.attrs._weight
        dx = op.dot(dy, w.T)
        normal_dw = op.dot(x.T, dy)

        dgain = normal_dw * w / gain
        dw = w / weight * (normal_dw - np.sum(w * normal_dw / gain, keepdims=True) * w / gain)
        db = np.ones_like(dy) * dy

        if isinstance(self.attrs._x, Node):
            self.attrs._x._update_diff(context, dx, **kwargs)

        if isinstance(self.attrs._gain, Node):
            self.attrs._gain._update_diff(context,
                                          np.sum(dgain, axis=0, keepdims=True), **kwargs)

        if isinstance(self.attrs._weight, Node):
            self.attrs._weight._update_diff(context, dw, **kwargs)

        if isinstance(self.attrs._bias, Node):
            db = dy
            self.attrs._bias._update_diff(context,
                                          np.sum(db, axis=0, keepdims=True), **kwargs)
Esempio n. 4
0
    def _oper_gpu(cls, x, pz, ps, w, wr, wc, b):
        if ps is None:
            s_p = GPUValue(shape=(x.shape[0], w.shape[1] // 4)).zeros_like_me()
            z_p = s_p.zeros_like_me()
        else:
            s_p, z_p = map(get_gpu, (ps, pz))

        s = s_p.empty_like_me()
        u = op.dot(x, w) + op.dot(z_p, wr)
        if b is not None:
            u += b

        u = get_gpu(u)
        z = z_p.zeros_like_me()
        cu.cupeepholelstm_forward(u, get_gpu(wc), s_p, s, z)

        ret = cls._create_node(z)
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._wr = wr
        ret.attrs._wc = wc
        ret.attrs._b = b
        ret.attrs._u = u
        ret.attrs._pz = pz
        ret.attrs._pstate = ps
        ret.attrs._state = s

        if isinstance(pz, Node):
            pz.attrs._pfgate = u
        return ret
Esempio n. 5
0
def test_gpu_node_dot(a, b):
    set_cuda_active(True)

    g1 = Variable(a)
    g2 = Variable(b)

    g3 = dot(g1, g2)
    g4 = rm.sum(g3)
    g = g4.grad()
    g_g1 = g.get(g1)
    g_g2 = g.get(g2)
    g_g3 = g.get(g3)
    g3.to_cpu()
    g4.to_cpu()

    set_cuda_active(False)
    c3 = dot(g1, g2)
    c4 = rm.sum(c3)
    c = c4.grad()
    c_g1 = c.get(g1)
    c_g2 = c.get(g2)
    c_c3 = c.get(c3)

    close(g3, c3)
    close(g4, c4)
    close(c_g1, g_g1)
    close(c_g2, g_g2)
    close(c_c3, g_g3)
Esempio n. 6
0
    def _oper_gpu(cls, x, pz, ps, w, wr, b):
        if ps is None:
            tmp = GPUValue(shape=(x.shape[0], w.shape[1] // 4))
            s_p = tmp.zeros_like_me()
            z_p = tmp.zeros_like_me()
        else:
            s_p = ps
            z_p = get_gpu(pz)

        u = dot(x, w) + dot(z_p, wr)
        if b is not None:
            u += b

        z = get_gpu(z_p).empty_like_me()
        state = get_gpu(s_p).empty_like_me()

        cu.culstm_forward_activate(get_gpu(u))
        cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z))

        ret = cls._create_node(z)

        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._wr = wr
        ret.attrs._b = b
        ret.attrs._pz = pz
        ret.attrs._u = u
        ret.attrs._pstate = s_p
        ret.attrs._state = state
        ret._state = state

        if isinstance(pz, Node):
            pz.attrs._pfgate = u

        return ret
Esempio n. 7
0
    def _backward_gpu(self, context, dy, **kwargs):
        x = get_gpu(self.attrs._x)
        w = get_gpu(self.attrs._w)
        gain = get_gpu(self.attrs._gain)
        weight = get_gpu(self.attrs._weight)
        dx = get_gpu(op.dot(dy, w.T))
        normal_dw = get_gpu(op.dot(x.T, dy))

        if isinstance(self.attrs._x, Node):
            self.attrs._x._update_diff(context, dx, **kwargs)

        if isinstance(self.attrs._gain, Node):
            dgain = normal_dw * w / gain
            self.attrs._gain._update_diff(context,
                                          op.sum(dgain, axis=0, keepdims=True), **kwargs)

        if isinstance(self.attrs._weight, Node):
            dw = w / get_gpu(weight) * get_gpu(normal_dw -
                                               get_gpu(op.sum(w * get_gpu(normal_dw) / get_gpu(gain), keepdims=True) *
                                                       w / get_gpu(gain)))
            self.attrs._weight._update_diff(context, dw, **kwargs)

        if isinstance(self.attrs._bias, Node):
            db = dy
            self.attrs._bias._update_diff(context,
                                          op.sum(db, axis=0, keepdims=True), **kwargs)
Esempio n. 8
0
    def _backward_gpu(self, context, dy, **kwargs):
        x = self.attrs._x
        w = self.attrs._w
        b = self.attrs._b
        u = self.attrs._u
        hminus = self.attrs._pz
        ABC = self.attrs._ABC

        dx = get_gpu(x).empty_like_me()
        db = get_gpu(b).empty_like_me()
        yconc = get_gpu(ABC).empty_like_me()
        du = get_gpu(u).empty_like_me()
        dpz = get_gpu(hminus).empty_like_me()
        dxx = get_gpu(x).empty_like_me()

        cu.cugru_backward(get_gpu(ABC), get_gpu(dy), yconc, get_gpu(u),
                          get_gpu(hminus), db, du, dpz, dxx)
        # Calculate dx

        dx = get_gpu(dot(yconc, w.T))

        xconc = get_gpu(x.T)

        dw = dot(get_gpu(xconc), get_gpu(yconc))

        self.attrs._x._update_diff(context, dx)
        self.attrs._w._update_diff(context, dw)
        self.attrs._b._update_diff(context, db)
        self.attrs._u._update_diff(context, du)
        if isinstance(self.attrs._pz, Node):
            self.attrs._pz._update_diff(context, dpz)
Esempio n. 9
0
    def _backward_gpu(self, context, dy):
        p = self.attrs._p
        u = self.attrs._u
        s = tanh(self.attrs._state)
        ps = self.attrs._pstate

        drt = context.restore(p["wr"], get_gpu(u).zeros_like_me())
        dou = context.restore(p["w"], get_gpu(dy).zeros_like_me())
        pfg = getattr(self.attrs, "_pfgate", get_gpu(u).zeros_like_me())

        e = get_gpu(dy) + get_gpu(dot(drt, p["wr"].T))

        dr, dou_n = (get_gpu(a).empty_like_me() for a in (drt, dou))
        cu.culstm_backward(*map(get_gpu, (u, dr, s, ps, e, pfg, dou, dou_n)))

        dx = dot(dr, p["w"].T)

        context.store(p["wr"], dr)
        context.store(p["w"], dou_n)

        if isinstance(self.attrs._x, Node):
            self.attrs._x._update_diff(context, dx)

        if isinstance(p["w"], Node):
            p["w"]._update_diff(context, dot(self.attrs._x.T, dr))

        if isinstance(p["wr"], Node):
            p["wr"]._update_diff(context, dot(self.T, drt))

        if isinstance(p["b"], Node):
            p["b"]._update_diff(context, sum(dr, axis=0))
Esempio n. 10
0
    def _oper_cpu(cls, x, pz, ps, parameter):
        p = parameter
        s = np.zeros(
            (x.shape[0],
             p["w"].shape[1] // 4), dtype=precision) if ps is None else ps
        z = np.zeros(
            (x.shape[0],
             p["w"].shape[1] // 4), dtype=precision) if pz is None else pz

        u = dot(x, p["w"]) + dot(z, p["wr"]) + p["b"]
        m = u.shape[1] // 4
        u, gated = np.split(u, [
            m,
        ], axis=1)
        u = tanh(u)

        gated = sigmoid(gated)

        state = gated[:, m:m * 2] * u + gated[:, :m] * s
        z = tanh(state) * gated[:, m * 2:]

        ret = cls._create_node(z)
        ret.attrs._x = x
        ret.attrs._p = parameter
        ret.attrs._u = u
        ret.attrs._pstate = ps
        ret.attrs._state = state
        ret.attrs._gated = gated
        ret.attrs._dt_d = [p[k] for k in ["wr", "w"]]
        ret._state = state

        if isinstance(pz, Node):
            pz.attrs._pfgate = gated[:, :m]

        return ret
Esempio n. 11
0
    def _backward_gpu(self, context, dy):
        p = self.attrs._p
        s = self.attrs._state
        ps = self.attrs._pstate
        u = self.attrs._u

        go = self.attrs._gated_o
        gf = self.attrs._gated_f
        gi = self.attrs._gated_i
        pgf = get_gpu(gf).zeros_like_me() if self.attrs._pgated_f is None else self.attrs._pgated_f

        drt, dit, dft, doot, dct = (context.restore(dt, get_gpu(dy).zeros_like_me())
                                    for dt in self.attrs._dt_d)

        activated_s = tanh(s)
        activated_u = tanh(u)

        e = dy + get_gpu(dot(drt, p["wr"].T)) \
               + get_gpu(dot(dit, p["wir"].T)) + \
               + get_gpu(dot(dft, p["wfr"].T)) + \
               + get_gpu(dot(doot, p["wor"].T))

        do = gate_diff(go) * activated_s * e
        ds = go * activation_diff(activated_s) * e
        dc = ds + pgf * dct + p["wfc"] * dft + p["wic"] * dit + p["woc"] * do

        df = gate_diff(gf) * ps * dc if ps is not None else get_gpu(gf).zeros_like_me()
        di = gate_diff(gi) * activated_u * dc

        d = gi * activation_diff(activated_u) * dc

        dx = dot(d, p["w"].T) \
            + dot(di, p["wi"].T) \
            + dot(do, p["wo"].T) \
            + dot(df, p["wf"].T)

        for dt_d, dt in zip(self.attrs._dt_d, (d, di, df, do, dc)):
            context.store(dt_d, get_gpu(dt))

        if isinstance(self.attrs._x, Node):
            self.attrs._x._update_diff(context, get_gpu(dx))

        for k, diff in zip(("w", "wo", "wi", "wf"), (d, do, di, df)):
            if isinstance(p[k], Node):
                p[k]._update_diff(context, get_gpu(dot(self.attrs._x.T, diff)))

        for k, diff in zip(("wr", "wor", "wir", "wfr"), (drt, doot, dit, dft)):
            if isinstance(p[k], Node):
                p[k]._update_diff(context, get_gpu(dot(self.T, diff)))

        for k, diff in zip(("wfc", "wic", "woc"), (dft, dit, do)):
            if isinstance(p[k], Node):
                p[k]._update_diff(context, sum(diff * get_gpu(s), axis=0))

        for k, diff in zip(("b", "bf", "bi", "bo"), (d, df, di, do)):
            if isinstance(p[k], Node):
                p[k]._update_diff(context, sum(diff, axis=0))
Esempio n. 12
0
    def _oper_gpu(cls, x, pz, ps, parameter):
        p = parameter
        s = get_gpu(np.zeros((x.shape[0], p["w"].shape[1]), dtype=precision)) if ps is None else ps
        z = get_gpu(s).zeros_like_me() if pz is None else pz

        u = dot(x, p["w"]) + dot(z, p["wr"]) + p["b"]

        gate_f = sigmoid(dot(x, p["wf"]) +
                         dot(z, p["wfr"]) + p["wfc"] * s + p["bf"])
        gate_i = sigmoid(dot(x, p["wi"]) +
                         dot(z, p["wir"]) + p["wic"] * s + p["bi"])

        state = gate_i * tanh(u) + gate_f * s

        gate_o = sigmoid(
            dot(x, p["wo"]) + dot(z, p["wor"]) + p["bo"] + p["woc"] * state)

        z = tanh(state) * gate_o

        ret = cls._create_node(get_gpu(z))
        ret.attrs._x = x
        ret.attrs._p = parameter
        ret.attrs._u = u
        ret.attrs._pgated_f = None
        ret.attrs._pstate = ps
        ret.attrs._state = state
        ret.attrs._gated_o = gate_o
        ret.attrs._gated_f = gate_f
        ret.attrs._gated_i = gate_i
        ret.attrs._dt_d = [p[k] for k in ["wr", "wi", "wf", "wo", "w"]]
        ret._state = state

        return ret
Esempio n. 13
0
    def _backward_gpu(self, context, dy, **kwargs):
        n, m = dy.shape

        w = self.attrs._w
        wr = self.attrs._wr
        wc = self.attrs._wc
        b = self.attrs._b

        u = self.attrs._u
        s = self.attrs._state
        ps = get_gpu(s).zeros_like_me(
        ) if self.attrs._pstate is None else self.attrs._pstate

        dot = context.restore(w, get_gpu(dy).zeros_like_me())
        drt = context.restore(wr, get_gpu(u).zeros_like_me())
        pfg = self.attrs.get("_pfgate", get_gpu(u).zeros_like_me())

        dr = get_gpu(drt).empty_like_me()
        dwc = GPUValue(shape=(n, m * 3))
        dou = get_gpu(dot).empty_like_me()

        cu.cupeepholelstm_backward(
            *map(get_gpu, (u, ps, s, pfg, wc, dy, drt, dot, dr, dou, dwc)))

        context.store(wr, dr)
        context.store(w, dou)

        if isinstance(self.attrs._x, Node):
            dx = op.dot(dr, w.T)
            self.attrs._x._update_diff(context, dx)

        if isinstance(w, Node):
            w._update_diff(context, op.dot(self.attrs._x.T, dr))

        if isinstance(wr, Node):
            wr._update_diff(context, op.dot(self.T, drt))

        if isinstance(wc, Node):
            wc._update_diff(context, op.sum(dwc, axis=0))

        if isinstance(b, Node):
            b._update_diff(context, op.sum(dr, axis=0))

        if isinstance(self.attrs._pz, Node):
            self.attrs._pz._update_diff(context, op.dot(dr, wr.T))
Esempio n. 14
0
 def _oper_gpu(cls, x, weight, gain, bias):
     assert len(x.shape) is 2, \
         "Currently only normalizes for dense networks."
     w = get_gpu(weight) / normalized_form(get_gpu(weight)) * get_gpu(gain)
     ret = cls._create_node(get_gpu(op.dot(get_gpu(x), w) + get_gpu(bias)))
     ret.attrs._x = x
     ret.attrs._w = w
     ret.attrs._weight = weight
     ret.attrs._gain = gain
     ret.attrs._bias = bias
     return ret
Esempio n. 15
0
 def _oper_cpu(cls, x, weight, gain, bias):
     assert len(x.shape) is 2, \
         "Currently only normalizes for dense networks."
     w = weight / normalized_form(weight) * gain
     ret = cls._create_node(op.dot(x, w) + bias)
     ret.attrs._x = x
     ret.attrs._w = w
     ret.attrs._weight = weight
     ret.attrs._gain = gain
     ret.attrs._bias = bias
     return ret
Esempio n. 16
0
def test_node_clear():
    DEBUG_GRAPH_INIT(True)

    a = Variable(np.random.rand(2, 2).astype(np.float32))
    b = Variable(np.random.rand(2, 2).astype(np.float32))

    layer = R.Lstm(2)

    c = layer(O.dot(a, b))  # NOQA

    DEBUG_NODE_STAT()
Esempio n. 17
0
def test_gpu_node_dot(a, b):
    set_cuda_active(True)

    g1 = Variable(a)
    g2 = Variable(b)

    g3 = rm.sum(dot(g1, g2))
    g = g3.grad()
    g_g1 = g.get(g1)
    g_g2 = g.get(g2)
    g3.to_cpu()

    set_cuda_active(False)
    c3 = rm.sum(dot(g1, g2))
    c = c3.grad()
    c_g1 = c.get(g1)
    c_g2 = c.get(g2)

    close(g3, c3)
    close(c_g1, g_g1)
    close(c_g2, g_g2)
    def _oper_cpu(cls, x, pz, w, u, b):
        # Initialize Variables
        m = w.shape[1] // 3
        w_z, w_r, w_h = np.split(w, [m, m * 2, ], axis=1)
        u_z, u_r, u_h = np.split(u, [m, m * 2], axis=1)
        hminus = Variable(np.zeros((x.shape[0], w.shape[1] // 3),
                                   dtype=precision)) if pz is None else pz

        b_z, b_r, b_h = np.split(b, [m, m * 2], axis=1) if b is not None else (0, 0, 0)
        A = dot(x, w_z) + dot(hminus, u_z) + b_z
        B = dot(x, w_r) + dot(hminus, u_r) + b_r
        C = dot(x, w_h) + sigmoid(B) * dot(hminus, u_h) + b_h

        h = sigmoid(A) * hminus + (1 - sigmoid(A)) * tanh(C)

        # Store Variables for Graph
        ret = cls._create_node(h)
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._w_z = w_z
        ret.attrs._w_r = w_r
        ret.attrs._w_h = w_h
        ret.attrs._u = u
        ret.attrs._u_z = u_z
        ret.attrs._u_h = u_h
        ret.attrs._u_r = u_r
        ret.attrs._pz = hminus
        ret.attrs._A = A
        ret.attrs._B = B
        ret.attrs._C = C

        if b is not None:
            ret.attrs._b = b

        return ret
Esempio n. 19
0
    def _backward_gpu(self, context, dy, **kwargs):

        w = self.attrs._w
        wr = self.attrs._wr
        b = self.attrs._b

        u = self.attrs._u
        s = tanh(self.attrs._state)
        ps = self.attrs._pstate

        drt = context.restore(wr, get_gpu(u).zeros_like_me())
        dou = context.restore(w, get_gpu(dy).zeros_like_me())
        pfg = self.attrs.get("_pfgate", get_gpu(u).zeros_like_me())

        e = get_gpu(dy)

        dr, dou_n = (get_gpu(a).empty_like_me() for a in (drt, dou))

        cu.culstm_backward(*map(get_gpu, (u, dr, s, ps, e, pfg, dou, dou_n)))

        dx = dot(dr, w.T)

        context.store(wr, dr)
        context.store(w, dou_n)

        if isinstance(self.attrs._x, Node):
            self.attrs._x._update_diff(context, dx)

        if isinstance(w, Node):
            w._update_diff(context, dot(self.attrs._x.T, dr))

        if isinstance(wr, Node):
            wr._update_diff(context, dot(self.T, drt))

        if isinstance(b, Node):
            b._update_diff(context, sum(dr, axis=0))

        if isinstance(self.attrs._pz, Node):
            self.attrs._pz._update_diff(context, dot(dr, wr.T))
Esempio n. 20
0
    def _backward_cpu(self, context, dy, **kwargs):
        x = self.attrs._x
        w_z = self.attrs._w_z
        w_r = self.attrs._w_r
        w_h = self.attrs._w_h
        A = self.attrs._A
        B = self.attrs._B
        C = self.attrs._C
        u_z = self.attrs._u_z
        u_h = self.attrs._u_h
        u_r = self.attrs._u_r
        hminus = self.attrs._pz
        y = dy

        dA = sigmoid_diff(A)
        dB = sigmoid_diff(B)
        dC = tanh_diff(C)

        # Calculate dx
        dx_z = dot(y * dA, w_z.T)
        dx_r = dot(y * dB * dC * u_h * hminus, w_r.T)
        dx_h = dot(y * dC, w_h.T)
        dx = dx_z + dx_r + dx_h

        # Calculate dw
        dw_z = dot(x.T, y * dA)
        dw_r = dot(x.T, y * dB * dC * u_h * hminus)
        dw_h = dot(x.T, y * dC)
        dw = np.concatenate([dw_z, dw_r, dw_h], axis=1)

        # Calculate db
        db_z = np.sum(y * dA, axis=0, keepdims=True)
        db_r = np.sum(y * dB * dC * u_h * hminus, axis=0, keepdims=True)
        db_h = np.sum(y * dC, axis=0, keepdims=True)
        db = np.concatenate([db_z, db_r, db_h], axis=1)

        du_z = np.sum(dA * hminus * y, axis=0, keepdims=True)
        du_r = np.sum(y * dC * dB * u_h * hminus * hminus,
                      axis=0,
                      keepdims=True)
        du_h = np.sum(sigmoid(B) * dC * y * hminus, axis=0, keepdims=True)
        du = np.concatenate([du_z, du_r, du_h], axis=1)

        pz_z = y * dA * u_z
        pz_r = y * dC * dB * u_h * hminus * u_r
        pz_h = y * dC * sigmoid(B) * u_h

        dpz = pz_z + pz_r + pz_h

        self.attrs._x._update_diff(context, dx)
        self.attrs._w._update_diff(context, dw)
        self.attrs._b._update_diff(context, db)
        self.attrs._u._update_diff(context, du)
        if isinstance(self.attrs._pz, Node):
            self.attrs._pz._update_diff(context, dpz)
Esempio n. 21
0
    def _oper_cpu(cls, x, pz, w, u, b):
        # Initialize Variables
        m = w.shape[1] // 3
        w_z, w_r, w_h = np.split(w, [
            m,
            m * 2,
        ], axis=1)
        u_z, u_r, u_h = np.split(u, [m, m * 2], axis=1)
        hminus = Variable(
            np.zeros((x.shape[0],
                      w.shape[1] // 3), dtype=precision)) if pz is None else pz

        # Perform Forward Calcuations
        if b is None:
            A = dot(x, w_z) + hminus * u_z
            B = dot(x, w_r) + u_r * hminus
            C = dot(x, w_h) + sigmoid(B) * u_h * hminus
        else:
            b_z, b_r, b_h = np.split(b, [m, m * 2], axis=1)
            A = dot(x, w_z) + hminus * u_z + b_z
            B = dot(x, w_r) + u_r * hminus + b_r
            C = dot(x, w_h) + sigmoid(B) * u_h * hminus + b_h

        h = sigmoid(A) + tanh(C)

        # Store Variables for Graph
        ret = cls._create_node(h)
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._w_z = w_z
        ret.attrs._w_r = w_r
        ret.attrs._w_h = w_h
        ret.attrs._b = b
        ret.attrs._b_z = b_z
        ret.attrs._b_r = b_r
        ret.attrs._b_h = b_h
        ret.attrs._u = u
        ret.attrs._u_z = u_z
        ret.attrs._u_h = u_h
        ret.attrs._u_r = u_r
        ret.attrs._pz = hminus
        ret.attrs._A = A
        ret.attrs._B = B
        ret.attrs._C = C

        return ret
Esempio n. 22
0
    def _oper_gpu(cls, x, pz, w, u, b):
        # Initialize Variables
        m = w.shape[1] // 3
        hminus = Variable(np.zeros(
            (x.shape[0], m), dtype=precision)) if pz is None else pz
        get_gpu(hminus)
        # Perform Forward Calcuations
        input = dot(get_gpu(x), get_gpu(w)) + get_gpu(b)
        ABC = get_gpu(input).empty_like_me()
        h = get_gpu(hminus).empty_like_me()
        cu.cugru_forward(get_gpu(input), get_gpu(hminus), get_gpu(u),
                         get_gpu(ABC), get_gpu(h))

        # Store Variables for Graph
        ret = cls._create_node(h)
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._b = b
        ret.attrs._u = u
        ret.attrs._pz = hminus
        ret.attrs._ABC = ABC

        return ret
Esempio n. 23
0
 def forward(self, x):
     return dot(x, self.params["w"])
    def _backward_cpu(self, context, dy, **kwargs):
        x = self.attrs._x
        w_z = self.attrs._w_z
        w_r = self.attrs._w_r
        w_h = self.attrs._w_h
        A = self.attrs._A
        B = self.attrs._B
        C = self.attrs._C
        u_z = self.attrs._u_z
        u_h = self.attrs._u_h
        u_r = self.attrs._u_r
        hminus = self.attrs._pz
        y = dy

        dA = y * (hminus - tanh(C)) * sigmoid_diff(A)
        dC = y * (1 - sigmoid(A)) * tanh_diff(C)
        dB = dC * dot(hminus, u_h) * sigmoid_diff(B)

        # Calculate dx
        dx_z = dot(dA, w_z.T)
        dx_r = dot(dB, w_r.T)
        dx_h = dot(dC, w_h.T)
        dx = dx_z + dx_r + dx_h

        # Calculate dw
        dw_z = dot(x.T, dA)
        dw_r = dot(x.T, dB)
        dw_h = dot(x.T, dC)
        dw = np.concatenate([dw_z, dw_r, dw_h], axis=1)

        # Calculate db
        db_z = np.sum(dA, axis=0, keepdims=True)
        db_r = np.sum(dB, axis=0, keepdims=True)
        db_h = np.sum(dC, axis=0, keepdims=True)
        db = np.concatenate([db_z, db_r, db_h], axis=1)

        du_z = dot(hminus.T, dA)
        du_r = dot(hminus.T, dB)
        du_h = dot(hminus.T, dC * sigmoid(B))
        du = np.concatenate([du_z, du_r, du_h], axis=1)

        pz_z = dot(dA, u_z.T)
        pz_r = dot(dB, u_r.T)
        pz_h = dot(dC * sigmoid(B), u_h.T)

        dpz = pz_z + pz_r + pz_h + y * sigmoid(A)

        self.attrs._w._update_diff(context, dw)
        self.attrs._u._update_diff(context, du)

        if hasattr(self.attrs, "_b"):
            self.attrs._b._update_diff(context, db)

        if isinstance(self.attrs._x, Node):
            self.attrs._x._update_diff(context, dx)

        if isinstance(self.attrs._pz, Node):
            self.attrs._pz._update_diff(context, dpz)
Esempio n. 25
0
 def forward(self, x):
     z = dot(x, self.params["w"])
     if self.params.get("b", None) is not None:
         z += self.params['b']
     return z