Esempio n. 1
0
    def backward_pass(self, buffers):

        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        in_deltas = buffers.input_deltas.default
        out_deltas = buffers.output_deltas.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_in_deltas = flatten_time(in_deltas)
        flat_out_deltas = flatten_time(out_deltas)
        flat_outputs = flatten_time(outputs)

        if self.type == 'max':
            argmax = buffers.internals.argmax
            flat_argmax = flatten_time(argmax)
            _h.maxpool2d_backward_batch(flat_inputs, self.kernel_size,
                                        flat_outputs, self.padding,
                                        self.stride, flat_argmax,
                                        flat_in_deltas, flat_out_deltas)
        elif self.type == 'avg':
            _h.avgpool2d_backward_batch(flat_inputs, self.kernel_size,
                                        flat_outputs, self.padding,
                                        self.stride,
                                        flat_in_deltas, flat_out_deltas)
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, R, bias, timing = buffers.parameters
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        Ha = buffers.internals.Ha

        flat_inputs = flatten_time(inputs)
        flat_H = flatten_time(Ha[:-1])

        _h.dot_mm(flat_inputs, W, flat_H, transb=True)
        _h.add_mv(flat_H, bias.reshape((1, self.size)), flat_H)

        tmp = _h.zeros(timing.shape)
        cond = _h.zeros(outputs[0].shape)
        for t in range(inputs.shape[0]):
            _h.dot_add_mm(outputs[t - 1], R, Ha[t], transb=True)
            _h.act_func[self.activation](Ha[t], outputs[t])
            # Undo updates
            if t > 0:
                _h.fill(tmp, t)
                _h.modulo_tt(tmp, timing, tmp)
                _h.broadcast_t(tmp.reshape((1, tmp.shape[0])), 0, cond)
                _h.copy_to_if(outputs[t - 1], outputs[t], cond)
Esempio n. 3
0
    def backward_pass(self, buffers):

        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        in_deltas = buffers.input_deltas.default
        out_deltas = buffers.output_deltas.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_in_deltas = flatten_time(in_deltas)
        flat_out_deltas = flatten_time(out_deltas)
        flat_outputs = flatten_time(outputs)

        if self.type == 'max':
            argmax = buffers.internals.argmax
            flat_argmax = flatten_time(argmax)
            _h.maxpool2d_backward_batch(flat_inputs, self.kernel_size,
                                        flat_outputs, self.padding,
                                        self.stride, flat_argmax,
                                        flat_in_deltas, flat_out_deltas)
        elif self.type == 'avg':
            _h.avgpool2d_backward_batch(flat_inputs, self.kernel_size,
                                        flat_outputs, self.padding,
                                        self.stride,
                                        flat_in_deltas, flat_out_deltas)
Esempio n. 4
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, R, bias = buffers.parameters
        dW, dR, dbias = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        dinputs = buffers.input_deltas.default
        doutputs = buffers.output_deltas.default
        Ha, dHa, dHb = buffers.internals

        _h.copy_to(doutputs, dHb)
        T = inputs.shape[0] - 1
        _h.act_func_deriv[self.activation](Ha[T], outputs[T], dHb[T], dHa[T])
        for t in range(T - 1, -1, -1):
            _h.dot_add_mm(dHa[t + 1], R, dHb[t])
            _h.act_func_deriv[self.activation](Ha[t], outputs[t],
                                               dHb[t], dHa[t])

        flat_inputs = flatten_time_and_features(inputs)
        flat_dinputs = flatten_time_and_features(dinputs)
        flat_dHa = flatten_time(dHa[:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dHa, W, flat_dinputs)
        _h.dot_add_mm(flat_dHa, flat_inputs, dW, transa=True)
        dbias_tmp = _h.allocate(dbias.shape)
        _h.sum_t(flat_dHa, axis=0, out=dbias_tmp)
        _h.add_tt(dbias, dbias_tmp, dbias)

        flat_outputs = flatten_time(outputs[:-2])
        flat_dHa = flatten_time(dHa[1:-1])
        _h.dot_add_mm(flat_dHa, flat_outputs, dR, transa=True)
        _h.dot_add_mm(dHa[0], outputs[-1], dR, transa=True)
Esempio n. 5
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, R, bias = buffers.parameters
        dW, dR, dbias = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        dinputs = buffers.input_deltas.default
        doutputs = buffers.output_deltas.default
        Ha, dHa, dHb = buffers.internals

        _h.copy_to(doutputs, dHb)
        T = inputs.shape[0] - 1
        _h.act_func_deriv[self.activation](Ha[T], outputs[T], dHb[T], dHa[T])
        for t in range(T - 1, -1, -1):
            _h.dot_add_mm(dHa[t + 1], R, dHb[t])
            _h.act_func_deriv[self.activation](Ha[t], outputs[t], dHb[t],
                                               dHa[t])

        flat_inputs = flatten_time_and_features(inputs)
        flat_dinputs = flatten_time_and_features(dinputs)
        flat_dHa = flatten_time(dHa[:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dHa, W, flat_dinputs)
        _h.dot_add_mm(flat_dHa, flat_inputs, dW, transa=True)
        dbias_tmp = _h.allocate(dbias.shape)
        _h.sum_t(flat_dHa, axis=0, out=dbias_tmp)
        _h.add_tt(dbias, dbias_tmp, dbias)

        flat_outputs = flatten_time(outputs[:-2])
        flat_dHa = flatten_time(dHa[1:-1])
        _h.dot_add_mm(flat_dHa, flat_outputs, dR, transa=True)
        _h.dot_add_mm(dHa[0], outputs[-1], dR, transa=True)
Esempio n. 6
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W_H, W_T, R_T, bias_T, R_H, bias_H = buffers.parameters

        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        H_list = []
        T_list = []
        Y_list = []

        for i in range(self.recurrence_depth):
            H_list.append(buffers.internals['H_{}'.format(i)])
            T_list.append(buffers.internals['T_{}'.format(i)])
            Y_list.append(buffers.internals['Y_{}'.format(i)])

        flat_inputs = flatten_time_and_features(inputs)

        flat_H = flatten_time(H_list[0][:-1])
        flat_T = flatten_time(T_list[0][:-1])

        _h.dot_mm(flat_inputs, W_H, flat_H, transb=True)
        _h.dot_mm(flat_inputs, W_T, flat_T, transb=True)

        for t in range(inputs.shape[0]):
            for i in range(self.recurrence_depth):
                if i == 0:
                    x = outputs[t - 1]
                    _h.dot_add_mm(x, R_T[i], T_list[i][t], transb=True)
                    _h.add_mv(T_list[i][t], bias_T[i].reshape((1, self.size)),
                              T_list[i][t])
                    _h.inplace_act_func['sigmoid'](T_list[i][t])
                    _h.dot_add_mm(x, R_H[i], H_list[i][t], transb=True)
                    _h.add_mv(H_list[i][t], bias_H[i].reshape((1, self.size)),
                              H_list[i][t])
                    _h.inplace_act_func[self.activation](H_list[i][t])
                else:
                    x = Y_list[i - 1][t]
                    _h.dot_mm(x, R_T[i], T_list[i][t], transb=True)
                    _h.add_mv(T_list[i][t], bias_T[i].reshape((1, self.size)),
                              T_list[i][t])
                    _h.inplace_act_func['sigmoid'](T_list[i][t])
                    _h.dot_mm(x, R_H[i], H_list[i][t], transb=True)
                    _h.add_mv(H_list[i][t], bias_H[i].reshape((1, self.size)),
                              H_list[i][t])
                    _h.inplace_act_func[self.activation](H_list[i][t])

                if i == 0:
                    _h.mult_tt(T_list[i][t], H_list[i][t], out=Y_list[i][t])
                    tmp = _h.ones(H_list[i][t].shape)
                    _h.subtract_tt(tmp, T_list[i][t], tmp)
                    _h.mult_add_tt(tmp, outputs[t - 1], out=Y_list[i][t])
                else:
                    _h.mult_tt(T_list[i][t], H_list[i][t], out=Y_list[i][t])
                    tmp = _h.ones(H_list[i][t].shape)
                    _h.subtract_tt(tmp, T_list[i][t], tmp)
                    _h.mult_add_tt(tmp, Y_list[i - 1][t], out=Y_list[i][t])
            _h.copy_to(Y_list[self.recurrence_depth - 1][t], outputs[t])
Esempio n. 7
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo) = buffers.parameters

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals
        x = buffers.inputs.default
        y = buffers.outputs.default

        time_size, batch_size, in_size = x.shape

        flat_x = flatten_time(x)
        flat_Za = flatten_time(Za[:-1])
        flat_Ia = flatten_time(Ia[:-1])
        flat_Fa = flatten_time(Fa[:-1])
        flat_Oa = flatten_time(Oa[:-1])
        _h.dot_mm(flat_x, Wz, flat_Za, transb=True)
        _h.dot_mm(flat_x, Wi, flat_Ia, transb=True)
        _h.dot_mm(flat_x, Wf, flat_Fa, transb=True)
        _h.dot_mm(flat_x, Wo, flat_Oa, transb=True)

        for t in range(time_size):
            # Block input
            _h.dot_add_mm(y[t - 1], Rz, Za[t], transb=True)
            _h.add_mv(Za[t], bz.reshape((1, self.size)), Za[t])
            _h.act_func[self.activation](Za[t], Zb[t])

            # Input Gate
            _h.dot_add_mm(y[t - 1], Ri, Ia[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pi, Ia[t])
            _h.add_mv(Ia[t], bi.reshape((1, self.size)), Ia[t])
            _h.sigmoid(Ia[t], Ib[t])

            # Forget Gate
            _h.dot_add_mm(y[t - 1], Rf, Fa[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pf, Fa[t])
            _h.add_mv(Fa[t], bf.reshape((1, self.size)), Fa[t])
            _h.sigmoid(Fa[t], Fb[t])

            # Cell
            _h.mult_tt(Ib[t], Zb[t], Ca[t])
            _h.mult_add_tt(Fb[t], Ca[t - 1], Ca[t])

            # Output Gate
            _h.dot_add_mm(y[t - 1], Ro, Oa[t], transb=True)
            _h.mult_add_mv(Ca[t], po, Oa[t])
            _h.add_mv(Oa[t], bo.reshape((1, self.size)), Oa[t])
            _h.sigmoid(Oa[t], Ob[t])

            # Block output
            _h.act_func[self.activation](Ca[t], Cb[t])
            _h.mult_tt(Ob[t], Cb[t], y[t])
Esempio n. 8
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_outputs = flatten_time(outputs)

        # calculate outputs
        _h.conv2d_forward_batch(flat_inputs, W, bias, flat_outputs,
                                self.padding, self.stride)
        _h.inplace_act_func[self.activation](outputs)
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_outputs = flatten_time(outputs)

        # calculate outputs
        _h.conv2d_forward_batch(flat_inputs, W, bias, flat_outputs,
                                self.padding, self.stride)
        _h.inplace_act_func[self.activation](outputs)
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        y = buffers.inputs.default
        t = buffers.inputs.targets
        cee = buffers.internals.cee
        cee_sum = buffers.outputs.default

        # the binomial cross entropy error is given by
        # - t * ln(y) - (1-t) * ln(1-y)
        tmp = _h.ones(cee.shape)
        _h.subtract_tt(tmp, y, cee)     # cee = 1-y
        _h.subtract_tt(tmp, t, tmp)     # tmp  = 1-t
        _h.clip_t(cee, 1e-6, 1.0, cee)
        _h.log_t(cee, cee)              # cee = ln(1-y)
        _h.mult_tt(tmp, cee, tmp)  # tmp = (1-t) * ln(1-y)

        _h.clip_t(y, 1e-6, 1.0, cee)
        _h.log_t(cee, cee)              # cee = ln(y)
        _h.mult_tt(t, cee, cee)    # cee = t * ln(y)

        _h.add_tt(tmp, cee, cee)        # cee = (1-t) * ln(1-y) + t * ln(y)

        # reshape for summation
        cee = flatten_time_and_features(cee)
        cee_sum = flatten_time(cee_sum)
        _h.sum_t(cee, axis=1, out=cee_sum)
        _h.mult_st(-1, cee_sum, cee_sum)  # * -1
Esempio n. 11
0
def test_flatten_time():
    # Testing for NumpyHandler only
    _h = NumpyHandler(np.float64)
    shape = (2, 3, 2, 4)
    x = np.random.randn(*shape)
    y = flatten_time(x).copy()
    yp = x.reshape((6, 2, 4))
    assert np.allclose(y, yp)
Esempio n. 12
0
def test_flatten_time():
    # Testing for NumpyHandler only
    _h = NumpyHandler(np.float64)
    shape = (2, 3, 2, 4)
    x = np.random.randn(*shape)
    y = flatten_time(x).copy()
    yp = x.reshape((6, 2, 4))
    assert np.allclose(y, yp)
Esempio n. 13
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, R, bias = buffers.parameters
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        Ha = buffers.internals.Ha

        flat_inputs = flatten_time(inputs)
        flat_H = flatten_time(Ha[:-1])

        _h.dot_mm(flat_inputs, W, flat_H, transb=True)
        _h.add_mv(flat_H, bias.reshape((1, self.size)), flat_H)

        for t in range(inputs.shape[0]):
            _h.dot_add_mm(outputs[t - 1], R, Ha[t], transb=True)
            _h.act_func[self.activation](Ha[t], outputs[t])
Esempio n. 14
0
    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler

        flat_inp = flatten_time_and_features(buffers.inputs.default)
        flat_mask = flatten_time(buffers.inputs.mask)
        flat_out = flatten_time_and_features(buffers.outputs.default)

        _h.mult_mv(flat_inp, flat_mask, out=flat_out)
Esempio n. 15
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        dW, dbias = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        in_deltas = buffers.input_deltas.default
        out_deltas = buffers.output_deltas.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_in_deltas = flatten_time(in_deltas)
        flat_out_deltas = flatten_time(out_deltas)

        # calculate in_deltas and gradients
        _h.inplace_act_func_deriv[self.activation](outputs, out_deltas)
        _h.conv2d_backward_batch(flat_inputs, W, self.padding, self.stride,
                                 flat_in_deltas, flat_out_deltas, dW, dbias)
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        dW, dbias = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        in_deltas = buffers.input_deltas.default
        out_deltas = buffers.output_deltas.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_in_deltas = flatten_time(in_deltas)
        flat_out_deltas = flatten_time(out_deltas)

        # calculate in_deltas and gradients
        _h.inplace_act_func_deriv[self.activation](outputs, out_deltas)
        _h.conv2d_backward_batch(flat_inputs, W, self.padding, self.stride,
                                 flat_in_deltas, flat_out_deltas, dW, dbias)
Esempio n. 17
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, R, bias, timing = buffers.parameters
        dW, dR, dbias, dtiming = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        dinputs = buffers.input_deltas.default
        doutputs = buffers.output_deltas.default
        Ha, dHa, dHb = buffers.internals

        tmp = _h.zeros(timing.shape)
        cond = _h.zeros(outputs[0].shape)

        _h.copy_to(doutputs, dHb)
        T = inputs.shape[0] - 1
        _h.act_func_deriv[self.activation](Ha[T], outputs[T], dHb[T], dHa[T])
        for t in range(T - 1, -1, -1):
            _h.fill(tmp, t + 1)
            _h.modulo_tt(tmp, timing, tmp)
            _h.broadcast_t(tmp.reshape((1, tmp.shape[0])), 0, cond)
            _h.add_into_if(dHb[t + 1], dHb[t], cond)
            _h.fill_if(dHa[t+1], 0.0, cond)
            _h.dot_add_mm(dHa[t + 1], R, dHb[t])
            _h.act_func_deriv[self.activation](Ha[t], outputs[t], dHb[t],
                                               dHa[t])

        flat_inputs = flatten_time(inputs)
        flat_dinputs = flatten_time(dinputs)
        flat_dHa = flatten_time(dHa[:-1])

        # Calculate in_deltas and gradients
        _h.dot_add_mm(flat_dHa, W, flat_dinputs)
        _h.dot_add_mm(flat_dHa, flat_inputs, dW, transa=True)
        dbias_tmp = _h.allocate(dbias.shape)
        _h.sum_t(flat_dHa, axis=0, out=dbias_tmp)
        _h.add_tt(dbias, dbias_tmp, dbias)

        flat_outputs = flatten_time(outputs[:-2])
        flat_dHa = flatten_time(dHa[1:-1])
        _h.dot_add_mm(flat_dHa, flat_outputs, dR, transa=True)
        _h.dot_add_mm(dHa[0], outputs[-1], dR, transa=True)
Esempio n. 18
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_outputs = flatten_time(outputs)

        # calculate outputs
        if self.type == 'max':
            argmax = buffers.internals.argmax
            flat_argmax = flatten_time(argmax)
            _h.maxpool2d_forward_batch(flat_inputs, self.kernel_size,
                                       flat_outputs, self.padding, self.stride,
                                       flat_argmax)
        elif self.type == 'avg':
            _h.avgpool2d_forward_batch(flat_inputs, self.kernel_size,
                                       flat_outputs, self.padding, self.stride)
Esempio n. 19
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_outputs = flatten_time(outputs)

        # calculate outputs
        if self.type == 'max':
            argmax = buffers.internals.argmax
            flat_argmax = flatten_time(argmax)
            _h.maxpool2d_forward_batch(flat_inputs, self.kernel_size,
                                       flat_outputs, self.padding, self.stride,
                                       flat_argmax)
        elif self.type == 'avg':
            _h.avgpool2d_forward_batch(flat_inputs, self.kernel_size,
                                       flat_outputs, self.padding, self.stride)
Esempio n. 20
0
    def backward_pass(self, buffers):
        _h = self.handler

        flat_out_deltas = flatten_time_and_features(
            buffers.output_deltas.default)
        tmp = self.handler.allocate(flat_out_deltas.shape)
        flat_mask = flatten_time(buffers.inputs.mask)
        flat_in_deltas = flatten_time_and_features(
            buffers.input_deltas.default)

        _h.mult_mv(flat_out_deltas, flat_mask, tmp)
        _h.add_tt(tmp, flat_in_deltas, flat_in_deltas)
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        diff = flatten_time_and_features(buffers.internals.squared_diff)
        diff_sum = flatten_time(buffers.outputs.default)

        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=diff)
        _h.mult_tt(diff, diff, out=diff)
        _h.sum_t(diff, axis=1, out=diff_sum)
        _h.mult_st(0.5, diff_sum, out=diff_sum)
Esempio n. 22
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        diff = flatten_time_and_features(buffers.internals.squared_diff)
        diff_sum = flatten_time(buffers.outputs.default)

        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=diff)
        _h.mult_tt(diff, diff, out=diff)
        _h.sum_t(diff, axis=1, out=diff_sum)
        _h.mult_st(0.5, diff_sum, out=diff_sum)
Esempio n. 23
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        assert isinstance(_h, Handler)

        dinputs = flatten_time_and_features(buffers.input_deltas.default)
        dloss = flatten_time(buffers.output_deltas.loss)
        dcee = flatten_time_and_features(buffers.internals.cee)
        targets = flatten_time_and_features(buffers.inputs.targets)
        prob = flatten_time_and_features(buffers.outputs.probabilities)

        _h.subtract_tt(prob, targets, dcee)  # y - t
        _h.mult_mv(dcee, dloss, dcee)  # out_delta * (y - t)
        _h.add_tt(dcee, dinputs, dinputs)
Esempio n. 24
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        assert isinstance(_h, Handler)
        inputs = buffers.inputs.default
        tmp = buffers.internals.tmp
        outputs = buffers.outputs.loss

        # reshape
        flat_inputs = flatten_time_and_features(inputs)
        flat_tmp = flatten_time_and_features(tmp)
        flat_outputs = flatten_time(outputs)

        # compute
        _h.abs_t(flat_inputs, flat_tmp)
        _h.sum_t(flat_tmp, 1, flat_outputs)
Esempio n. 25
0
    def backward_pass(self, buffers):
        _h = self.handler
        assert isinstance(_h, Handler)
        inputs = buffers.inputs.default
        tmp = buffers.internals.tmp
        output_deltas = buffers.output_deltas.loss
        input_deltas = buffers.input_deltas.default

        # reshape
        flat_inputs = flatten_time_and_features(inputs)
        flat_tmp = flatten_time_and_features(tmp)
        flat_output_deltas = flatten_time(output_deltas)
        flat_input_deltas = flatten_time_and_features(input_deltas)

        # compute
        _h.mult_mv(flat_inputs, flat_output_deltas, flat_tmp)
        _h.add_tt(flat_tmp, flat_input_deltas, flat_input_deltas)
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        out_deltas = buffers.output_deltas.default
        grad_diff = buffers.internals.grad_diff
        dinputs_1 = flatten_time_and_features(buffers.input_deltas.inputs_1)
        dinputs_2 = flatten_time_and_features(buffers.input_deltas.inputs_2)

        tmp = _h.allocate(inputs_2.shape)
        # out_deltas has only one feature dimension due to summation,
        # so we broadcast to all feature dimensions
        _h.broadcast_t(out_deltas, 2, grad_diff)

        grad_diff = flatten_time(grad_diff)
        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=tmp)
        _h.mult_add_tt(grad_diff, tmp, dinputs_1)

        _h.subtract_tt(inputs_2, inputs_1, out=tmp)
        _h.mult_add_tt(grad_diff, tmp, dinputs_2)
Esempio n. 27
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        out_deltas = buffers.output_deltas.default
        grad_diff = buffers.internals.grad_diff
        dinputs_1 = flatten_time_and_features(buffers.input_deltas.inputs_1)
        dinputs_2 = flatten_time_and_features(buffers.input_deltas.inputs_2)

        tmp = _h.allocate(inputs_2.shape)
        # out_deltas has only one feature dimension due to summation,
        # so we broadcast to all feature dimensions
        _h.broadcast_t(out_deltas, 2, grad_diff)

        grad_diff = flatten_time(grad_diff)
        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=tmp)
        _h.mult_add_tt(grad_diff, tmp, dinputs_1)

        _h.subtract_tt(inputs_2, inputs_1, out=tmp)
        _h.mult_add_tt(grad_diff, tmp, dinputs_2)
Esempio n. 28
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler

        W_H, W_T, R_T, bias_T, R_H, bias_H = buffers.parameters
        dW_H, dW_T, dR_T, dbias_T, dR_H, dbias_H = buffers.gradients

        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        dinputs = buffers.input_deltas.default
        doutputs = buffers.output_deltas.default

        H_list = []
        T_list = []
        Y_list = []
        dH_list = []
        dT_list = []
        dY_list = []

        for i in range(self.recurrence_depth):
            H_list.append(buffers.internals['H_{}'.format(i)])
            T_list.append(buffers.internals['T_{}'.format(i)])
            Y_list.append(buffers.internals['Y_{}'.format(i)])
            dH_list.append(buffers.internals['dH_{}'.format(i)])
            dT_list.append(buffers.internals['dT_{}'.format(i)])
            dY_list.append(buffers.internals['dY_{}'.format(i)])

        t = inputs.shape[0] - 1
        _h.copy_to(doutputs[t], dY_list[self.recurrence_depth - 1][t])

        for i in range(self.recurrence_depth - 1, -1, -1):
            if i == 0:
                _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                tmp = _h.ones(dH_list[i][t].shape)
                _h.subtract_tt(H_list[i][t], outputs[t - 1], tmp)
                _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                     dT_list[i][t])
                _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                           dH_list[i][t])
            else:
                _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                tmp = _h.ones(dH_list[i][t].shape)
                _h.subtract_tt(tmp, T_list[i][t], tmp)
                _h.mult_tt(dY_list[i][t], tmp, dY_list[i - 1][t])

                _h.subtract_tt(H_list[i][t], Y_list[i - 1][t], tmp)
                _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                     dT_list[i][t])
                _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                           dH_list[i][t])
                _h.dot_add_mm(dT_list[i][t], R_T[i], dY_list[i - 1][t])
                _h.dot_add_mm(dH_list[i][t], R_H[i], dY_list[i - 1][t])

        for t in range(inputs.shape[0] - 2, -1, -1):
            _h.dot_add_mm(dT_list[0][t + 1], R_T[0], doutputs[t])
            _h.dot_add_mm(dH_list[0][t + 1], R_H[0], doutputs[t])
            tmp = _h.ones(dH_list[0][t + 1].shape)
            _h.subtract_tt(tmp, T_list[0][t + 1], tmp)
            _h.mult_add_tt(dY_list[0][t + 1], tmp, doutputs[t])
            _h.copy_to(doutputs[t], dY_list[self.recurrence_depth - 1][t])

            for i in range(self.recurrence_depth - 1, -1, -1):
                if i == 0:
                    _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                    tmp = _h.ones(dH_list[i][t].shape)
                    _h.subtract_tt(H_list[i][t], outputs[t - 1], tmp)
                    _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                    _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                         dT_list[i][t])
                    _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                               dH_list[i][t])
                else:
                    _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                    tmp = _h.ones(dH_list[i][t].shape)
                    _h.subtract_tt(tmp, T_list[i][t], tmp)
                    _h.mult_tt(dY_list[i][t], tmp, dY_list[i - 1][t])

                    _h.subtract_tt(H_list[i][t], Y_list[i - 1][t], tmp)
                    _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                    _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                         dT_list[i][t])
                    _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                               dH_list[i][t])
                    _h.dot_add_mm(dT_list[i][t], R_T[i], dY_list[i - 1][t])
                    _h.dot_add_mm(dH_list[i][t], R_H[i], dY_list[i - 1][t])

        flat_inputs = flatten_time_and_features(inputs)
        flat_dinputs = flatten_time_and_features(dinputs)
        flat_dH = flatten_time(dH_list[0][:-1])
        flat_dT = flatten_time(dT_list[0][:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dH, W_H, flat_dinputs)
        _h.dot_add_mm(flat_dH, flat_inputs, dW_H, transa=True)
        _h.dot_add_mm(flat_dT, W_T, flat_dinputs)
        _h.dot_add_mm(flat_dT, flat_inputs, dW_T, transa=True)

        for i in range(self.recurrence_depth):
            dbias_tmp = _h.allocate(dbias_H[i].shape)
            flat_dH = flatten_time(dH_list[i][:-1])
            flat_dT = flatten_time(dT_list[i][:-1])
            _h.sum_t(flat_dT, axis=0, out=dbias_tmp)
            _h.add_tt(dbias_T[i], dbias_tmp, dbias_T[i])
            _h.sum_t(flat_dH, axis=0, out=dbias_tmp)
            _h.add_tt(dbias_H[i], dbias_tmp, dbias_H[i])

        for i in range(self.recurrence_depth):
            if i == 0:
                flat_outputs = flatten_time(outputs[:-2])
                flat_dH = flatten_time(dH_list[i][1:-1])
                flat_dT = flatten_time(dT_list[i][1:-1])
                _h.dot_add_mm(flat_dT, flat_outputs, dR_T[i], transa=True)
                _h.dot_add_mm(dT_list[i][0], outputs[-1], dR_T[i], transa=True)

                _h.dot_add_mm(flat_dH, flat_outputs, dR_H[i], transa=True)
                _h.dot_add_mm(dH_list[i][0], outputs[-1], dR_H[i], transa=True)
            else:
                flat_outputs = flatten_time(Y_list[i - 1][:-1])
                flat_dH = flatten_time(dH_list[i][:-1])
                flat_dT = flatten_time(dT_list[i][:-1])
                _h.dot_add_mm(flat_dT, flat_outputs, dR_T[i], transa=True)
                _h.dot_add_mm(flat_dH, flat_outputs, dR_H[i], transa=True)
Esempio n. 29
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo) = buffers.parameters
        (dWz, dWi, dWf, dWo,
         dpi, dpf, dpo,
         dRz, dRi, dRf, dRo,
         dbz, dbi, dbf, dbo) = buffers.gradients

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals

        x = buffers.inputs.default
        dx = buffers.input_deltas.default
        y = buffers.outputs.default
        deltas = buffers.output_deltas.default

        dy = _h.allocate(y.shape)
        _h.fill(dCa, 0.0)

        time_size, batch_size, in_size = x.shape
        for t in range(time_size - 1, -1, - 1):
            # Accumulate recurrent deltas
            _h.copy_to(deltas[t], dy[t])
            _h.dot_add_mm(dIa[t + 1], Ri, dy[t])
            _h.dot_add_mm(dFa[t + 1], Rf, dy[t])
            _h.dot_add_mm(dOa[t + 1], Ro, dy[t])
            _h.dot_add_mm(dZa[t + 1], Rz, dy[t])

            # Peephole connection part:
            _h.mult_add_mv(dIa[t + 1], pi, dCa[t])
            _h.mult_add_mv(dFa[t + 1], pf, dCa[t])

            # Output Gate
            _h.mult_tt(dy[t], Cb[t], dOb[t])
            _h.sigmoid_deriv(Oa[t], Ob[t], dOb[t], dOa[t])
            # Peephole connection
            _h.mult_add_mv(dOa[t], po, dCa[t])

            # Cell
            _h.mult_tt(dy[t], Ob[t], dCb[t])
            _h.act_func_deriv[self.activation](Ca[t], Cb[t], dCb[t], dCb[t])
            _h.add_tt(dCa[t], dCb[t], dCa[t])
            _h.mult_add_tt(dCa[t + 1], Fb[t + 1], dCa[t])

            # Forget Gate
            _h.mult_tt(dCa[t], Ca[t - 1], dFb[t])
            _h.sigmoid_deriv(Fa[t], Fb[t], dFb[t], dFa[t])

            # Input Gate
            _h.mult_tt(dCa[t], Zb[t], dIb[t])
            _h.sigmoid_deriv(Ia[t], Ib[t], dIb[t], dIa[t])

            # Block Input
            _h.mult_tt(dCa[t], Ib[t], dZb[t])
            _h.act_func_deriv[self.activation](Za[t], Zb[t], dZb[t], dZa[t])

        flat_inputs = flatten_time(x)
        flat_dinputs = flatten_time(dx)

        flat_dIa = flatten_time(dIa[:-1])
        flat_dFa = flatten_time(dFa[:-1])
        flat_dOa = flatten_time(dOa[:-1])
        flat_dZa = flatten_time(dZa[:-1])

        # Calculate in_deltas and gradients
        _h.dot_add_mm(flat_dIa, Wi, flat_dinputs)
        _h.dot_add_mm(flat_dFa, Wf, flat_dinputs)
        _h.dot_add_mm(flat_dOa, Wo, flat_dinputs)
        _h.dot_add_mm(flat_dZa, Wz, flat_dinputs)

        _h.dot_add_mm(flat_dIa, flat_inputs, dWi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_inputs, dWf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_inputs, dWo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_inputs, dWz, transa=True)

        dbias_tmp = _h.allocate(dbz.shape)
        _h.sum_t(flat_dIa, axis=0, out=dbias_tmp)
        _h.add_tt(dbi, dbias_tmp, dbi)
        _h.sum_t(flat_dFa, axis=0, out=dbias_tmp)
        _h.add_tt(dbf, dbias_tmp, dbf)
        _h.sum_t(flat_dOa, axis=0, out=dbias_tmp)
        _h.add_tt(dbo, dbias_tmp, dbo)
        _h.sum_t(flat_dZa, axis=0, out=dbias_tmp)
        _h.add_tt(dbz, dbias_tmp, dbz)

        flat_outputs = flatten_time(y[:-2])
        flat_cell = flatten_time(Ca[:-2])
        flat_cell2 = flatten_time(Ca[:-1])

        dWco_tmp = _h.allocate(flat_cell2.shape)
        dWc_tmp = _h.allocate(dpo.shape)

        # Output gate Peephole
        _h.mult_tt(flat_cell2, flat_dOa, dWco_tmp)
        _h.sum_t(dWco_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpo, dWc_tmp, dpo)

        flat_dIa = flatten_time(dIa[1:-1])
        flat_dFa = flatten_time(dFa[1:-1])
        flat_dOa = flatten_time(dOa[1:-1])
        flat_dZa = flatten_time(dZa[1:-1])

        _h.dot_add_mm(flat_dIa, flat_outputs, dRi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_outputs, dRf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_outputs, dRo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_outputs, dRz, transa=True)

        _h.dot_add_mm(dIa[0], dy[-1], dRi, transa=True)
        _h.dot_add_mm(dFa[0], dy[-1], dRf, transa=True)
        _h.dot_add_mm(dOa[0], dy[-1], dRo, transa=True)
        _h.dot_add_mm(dZa[0], dy[-1], dRz, transa=True)

        # Other Peephole connections
        dWcif_tmp = _h.allocate(flat_cell.shape)
        _h.mult_tt(flat_cell, flat_dIa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(flat_cell, flat_dFa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)

        dWcif_tmp = _h.allocate(dIa[0].shape)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)
Esempio n. 30
0
    def backward_pass(self, buffers):
        # prepare
        _h = self.handler

        (dWz, dWi, dWf, dWo,
         dpi, dpf, dpo,
         dRz, dRi, dRf, dRo,
         dbz, dbi, dbf, dbo,
         dtiming) = buffers.gradients

        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo,
         timing) = buffers.parameters

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals

        x = buffers.inputs.default
        dx = buffers.input_deltas.default
        y = buffers.outputs.default
        deltas = buffers.output_deltas.default

        dy = _h.allocate(y.shape)

        time_size, batch_size = x.shape[0], x.shape[1]

        # Temporary variable to be filled with the current value of time t
        tmp = _h.zeros(timing.shape)

        _h.fill(dCa, 0.0)
        cond = _h.zeros(y[0].shape)

        for t in range(time_size - 1, -1, - 1):
            # Accumulate recurrent deltas
            _h.add_tt(dy[t], deltas[t], dy[t])
            _h.fill(tmp, t)
            _h.modulo_tt(tmp, timing, tmp)
            _h.broadcast_t(tmp.reshape((1, tmp.shape[0])), 0, cond)

            _h.dot_add_mm(dIa[t + 1], Ri, dy[t])
            _h.dot_add_mm(dFa[t + 1], Rf, dy[t])
            _h.dot_add_mm(dOa[t + 1], Ro, dy[t])
            _h.dot_add_mm(dZa[t + 1], Rz, dy[t])

            _h.mult_add_mv(dIa[t + 1], pi, dCa[t])
            _h.mult_add_mv(dFa[t + 1], pf, dCa[t])

            # Output Gate
            _h.mult_tt(dy[t], Cb[t], dOb[t])
            _h.fill_if(dOb[t], 0, cond)  # Set inactive to 0
            _h.sigmoid_deriv(Oa[t], Ob[t], dOb[t], dOa[t])
            # Output influence on peephole:
            _h.mult_add_mv(dOa[t], po, dCa[t])

            # Cell
            _h.mult_tt(dy[t], Ob[t], dCb[t])
            _h.act_func_deriv[self.activation](Ca[t], Cb[t], dCb[t], dCb[t])
            _h.fill_if(dCb[t], 0, cond)
            _h.add_tt(dCa[t], dCb[t], dCa[t])
            _h.mult_add_tt(dCa[t + 1], Fb[t + 1], dCa[t])

            # Forget Gate
            _h.mult_tt(dCa[t], Ca[t - 1], dFb[t])
            _h.sigmoid_deriv(Fa[t], Fb[t], dFb[t], dFa[t])

            # Input Gate
            _h.mult_tt(dCa[t], Zb[t], dIb[t])
            _h.sigmoid_deriv(Ia[t], Ib[t], dIb[t], dIa[t])

            # Block Input
            _h.mult_tt(dCa[t], Ib[t], dZb[t])
            _h.act_func_deriv[self.activation](Za[t], Zb[t], dZb[t], dZa[t])

            # Copy over the error from previous inactive nodes
            _h.add_into_if(dy[t], dy[t-1], cond)
            _h.add_into_if(dCa[t], dCa[t-1], cond)

            # Undo updates to inactive nodes:
            _h.fill_if(dIa[t], 0, cond)
            _h.fill_if(dFa[t], 0, cond)
            _h.fill_if(dZa[t], 0, cond)
            _h.fill_if(Fb[t], 0, cond)

        # Same as for standard RNN:
        flat_inputs = flatten_time_and_features(x)
        flat_dinputs = flatten_time_and_features(dx)

        flat_dIa = flatten_time(dIa[:-1])
        flat_dFa = flatten_time(dFa[:-1])
        flat_dOa = flatten_time(dOa[:-1])
        flat_dZa = flatten_time(dZa[:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dIa, Wi, flat_dinputs)
        _h.dot_add_mm(flat_dFa, Wf, flat_dinputs)
        _h.dot_add_mm(flat_dOa, Wo, flat_dinputs)
        _h.dot_add_mm(flat_dZa, Wz, flat_dinputs)

        _h.dot_add_mm(flat_dIa, flat_inputs, dWi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_inputs, dWf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_inputs, dWo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_inputs, dWz, transa=True)

        dbias_tmp = _h.allocate(dbz.shape)
        _h.sum_t(flat_dIa, axis=0, out=dbias_tmp)
        _h.add_tt(dbi, dbias_tmp, dbi)
        _h.sum_t(flat_dFa, axis=0, out=dbias_tmp)
        _h.add_tt(dbf, dbias_tmp, dbf)
        _h.sum_t(flat_dOa, axis=0, out=dbias_tmp)
        _h.add_tt(dbo, dbias_tmp, dbo)
        _h.sum_t(flat_dZa, axis=0, out=dbias_tmp)
        _h.add_tt(dbz, dbias_tmp, dbz)

        flat_outputs = flatten_time(y[:-2])

        flat_cell = flatten_time(Ca[:-2])
        flat_cell2 = flatten_time(Ca[:-1])

        dWco_tmp = _h.allocate(flat_cell2.shape)
        dWc_tmp = _h.allocate(dpo.shape)
        # Peephole connection output weight:
        _h.mult_tt(flat_cell2, flat_dOa, dWco_tmp)
        _h.sum_t(dWco_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpo, dWc_tmp, dpo)

        flat_dIa = flatten_time(dIa[1:-1])
        flat_dFa = flatten_time(dFa[1:-1])
        flat_dOa = flatten_time(dOa[1:-1])
        flat_dZa = flatten_time(dZa[1:-1])

        _h.dot_add_mm(flat_dIa, flat_outputs, dRi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_outputs, dRf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_outputs, dRo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_outputs, dRz, transa=True)

        _h.dot_add_mm(dIa[0], dy[-1], dRi, transa=True)
        _h.dot_add_mm(dFa[0], dy[-1], dRf, transa=True)
        _h.dot_add_mm(dOa[0], dy[-1], dRo, transa=True)
        _h.dot_add_mm(dZa[0], dy[-1], dRz, transa=True)

        # Other Peephole connections
        dWcif_tmp = _h.allocate(flat_cell.shape)
        _h.mult_tt(flat_cell, flat_dIa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(flat_cell, flat_dFa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)

        dWcif_tmp = _h.allocate(dIa[0].shape)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)
Esempio n. 31
0
    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo,
         timing) = buffers.parameters

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals
        x = buffers.inputs.default
        y = buffers.outputs.default
        time_size, batch_size = x.shape[0], x.shape[1]

        # Temporary variable to be filled with the current value of time t
        tmp = _h.zeros(timing.shape)
        cond = _h.zeros(y[0].shape)

        flat_x = flatten_time_and_features(x)
        flat_Za = flatten_time(Za[:-1])
        flat_Ia = flatten_time(Ia[:-1])
        flat_Fa = flatten_time(Fa[:-1])
        flat_Oa = flatten_time(Oa[:-1])
        _h.dot_mm(flat_x, Wz, flat_Za, transb=True)
        _h.dot_mm(flat_x, Wi, flat_Ia, transb=True)
        _h.dot_mm(flat_x, Wf, flat_Fa, transb=True)
        _h.dot_mm(flat_x, Wo, flat_Oa, transb=True)

        for t in range(time_size):

            # Block input
            _h.dot_add_mm(y[t - 1], Rz, Za[t], transb=True)
            _h.add_mv(Za[t], bz.reshape((1, self.size)), Za[t])
            _h.act_func[self.activation](Za[t], Zb[t])

            # Input Gate
            _h.dot_add_mm(y[t - 1], Ri, Ia[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pi, Ia[t])  # ADDED PEEPHOLE CONNECTION
            _h.add_mv(Ia[t], bi.reshape((1, self.size)), Ia[t])
            _h.sigmoid(Ia[t], Ib[t])

            # Forget Gate
            _h.dot_add_mm(y[t - 1], Rf, Fa[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pf, Fa[t])  # ADDED PEEPHOLE CONNECTION
            _h.add_mv(Fa[t], bf.reshape((1, self.size)), Fa[t])
            _h.sigmoid(Fa[t], Fb[t])

            # Cell
            _h.mult_tt(Ib[t], Zb[t], Ca[t])
            _h.mult_add_tt(Fb[t], Ca[t - 1], Ca[t])

            # Output Gate
            _h.dot_add_mm(y[t - 1], Ro, Oa[t], transb=True)
            _h.mult_add_mv(Ca[t], po, Oa[t])  # ADDED PEEPHOLE CONNECTION
            _h.add_mv(Oa[t], bo.reshape((1, self.size)), Oa[t])
            _h.sigmoid(Oa[t], Ob[t])

            # Block output
            _h.act_func[self.activation](Ca[t], Cb[t])
            _h.mult_tt(Ob[t], Cb[t], y[t])

            if t > 0:
                _h.fill(tmp, t)
                _h.modulo_tt(tmp, timing, tmp)
                _h.broadcast_t(tmp.reshape((1, tmp.shape[0])), 0, cond)

            # Reset Cell
                _h.copy_to_if(Ca[t-1], Ca[t], cond)
            # Reset Block output
                _h.copy_to_if(y[t-1], y[t], cond)