Example #1
0
    def pass_forward(self, inputs, train_mode=True):
        self.inputs = inputs
        batch_size, time_steps, input_dim = inputs.shape

        self.state_input = np.zeros((batch_size, time_steps, self.h_units))
        self.states = np.zeros((batch_size, time_steps + 1, self.h_units))
        self.outputs = np.zeros((batch_size, time_steps, input_dim))

        self.states[:, -1] = np.zeros(
            (batch_size, self.h_units
             ))  # last column containing the final state set to zero

        for t in range(time_steps):
            self.state_input[:, t] = (
                np.dot(inputs[:, t], self.W_input.T) +
                np.dot(self.states[:, t - 1], self.W_recur.T)) + self.b_input
            self.states[:, t] = activate(self.activation).forward(
                self.state_input[:, t])
            self.outputs[:, t] = np.dot(self.states[:, t],
                                        self.W_output.T) + self.b_output

        if not train_mode:
            return activate('softmax').forward(
                self.outputs)  # if mode is not training

        return self.outputs
Example #2
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_input = np.zeros_like(self.W_input)
            dW_recur = np.zeros_like(self.W_recur)
            dW_output = np.zeros_like(self.W_output)

            db_input = np.zeros_like(self.b_input)
            db_output = np.zeros_like(self.b_output)

            for t in np.arange(time_steps)[::-1]:  # reversed
                dW_output += np.dot(grad[:, t].T, self.states[:, t])
                db_output += np.sum(grad[:, t], axis=0)
                dstate = np.dot(grad[:, t], self.W_output) * activate(
                    self.activation).backward(self.state_input[:, t])
                next_grad[:, t] = np.dot(dstate, self.W_input)

                for tt in np.arange(max(0, t - self.bptt_truncate),
                                    t + 1)[::-1]:  # reversed
                    dW_input += np.dot(dstate.T, self.inputs[:, tt])
                    dW_recur += np.dot(dstate.T, self.states[:, tt - 1])
                    db_input += np.sum(dstate, axis=0)
                    dstate = dstate.dot(self.W_recur) * activate(
                        self.activation).backward(self.state_input[:, tt - 1])

            # optimize weights and bias
            self.W_input = optimizer(self.optimizer_kwargs).update(
                self.W_input, cg(dW_input))
            self.W_output = optimizer(self.optimizer_kwargs).update(
                self.W_output, cg(dW_output))
            self.W_recur = optimizer(self.optimizer_kwargs).update(
                self.W_recur, cg(dW_recur))

            self.b_input = optimizer(self.optimizer_kwargs).update(
                self.b_input, cg(db_input))
            self.b_output = optimizer(self.optimizer_kwargs).update(
                self.b_output, cg(db_output))

        # endif self.is_trainable

        return next_grad
Example #3
0
    def pass_forward(self, inputs, train_mode = True):
        self.inputs = inputs
        batch_size, time_steps, input_dim = inputs.shape

        self.forget = np.zeros((batch_size, time_steps, self.h_units))
        self.input = np.zeros((batch_size, time_steps, self.h_units))
        self.output = np.zeros((batch_size, time_steps, self.h_units))
        self.states = np.zeros((batch_size, time_steps, self.h_units))
        self.cell_tilde = np.zeros((batch_size, time_steps, self.h_units))
        self.cell = np.zeros((batch_size, time_steps, self.h_units))
        self.final = np.zeros((batch_size, time_steps, input_dim))

        self.z = np.concatenate((self.inputs, self.states), axis=2)

        for t in range(time_steps):
            self.forget[:, t] = activate(self.gate_activation).forward(np.dot(self.z[:, t], self.W_forget) + self.b_forget)
            self.input[:, t] = activate(self.gate_activation).forward(np.dot(self.z[:, t], self.W_input) + self.b_input)
            self.cell_tilde[:, t] = activate(self.activation).forward(np.dot(self.z[:, t], self.W_cell) + self.b_cell)
            self.cell[:, t] = self.forget[:, t] * self.cell[:, t-1] + self.input[:, t] * self.cell_tilde[:, t]
            self.output[:, t] = activate(self.gate_activation).forward(np.dot(self.z[:, t], self.W_output) + self.b_output)
            self.states[:, t] = self.output[:, t] * activate(self.activation).forward(self.cell[:, t])

            # logits
            self.final[:, t] = np.dot(self.states[:, t], self.W_final) + self.b_final

        if not train_mode:
            return activate('softmax').forward(self.final) # if mode is not training

        return self.final
Example #4
0
    def pass_forward(self, inputs, train_mode=True):
        self.inputs = inputs
        batch_size, time_steps, input_dim = inputs.shape

        self.update = np.zeros((batch_size, time_steps, self.h_units))
        self.reset = np.zeros((batch_size, time_steps, self.h_units))
        self.cell = np.zeros((batch_size, time_steps, self.h_units))
        self.states = np.zeros((batch_size, time_steps, self.h_units))
        self.final = np.zeros((batch_size, time_steps, input_dim))

        self.z = np.concatenate((self.inputs, self.states), axis=2)
        self.z_tilde = np.zeros_like(self.z)

        for t in range(time_steps):
            self.update[:, t] = activate(self.gate_activation)._forward(
                np.dot(self.z[:, t], self.W_update) + self.b_update)
            self.reset[:, t] = activate(self.gate_activation)._forward(
                np.dot(self.z[:, t], self.W_reset) + self.b_reset)
            self.z_tilde[:, t] = np.concatenate(
                (self.reset[:, t] * self.states[:, t - 1], self.inputs[:, t]),
                axis=1)
            self.cell[:, t] = activate(self.activation)._forward(
                np.dot(self.z_tilde[:, t - 1], self.W_cell) + self.b_cell)
            self.states[:, t] = (
                1. - self.update[:, t]
            ) * self.states[:, t - 1] + self.update[:, t] * self.cell[:, t]

            # logits
            self.final[:, t] = np.dot(self.states[:, t],
                                      self.W_final) + self.b_final

        if not train_mode:
            return activate('softmax')._forward(
                self.final)  # if mode is not training

        return self.final
Example #5
0
    def __init__(self,
                 epochs,
                 activation='sigmoid',
                 loss='categorical_crossentropy',
                 init_method='he_normal',
                 optimizer={},
                 penalty='lasso',
                 penalty_weight=0,
                 l1_ratio=0.5):

        self.epochs = epochs
        self.activate = activate(activation)
        self.loss = objective(loss)
        self.init_method = init(init_method)
        self.optimizer = optimizer
        self.regularization = regularize(penalty,
                                         penalty_weight,
                                         l1_ratio=l1_ratio)
Example #6
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape

        dW_update = np.zeros_like(self.W_update)
        dW_reset = np.zeros_like(self.W_reset)
        dW_cell = np.zeros_like(self.W_cell)
        dW_final = np.zeros_like(self.W_final)

        db_update = np.zeros_like(self.b_update)
        db_reset = np.zeros_like(self.b_reset)
        db_cell = np.zeros_like(self.b_cell)
        db_final = np.zeros_like(self.b_final)

        dstates = np.zeros_like(self.states)
        dstate_a = np.zeros_like(self.states)
        dstate_b = np.zeros_like(self.states)
        dstate_c = np.zeros_like(self.states)
        dstates_next = np.zeros_like(self.states)
        dstates_prime = np.zeros_like(self.states)

        dz_cell = np.zeros_like(self.cell)
        dcell = np.zeros_like(self.cell)

        dz_reset = np.zeros_like(self.reset)
        dreset = np.zeros_like(self.reset)

        dz_update = np.zeros_like(self.update)
        dupdate = np.zeros_like(self.update)

        next_grad = np.zeros_like(grad)

        for t in np.arange(time_steps)[::-1]:  # reversed

            dW_final += np.dot(self.states[:, t].T, grad[:, t])
            db_final += np.sum(grad[:, t], axis=0)

            dstates[:, t] = np.dot(grad[:, t], self.W_final.T)
            dstates[:, t] += dstates_next[:, t]
            next_grad = np.dot(dstates, self.W_final)

            dcell[:, t] = self.update[:, t] * dstates[:, t]
            dstate_a[:, t] = (1. - self.update[:, t]) * dstates[:, t]
            dupdate[:,
                    t] = self.cell[:,
                                   t] * dstates[:,
                                                t] - self.states[:, t -
                                                                 1] * dstates[:,
                                                                              t]

            dcell[:, t] = activate(self.activation)._backward(
                self.cell[:, t]) * dcell[:, t]
            dW_cell += np.dot(self.z_tilde[:, t - 1].T, dcell[:, t])
            db_cell += np.sum(dcell[:, t], axis=0)
            dz_cell = np.dot(dcell[:, t], self.W_cell.T)

            dstates_prime[:, t] = dz_cell[:, :self.h_units]
            dstate_b[:, t] = self.reset[:, t] * dstates_prime[:, t]

            dreset[:, t] = self.states[:, t - 1] * dstates_prime[:, t]
            dreset[:, t] = activate(self.gate_activation)._backward(
                self.reset[:, t]) * dreset[:, t]
            dW_reset += np.dot(self.z[:, t].T, dreset[:, t])
            db_reset += np.sum(dreset[:, t], axis=0)
            dz_reset = np.dot(dreset[:, t], self.W_reset.T)

            dupdate[:, t] = activate(self.gate_activation)._backward(
                self.update[:, t]) * dupdate[:, t]
            dW_update += np.dot(self.z[:, t].T, dupdate[:, t])
            db_update += np.sum(dupdate[:, t], axis=0)
            dz_update = np.dot(dupdate[:, t], self.W_update.T)

            dz = dz_reset + dz_update
            dstate_c[:, t] = dz[:, :self.h_units]

            dstates_next = dstate_a + dstate_b + dstate_c

        # optimize weights and bias
        self.W_final = optimizer(self.optimizer_kwargs)._update(
            self.W_final, cg(dW_final))
        self.b_final = optimizer(self.optimizer_kwargs)._update(
            self.b_final, cg(db_final))

        self.W_cell = optimizer(self.optimizer_kwargs)._update(
            self.W_cell, cg(dW_cell))
        self.b_cell = optimizer(self.optimizer_kwargs)._update(
            self.b_cell, cg(db_cell))

        self.W_reset = optimizer(self.optimizer_kwargs)._update(
            self.W_reset, cg(dW_reset))
        self.b_reset = optimizer(self.optimizer_kwargs)._update(
            self.b_reset, cg(db_reset))

        self.W_update = optimizer(self.optimizer_kwargs)._update(
            self.W_update, cg(dW_update))
        self.b_update = optimizer(self.optimizer_kwargs)._update(
            self.b_update, cg(db_update))

        return next_grad
Example #7
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape

        dW_forget = np.zeros_like(self.W_forget)
        dW_input = np.zeros_like(self.W_input)
        dW_output = np.zeros_like(self.W_output)
        dW_cell = np.zeros_like(self.W_cell)
        dW_final = np.zeros_like(self.W_final)

        db_forget = np.zeros_like(self.b_forget)
        db_input = np.zeros_like(self.b_input)
        db_output = np.zeros_like(self.b_output)
        db_cell = np.zeros_like(self.b_cell)
        db_final = np.zeros_like(self.b_final)

        dstates = np.zeros_like(self.states)
        dcell = np.zeros_like(self.cell)
        dcell_tilde = np.zeros_like(self.cell_tilde)
        dforget = np.zeros_like(self.forget)
        dinput = np.zeros_like(self.input)
        doutput = np.zeros_like(self.output)

        dcell_next = np.zeros_like(self.cell)
        dstates_next = np.zeros_like(self.states)

        next_grad = np.zeros_like(grad)

        for t in np.arange(time_steps)[::-1]:  # reversed

            dW_final += np.dot(self.states[:, t].T, grad[:, t])
            db_final += np.sum(grad[:, t], axis=0)

            dstates[:, t] = np.dot(grad[:, t], self.W_final.T)
            dstates[:, t] += dstates_next[:, t]
            next_grad = np.dot(dstates, self.W_final)

            doutput[:, t] = activate(self.activation)._forward(
                self.cell[:, t]) * dstates[:, t]
            doutput[:, t] = activate(self.gate_activation)._backward(
                self.output[:, t]) * doutput[:, t]
            dW_output += np.dot(self.z[:, t].T, doutput[:, t])
            db_output += np.sum(doutput[:, t], axis=0)

            dcell[:, t] += self.output[:, t] * dstates[:, t] * activate(
                self.activation)._backward(self.cell[:, t])
            dcell[:, t] += dcell_next[:, t]
            dcell_tilde[:, t] = dcell[:, t] * self.input[:, t]
            dcell_tilde[:, t] = dcell_tilde[:, t] * activate(
                self.activation)._backward(dcell_tilde[:, t])
            dW_cell += np.dot(self.z[:, t].T, dcell[:, t])
            db_cell += np.sum(dcell[:, t], axis=0)

            dinput[:, t] = self.cell_tilde[:, t] * dcell[:, t]
            dinput[:, t] = activate(self.gate_activation)._backward(
                self.input[:, t]) * dinput[:, t]
            dW_input += np.dot(self.z[:, t].T, dinput[:, t])
            db_input += np.sum(dinput[:, t], axis=0)

            dforget[:, t] = self.cell[:, t - 1] * dcell[:, t]
            dforget[:, t] = activate(self.gate_activation)._backward(
                self.forget[:, t]) * dforget[:, t]
            dW_forget += np.dot(self.z[:, t].T, dforget[:, t])
            db_forget += np.sum(dforget[:, t], axis=0)

            dz_forget = np.dot(dforget[:, t], self.W_forget.T)
            dz_input = np.dot(dinput[:, t], self.W_input.T)
            dz_output = np.dot(doutput[:, t], self.W_output.T)
            dz_cell = np.dot(dcell[:, t], self.W_cell.T)

            dz = dz_forget + dz_input + dz_output + dz_cell
            dstates_next[:, t] = dz[:, :self.h_units]
            dcell_next = self.forget * dcell

        # optimize weights and bias
        self.W_final = optimizer(self.optimizer_kwargs)._update(
            self.W_final, cg(dW_final))
        self.b_final = optimizer(self.optimizer_kwargs)._update(
            self.b_final, cg(db_final))

        self.W_forget = optimizer(self.optimizer_kwargs)._update(
            self.W_forget, cg(dW_forget))
        self.b_forget = optimizer(self.optimizer_kwargs)._update(
            self.b_forget, cg(db_forget))

        self.W_input = optimizer(self.optimizer_kwargs)._update(
            self.W_input, cg(dW_input))
        self.b_input = optimizer(self.optimizer_kwargs)._update(
            self.b_input, cg(db_input))

        self.W_output = optimizer(self.optimizer_kwargs)._update(
            self.W_output, cg(dW_output))
        self.b_output = optimizer(self.optimizer_kwargs)._update(
            self.b_output, cg(db_output))

        self.W_cell = optimizer(self.optimizer_kwargs)._update(
            self.W_cell, cg(dW_cell))
        self.b_cell = optimizer(self.optimizer_kwargs)._update(
            self.b_cell, cg(db_cell))

        return next_grad