Exemple #1
0
    def __init__(self, inpt,
                 nin, nunits,
                 forget=False,
                 pre_activation='tanh',
                 post_activation='linear',
                 learn_init_states=True):
        """
        Init
        :@param inpt: activations from incoming layer.
        :@param nin: dimensions of incoming layer.
        :@param nunits: number of units.
        :@param forget: use forget gate
        :@param pre_activation: activation pre-synaptic to central cell.
        :@param post_activation: activation applied to central cell b4 output.
        :@param learn_init_states: learn the initial states
        :@return: Output
        """

        num_activations = 3 + forget
        w = stacked_ortho_wts(nin, nunits, num_activations)
        u = stacked_ortho_wts(nunits, nunits, num_activations)
        b = share(np.zeros(num_activations * nunits))
        out0 = share(np.zeros(nunits))
        cell0 = share(np.zeros(nunits))

        pre_activation = activation_by_name(pre_activation)
        post_activation = activation_by_name(post_activation)

        def step(in_t, out_tm1, cell_tm1):
            """
            Scan function.
            :@param in_t: current input from incoming layer
            :@param out_tm1: prev output of LSTM layer
            :@param cell_tm1: prev central cell value
            :@return: current output and central cell value
            """
            tmp = TT.dot(out_tm1, u) + in_t

            inn_gate = sigmoid(tmp[:nunits])
            out_gate = sigmoid(tmp[nunits:2 * nunits])
            fgt_gate = sigmoid(
                tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate

            cell_val = pre_activation(tmp[-nunits:])
            cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val
            out = out_gate * post_activation(cell_val)

            return out, cell_val

        inpt = TT.dot(inpt, w) + b
        # seqlen x nin * nin x 3*nout + 3 * nout  = seqlen x 3*nout

        rval, updates = theano.scan(step,
                                sequences=[inpt],
                                outputs_info=[out0, cell0], )

        self.output = rval[0]
        self.params = [w, u, b]
        if learn_init_states:
            self.params += [out0, cell0]
        self.nout = nunits
Exemple #2
0
    def __init__(self,
                 inpt,
                 nin,
                 nunits,
                 forget=False,
                 actvn_pre='tanh',
                 actvn_post='linear',
                 learn_init_states=True):
        """
        Init
        :param inpt: Lower layer's excitation.
        :param nin: Dimension of lower layer.
        :param nunits: Number of units.
        :param forget: Want a seperate forget gate (or use 1-input)?
        :param actvn_pre: Activation applied to new candidate for cell value.
        :param actvn_post: Activation applied to cell value before output.
        :param learn_init_states: Should the intial states be learnt?
        :return: Output
        """
        # TODO: Incell connections

        num_activations = 3 + forget
        w = stacked_ortho_wts(nin, nunits, num_activations)
        u = stacked_ortho_wts(nunits, nunits, num_activations)
        b = share(np.zeros(num_activations * nunits))
        out0 = share(np.zeros(nunits))
        cell0 = share(np.zeros(nunits))

        actvn_pre = activation_by_name(actvn_pre)
        actvn_post = activation_by_name(actvn_post)

        def step(in_t, out_tm1, cell_tm1):
            """
            Scan function.
            :param in_t: Current input from bottom layer
            :param out_tm1: Prev output of LSTM layer
            :param cell_tm1: Prev cell value
            :return: Current output and cell value
            """
            tmp = tt.dot(out_tm1, u) + in_t

            inn_gate = sigmoid(tmp[:nunits])
            out_gate = sigmoid(tmp[nunits:2 * nunits])
            fgt_gate = sigmoid(tmp[2 * nunits:3 *
                                   nunits]) if forget else 1 - inn_gate

            cell_val = actvn_pre(tmp[-nunits:])
            cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val
            out = out_gate * actvn_post(cell_val)

            return out, cell_val

        inpt = tt.dot(inpt, w) + b
        # seqlen x nin * nin x 3*nout + 3 * nout  = seqlen x 3*nout

        rval, updates = th.scan(
            step,
            sequences=[inpt],
            outputs_info=[out0, cell0],
        )

        self.output = rval[0]
        self.params = [w, u, b]
        if learn_init_states:
            self.params += [out0, cell0]
        self.nout = nunits
Exemple #3
0
    def __init__(self, inpt,
                 nin, nunits,
                 forget=False,
                 actvn_pre='tanh',
                 actvn_post='linear',
                 learn_init_states=True):
        """
        Init
        :param inpt: Lower layer's excitation.
        :param nin: Dimension of lower layer.
        :param nunits: Number of units.
        :param forget: Want a seperate forget gate (or use 1-input)?
        :param actvn_pre: Activation applied to new candidate for cell value.
        :param actvn_post: Activation applied to cell value before output.
        :param learn_init_states: Should the intial states be learnt?
        :return: Output
        """
        # TODO: Incell connections

        num_activations = 3 + forget
        w = stacked_ortho_wts(nin, nunits, num_activations)
        u = stacked_ortho_wts(nunits, nunits, num_activations)
        b = share(np.zeros(num_activations * nunits))
        out0 = share(np.zeros(nunits))
        cell0 = share(np.zeros(nunits))

        actvn_pre = activation_by_name(actvn_pre)
        actvn_post = activation_by_name(actvn_post)

        def step(in_t, out_tm1, cell_tm1):
            """
            Scan function.
            :param in_t: Current input from bottom layer
            :param out_tm1: Prev output of LSTM layer
            :param cell_tm1: Prev cell value
            :return: Current output and cell value
            """
            tmp = tt.dot(out_tm1, u) + in_t

            inn_gate = sigmoid(tmp[:nunits])
            out_gate = sigmoid(tmp[nunits:2 * nunits])
            fgt_gate = sigmoid(
                tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate

            cell_val = actvn_pre(tmp[-nunits:])
            cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val
            out = out_gate * actvn_post(cell_val)

            return out, cell_val

        inpt = tt.dot(inpt, w) + b
        # seqlen x nin * nin x 3*nout + 3 * nout  = seqlen x 3*nout

        rval, updates = th.scan(step,
                                sequences=[inpt],
                                outputs_info=[out0, cell0], )

        self.output = rval[0]
        self.params = [w, u, b]
        if learn_init_states:
            self.params += [out0, cell0]
        self.nout = nunits