def __init__(self, inpt, nin, nunits, forget=False, pre_activation='tanh', post_activation='linear', learn_init_states=True): """ Init :@param inpt: activations from incoming layer. :@param nin: dimensions of incoming layer. :@param nunits: number of units. :@param forget: use forget gate :@param pre_activation: activation pre-synaptic to central cell. :@param post_activation: activation applied to central cell b4 output. :@param learn_init_states: learn the initial states :@return: Output """ num_activations = 3 + forget w = stacked_ortho_wts(nin, nunits, num_activations) u = stacked_ortho_wts(nunits, nunits, num_activations) b = share(np.zeros(num_activations * nunits)) out0 = share(np.zeros(nunits)) cell0 = share(np.zeros(nunits)) pre_activation = activation_by_name(pre_activation) post_activation = activation_by_name(post_activation) def step(in_t, out_tm1, cell_tm1): """ Scan function. :@param in_t: current input from incoming layer :@param out_tm1: prev output of LSTM layer :@param cell_tm1: prev central cell value :@return: current output and central cell value """ tmp = TT.dot(out_tm1, u) + in_t inn_gate = sigmoid(tmp[:nunits]) out_gate = sigmoid(tmp[nunits:2 * nunits]) fgt_gate = sigmoid( tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate cell_val = pre_activation(tmp[-nunits:]) cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val out = out_gate * post_activation(cell_val) return out, cell_val inpt = TT.dot(inpt, w) + b # seqlen x nin * nin x 3*nout + 3 * nout = seqlen x 3*nout rval, updates = theano.scan(step, sequences=[inpt], outputs_info=[out0, cell0], ) self.output = rval[0] self.params = [w, u, b] if learn_init_states: self.params += [out0, cell0] self.nout = nunits
def __init__(self, inpt, nin, nunits, forget=False, actvn_pre='tanh', actvn_post='linear', learn_init_states=True): """ Init :param inpt: Lower layer's excitation. :param nin: Dimension of lower layer. :param nunits: Number of units. :param forget: Want a seperate forget gate (or use 1-input)? :param actvn_pre: Activation applied to new candidate for cell value. :param actvn_post: Activation applied to cell value before output. :param learn_init_states: Should the intial states be learnt? :return: Output """ # TODO: Incell connections num_activations = 3 + forget w = stacked_ortho_wts(nin, nunits, num_activations) u = stacked_ortho_wts(nunits, nunits, num_activations) b = share(np.zeros(num_activations * nunits)) out0 = share(np.zeros(nunits)) cell0 = share(np.zeros(nunits)) actvn_pre = activation_by_name(actvn_pre) actvn_post = activation_by_name(actvn_post) def step(in_t, out_tm1, cell_tm1): """ Scan function. :param in_t: Current input from bottom layer :param out_tm1: Prev output of LSTM layer :param cell_tm1: Prev cell value :return: Current output and cell value """ tmp = tt.dot(out_tm1, u) + in_t inn_gate = sigmoid(tmp[:nunits]) out_gate = sigmoid(tmp[nunits:2 * nunits]) fgt_gate = sigmoid(tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate cell_val = actvn_pre(tmp[-nunits:]) cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val out = out_gate * actvn_post(cell_val) return out, cell_val inpt = tt.dot(inpt, w) + b # seqlen x nin * nin x 3*nout + 3 * nout = seqlen x 3*nout rval, updates = th.scan( step, sequences=[inpt], outputs_info=[out0, cell0], ) self.output = rval[0] self.params = [w, u, b] if learn_init_states: self.params += [out0, cell0] self.nout = nunits
def __init__(self, inpt, nin, nunits, forget=False, actvn_pre='tanh', actvn_post='linear', learn_init_states=True): """ Init :param inpt: Lower layer's excitation. :param nin: Dimension of lower layer. :param nunits: Number of units. :param forget: Want a seperate forget gate (or use 1-input)? :param actvn_pre: Activation applied to new candidate for cell value. :param actvn_post: Activation applied to cell value before output. :param learn_init_states: Should the intial states be learnt? :return: Output """ # TODO: Incell connections num_activations = 3 + forget w = stacked_ortho_wts(nin, nunits, num_activations) u = stacked_ortho_wts(nunits, nunits, num_activations) b = share(np.zeros(num_activations * nunits)) out0 = share(np.zeros(nunits)) cell0 = share(np.zeros(nunits)) actvn_pre = activation_by_name(actvn_pre) actvn_post = activation_by_name(actvn_post) def step(in_t, out_tm1, cell_tm1): """ Scan function. :param in_t: Current input from bottom layer :param out_tm1: Prev output of LSTM layer :param cell_tm1: Prev cell value :return: Current output and cell value """ tmp = tt.dot(out_tm1, u) + in_t inn_gate = sigmoid(tmp[:nunits]) out_gate = sigmoid(tmp[nunits:2 * nunits]) fgt_gate = sigmoid( tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate cell_val = actvn_pre(tmp[-nunits:]) cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val out = out_gate * actvn_post(cell_val) return out, cell_val inpt = tt.dot(inpt, w) + b # seqlen x nin * nin x 3*nout + 3 * nout = seqlen x 3*nout rval, updates = th.scan(step, sequences=[inpt], outputs_info=[out0, cell0], ) self.output = rval[0] self.params = [w, u, b] if learn_init_states: self.params += [out0, cell0] self.nout = nunits