def __init__(self, inpt, nin, nunits, conv_sz=1, learn_init_state=True): # inpt is transposed a priori tablet_wd, _ = inpt.shape if conv_sz > 1: inpt_clipped = inpt[:conv_sz * (tablet_wd // conv_sz), :] inpt_conv = inpt_clipped.reshape( (tablet_wd // conv_sz, nin * conv_sz)) else: inpt_conv = inpt wio = share(init_wts(nin * conv_sz, nunits)) # input to output woo = share(init_wts(nunits, nunits)) # output to output bo = share(init_wts(nunits)) h0 = share(init_wts(nunits)) def step(in_t, out_tm1): return tt.tanh(tt.dot(out_tm1, woo) + tt.dot(in_t, wio) + bo) self.output, _ = theano.scan( step, sequences=[inpt_conv], outputs_info=[h0] ) self.params = [wio, woo, bo] if learn_init_state: self.params += [h0] self.nout = nunits
def __init__(self, inpt, nin, nunits, forget=False, actvn_pre='tanh', actvn_post='linear', learn_init_states=True): """ Init :param inpt: Lower layer's excitation. :param nin: Dimension of lower layer. :param nunits: Number of units. :param forget: Want a seperate forget gate (or use 1-input)? :param actvn_pre: Activation applied to new candidate for cell value. :param actvn_post: Activation applied to cell value before output. :param learn_init_states: Should the intial states be learnt? :return: Output """ # TODO: Incell connections num_activations = 3 + forget w = stacked_wts(nin, nunits, num_activations) u = stacked_wts(nunits, nunits, num_activations) b = share(np.zeros(num_activations * nunits)) out0 = share(np.zeros(nunits)) cell0 = share(np.zeros(nunits)) actvn_pre = activation_by_name(actvn_pre) actvn_post = activation_by_name(actvn_post) def step(in_t, out_tm1, cell_tm1): """ Scan function. :param in_t: Current input from bottom layer :param out_tm1: Prev output of LSTM layer :param cell_tm1: Prev cell value :return: Current output and cell value """ tmp = tt.dot(out_tm1, u) + in_t inn_gate = sigmoid(tmp[:nunits]) out_gate = sigmoid(tmp[nunits:2 * nunits]) fgt_gate = sigmoid( tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate cell_val = actvn_pre(tmp[-nunits:]) cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val out = out_gate * actvn_post(cell_val) return out, cell_val inpt = tt.dot(inpt, w) + b # seqlen x nin * nin x 3*nout + 3 * nout = seqlen x 3*nout rval, updates = th.scan(step, sequences=[inpt], outputs_info=[out0, cell0], ) self.output = rval[0] self.params = [w, u, b] if learn_init_states: self.params += [out0, cell0] self.nout = nunits
def stacked_wts(n, m, copies, name=None): return share( np.hstack([orthonormal_wts(n, m) for _ in range(copies)]), name=name)
def __init__(self, inpt, in_sz, n_classes, ): b = share(init_wts(n_classes)) w = share(init_wts(in_sz, n_classes)) self.output = tt.nnet.softmax(tt.dot(inpt, w) + b) self.params = [w, b]