def __init__(self, hidden_size, learning_rate=0.1): self.hidden_size = hidden_size self.learning_rate = learning_rate self.params = [] self._train = None self._predict = None self.fwd_lstm = LSTM(self.hidden_size) self.bwd_lstm = LSTM(self.hidden_size) self.params += self.fwd_lstm.params self.params += self.bwd_lstm.params self.Wfwd = theano.shared(name="Wfwd", value=utils.init_norm( self.hidden_size, self.hidden_size), borrow=True) self.Wbwd = theano.shared(name="Wbwd", value=utils.init_norm( self.hidden_size, self.hidden_size), borrow=True) self.bc = theano.shared(name="bc", value=np.zeros(self.hidden_size), borrow=True) self.params += [self.Wfwd, self.Wbwd, self.bc]
def __init__(self, hidden_size): self.hidden_size = hidden_size # lstm W matrixes, Wf, Wi, Wo, Wc respectively, all config.floatX type self.W = theano.shared(name="W", value=utils.init_norm(self.hidden_size, 4*self.hidden_size), borrow=True) # lstm U matrixes, Uf, Ui, Uo, Uc respectively, all config.floatX type self.U = theano.shared(name="U", value=utils.init_norm(self.hidden_size, 4*self.hidden_size), borrow=True) # lstm b vectors, bf, bi, bo, bc respectively, all config.floatX type self.b = theano.shared(name="b", value=np.zeros( 4*self.hidden_size, dtype=theano.config.floatX ), borrow=True) self.params = [self.W, self.U, self.b]
def __init__(self, hidden_size, learning_rate=0.1): self.hidden_size = hidden_size self.learning_rate = learning_rate self.params = [] self._train = None self._predict = None self.fwd_lstm = LSTM(self.hidden_size) self.bwd_lstm = LSTM(self.hidden_size) self.params += self.fwd_lstm.params self.params += self.bwd_lstm.params self.Wfwd = theano.shared(name="Wfwd", value=utils.init_norm(self.hidden_size, self.hidden_size), borrow=True) self.Wbwd = theano.shared(name="Wbwd", value=utils.init_norm(self.hidden_size, self.hidden_size), borrow=True) self.bc = theano.shared(name="bc", value=np.zeros(self.hidden_size), borrow=True) self.params += [self.Wfwd, self.Wbwd, self.bc]
def __init__(self, voca_size, hidden_size): # word embedding matrix self.hidden_size = hidden_size self.Wemb = theano.shared( name="word embedding matrix", value = utils.init_norm(voca_size, hidden_size).astype(theano.config.floatX), borrow=True ) self.params = [self.Wemb]
def __init__(self, voca_size, hidden_size, ydim, num_layers=2, learning_rate=0.1): self.hidden_size = hidden_size self.n_out = ydim self.learning_rate = learning_rate self.num_layers = num_layers self.layers = [] self.params = [] self.emb = WordEmbeder(voca_size, hidden_size) self.params += self.emb.params x = tensor.imatrix() #symbolic mask = tensor.imatrix() y = tensor.ivector() state_below = self.emb.embed_it(x) for _ in range(self.num_layers): binet = BiLSTM(self.hidden_size, self.learning_rate) self.layers += binet, self.params += binet.params state_below = binet.forward(state_below, mask) self.U = theano.shared(name="biU", value=utils.init_norm(self.hidden_size, self.n_out), borrow=True) self.by = theano.shared(name="by", value=np.zeros(self.n_out), borrow=True) self.params += [self.U, self.by] #mean pooling hs = state_below mp = (hs*mask[:,:,None]).sum(axis=0) mp = mp / mask.sum(axis=0)[:,None] #classifier pred_p = tensor.nnet.softmax(tensor.dot(mp, self.U) + self.by) pred_y = pred_p.argmax(axis=1) #nll off_set = 1e-8 cost = -tensor.log( pred_p[tensor.arange(mask.shape[1]), y] + off_set ).mean() gparams = [tensor.grad(cost, param) for param in self.params] updates = [(param, param - self.learning_rate*gparam) for param, gparam in zip(self.params, gparams)] vinputs = tensor.imatrix("vinputs")#variable vmask = tensor.imatrix("vmask") vy = tensor.ivector("vy") self._train = theano.function( inputs=[vinputs, vmask, vy], outputs=cost, updates=updates, givens={x:vinputs, mask:vmask, y:vy} ) self._predict = theano.function( inputs=[vinputs, vmask], outputs=pred_y, givens={x:vinputs, mask:vmask} )
def __init__(self, voca_size, hidden_size): # word embedding matrix self.hidden_size = hidden_size self.Wemb = theano.shared( name="word embedding matrix", value=utils.init_norm(voca_size, hidden_size).astype(theano.config.floatX), borrow=True) self.params = [self.Wemb]
def __init__(self, hidden_size): self.hidden_size = hidden_size # lstm W matrixes, Wf, Wi, Wo, Wc. self.W = theano.shared(name="W", value=utils.init_norm(self.hidden_size, 4 * self.hidden_size), borrow=True) # lstm U matrixes, Uf, Ui, Uo, Uc. self.U = theano.shared(name="U", value=utils.init_norm(self.hidden_size, 4 * self.hidden_size), borrow=True) # lstm b vectors, bf, bi, bo, bc. self.b = theano.shared(name="b", value=np.zeros(4 * self.hidden_size, dtype=theano.config.floatX), borrow=True) self.params = [self.W, self.U, self.b]
def __init__(self, voca_size, hidden_size, lstm_layers_num, learning_rate=0.2): self.voca_size = voca_size self.hidden_size = hidden_size self.lstm_layers_num = lstm_layers_num self.learning_rate = learning_rate self._train = None self._utter = None self.params = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] self.hos = [] self.Cos = [] encoderInputs, encoderMask = tensor.imatrices(2) decoderInputs, decoderMask, decoderTarget = tensor.imatrices(3) self.lookuptable = theano.shared( name="Encoder LookUpTable", value=utils.init_norm(self.voca_size, self.hidden_size), borrow=True ) self.linear = theano.shared( name="Linear", value=utils.init_norm(self.hidden_size, self.voca_size), borrow=True ) self.params += [self.lookuptable, self.linear] #concatenate #(max_sent_size, batch_size, hidden_size) state_below = self.lookuptable[encoderInputs.flatten()].reshape((encoderInputs.shape[0], encoderInputs.shape[1], self.hidden_size)) for _ in range(self.lstm_layers_num): enclstm = LSTM(self.hidden_size) self.encoder_lstm_layers += enclstm, #append self.params += enclstm.params #concatenate hs, Cs = enclstm.forward(state_below, encoderMask) self.hos += hs[-1], self.Cos += Cs[-1], state_below = hs state_below = self.lookuptable[decoderInputs.flatten()].reshape((decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co) decoder_lstm_outputs = state_below ei, em, di, dm, dt = tensor.imatrices(5) #place holders ##################################################### ##################################################### linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear) softmax_outputs, updates = theano.scan( fn=lambda x: tensor.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * tensor.log(pred[tensor.arange(decoderInputs.shape[1]), y]) costs, updates = theano.scan(fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask]) loss = costs.sum() / decoderMask.sum() gparams = [tensor.grad(loss, param) for param in self.params] updates = [(param, param - self.learning_rate*gparam) for param, gparam in zip(self.params, gparams)] self._train = theano.function( inputs=[ei, em, di, dm, dt], outputs=[loss, costs], updates=updates, givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt} ) ##################################################### ##################################################### hs0, Cs0 = tensor.as_tensor_variable(self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0") token_idxs = tensor.fill( tensor.zeros_like(decoderInputs, dtype="int32"), utils.idx_start) msk = tensor.fill( (tensor.zeros_like(decoderInputs, dtype="int32")), 1) def _step(token_idxs, hs_, Cs_): hs, Cs = [], [] state_below = self.lookuptable[token_idxs].reshape((decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below, msk, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below = h hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(Cs) next_token_idx = tensor.cast( tensor.dot(state_below, self.linear).argmax(axis=-1), "int32" ) return next_token_idx, hs, Cs outputs, updates = theano.scan( fn=_step, outputs_info=[token_idxs, hs0, Cs0], n_steps=utils.max_sent_size ) listof_token_idx = outputs[0] self._utter = theano.function( inputs=[ei, em, di], outputs=listof_token_idx, givens={encoderInputs:ei, encoderMask:em, decoderInputs:di} #givens={encoderInputs:ei, encoderMask:em} )
def __init__(self, voca_size, hidden_size, ydim, num_layers=2, learning_rate=0.1): self.hidden_size = hidden_size self.n_out = ydim self.learning_rate = learning_rate self.num_layers = num_layers self.layers = [] self.params = [] self.emb = WordEmbeder(voca_size, hidden_size) self.params += self.emb.params x = tensor.imatrix() #symbolic mask = tensor.imatrix() y = tensor.ivector() state_below = self.emb.embed_it(x) for _ in range(self.num_layers): binet = BiLSTM(self.hidden_size, self.learning_rate) self.layers += binet, self.params += binet.params state_below = binet.forward(state_below, mask) self.U = theano.shared(name="biU", value=utils.init_norm(self.hidden_size, self.n_out), borrow=True) self.by = theano.shared(name="by", value=np.zeros(self.n_out), borrow=True) self.params += [self.U, self.by] #mean pooling hs = state_below mp = (hs * mask[:, :, None]).sum(axis=0) mp = mp / mask.sum(axis=0)[:, None] #classifier pred_p = tensor.nnet.softmax(tensor.dot(mp, self.U) + self.by) pred_y = pred_p.argmax(axis=1) #nll off_set = 1e-8 cost = -tensor.log(pred_p[tensor.arange(mask.shape[1]), y] + off_set).mean() gparams = [tensor.grad(cost, param) for param in self.params] updates = [(param, param - self.learning_rate * gparam) for param, gparam in zip(self.params, gparams)] vinputs = tensor.imatrix("vinputs") #variable vmask = tensor.imatrix("vmask") vy = tensor.ivector("vy") self._train = theano.function(inputs=[vinputs, vmask, vy], outputs=cost, updates=updates, givens={ x: vinputs, mask: vmask, y: vy }) self._predict = theano.function(inputs=[vinputs, vmask], outputs=pred_y, givens={ x: vinputs, mask: vmask })
def __init__(self, voca_size, hidden_size, lstm_layers_num, learning_rate=0.2): self.voca_size = voca_size self.hidden_size = hidden_size self.lstm_layers_num = lstm_layers_num self.learning_rate = learning_rate self._train = None self._utter = None self.params = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] self.hos = [] self.Cos = [] encoderInputs, encoderMask = tensor.imatrices(2) decoderInputs, decoderMask, decoderTarget = tensor.imatrices(3) self.lookuptable = theano.shared(name="Encoder LookUpTable", value=utils.init_norm( self.voca_size, self.hidden_size), borrow=True) self.linear = theano.shared(name="Linear", value=utils.init_norm( self.hidden_size, self.voca_size), borrow=True) self.params += [self.lookuptable, self.linear] #concatenate #(max_sent_size, batch_size, hidden_size) state_below = self.lookuptable[encoderInputs.flatten()].reshape( (encoderInputs.shape[0], encoderInputs.shape[1], self.hidden_size)) for _ in range(self.lstm_layers_num): enclstm = LSTM(self.hidden_size) self.encoder_lstm_layers += enclstm, #append self.params += enclstm.params #concatenate hs, Cs = enclstm.forward(state_below, encoderMask) self.hos += hs[-1], self.Cos += Cs[-1], state_below = hs state_below = self.lookuptable[decoderInputs.flatten()].reshape( (decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co) decoder_lstm_outputs = state_below ei, em, di, dm, dt = tensor.imatrices(5) #place holders ##################################################### ##################################################### linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear) softmax_outputs, updates = theano.scan( fn=lambda x: tensor.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * tensor.log(pred[tensor.arange(decoderInputs.shape[1]), y]) costs, updates = theano.scan( fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask]) loss = costs.sum() / decoderMask.sum() gparams = [tensor.grad(loss, param) for param in self.params] updates = [(param, param - self.learning_rate * gparam) for param, gparam in zip(self.params, gparams)] self._train = theano.function(inputs=[ei, em, di, dm, dt], outputs=[loss, costs], updates=updates, givens={ encoderInputs: ei, encoderMask: em, decoderInputs: di, decoderMask: dm, decoderTarget: dt }) ##################################################### ##################################################### hs0, Cs0 = tensor.as_tensor_variable( self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0") token_idxs = tensor.fill( tensor.zeros_like(decoderInputs, dtype="int32"), utils.idx_start) msk = tensor.fill((tensor.zeros_like(decoderInputs, dtype="int32")), 1) def _step(token_idxs, hs_, Cs_): hs, Cs = [], [] state_below = self.lookuptable[token_idxs].reshape( (decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below, msk, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below = h hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable( Cs) next_token_idx = tensor.cast( tensor.dot(state_below, self.linear).argmax(axis=-1), "int32") return next_token_idx, hs, Cs outputs, updates = theano.scan(fn=_step, outputs_info=[token_idxs, hs0, Cs0], n_steps=utils.max_sent_size) listof_token_idx = outputs[0] self._utter = theano.function( inputs=[ei, em, di], outputs=listof_token_idx, givens={ encoderInputs: ei, encoderMask: em, decoderInputs: di } #givens={encoderInputs:ei, encoderMask:em} )