Ejemplo n.º 1
0
    def __init__(self, hidden_size, learning_rate=0.1):
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate
        self.params = []
        self._train = None
        self._predict = None

        self.fwd_lstm = LSTM(self.hidden_size)
        self.bwd_lstm = LSTM(self.hidden_size)
        self.params += self.fwd_lstm.params
        self.params += self.bwd_lstm.params

        self.Wfwd = theano.shared(name="Wfwd",
                                  value=utils.init_norm(
                                      self.hidden_size, self.hidden_size),
                                  borrow=True)
        self.Wbwd = theano.shared(name="Wbwd",
                                  value=utils.init_norm(
                                      self.hidden_size, self.hidden_size),
                                  borrow=True)
        self.bc = theano.shared(name="bc",
                                value=np.zeros(self.hidden_size),
                                borrow=True)

        self.params += [self.Wfwd, self.Wbwd, self.bc]
Ejemplo n.º 2
0
	def __init__(self, hidden_size):

		self.hidden_size = hidden_size

		# lstm W matrixes, Wf, Wi, Wo, Wc respectively, all config.floatX type
		self.W = theano.shared(name="W", value=utils.init_norm(self.hidden_size, 4*self.hidden_size), borrow=True)
		# lstm U matrixes, Uf, Ui, Uo, Uc respectively, all config.floatX type
		self.U = theano.shared(name="U", value=utils.init_norm(self.hidden_size, 4*self.hidden_size), borrow=True)
		# lstm b vectors, bf, bi, bo, bc respectively, all config.floatX type
		self.b = theano.shared(name="b", value=np.zeros( 4*self.hidden_size, dtype=theano.config.floatX ), borrow=True)

		self.params = [self.W, self.U, self.b]
Ejemplo n.º 3
0
	def __init__(self, hidden_size, learning_rate=0.1):
		self.hidden_size = hidden_size
		self.learning_rate = learning_rate
		self.params = []
		self._train = None
		self._predict = None

		self.fwd_lstm = LSTM(self.hidden_size)
		self.bwd_lstm = LSTM(self.hidden_size)
		self.params += self.fwd_lstm.params
		self.params += self.bwd_lstm.params

		self.Wfwd = theano.shared(name="Wfwd", value=utils.init_norm(self.hidden_size, self.hidden_size), borrow=True)
		self.Wbwd = theano.shared(name="Wbwd", value=utils.init_norm(self.hidden_size, self.hidden_size), borrow=True)
		self.bc = theano.shared(name="bc", value=np.zeros(self.hidden_size), borrow=True)

		self.params += [self.Wfwd, self.Wbwd, self.bc]
Ejemplo n.º 4
0
	def __init__(self, voca_size, hidden_size):
		# word embedding matrix
		self.hidden_size = hidden_size
		self.Wemb = theano.shared(
			name="word embedding matrix",
			value = utils.init_norm(voca_size, hidden_size).astype(theano.config.floatX),
			borrow=True
			)
		self.params = [self.Wemb]
Ejemplo n.º 5
0
	def __init__(self, voca_size, hidden_size, ydim, num_layers=2, learning_rate=0.1):
		self.hidden_size = hidden_size
		self.n_out = ydim
		self.learning_rate = learning_rate
		self.num_layers = num_layers
		self.layers = []
		self.params = []

		self.emb = WordEmbeder(voca_size, hidden_size)
		self.params += self.emb.params

		x = tensor.imatrix() #symbolic
		mask = tensor.imatrix()
		y = tensor.ivector()

		state_below = self.emb.embed_it(x)
		for _ in range(self.num_layers):
			binet = BiLSTM(self.hidden_size, self.learning_rate)
			self.layers += binet,
			self.params += binet.params
			state_below = binet.forward(state_below, mask)

		self.U = theano.shared(name="biU", value=utils.init_norm(self.hidden_size, self.n_out), borrow=True)
		self.by = theano.shared(name="by", value=np.zeros(self.n_out), borrow=True)
		self.params += [self.U, self.by]

		#mean pooling
		hs = state_below
		mp = (hs*mask[:,:,None]).sum(axis=0)
		mp = mp / mask.sum(axis=0)[:,None]

		#classifier
		pred_p = tensor.nnet.softmax(tensor.dot(mp, self.U) + self.by)
		pred_y = pred_p.argmax(axis=1)

		#nll
		off_set = 1e-8
		cost = -tensor.log( pred_p[tensor.arange(mask.shape[1]), y] + off_set ).mean()
		gparams = [tensor.grad(cost, param) for param in self.params]
		updates = [(param, param - self.learning_rate*gparam) for param, gparam in zip(self.params, gparams)]

		vinputs = tensor.imatrix("vinputs")#variable
		vmask = tensor.imatrix("vmask")
		vy = tensor.ivector("vy")
		
		self._train = theano.function(
			inputs=[vinputs, vmask, vy],
			outputs=cost,
			updates=updates,
			givens={x:vinputs, mask:vmask, y:vy}
			)

		self._predict = theano.function(
			inputs=[vinputs, vmask],
			outputs=pred_y,
			givens={x:vinputs, mask:vmask}
			)
Ejemplo n.º 6
0
 def __init__(self, voca_size, hidden_size):
     # word embedding matrix
     self.hidden_size = hidden_size
     self.Wemb = theano.shared(
         name="word embedding matrix",
         value=utils.init_norm(voca_size,
                               hidden_size).astype(theano.config.floatX),
         borrow=True)
     self.params = [self.Wemb]
Ejemplo n.º 7
0
    def __init__(self, hidden_size):

        self.hidden_size = hidden_size

        # lstm W matrixes, Wf, Wi, Wo, Wc.
        self.W = theano.shared(name="W",
                               value=utils.init_norm(self.hidden_size,
                                                     4 * self.hidden_size),
                               borrow=True)
        # lstm U matrixes, Uf, Ui, Uo, Uc.
        self.U = theano.shared(name="U",
                               value=utils.init_norm(self.hidden_size,
                                                     4 * self.hidden_size),
                               borrow=True)
        # lstm b vectors, bf, bi, bo, bc.
        self.b = theano.shared(name="b",
                               value=np.zeros(4 * self.hidden_size,
                                              dtype=theano.config.floatX),
                               borrow=True)

        self.params = [self.W, self.U, self.b]
Ejemplo n.º 8
0
	def __init__(self, voca_size, hidden_size, lstm_layers_num, learning_rate=0.2):
		self.voca_size = voca_size
		self.hidden_size = hidden_size
		self.lstm_layers_num = lstm_layers_num
		self.learning_rate = learning_rate
		self._train = None
		self._utter = None
		self.params = []
		self.encoder_lstm_layers = []
		self.decoder_lstm_layers = []
		self.hos = []
		self.Cos = []

		encoderInputs, encoderMask = tensor.imatrices(2)
		decoderInputs, decoderMask, decoderTarget = tensor.imatrices(3)

		self.lookuptable = theano.shared(
			name="Encoder LookUpTable",
			value=utils.init_norm(self.voca_size, self.hidden_size),
			borrow=True
			)
		self.linear = theano.shared(
			name="Linear",
			value=utils.init_norm(self.hidden_size, self.voca_size),
			borrow=True
			)
		self.params += [self.lookuptable, self.linear]    #concatenate
		
		#(max_sent_size, batch_size, hidden_size)
		state_below = self.lookuptable[encoderInputs.flatten()].reshape((encoderInputs.shape[0], encoderInputs.shape[1], self.hidden_size))
		for _ in range(self.lstm_layers_num):
			enclstm = LSTM(self.hidden_size)
			self.encoder_lstm_layers += enclstm,    #append
			self.params += enclstm.params    #concatenate
			hs, Cs = enclstm.forward(state_below, encoderMask)
			self.hos += hs[-1],
			self.Cos += Cs[-1],
			state_below = hs

		state_below = self.lookuptable[decoderInputs.flatten()].reshape((decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size))
		for i in range(self.lstm_layers_num):
			declstm = LSTM(self.hidden_size)
			self.decoder_lstm_layers += declstm,    #append
			self.params += declstm.params    #concatenate
			ho, Co = self.hos[i], self.Cos[i]
			state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co)
		decoder_lstm_outputs = state_below

		ei, em, di, dm, dt = tensor.imatrices(5)    #place holders
		#####################################################
		#####################################################
		linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear)
		softmax_outputs, updates = theano.scan(
			fn=lambda x: tensor.nnet.softmax(x),
			sequences=[linear_outputs],
			)

		def _NLL(pred, y, m):
			return -m * tensor.log(pred[tensor.arange(decoderInputs.shape[1]), y])
		costs, updates = theano.scan(fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask])
		loss = costs.sum() / decoderMask.sum()

		gparams = [tensor.grad(loss, param) for param in self.params]
		updates = [(param, param - self.learning_rate*gparam) for param, gparam in zip(self.params, gparams)]

		self._train = theano.function(
			inputs=[ei, em, di, dm, dt],
			outputs=[loss, costs],
			updates=updates,
			givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt}
			)
		#####################################################
		#####################################################
		hs0, Cs0 = tensor.as_tensor_variable(self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0")
		token_idxs = tensor.fill( tensor.zeros_like(decoderInputs, dtype="int32"), utils.idx_start)
		msk = tensor.fill( (tensor.zeros_like(decoderInputs, dtype="int32")), 1)

		def _step(token_idxs, hs_, Cs_):
			hs, Cs = [], []
			state_below = self.lookuptable[token_idxs].reshape((decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size))
			for i, lstm in enumerate(self.decoder_lstm_layers):
				h, C = lstm.forward(state_below, msk, hs_[i], Cs_[i])    #mind msk
				hs += h[-1],
				Cs += C[-1],
				state_below = h
			hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(Cs)
			next_token_idx = tensor.cast( tensor.dot(state_below, self.linear).argmax(axis=-1), "int32" )
			return next_token_idx, hs, Cs

		outputs, updates = theano.scan(
			fn=_step,
			outputs_info=[token_idxs, hs0, Cs0],
			n_steps=utils.max_sent_size
			)
		listof_token_idx = outputs[0]
		self._utter = theano.function(
			inputs=[ei, em, di],
			outputs=listof_token_idx,
			givens={encoderInputs:ei, encoderMask:em, decoderInputs:di}
			#givens={encoderInputs:ei, encoderMask:em}
			)
Ejemplo n.º 9
0
    def __init__(self,
                 voca_size,
                 hidden_size,
                 ydim,
                 num_layers=2,
                 learning_rate=0.1):
        self.hidden_size = hidden_size
        self.n_out = ydim
        self.learning_rate = learning_rate
        self.num_layers = num_layers
        self.layers = []
        self.params = []

        self.emb = WordEmbeder(voca_size, hidden_size)
        self.params += self.emb.params

        x = tensor.imatrix()  #symbolic
        mask = tensor.imatrix()
        y = tensor.ivector()

        state_below = self.emb.embed_it(x)
        for _ in range(self.num_layers):
            binet = BiLSTM(self.hidden_size, self.learning_rate)
            self.layers += binet,
            self.params += binet.params
            state_below = binet.forward(state_below, mask)

        self.U = theano.shared(name="biU",
                               value=utils.init_norm(self.hidden_size,
                                                     self.n_out),
                               borrow=True)
        self.by = theano.shared(name="by",
                                value=np.zeros(self.n_out),
                                borrow=True)
        self.params += [self.U, self.by]

        #mean pooling
        hs = state_below
        mp = (hs * mask[:, :, None]).sum(axis=0)
        mp = mp / mask.sum(axis=0)[:, None]

        #classifier
        pred_p = tensor.nnet.softmax(tensor.dot(mp, self.U) + self.by)
        pred_y = pred_p.argmax(axis=1)

        #nll
        off_set = 1e-8
        cost = -tensor.log(pred_p[tensor.arange(mask.shape[1]), y] +
                           off_set).mean()
        gparams = [tensor.grad(cost, param) for param in self.params]
        updates = [(param, param - self.learning_rate * gparam)
                   for param, gparam in zip(self.params, gparams)]

        vinputs = tensor.imatrix("vinputs")  #variable
        vmask = tensor.imatrix("vmask")
        vy = tensor.ivector("vy")

        self._train = theano.function(inputs=[vinputs, vmask, vy],
                                      outputs=cost,
                                      updates=updates,
                                      givens={
                                          x: vinputs,
                                          mask: vmask,
                                          y: vy
                                      })

        self._predict = theano.function(inputs=[vinputs, vmask],
                                        outputs=pred_y,
                                        givens={
                                            x: vinputs,
                                            mask: vmask
                                        })
Ejemplo n.º 10
0
    def __init__(self,
                 voca_size,
                 hidden_size,
                 lstm_layers_num,
                 learning_rate=0.2):
        self.voca_size = voca_size
        self.hidden_size = hidden_size
        self.lstm_layers_num = lstm_layers_num
        self.learning_rate = learning_rate
        self._train = None
        self._utter = None
        self.params = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []
        self.hos = []
        self.Cos = []

        encoderInputs, encoderMask = tensor.imatrices(2)
        decoderInputs, decoderMask, decoderTarget = tensor.imatrices(3)

        self.lookuptable = theano.shared(name="Encoder LookUpTable",
                                         value=utils.init_norm(
                                             self.voca_size, self.hidden_size),
                                         borrow=True)
        self.linear = theano.shared(name="Linear",
                                    value=utils.init_norm(
                                        self.hidden_size, self.voca_size),
                                    borrow=True)
        self.params += [self.lookuptable, self.linear]  #concatenate

        #(max_sent_size, batch_size, hidden_size)
        state_below = self.lookuptable[encoderInputs.flatten()].reshape(
            (encoderInputs.shape[0], encoderInputs.shape[1], self.hidden_size))
        for _ in range(self.lstm_layers_num):
            enclstm = LSTM(self.hidden_size)
            self.encoder_lstm_layers += enclstm,  #append
            self.params += enclstm.params  #concatenate
            hs, Cs = enclstm.forward(state_below, encoderMask)
            self.hos += hs[-1],
            self.Cos += Cs[-1],
            state_below = hs

        state_below = self.lookuptable[decoderInputs.flatten()].reshape(
            (decoderInputs.shape[0], decoderInputs.shape[1], self.hidden_size))
        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co)
        decoder_lstm_outputs = state_below

        ei, em, di, dm, dt = tensor.imatrices(5)  #place holders
        #####################################################
        #####################################################
        linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear)
        softmax_outputs, updates = theano.scan(
            fn=lambda x: tensor.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * tensor.log(pred[tensor.arange(decoderInputs.shape[1]),
                                        y])

        costs, updates = theano.scan(
            fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask])
        loss = costs.sum() / decoderMask.sum()

        gparams = [tensor.grad(loss, param) for param in self.params]
        updates = [(param, param - self.learning_rate * gparam)
                   for param, gparam in zip(self.params, gparams)]

        self._train = theano.function(inputs=[ei, em, di, dm, dt],
                                      outputs=[loss, costs],
                                      updates=updates,
                                      givens={
                                          encoderInputs: ei,
                                          encoderMask: em,
                                          decoderInputs: di,
                                          decoderMask: dm,
                                          decoderTarget: dt
                                      })
        #####################################################
        #####################################################
        hs0, Cs0 = tensor.as_tensor_variable(
            self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos,
                                                             name="Cs0")
        token_idxs = tensor.fill(
            tensor.zeros_like(decoderInputs, dtype="int32"), utils.idx_start)
        msk = tensor.fill((tensor.zeros_like(decoderInputs, dtype="int32")), 1)

        def _step(token_idxs, hs_, Cs_):
            hs, Cs = [], []
            state_below = self.lookuptable[token_idxs].reshape(
                (decoderInputs.shape[0], decoderInputs.shape[1],
                 self.hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below, msk, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below = h
            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            next_token_idx = tensor.cast(
                tensor.dot(state_below, self.linear).argmax(axis=-1), "int32")
            return next_token_idx, hs, Cs

        outputs, updates = theano.scan(fn=_step,
                                       outputs_info=[token_idxs, hs0, Cs0],
                                       n_steps=utils.max_sent_size)
        listof_token_idx = outputs[0]
        self._utter = theano.function(
            inputs=[ei, em, di],
            outputs=listof_token_idx,
            givens={
                encoderInputs: ei,
                encoderMask: em,
                decoderInputs: di
            }
            #givens={encoderInputs:ei, encoderMask:em}
        )