Ejemplo n.º 1
0
    def build_model1(self):
        # LookupTable to Embedding
        src_embedding_layer = EmbeddingLayer(input_dim=self.n_src_vocab, output_dim=self.src_embed_dim, name='src_embedding')
        tgt_embedding_layer = EmbeddingLayer(input_dim=self.n_tgt_vocab, output_dim=self.tgt_embed_dim, name='src_embedding')

        # LSTMs
        src_lstm_forward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim)
        src_lstm_backward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim)
        tgt_lstm = LSTM(input_dim=self.tgt_embed_dim, output_dim=self.tgt_lstm_op_dim)
        sys.stderr.write(str(tgt_lstm.params) + "\n")  # TODO

        # From target LSTM to target word indexes
        # Input: target LSTM output dim + Attention from BiLSTM
        proj_layer = FullyConnectedLayer(input_dim=tgt_lstm_op_dim + 2 * src_lstm_op_dim, output_dim=self.n_tgt_vocab, activation='softmax')

        params = src_embedding_layer.params + tgt_embedding_layer.params + src_lstm_forward.params + src_lstm_backward.params + tgt_lstm.params[:-1] + proj_layer.params

        # declare input variables
        src_ip = T.ivector()
        tgt_ip = T.ivector()
        tgt_op = T.ivector()

        # lookup table -> embedding
        src_embed_ip = src_embedding_layer.fprop(src_ip)
        tgt_embed_ip = tgt_embedding_layer.fprop(tgt_ip)

        # embedding -> source BiLSTM
        src_lstm_forward.fprop(src_embed_ip)
        src_lstm_backward.fprop(src_embed_ip[::-1, :])
        # Concatenate foward/backward. (Flip backward again to get corresponding h for the same word)
        encoderh = T.concatenate((src_lstm_forward.h, src_lstm_backward.h[::-1, :]), axis=1)

        # End of source BiLSTM -> target LSTM
        tgt_lstm.h_0 = encoderh[-1]
        tgt_lstm.fprop(tgt_embed_ip)

        # Attention
        # Read http://arxiv.org/abs/1508.04025
        attention = tgt_lstm.h.dot(encoderh.transpose())
        attention = attention.dot(encoderh)

        # Order preference?
        decoderh = T.concatenate((attention, tgt_lstm.h), axis=1)

        # LSTM output -> target word
        proj_op = proj_layer.fprop(decoder)

        # Cost + regularization
        cost = T.nnet.categorical_crossentropy(proj_op, tgt_op).mean()
        cost += beta * T.mean((tgt_lstm.h[:-1] ** 2 - tgt_lstm.h[1:] ** 2) ** 2)

        return dict({'cost': cost,
                     'src_ip': src_ip,
                     'tgt_ip': tgt_ip,
                     'tgt_op': tgt_op,
                     'params': params,
                     'proj_op': proj_op})
Ejemplo n.º 2
0
    def build_model1(self):
        # LookupTable to Embedding
        src_embedding_layer = EmbeddingLayer(input_dim=self.n_src_vocab, output_dim=self.src_embed_dim, name='src_embedding')
        tgt_embedding_layer = EmbeddingLayer(input_dim=self.n_tgt_vocab, output_dim=self.tgt_embed_dim, name='src_embedding')

        # LSTMs
        src_lstm_forward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim)
        src_lstm_backward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim)
        tgt_lstm = LSTM(input_dim=self.tgt_embed_dim, output_dim=self.tgt_lstm_op_dim)
        sys.stderr.write(str(tgt_lstm.params) + "\n")  # TODO

        # From target LSTM to target word indexes
        # Input: target LSTM output dim + Attention from BiLSTM
        proj_layer = FullyConnectedLayer(input_dim=tgt_lstm_op_dim + 2 * src_lstm_op_dim, output_dim=self.n_tgt_vocab, activation='softmax')

        params = src_embedding_layer.params + tgt_embedding_layer.params + src_lstm_forward.params + src_lstm_backward.params + tgt_lstm.params[:-1] + proj_layer.params

        # declare input variables
        src_ip = T.ivector()
        tgt_ip = T.ivector()
        tgt_op = T.ivector()

        # lookup table -> embedding
        src_embed_ip = src_embedding_layer.fprop(src_ip)
        tgt_embed_ip = tgt_embedding_layer.fprop(tgt_ip)

        # embedding -> source BiLSTM
        src_lstm_forward.fprop(src_embed_ip)
        src_lstm_backward.fprop(src_embed_ip[::-1, :])
        # Concatenate foward/backward. (Flip backward again to get corresponding h for the same word)
        encoderh = T.concatenate((src_lstm_forward.h, src_lstm_backward.h[::-1, :]), axis=1)

        # End of source BiLSTM -> target LSTM
        tgt_lstm.h_0 = encoderh[-1]
        tgt_lstm.fprop(tgt_embed_ip)

        # Attention
        # Read http://arxiv.org/abs/1508.04025
        attention = tgt_lstm.h.dot(encoderh.transpose())
        attention = attention.dot(encoderh)

        # Order preference?
        decoderh = T.concatenate((attention, tgt_lstm.h), axis=1)

        # LSTM output -> target word
        proj_op = proj_layer.fprop(decoder)

        # Cost + regularization
        cost = T.nnet.categorical_crossentropy(proj_op, tgt_op).mean()
        cost += beta * T.mean((tgt_lstm.h[:-1] ** 2 - tgt_lstm.h[1:] ** 2) ** 2)

        return dict({'cost': cost,
                     'src_ip': src_ip,
                     'tgt_ip': tgt_ip,
                     'tgt_op': tgt_op,
                     'params': params,
                     'proj_op': proj_op})
Ejemplo n.º 3
0
        params += rnn.params[:-1]
    else:
        params += rnn.params

params += tgt_lstm_h_to_vocab.params

logging.info('Model parameters ...')
logging.info('Src Embedding dim : %d ' % (src_embedding_layer.output_dim))
logging.info('Tgt Embedding dim : %d ' % (tgt_embedding_layer.output_dim))
logging.info('Encoder dim : %d ' % (src_lstm_2.output_dim))
logging.info('Batch size : %d ' % (batch_size))
logging.info('Decoder LSTM dim : %d ' % (tgt_lstm_2.output_dim))
logging.info('Depth : %s ' % ('3'))

# Get embedding matrices
src_emb_inp = src_embedding_layer.fprop(src_inp[:, ::-1])
tgt_emb_inp = tgt_embedding_layer.fprop(src_inp[:, :-1])

# Get encoder representation
src_lstm_0.fprop(src_emb_inp)
src_lstm_1.fprop(src_lstm_0.h)
src_lstm_2.fprop(src_lstm_1.h)

encoder_final_state = src_lstm_2.h.dimshuffle(1, 0,
                                              2)[T.arange(src_inp.shape[0]),
                                                 src_lens - 1, :]

# Connect encoder and decoder
tgt_lstm_0.h_0 = encoder_final_state

# Decode sentence from input
Ejemplo n.º 4
0
params += decoder[0].params[:-1]
if args.attention == 'mlp':
    params += attention_layer_1.params + attention_layer_2.params

logging.info('Model parameters ...')
logging.info('Src Embedding dim : %d ' % (src_emb_dim))
logging.info('Tgt Embedding dim : %d ' % (tgt_emb_dim))
logging.info('Encoder BiLSTM dim : %d ' % (encoder_forward[-1].output_dim))
logging.info('Batch size : %s ' % (batch_size))
logging.info('Decoder LSTM dim : %d ' % (decoder[-1].output_dim))
logging.info('Attention mechanism : %s ' % (args.attention))
logging.info('Depth : %s ' % (args.num_layers))
logging.info('Peek Encoder : %s ' % (str(peek_encoder)))

# Get embedding matrices
src_emb_inp = src_embedding_layer.fprop(src_inp)
tgt_emb_inp = tgt_embedding_layer.fprop(tgt_inp)

encoder_representation = None
# Get BiLSTM representations
encoder_forward[0].fprop(src_emb_inp)
encoder_backward.fprop(src_emb_inp[:, ::-1])
# h is seqlen x batch x hdim
encoder_representation = T.concatenate(
    (encoder_forward[0].h, encoder_backward.h[::-1, :, :]),
    axis=2).dimshuffle(1, 0, 2)
for rnn in encoder_forward[1:]:
    rnn.fprop(encoder_representation)
    encoder_representation = rnn.h.dimshuffle(1, 0, 2)

encoder_final_state = encoder_representation[T.arange(src_inp.shape[0]),