Beispiel #1
0
def test_advanced_cells():
    rnn = RecurrentSequential()
    rnn.add(RHNCell(10, recurrence_depth=2, input_dim=5))

    a = Input((7, 5))
    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10)))
    model.predict(np.random.random((12, 7, 5)))
Beispiel #2
0
def test_serialisation():
    rnn = RecurrentSequential()
    rnn.add(LSTMCell(output_dim=7, input_dim=5))
    rnn.add(SimpleRNNCell(8))
    rnn.add(GRUCell(10))

    rnn_config = rnn.get_config()
    recovered_rnn = RecurrentSequential.from_config(rnn_config)

    a = Input((7, 5))
    b = recovered_rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10)))
    model.predict(np.random.random((12, 7, 5)))
Beispiel #3
0
def encoder_init(input,
                 postcshape,
                 hidden_dim,
                 depth,
                 dropout=0,
                 seq2seq=True,
                 bidirectional=True,
                 unroll=False,
                 stateful=False,
                 Encoder=None,
                 global_name="",
                 return_model=False):
    if Encoder == None:
        Encoder = [hidden_dim] * depth[0]
    else:
        if len(Encoder) < depth[0]:
            Encoder = Encoder + [hidden_dim] * (depth[0] - len(Encoder))
    encoder = RecurrentSequential(
        unroll=unroll,
        stateful=stateful,
        #   return_states=True, return_all_states=True, AllStateTransfer needs modification in the tensorflow backend
        return_sequences=True,
        name=global_name + 'encoder')
    encoder.add(LSTMCell(Encoder[0], batch_input_shape=postcshape[1:]))

    for k in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(Encoder[k]))

    if bidirectional:
        encoder = Bidirectional(encoder,
                                merge_mode='sum',
                                name=global_name + 'encoder')
        encoder.forward_layer.build(postcshape)
        encoder.backward_layer.build(postcshape)
        # patch
        encoder.layer = encoder.forward_layer
    if return_model:
        enc_input = Input(shape=postcshape[1:], name='encoder_input')
        encoded_out = encoder(enc_input)
        encoder_model = Model(inputs=[enc_input], outputs=[encoded_out])
        return encoder_model(input)
    return encoder(input)
def test_rnn_cell_from_model():
    x = Input((5, ))
    h_tm1 = Input((10, ))
    h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)])
    h = Activation('tanh')(h)

    cell_model = Model([x, h_tm1], [h, h])

    rnn_cell = RNNCellFromModel(cell_model)

    rnn = RecurrentSequential()
    rnn.add(rnn_cell)

    a = Input((7, 5))
    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((32, 7, 5))), np.random.random((32, 10)))
    model.predict(np.zeros((32, 7, 5)))
Beispiel #5
0
def test_unroll():
    rnn = RecurrentSequential(unroll=True)
    rnn.add(LSTMCell(7, input_dim=5))
    rnn.add(SimpleRNNCell(8))
    rnn.add(GRUCell(10))

    a = Input((7, 5))
    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10)))
    model.predict(np.random.random((12, 7, 5)))
Beispiel #6
0
def test_state_initializer_as_list():
    rnn = RecurrentSequential(
        state_initializer=['random_normal', 'glorot_uniform'])
    rnn.add(LSTMCell(7, batch_input_shape=(12, 5)))
    rnn.add(SimpleRNNCell(8))
    rnn.add(GRUCell(10))

    a = Input((7, 5))
    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10)))
    model.predict(np.random.random((12, 7, 5)))
Beispiel #7
0
def test_teacher_force():
    a = Input((7, 5))

    rnn = RecurrentSequential(readout=True, teacher_force=True)
    rnn.add(LSTMCell(7, input_dim=5))
    rnn.add(SimpleRNNCell(8))
    rnn.add(GRUCell(5))

    ground_truth = Input((7, 5))

    b = rnn(a, ground_truth=ground_truth)

    model = Model([a, ground_truth], b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit([np.random.random((12, 7, 5)),
               np.random.random((12, 7, 5))], np.random.random((12, 5)))
    model.predict([np.random.random((12, 7, 5))] * 2)
Beispiel #8
0
def test_readout_state_sync_unroll():
    a = Input((7, 5))
    rnn = RecurrentSequential(readout=True, state_sync=True, unroll=True)
    rnn.add(LSTMCell(5, input_dim=5))
    rnn.add(LSTMCell(5))
    rnn.add(LSTMCell(5))

    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5)))
    model.predict(np.random.random((12, 7, 5)))
Beispiel #9
0
def test_decode_unroll():
    a = Input((5, ))
    rnn = RecurrentSequential(decode=True, output_length=7, unroll=True)
    rnn.add(LSTMCell(10, input_dim=5))
    rnn.add(LSTMCell(10))
    rnn.add(LSTMCell(10))

    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 5))), np.random.random((12, 7, 10)))
    model.predict(np.random.random((12, 5)))
Beispiel #10
0
def test_state_sync():
    rnn = RecurrentSequential(state_sync=True)
    rnn.add(LSTMCell(10, input_dim=5))
    rnn.add(LSTMCell(10))
    rnn.add(LSTMCell(10))

    a = Input((7, 5))
    b = rnn(a)

    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10)))
    model.predict(np.random.random((12, 7, 5)))
Beispiel #11
0
def Seq2SeqtableQA(row_maxlen,
                   question_maxlen,
                   answer_maxlen,
                   len_dic,
                   hidden_dim,
                   batch_size,
                   depth=(1, 1),
                   dropout=0.0,
                   unroll=False,
                   stateful=False):
    # def Seq2SeqtableQA(output_dim, output_length, hidden_dim=None, input_shape=None,
    # batch_size=None, batch_input_shape=None, input_dim=None,
    # input_length=None, depth=1, dropout=0.0, unroll=False,
    # stateful=False):
    '''
    Based on SimpleSeq2Seq
    from https://github.com/farizrahman4u/seq2seq/blob/master/seq2seq/models.py
    '''

    # input placeholders
    table = Input((row_maxlen, len_dic))
    question = Input((question_maxlen, len_dic))

    # table encoder
    table_encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    # table_encoder.add(Embedding(input_dim=len_dic,
    #                             output_dim=hidden_dim,
    #                             input_length=row_maxlen,
    #                             # weights = [embedding_matrix],
    #                             mask_zero=True,
    #                             trainable=False))
    table_encoder.add(
        LSTMCell(hidden_dim, batch_input_shape=(row_maxlen, len_dic)))

    for _ in range(1, depth[0]):
        table_encoder.add(Dropout(dropout))
        table_encoder.add(LSTMCell(hidden_dim))

    table_encoded = table_encoder(table)

    # question encoder
    question_encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    question_encoder.add(
        LSTMCell(hidden_dim, batch_input_shape=(question_maxlen, len_dic)))

    for _ in range(1, depth[0]):
        question_encoder.add(Dropout(dropout))
        question_encoder.add(LSTMCell(hidden_dim))

    question_encoded = question_encoder(question)

    # match table and question
    match = dot([table_encoded, question_encoded], axes=(1, 1))
    # match = Activation('softmax')(match)

    # answer decoder
    answer_decoder = RecurrentSequential(unroll=unroll,
                                         stateful=stateful,
                                         decode=True,
                                         output_length=answer_maxlen)
    answer_decoder.add(Dropout(dropout, input_shape=(batch_size, hidden_dim)))

    if depth[1] == 1:
        answer_decoder.add(LSTMCell(len_dic))
    else:
        answer_decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            answer_decoder.add(Dropout(dropout))
            answer_decoder.add(LSTMCell(hidden_dim))
    answer_decoder.add(Dropout(dropout))
    answer_decoder.add(LSTMCell(len_dic))

    answer_decoded = answer_decoder(match)

    return Model(inputs=[table, question], outputs=answer_decoded)
Beispiel #12
0
def Seq2Seq(output_dim,
            output_length,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
Beispiel #13
0
def decoder_init(input,
                 shape,
                 input_dim,
                 hidden_dim,
                 output_dim,
                 output_length,
                 depth,
                 dropout=0,
                 bidirectional=True,
                 unroll=False,
                 stateful=False,
                 Decoder=None,
                 AttentionCell=AltAttentionDecoderCell,
                 global_name="",
                 return_model=False):
    if Decoder == None:
        Decoder = [hidden_dim] * depth[1]
    else:
        if len(Decoder) < depth[1]:
            Decoder = Decoder + [hidden_dim] * (depth[1] - len(Decoder))

    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  name='decoder')
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], input_dim)))
    if depth[1] == 1:
        decoder.add(AttentionCell)
    else:
        decoder.add(AttentionCell)
        for k in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=Decoder[k + 1],
                                hidden_dim=Decoder[k]))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=Decoder[-1]))
    return decoder(input)
Beispiel #14
0
    def _make_model(self):
        """Constructs ContraVAE model.

    Returns:
      Nothing.
    """

        # Load embedding in Embedding layer
        logger.info("Making model...")
        embedding_matrix = self._load_embedding()
        embedding_layer = Embedding(
            self.num_words + 1,
            self.config.embedding_dim,
            weights=[embedding_matrix],
            input_length=self.config.max_sequence_length,
            trainable=False)

        # Q(z|X,c) -- encoder
        # Embedded sequence input
        sequence_inputs = Input(batch_shape=(self.config.batch_size,
                                             self.config.max_sequence_length),
                                dtype='int32')
        embedded_sequence_inputs = embedding_layer(sequence_inputs)
        # Merge with score inputs
        score_inputs = Input(batch_shape=(self.config.batch_size, 1))
        score_inputs_repeated = RepeatVector(
            self.config.max_sequence_length)(score_inputs)
        last_layer = concatenate(
            [embedded_sequence_inputs, score_inputs_repeated], axis=2)
        # LSTM layers
        for dim in self.config.encoder_lstm_dims[:-1]:
            last_layer = LSTM(dim, return_sequences=True)(last_layer)
        last_layer = LSTM(self.config.encoder_lstm_dims[-1],
                          return_sequences=False)(last_layer)
        # Mean and std of z
        z_mean = Dense(self.config.latent_dim, activation='tanh')(last_layer)
        z_log_sigma = Dense(self.config.latent_dim,
                            activation='tanh')(last_layer)

        # Sample z ~ Q(z|X,c)
        def sampling(args):
            z_mean, z_log_sigma = args
            epsilon = K.random_normal_variable(shape=(self.config.batch_size,
                                                      self.config.latent_dim),
                                               mean=0.,
                                               scale=1.)
            return z_mean + K.exp(z_log_sigma / 2.) * epsilon

        z = Lambda(sampling)([z_mean, z_log_sigma])

        # Second score inputs - at training time this is simply equal to
        # score_inputs; at sampling time this could vary.
        score_inputs2 = Input(batch_shape=(self.config.batch_size, 1))
        z_c = concatenate([z, score_inputs2], axis=1)
        # Repeat z_c so every timestep has access to it
        #z_c_repeated = RepeatVector(self.config.max_sequence_length)(z_c)

        # P(X|z,c) -- decoder.
        rnn = RecurrentSequential(
            decode=True, output_length=self.config.max_sequence_length)
        rnn.add(
            LSTMCell(self.config.decoder_lstm_dims[0],
                     input_dim=self.config.latent_dim + 1))
        for dim in self.config.decoder_lstm_dims[1:]:
            rnn.add(LSTMCell(dim))
        decoder_out = TimeDistributed(
            Dense(self.num_words + 1, activation='tanh'))

        # Decoder output
        # x_decoded = rnn(z_c_repeated, ground_truth=sequence_inputs)
        h_decoded = rnn(z_c)
        x_decoded = decoder_out(h_decoded)

        # Construct models
        # VAE
        vae = Model([sequence_inputs, score_inputs, score_inputs2], x_decoded)
        # Encoder
        encoder = Model([sequence_inputs, score_inputs], z_mean)
        # Generator
        generator_z_inputs = Input(batch_shape=(self.config.batch_size,
                                                self.config.latent_dim))
        generator_z_c = concatenate([generator_z_inputs, score_inputs2],
                                    axis=1)
        generator_h_decoded = rnn(generator_z_c)
        generator_x_decoded = decoder_out(generator_h_decoded)
        generator = Model([generator_z_inputs, score_inputs2],
                          generator_x_decoded)

        # Define loss function
        kl_weight = self.config.kl_weight

        def recon_loss(y_true, y_pred):
            """E[log P(X|z,y)].
      """
            recon = K.mean(K.sparse_categorical_crossentropy(output=y_pred,
                                                             target=y_true,
                                                             from_logits=True),
                           axis=1)
            return recon

        def kl_loss(y_true, y_pred):
            """D_KL(Q(z|X,y) || P(z|X)); calculate in closed form as both
      dist. are Gaussian.
      """
            kl = 0.5 * K.mean(
                K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma,
                axis=1)
            kl = kl * kl_weight
            return kl

        def vae_loss(y_true, y_pred):
            """Calculate loss = reconstruction loss + KL loss.
      """
            recon = recon_loss(y_true, y_pred)
            kl = kl_loss(y_true, y_pred)
            return recon + kl

        # Compile model
        vae.compile(loss=vae_loss,
                    optimizer=self.config.optimizer,
                    metrics=[recon_loss, kl_loss])

        self.vae = vae
        self.encoder = encoder
        self.generator = generator
        logger.info("Done making model.")
Beispiel #15
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=True,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    [1] Sequence to Sequence Learning with Neural Networks
    [2] Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation
    [3] Neural Machine Translation by Jointly Learning to Align and Translate
    [4] A Neural Conversational Model

    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    # shape:[batch, max_encoder_length, input_dim]
    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # 1.定义encoder
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(
        hidden_dim,
        batch_input_shape=(shape[0],
                           shape[2])))  # shape[0]:batch, shape[2]:input_dim

    for _ in range(1, depth[0]):  # 所谓的depth,就是lstm堆叠的层数
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(
            shape)  # [batch, max_encoder_length, input_dim]
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    # 2.encode
    # _input:[batch, max_encoder_length, input_dim]
    # encoded: [batch, max_encoder_length, hidden]
    encoded = encoder(_input)

    # 3.定义decoder
    decoder = RecurrentSequential(
        decode=True,
        output_length=output_length,
        unroll=unroll,  # False
        stateful=stateful)  # False

    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    # attention
    decoder.add(
        AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    if depth[1] != 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))

        decoder.add(Dropout(dropout))

        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    # 4. decode
    decoded = decoder(encoded)

    inputs = [_input]
    model = Model(inputs=inputs, outputs=decoded)

    return model
Beispiel #16
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=True,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.
    The  math:
            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.
            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:
    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)
    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.
    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    # encoded = encoder(_input)
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    # inputs = [_input]
    # decoded = decoder(encoded)
    # model = Model(inputs, decoded)
    return encoder, decoder
Beispiel #17
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    is_embedding=True,
    embedding_dim=None,
    n_tokens=1000,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=False,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim
    if is_embedding:
        _input = Input(batch_shape=shape)
        _input._keras_history[0].supports_masking = True
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        i._keras_history[0].supports_masking = True
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))
    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    x = encoder(_input)
    decoder_outputs = decoder(x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)
    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
Beispiel #18
0
def Seq2Seq(output_dim,
            output_length,
            hidden_dim=None,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            is_embedding=True,
            embedding_dim=None,
            n_tokens=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=False,
            teacher_force=False,
            peek=False,
            dropout=0.):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    if is_embedding:
        _input = Input(batch_shape=shape)
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           mask_zero=True,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(readout=True,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    # dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        #decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        decoder.add(LSTMCell(output_dim))
    else:
        #decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            #decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
            decoder.add(LSTMCell(hidden_dim))
        decoder.add(Dropout(dropout))
        #decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        decoder.add(LSTMCell(output_dim))

    x = encoder(_input)
    if broadcast_state:
        assert type(x) is list
        states = x[-2:]
        x = x[0]
    else:
        states = None
    decoder_outputs = decoder(x, initial_state=states, initial_readout=x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)

    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
Beispiel #19
0
def paired_trimodal_model(output_dim,
                          output_length,
                          batch_input_shape=None,
                          batch_size=None,
                          input_shape=None,
                          input_length=None,
                          input_dim=None,
                          hidden_dim=None,
                          depth=1,
                          bidirectional=True,
                          unroll=False,
                          stateful=False,
                          dropout=0.0):
    """
  One modal translates into two other modalities, no cycle involved 
  The model has 1 encoder and 2 decoders 
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # encoder phase
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    # encoder phase
    encoder_2 = RecurrentSequential(unroll=unroll,
                                    stateful=stateful,
                                    return_sequences=True)
    encoder_2.add(
        LSTMCell(hidden_dim, batch_input_shape=(shape[0], output_dim)))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

        encoder_2.add(Dropout(dropout))
        encoder_2.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

        encoder_2 = Bidirectional(encoder_2, merge_mode='sum')
        encoder_2.forward_layer.build(shape)
        encoder_2.backward_layer.build(shape)
        # patch
        encoder_2.layer = encoder_2.forward_layer

    encoded_one = encoder(_input)

    # decoder phase
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

    decoder_2 = RecurrentSequential(decode=True,
                                    output_length=input_length,
                                    unroll=unroll,
                                    stateful=stateful)
    decoder_2.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    if depth[1] == 1:
        decoder_2.add(
            AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
    else:
        decoder_2.add(
            AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder_2.add(Dropout(dropout))
            decoder_2.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder_2.add(Dropout(dropout))
        decoder_2.add(
            LSTMDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))

    inputs = [_input]
    decoded_one = decoder(encoded_one)

    encoded_two = encoder_2(decoded_one)
    decoded_two = decoder_2(encoded_two)

    return inputs, encoded_one, encoded_two, decoded_one, decoded_two
Beispiel #20
0
def mctn_level2_model(input,
                      output_dim,
                      output_length,
                      batch_input_shape=None,
                      batch_size=None,
                      input_shape=None,
                      input_length=None,
                      input_dim=None,
                      hidden_dim=None,
                      depth=1,
                      bidirectional=True,
                      unroll=False,
                      stateful=False,
                      dropout=0.0):
    """ 
  Level 2 MCTN used for translation between the joint embedded of 
  2 modalities to the third one. Due to the lack of ground truth, no 
  cycle phase happens
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise

    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(input)
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    inputs = [input]
    decoded = decoder(encoded)

    return inputs, encoded, decoded
Beispiel #21
0
def mctn_model(output_dim,
               output_length,
               batch_input_shape=None,
               batch_size=None,
               input_shape=None,
               input_length=None,
               input_dim=None,
               hidden_dim=None,
               depth=1,
               bidirectional=True,
               unroll=False,
               stateful=False,
               dropout=0,
               is_cycled=True):
    """
  MCTN Model (by default with Cycle Consistency Loss) 
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # encoder phase
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)

    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))
    # encoder.add(Dropout(dropout))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)

    # decoder phase
    decoder = RecurrentSequential(
        decode=True,
        output_length=1,  #output_length
        unroll=unroll,
        stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    inputs = [_input]
    decoded_0 = decoder(encoded)
    decoded = Reshape((output_dim, ))(decoded_0)

    # cycle phase
    cycled_decoded = None
    if is_cycled:
        cycled_encoded = encoder(decoded_0)
        cycled_decoded = decoder(cycled_encoded)

    return inputs, encoded, decoded, cycled_decoded
Beispiel #22
0
def AttentionSeqtoSeq(output_dim, output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=None,
                     input_dim=None, hidden_dim=None, depth=1,
                     bidirectional=True, unroll=False, stateful=False, dropout=0.0,
                     ):
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        #encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)
    #decoder_input = Input(batch_shape = encoded.shape)#新加的
    decoder = RecurrentSequential(decode=True, output_length=output_length,
                                  unroll=unroll, stateful=stateful)

    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            #decoder.add(Dropout(dropout))
            decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        #decoder.add(Dropout(dropout))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        decoder.add(Dense(output_dim*2))
        decoder.add(Dense(output_dim,activation = "softmax"))
    
    inputs = [_input]
    decoded = decoder(encoded)
    model = Model(inputs, decoded)
    return model
Beispiel #23
0
def seq2seq_model(x_train_1, x_train_2):
    #encoder
    S_inputs = Input(shape=(x_train_1.shape[1], x_train_1.shape[2]))
    # embeddings = Embedding(max_features, 128)(S_inputs)
    # embeddings = Position_Embedding()(S_inputs)  # 增加Position_Embedding能轻微提高准确率
    encoded = Attention(32, 32)([S_inputs, S_inputs, S_inputs])
    # O_seq=Attention(16, 16)([O_seq, O_seq, O_seq])
    # O_seq = GlobalAveragePooling1D()(O_seq)
    # O_seq = Dropout(dropout)(O_seq)
    # outputs = Dense(3, activation='softmax')(O_seq)

    #decoder
    decoder = RecurrentSequential(
        decode=True,
        output_length=1,  # x_train_2.shape[1]
        unroll=False,
        stateful=False)
    decoder.add(
        Dropout(dropout,
                batch_input_shape=(None, x_train_1.shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=x_train_2.shape[2],
                                 hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=x_train_2.shape[2],
                                 hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=x_train_2.shape[2],
                            hidden_dim=hidden_dim))

    #regression model
    x = Attention(8, 16)([encoded, encoded, encoded])
    x = GlobalAveragePooling1D()(x)
    x = Dropout(dropout)(x)
    regr_outputs = Dense(3, activation='softmax')(x)

    decoded = decoder(encoded)
    decoded = Reshape((x_train_2.shape[2], ))(decoded)
    model = Model(inputs=S_inputs, outputs=[decoded, regr_outputs])
    print(model.summary())

    # try using different optimizers and different optimizer configs
    model.compile(loss=['mse', 'categorical_crossentropy'],
                  loss_weights=[1, 10],
                  optimizer='adam',
                  metrics=['categorical_accuracy'])

    return model
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth) 
        # depth是整数时,相当于编码器和解码器都有相同的层数
    if batch_input_shape:
        shape = batch_input_shape 
        # 批输入的shape作为模型输入的shape
    elif input_shape:
        shape = (batch_size,) + input_shape 
        # 不指定batch input shape,则用批大小拼接input shape,如batch size为32,input为768,拼接后就是(32,768)
        # input shape 必须是一个元组
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
            # 一般情况下通用的shape(批大小,输入序列长度,输入维度)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
        # 隐藏层的维度如果也是None?那代表什么呢

    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    '''
    参数:
        readout:是否额外将输出进行处理 选项有add(True),multiply,average,maximum等
        state_sync:状态是否在内部传播,源码中对initial_states的处理不一样,对每个cell的state都进行传播到下一个batch
        stateful:keras特性,在不同的batch之间传递cells的状态,而不是仅仅在cell之间传递状态,即stateful
                  在stateful = True 时,我们要在fit中手动使得shuffle = False。随后,在X[i](表示输入矩阵中第
                  i个sample)这个小序列训练完之后,Keras会将将训练完的记忆参数传递给X[i+bs](表示第i+bs个sample),
                  作为其初始的记忆参数。
        unroll:keras特性,将LSTM网络展开,也就是原本的时序序列直接展开成多个cell拼接,可以加快速度,但是占用更多内存
    '''
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))
    # 根据depth[0]指定编码器深度

    dense1 = TimeDistributed(Dense(hidden_dim))
    '''
    # dence1:
    # 使用TimeDistributed层对1个batch中样本(input_length,input_dim)每个向量都进行Dense操作,在整个length长度下,这个样本
    # 都共享TimeDistributed层的权重,即输出后变成(batch_size,input_length,hidden_dim)
    '''
    dense1.supports_masking = True
    dense2 = Dense(output_dim)
    '''
    dence2:
        处理从encoder之后的编码,整型为output_dim,再送给decoder
    '''

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)
    '''
    参数:
        teaching force :它每次不使用上一个state的输出作为下一个state的输入,而是直接
        使用训练数据的标准答案(ground truth)的对应上一项作为下一个state的输入。
        结合beam search和计划抽样,使用一个概率p来决定使用teaching还是free training,随着训练epoch增加,
        概率p也会减少,相当于逐步的减小teaching的采样频率,确保模型既能快速学习,又有泛化能力
    '''

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))
    # 根据depth[1]指定解码器的深度

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    # 对输入数据先通过TimeDistributed层,处理成hidden_dim的向量维度
    encoded_seq = encoder(encoded_seq)
    # 再通过encoder编码
    # 以下是一些选项的处理,是否广播状态,是否teaching模式等
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]
    # 编码之后的后续处理

    # 解码,initial_state是否接受从编码器传递过来的状态,
    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    seq2seq_model = Model(inputs, decoded_seq)
    # 整个模型就是从输入到解码seq,可以将编码器单独拿出来,使用其中的编码
    # 另外,模型处理的实时新闻序列到股价波动序列,如果要将休盘期内新闻信息也纳入训练,
    # 则需要共享编码器和解码权重,并增加新的Flatten和Dence层,将解码器输出序列视为波动编码,再进入Dence输出标量
    # 涉及到,在RecurrentSequential后增加Sequencial序列
    seq2seq_model.encoder = encoder
    seq2seq_model.decoder = decoder

    decoded_vec = Flatten()(decoded_seq)
    decoded_vec = Dense(1, activation='tanh')(decoded_vec)
    seq2vec_model = Model(inputs, [decoded_seq, decoded_vec])
    # 最终模型有1个输入,2个输出

    return seq2vec_model
Beispiel #25
0
def SimpleSeq2Seq(output_dim,
                  output_length,
                  hidden_dim=None,
                  input_shape=None,
                  batch_size=None,
                  batch_input_shape=None,
                  input_dim=None,
                  input_length=None,
                  is_embedding=True,
                  embedding_dim=None,
                  n_tokens=None,
                  depth=1,
                  dropout=0.0,
                  unroll=False,
                  stateful=False):
    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    if is_embedding:
        _input = Input(batch_shape=shape)
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           mask_zero=True,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  decode=True,
                                  output_length=output_length)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(LSTMCell(output_dim))

    x = encoder(_input)
    decoder_outputs = decoder(x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)
    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''
    ''' 
        Below block is used for computing the shape - batch_input_shape=(batch_size, timesteps, data_dim) 
        batch_size creates a statefull LSTM while None makes it unstateful 
    '''
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    ''' 
        Sequential model :- https://keras.io/layers/recurrent/
        unroll - Nothing important 
        return_state -  Boolean. Whether to return the last state in addition to the output.

    '''
    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    ''' 
        TimeDistributed :- https://keras.io/layers/wrappers/
    '''
    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    ''' 
        Readout lets you feed the output of your RNN from the previous time step back to the current time step.
    '''
    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))




    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = Trueoutput_dim
        inputs += [truth_tensor]


    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
Beispiel #27
0
def Seq2Seq(output_dim,
            output_length,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None,
                  batch_size=None, batch_input_shape=None, input_dim=None,
                  input_length=None, depth=1, dropout=0.0, unroll=False,
                  stateful=False):

    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.

    '''

    '''
        Below block is trying to construct the shape tuple which is (batch_size,input_dim,input_length)
        batch_shape(batch_size,input_dim,input_length)
        # TODO: Add more information about the statefulness of the LSTM and the input_shape expected by the same.    
    '''
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    '''
        batch_input_shape=(batch_size, timesteps, data_dim)
        for lstm arguments :- https://keras.io/layers/recurrent/#lstm
            >Specifically hidden_dim = units and it is a Positive integer representing dimensionality of the output space.
    '''    
    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  decode=True, output_length=output_length)
    ''' 
        hidden_dim is here equal to the output dim of the encoder thing. Hence should not have shape error.
        Also the second arguments is the expected dimension of the input. See below comment
        '''
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
    decoder.add(Dropout(dropout))
    decoder.add(LSTMCell(output_dim))

    '''
        This returns a tensor 
            >batch_shape(batch_size,input_dim,input_length)
        https://keras.io/layers/recurrent/ :- Note on using statefulness in RNNs
    '''
    _input = Input(batch_shape=shape)

    x = encoder(_input)
    output = decoder(x)
    ''' 
    This Model thing; takes _input as the input and applies everything to reach output. 
    This model will include all layers required in the computation of output given _input. So one can 
    think of them as Theano function.
     '''
    return Model(_input, output)
Beispiel #29
0
def SimpleSeq2Seq(output_dim,
                  output_length,
                  hidden_dim=None,
                  input_shape=None,
                  batch_size=None,
                  batch_input_shape=None,
                  input_dim=None,
                  input_length=None,
                  depth=1,
                  dropout=0.0,
                  unroll=False,
                  stateful=False):
    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.
    Arguments:
    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.
    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  decode=True,
                                  output_length=output_length)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(LSTMCell(output_dim))

    return encoder, decoder
def Pointer(output_dim, output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=None,
                     input_dim=None, hidden_dim=None, depth=1,
                     bidirectional=True, unroll=False, stateful=False, dropout=0.0,):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward networ k.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    # print shape    

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll, stateful=False,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)
    decoder = RecurrentSequential(decode=True, output_length=output_length,
                                  unroll=unroll, stateful=stateful,return_sequences=True)

    # decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    # if depth[1] == 1:
    # decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    # decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,batch_input_shape=(shape[0], shape[1], hidden_dim)))

    # decoder.add(TimeDistributed(Activation('softmax')))
    # decoder.add(TimeDistributed(Activation('softmax')))
    # output = TimeDistributed(Dense(output_dim, activation='softmax'))
    # output = TimeDistributed(Activation='softmax')

    # output = TimeDistributed(Dense(output_dim, activation='softmax'))
    # Dense(class_count, activation='softmax')(x)
    # decoder.add(Dense(output_dim, activation='softmax')(x))
    # else:
    #     decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    #     for _ in range(depth[1] - 2):
    #         decoder.add(Dropout(dropout))
    #         decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
    #     decoder.add(Dropout(dropout))
    #     decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))


    # Softmax is outside
    # inputs = [_input]
    # decoded = decoder(encoded)
    # outputs = output(decoded)
    # model = Model(inputs, outputs)
    # return model 

    # Softmax is inside cell
    inputs = [_input]
    decoded = decoder(encoded)
    model = Model(inputs, decoded)
    return model