コード例 #1
0
def mctn_level2_model(input,
                      output_dim,
                      output_length,
                      batch_input_shape=None,
                      batch_size=None,
                      input_shape=None,
                      input_length=None,
                      input_dim=None,
                      hidden_dim=None,
                      depth=1,
                      bidirectional=True,
                      unroll=False,
                      stateful=False,
                      dropout=0.0):
  """ 
  Level 2 MCTN used for translation between the joint embedded of 
  2 modalities to the third one. Due to the lack of ground truth, no 
  cycle phase happens
  """
  if isinstance(depth, int):
    depth = (depth, depth)
  if batch_input_shape:
    shape = batch_input_shape
  elif input_shape:
    shape = (batch_size,) + input_shape
  elif input_dim:
    if input_length:
      shape = (batch_size,) + (input_length,) + (input_dim,)
    else:
      shape = (batch_size,) + (None,) + (input_dim,)
  else:
    # TODO Proper error message
    raise

  if hidden_dim is None:
    hidden_dim = output_dim

  encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                return_sequences=True)
  encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

  for _ in range(1, depth[0]):
    encoder.add(Dropout(dropout))
    encoder.add(LSTMCell(hidden_dim))

  if bidirectional:
    encoder = Bidirectional(encoder, merge_mode='sum')
    encoder.forward_layer.build(shape)
    encoder.backward_layer.build(shape)
    # patch
    encoder.layer = encoder.forward_layer

  encoded = encoder(input)
  decoder = RecurrentSequential(decode=True, output_length=output_length,
                                unroll=unroll, stateful=stateful)
  decoder.add(
    Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
  if depth[1] == 1:
    decoder.add(
      AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
  else:
    decoder.add(
      AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    for _ in range(depth[1] - 2):
      decoder.add(Dropout(dropout))
      decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
    decoder.add(Dropout(dropout))
    decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

  inputs = [input]
  decoded = decoder(encoded)

  return inputs, encoded, decoded
コード例 #2
0
def mctn_model(output_dim,
               output_length,
               batch_input_shape=None,
               batch_size=None,
               input_shape=None,
               input_length=None,
               input_dim=None,
               hidden_dim=None,
               depth=1,
               bidirectional=True,
               unroll=False,
               stateful=False,
               dropout=0.0,
               is_cycled=True
               ):
  """
  MCTN Model (by default with Cycle Consistency Loss) 
  """
  if isinstance(depth, int):
    depth = (depth, depth)
  if batch_input_shape:
    shape = batch_input_shape
  elif input_shape:
    shape = (batch_size,) + input_shape
  elif input_dim:
    if input_length:
      shape = (batch_size,) + (input_length,) + (input_dim,)
    else:
      shape = (batch_size,) + (None,) + (input_dim,)
  else:
    # TODO Proper error message
    raise TypeError
  if hidden_dim is None:
    hidden_dim = output_dim

  _input = Input(batch_shape=shape)
  _input._keras_history[0].supports_masking = True

  # encoder phase
  encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                return_sequences=True)
  encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

  for _ in range(1, depth[0]):
    encoder.add(Dropout(dropout))
    encoder.add(LSTMCell(hidden_dim))

  if bidirectional:
    encoder = Bidirectional(encoder, merge_mode='sum')
    encoder.forward_layer.build(shape)
    encoder.backward_layer.build(shape)
    # patch
    encoder.layer = encoder.forward_layer

  encoded = encoder(_input)

  # decoder phase
  decoder = RecurrentSequential(decode=True, output_length=output_length,
                                unroll=unroll, stateful=stateful)
  decoder.add(
    Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
  if depth[1] == 1:
    decoder.add(
      AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
  else:
    decoder.add(
      AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    for _ in range(depth[1] - 2):
      decoder.add(Dropout(dropout))
      decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
    decoder.add(Dropout(dropout))
    decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

  inputs = [_input]
  decoded = decoder(encoded)

  # cycle phase
  cycled_decoded = None
  if is_cycled:
    cycled_encoded = encoder(decoded)
    cycled_decoded = decoder(cycled_encoded)

  return inputs, encoded, decoded, cycled_decoded
コード例 #3
0
def paired_trimodal_model(output_dim,
                          output_length,
                          batch_input_shape=None,
                          batch_size=None,
                          input_shape=None,
                          input_length=None,
                          input_dim=None,
                          hidden_dim=None,
                          depth=1,
                          bidirectional=True,
                          unroll=False,
                          stateful=False,
                          dropout=0.0):
  """
  One modal translates into two other modalities, no cycle involved 
  The model has 1 encoder and 2 decoders 
  """
  if isinstance(depth, int):
    depth = (depth, depth)
  if batch_input_shape:
    shape = batch_input_shape
  elif input_shape:
    shape = (batch_size,) + input_shape
  elif input_dim:
    if input_length:
      shape = (batch_size,) + (input_length,) + (input_dim,)
    else:
      shape = (batch_size,) + (None,) + (input_dim,)
  else:
    # TODO Proper error message
    raise TypeError

  if hidden_dim is None:
    hidden_dim = output_dim

  _input = Input(batch_shape=shape)
  _input._keras_history[0].supports_masking = True

  # encoder phase
  encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                return_sequences=True)
  encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

  # encoder phase
  encoder_2 = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  return_sequences=True)
  encoder_2.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], output_dim)))

  for _ in range(1, depth[0]):
    encoder.add(Dropout(dropout))
    encoder.add(LSTMCell(hidden_dim))

    encoder_2.add(Dropout(dropout))
    encoder_2.add(LSTMCell(hidden_dim))

  if bidirectional:
    encoder = Bidirectional(encoder, merge_mode='sum')
    encoder.forward_layer.build(shape)
    encoder.backward_layer.build(shape)
    # patch
    encoder.layer = encoder.forward_layer

    encoder_2 = Bidirectional(encoder_2, merge_mode='sum')
    encoder_2.forward_layer.build(shape)
    encoder_2.backward_layer.build(shape)
    # patch
    encoder_2.layer = encoder_2.forward_layer

  encoded_one = encoder(_input)

  # decoder phase
  decoder = RecurrentSequential(decode=True, output_length=output_length,
                                unroll=unroll, stateful=stateful)
  decoder.add(
    Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

  decoder_2 = RecurrentSequential(decode=True, output_length=input_length,
                                  unroll=unroll, stateful=stateful)
  decoder_2.add(
    Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

  if depth[1] == 1:
    decoder.add(
      AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
  else:
    decoder.add(
      AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    for _ in range(depth[1] - 2):
      decoder.add(Dropout(dropout))
      decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
    decoder.add(Dropout(dropout))
    decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

  if depth[1] == 1:
    decoder_2.add(
      AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
  else:
    decoder_2.add(
      AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
    for _ in range(depth[1] - 2):
      decoder_2.add(Dropout(dropout))
      decoder_2.add(LSTMDecoderCell(output_dim=hidden_dim,
                                    hidden_dim=hidden_dim))
    decoder_2.add(Dropout(dropout))
    decoder_2.add(LSTMDecoderCell(output_dim=input_dim,
                                  hidden_dim=hidden_dim))

  inputs = [_input]
  decoded_one = decoder(encoded_one)

  encoded_two = encoder_2(decoded_one)
  decoded_two = decoder_2(encoded_two)

  return inputs, encoded_one, encoded_two, decoded_one, decoded_two
コード例 #4
0
ファイル: models.py プロジェクト: marlboro233544951/seq2seq
def AttentionSeq2Seq(output_dim,
                     output_length,
                     hidden_dim=None,
                     depth=1,
                     bidirectional=True,
                     dropout=0.,
                     **kwargs):
    '''
	This is an attention Seq2seq model based on [3].
	Here, there is a soft allignment between the input and output sequence elements.
	A bidirection encoder is used by default. There is no hidden state transfer in this
	model.

	The  math:

		Encoder:
		X = Input Sequence of length m.
		H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, 
		so H is a sequence of vectors of length m.

		Decoder:
        y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
        and v (called the context vector) is a weighted sum over H:

        v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

        The weight alpha[i, j] for each hj is computed as follows:
        energy = a(s(i-1), H(j))        
        alhpa = softmax(energy)
        Where a is a feed forward network.

	'''
    if type(depth) == int:
        depth = [depth, depth]
    if 'batch_input_shape' in kwargs:
        shape = kwargs['batch_input_shape']
        del kwargs['batch_input_shape']
    elif 'input_shape' in kwargs:
        shape = (None, ) + tuple(kwargs['input_shape'])
        del kwargs['input_shape']
    elif 'input_dim' in kwargs:
        if 'input_length' in kwargs:
            shape = (None, kwargs['input_length'], kwargs['input_dim'])
            del kwargs['input_length']
        else:
            shape = (None, None, kwargs['input_dim'])
        del kwargs['input_dim']
    if 'unroll' in kwargs:
        unroll = kwargs['unroll']
        del kwargs['unroll']
    else:
        unroll = False
    if 'stateful' in kwargs:
        stateful = kwargs['stateful']
        del kwargs['stateful']
    else:
        stateful = False
    if not hidden_dim:
        hidden_dim = output_dim
    encoder = RecurrentContainer(unroll=unroll,
                                 stateful=stateful,
                                 return_sequences=True,
                                 input_length=shape[1])
    encoder.add(
        LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim, **kwargs))
    input = Input(batch_shape=shape)
    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
    encoded = encoder(input)
    decoded = encoded
    for _ in range(1, depth[1]):
        decoder = AttentionDecoderCell(
            output_dim=hidden_dim,
            hidden_dim=hidden_dim,
            batch_input_shape=(shape[0], shape[1], hidden_dim)).get_layer(
                decode=True,
                output_length=output_length,
                unroll=unroll,
                stateful=stateful)
        decoded = Dropout(dropout)(decoded)
        decoded = decoder(decoded)
    decoder = AttentionDecoderCell(
        output_dim=output_dim,
        hidden_dim=hidden_dim,
        batch_input_shape=(shape[0],
                           output_length if depth[1] > 1 else shape[1],
                           hidden_dim)).get_layer(decode=True,
                                                  output_length=output_length,
                                                  unroll=unroll,
                                                  stateful=stateful)
    decoded = Dropout(dropout)(decoded)
    decoded = decoder(decoded)
    model = Model(input, decoded)
    return model
コード例 #5
0
ファイル: models.py プロジェクト: marlboro233544951/seq2seq
def Seq2Seq(output_dim,
            output_length,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            inner_broadcast_state=True,
            peek=False,
            dropout=0.,
            **kwargs):
    '''
	Seq2seq model based on [1] and [2].
	This model has the ability to transfer the encoder hidden state to the decoder's
	hidden state(specified by the broadcast_state argument). Also, in deep models 
	(depth > 1), the hidden state is propogated throughout the LSTM stack(specified by 
	the inner_broadcast_state argument. You can switch between [1] based model and [2] 
	based model using the peek argument.(peek = True for [2], peek = False for [1]).
	When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

	[1] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
        y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1), C)
        y(0) = LSTM(s0, C, C)
        Where s is the hidden state of the LSTM (h and c), and C is the context vector 
        from the encoder.

	Arguments:

	output_dim : Required output dimension.
	hidden_dim : The dimension of the internal representations of the model.
	output_length : Length of the required output sequence.
	depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
			there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
			decoding side. You can also specify depth as a tuple. For example,
			if depth = (4, 5), 4 LSTMs will be added to the encoding side and
			5 LSTMs will be added to the decoding side.
	broadcast_state : Specifies whether the hidden state from encoder should be 
					  transfered to the deocder.
	inner_broadcast_state : Specifies whether hidden states should be propogated 
							throughout the LSTM stack in deep models.
	peek : Specifies if the decoder should be able to peek at the context vector
		   at every timestep.
	dropout : Dropout probability in between layers.


	'''
    if type(depth) == int:
        depth = [depth, depth]
    if 'batch_input_shape' in kwargs:
        shape = kwargs['batch_input_shape']
        del kwargs['batch_input_shape']
    elif 'input_shape' in kwargs:
        shape = (None, ) + tuple(kwargs['input_shape'])
        del kwargs['input_shape']
    elif 'input_dim' in kwargs:
        if 'input_length' in kwargs:
            shape = (None, kwargs['input_length'], kwargs['input_dim'])
            del kwargs['input_length']
        else:
            shape = (None, None, kwargs['input_dim'])
        del kwargs['input_dim']
    if 'unroll' in kwargs:
        unroll = kwargs['unroll']
        del kwargs['unroll']
    else:
        unroll = False
    if 'stateful' in kwargs:
        stateful = kwargs['stateful']
        del kwargs['stateful']
    else:
        stateful = False
    if not hidden_dim:
        hidden_dim = output_dim
    encoder = RecurrentContainer(readout=True,
                                 state_sync=inner_broadcast_state,
                                 input_length=shape[1],
                                 unroll=unroll,
                                 stateful=stateful)
    for i in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim,
                     batch_input_shape=(shape[0], hidden_dim),
                     **kwargs))
        encoder.add(Dropout(dropout))
    dense1 = TimeDistributed(Dense(hidden_dim))
    dense2 = Dense(output_dim)
    decoder = RecurrentContainer(readout='add' if peek else 'readout_only',
                                 state_sync=inner_broadcast_state,
                                 output_length=output_length,
                                 unroll=unroll,
                                 stateful=stateful,
                                 decode=True,
                                 input_length=shape[1])
    for i in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim),
                            **kwargs))
    input = Input(batch_shape=shape)
    encoded_seq = dense1(input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        decoder.model.layers[1].states[:2] = encoder.state_outputs[-3:-1]
    encoded_seq = dense2(encoded_seq)
    decoder.initial_readout = encoded_seq
    decoded_seq = decoder(encoded_seq)
    model = Model(input, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
コード例 #6
0
ファイル: models.py プロジェクト: marlboro233544951/seq2seq
def SimpleSeq2Seq(output_dim,
                  output_length,
                  hidden_dim=None,
                  depth=1,
                  dropout=0.,
                  **kwargs):
    '''
	Simple model for sequence to sequence learning.
	The encoder encodes the input sequence to vector (called context vector)
	The decoder decodes the context vector in to a sequence of vectors.
	There is no one on one relation between the input and output sequence elements.
	The input sequence and output sequence may differ in length.

	Arguments:

	output_dim : Required output dimension.
	hidden_dim : The dimension of the internal representations of the model.
	output_length : Length of the required output sequence.
	depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
			there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
			decoding side. You can also specify depth as a tuple. For example,
			if depth = (4, 5), 4 LSTMs will be added to the encoding side and
			5 LSTMs will be added to the decoding side.
	dropout : Dropout probability in between layers.

	'''
    if type(depth) == int:
        depth = [depth, depth]
    if 'batch_input_shape' in kwargs:
        shape = kwargs['batch_input_shape']
        del kwargs['batch_input_shape']
    elif 'input_shape' in kwargs:
        shape = (None, ) + tuple(kwargs['input_shape'])
        del kwargs['input_shape']
    elif 'input_dim' in kwargs:
        if 'input_length' in kwargs:
            shape = (None, kwargs['input_length'], kwargs['input_dim'])
            del kwargs['input_length']
        else:
            shape = (None, None, kwargs['input_dim'])
        del kwargs['input_dim']
    if 'unroll' in kwargs:
        unroll = kwargs['unroll']
        del kwargs['unroll']
    else:
        unroll = False
    if 'stateful' in kwargs:
        stateful = kwargs['stateful']
        del kwargs['stateful']
    else:
        stateful = False
    if not hidden_dim:
        hidden_dim = output_dim
    encoder = RecurrentContainer(unroll=unroll,
                                 stateful=stateful,
                                 input_length=shape[1])
    encoder.add(
        LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim, **kwargs))
    decoder = RecurrentContainer(unroll=unroll,
                                 stateful=stateful,
                                 decode=True,
                                 output_length=output_length,
                                 input_length=shape[1])
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))
    decoder.add(LSTMCell(hidden_dim, **kwargs))
    for _ in range(1, depth[1]):
        decoder.add(Dropout(dropout))
        decoder.add(LSTMCell(hidden_dim, **kwargs))
    model = Sequential()
    model.add(encoder)
    model.add(decoder)
    return model
コード例 #7
0
    def __init__(self, config):

        self.model = None
        self.check_list = {
            'text_maxlen', 'sentence_maxnum', 'sentence_maxlen', 'hidden_size',
            'delimiter', 'pad_word', 'unk_word', 'start_sent', 'end_sent',
            'vocab_size', 'embed_size', "embed_path", 'embed_trainable',
            'learning_rate'
        }
        self.config = config
        assert self.check(), 'parametre check failed'

        self.size = self.config['hidden_size']

        embed_dict = read_embedding(filename=self.config['embed_path'])
        self._PAD_ = self.config['pad_word']
        self._UNK_ = self.config['unk_word']
        self._START_ = self.config['start_sent']
        self._END_ = self.config['end_sent']
        embed_dict[self._PAD_] = np.zeros((self.config['embed_size'], ),
                                          dtype=np.float32)
        embed_dict[self._UNK_] = np.zeros((self.config['embed_size'], ),
                                          dtype=np.float32)
        embed = np.float32(
            np.random.uniform(
                -0.2, 0.2,
                [self.config['vocab_size'], self.config['embed_size']]))
        weights = convert_embed_2_numpy(embed_dict, embed=embed)

        self.Emb = Embedding(self.config['vocab_size'],
                             self.config['embed_size'],
                             weights=[weights],
                             trainable=self.config['embed_trainable'])
        self.Splitlayer_keephead = SplitLayer(
            delimiter=self.config['delimiter'],
            output_sentence_len=self.config['sentence_maxlen'],
            output_sentence_num=self.config['sentence_maxnum'],
            pad_word=self.config['pad_word'],
            cut_head=False,
            name='Split_Layer_keep_head')
        self.Splitlayer_cuthead = SplitLayer(
            delimiter=self.config['delimiter'],
            output_sentence_len=self.config['sentence_maxlen'],
            output_sentence_num=self.config['sentence_maxnum'],
            pad_word=self.config['pad_word'],
            cut_head=True,
            name='Split_Layer_cut_head')
        self.Sentence_reshape1D = Reshape((self.config['sentence_maxnum'] *
                                           self.config['sentence_maxlen'], ),
                                          name='Sentence_reshape1D')

        self.Sentence_reshape2D = Reshape((
            self.config['sentence_maxnum'],
            self.config['sentence_maxlen'],
            self.config['embed_size'],
        ),
                                          name='Sentence_reshape2D')
        self.Encoder_word = CuDNNLSTM(units=self.size, name='Encoder_word')
        self.Encoder_sent = CuDNNLSTM(units=self.size,
                                      name='Encoder_sent',
                                      return_state=True)
        self.Decoder_word_cell = LSTMCell(units=self.size,
                                          name='Decoder_word_cell')
        self.Decoder_sent_cell = LSTMCell(units=self.size,
                                          name='Decoder_sent_cell')

        self.AttentionMapper = Linear(output_size=self.size,
                                      bias=True,
                                      bias_start=0.0,
                                      activation='tanh')
        self.Join = Dense(units=1, use_bias=False,
                          name='Join')  # shape : [attention_vec_size]
        self.Exp = Lambda(lambda x: K.exp(x), name='Exp')
        self.Calcprob = Dense(units=self.config['vocab_size'],
                              activation='softmax',
                              name='Calcprob')
        self.ArgMax = Lambda(lambda x: K.argmax(x, axis=-1), dtype='int32')
        self.Printer = Lambda(lambda x: K.tf.Print(x, [x]))
        self.Identical = Lambda(lambda x: x, name='Identical')

        self.EncoderModel = None
        self.DecoderModel_onesent = None
        self.DecoderModel_onestep = None

        self._mask = None
        self._targets = None

        self.optim = optimizers.SGD(config['learning_rate'])
        return
コード例 #8
0
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]


    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
コード例 #9
0
def AttentionSeq2Seq(output_dim, output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=None,
                     input_dim=None, hidden_dim=None, depth=1,
                     bidirectional=True, unroll=False, stateful=False, dropout=0.0,):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)
    decoder = RecurrentSequential(decode=True, output_length=output_length,
                                  unroll=unroll, stateful=stateful)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    
    inputs = [_input]
    decoded = decoder(encoded)
    model = Model(inputs, decoded)
    return model
コード例 #10
0
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None,
                  batch_size=None, batch_input_shape=None, input_dim=None,
                  input_length=None, depth=1, dropout=0.0, unroll=False,
                  stateful=False):

    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  decode=True, output_length=output_length)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
    decoder.add(Dropout(dropout))
    decoder.add(LSTMCell(output_dim))

    _input = Input(batch_shape=shape)
    x = encoder(_input)
    output = decoder(x)
    return Model(_input, output)