コード例 #1
0
def deep_rnn_model(input_dim, units, recur_layers, output_dim=29):
    """ Build a deep recurrent network for speech 
    """
    # Main acoustic input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # TODO: Add recurrent layers, each with batch normalization
    prev_input = input_data
    simple_rnn = []
    bn_cnn = []
    for i in range(0, recur_layers):
        #Iterate over each layer and add them to an array
        # Add recurrent layers with batch normalization
        simple_rnn_aux = GRU(units,
                             return_sequences=True,
                             implementation=2,
                             name="rnn" + str(i))(prev_input)
        simple_rnn.append(simple_rnn_aux)
        # Add batch normalization to each layer
        bn_cnn_aux = BatchNormalization(name="bn_conv_1d" + str(i))(
            simple_rnn[i])
        bn_cnn.append(bn_cnn_aux)
        prev_input = bn_cnn[i]
    # TODO: Add a TimeDistributed(Dense(output_dim)) layer
    # Use as input the last bn_cnn
    time_dense = TimeDistributed(Dense(output_dim))(bn_cnn[recur_layers - 1])
    time_dense.supports_masking = True
    # Add softmax activation layer
    y_pred = Activation('softmax', name='softmax')(time_dense)
    # Specify the model
    model = Model(inputs=input_data, outputs=y_pred)
    model.output_length = lambda x: x
    print(model.summary())
    return model
コード例 #2
0
def cnn_rnn_model(input_dim, filters, kernel_size, conv_stride,
    conv_border_mode, units, output_dim=29):
    """ Build a recurrent + convolutional network for speech 
    """
    # Main acoustic input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # Add convolutional layer
    conv_1d = Conv1D(filters, kernel_size, 
                     strides=conv_stride, 
                     padding=conv_border_mode,
                     activation='relu',
                     name='conv1d')(input_data)
    # Add batch normalization
    bn_cnn = BatchNormalization(name='bn_conv_1d')(conv_1d)
    # Add a recurrent layer
    simp_rnn = SimpleRNN(units, activation='relu',
        return_sequences=True, implementation=2, name='rnn')(bn_cnn)
    # TODO: Add batch normalization
    bn_rnn = BatchNormalization(name='bn_simple_rnn')(simp_rnn)
    # TODO: Add a TimeDistributed(Dense(output_dim)) layer
    time_dense = TimeDistributed(Dense(output_dim))(bn_rnn)
    time_dense.supports_masking = True
    # Add softmax activation layer
    y_pred = Activation('softmax', name='softmax')(time_dense)
    # Specify the model
    model = Model(inputs=input_data, outputs=y_pred)
    model.output_length = lambda x: cnn_output_length(
        x, kernel_size, conv_border_mode, conv_stride)
    print(model.summary())
    return model
コード例 #3
0
def rnn_model(input_dim, units, activation, output_dim=29):
    """ Build a recurrent network for speech 
    """
    # Main acoustic input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # Add recurrent layer
    simp_rnn = GRU(units,
                   activation=activation,
                   return_sequences=True,
                   implementation=2,
                   name='rnn')(input_data)
    # TODO: Add batch normalization
    bn_cnn = BatchNormalization(name='bn_conv_1d')(simp_rnn)
    # TODO: Add a TimeDistributed(Dense(output_dim)) layer
    time_dense = TimeDistributed(Dense(output_dim))(bn_cnn)
    time_dense.supports_masking = True
    # Add softmax activation layer
    y_pred = Activation('softmax', name='softmax')(time_dense)
    # Specify the model
    model = Model(inputs=input_data, outputs=y_pred)
    model.output_length = lambda x: x
    print(model.summary())
    return model
コード例 #4
0
ファイル: models.py プロジェクト: IcyShen1949/transfer
def Seq2Seq(output_dim,
            output_length,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
コード例 #5
0
ファイル: models.py プロジェクト: Vanova/seq2seq
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs):
	'''
	Seq2seq model based on [1] and [2].
	This model has the ability to transfer the encoder hidden state to the decoder's
	hidden state(specified by the broadcast_state argument). Also, in deep models 
	(depth > 1), the hidden state is propogated throughout the LSTM stack(specified by 
	the inner_broadcast_state argument. You can switch between [1] based model and [2] 
	based model using the peek argument.(peek = True for [2], peek = False for [1]).
	When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

	[1] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
        y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1), C)
        y(0) = LSTM(s0, C, C)
        Where s is the hidden state of the LSTM (h and c), and C is the context vector 
        from the encoder.

	Arguments:

	output_dim : Required output dimension.
	hidden_dim : The dimension of the internal representations of the model.
	output_length : Length of the required output sequence.
	depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
			there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
			decoding side. You can also specify depth as a tuple. For example,
			if depth = (4, 5), 4 LSTMs will be added to the encoding side and
			5 LSTMs will be added to the decoding side.
	broadcast_state : Specifies whether the hidden state from encoder should be 
					  transfered to the deocder.
	inner_broadcast_state : Specifies whether hidden states should be propogated 
							throughout the LSTM stack in deep models.
	peek : Specifies if the decoder should be able to peek at the context vector
		   at every timestep.
	dropout : Dropout probability in between layers.


	'''
	if type(depth) == int:
		depth = [depth, depth]
	if 'batch_input_shape' in kwargs:
		shape = kwargs['batch_input_shape']
		del kwargs['batch_input_shape']
	elif 'input_shape' in kwargs:
		shape = (None,) + tuple(kwargs['input_shape'])
		del kwargs['input_shape']
	elif 'input_dim' in kwargs:
		if 'input_length' in kwargs:
			shape = (None, kwargs['input_length'], kwargs['input_dim'])
			del kwargs['input_length']
		else:
			shape = (None, None, kwargs['input_dim'])
		del kwargs['input_dim']
	if 'unroll' in kwargs:
		unroll = kwargs['unroll']
		del kwargs['unroll']
	else:
		unroll = False
	if 'stateful' in kwargs:
		stateful = kwargs['stateful']
		del kwargs['stateful']
	else:
		stateful = False
	if not hidden_dim:
		hidden_dim = output_dim
	encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state)
	for i in range(depth[0]):
		encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs))
		encoder.add(Dropout(dropout))
	dense1 = TimeDistributed(Dense(hidden_dim))
	dense1.supports_masking = True
	dense2 = Dense(output_dim)
	decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1])
	for i in range(depth[1]):
		decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
		decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs))
	input = Input(batch_shape=shape)
	input._keras_history[0].supports_masking = True
	encoded_seq = dense1(input)
	encoded_seq = encoder(encoded_seq)
	if broadcast_state:
		states = encoded_seq[-2:]
		encoded_seq = encoded_seq[0]
	else:
		states = [None] * 2
	encoded_seq = dense2(encoded_seq)
	inputs = [input]
	if teacher_force:
		truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
		truth_tensor._keras_history[0].supports_masking = True
		inputs += [truth_tensor]
	decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states})
	model = Model(inputs, decoded_seq)
	model.encoder = encoder
	model.decoder = decoder
	return model
コード例 #6
0
def Seq2Seq(output_dim, output_length, lookup_matrix, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models 
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by 
    the inner_broadcast_state argument. You can switch between [1] based model and [2] 
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

        Encoder:
        X = Input sequence
        C = LSTM(X); The context vector

        Decoder:
        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
        y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

        Encoder:
        X = Input sequence
        C = LSTM(X); The context vector

        Decoder:
        y(t) = LSTM(s(t-1), y(t-1), C)
        y(0) = LSTM(s0, C, C)
        Where s is the hidden state of the LSTM (h and c), and C is the context vector 
        from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be 
                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated 
                            throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
           at every timestep.
    dropout : Dropout probability in between layers.


    '''
    if type(depth) == int:
        depth = [depth, depth]
    if 'batch_input_shape' in kwargs:
        shape = kwargs['batch_input_shape']
        del kwargs['batch_input_shape']
    elif 'input_shape' in kwargs:
        shape = (None,) + tuple(kwargs['input_shape'])
        del kwargs['input_shape']
    elif 'input_dim' in kwargs:
        if 'input_length' in kwargs:
            shape = (None, kwargs['input_length'], kwargs['input_dim'])
            del kwargs['input_length']
        else:
            shape = (None, None, kwargs['input_dim'])
        del kwargs['input_dim']
    if 'unroll' in kwargs:
        unroll = kwargs['unroll']
        del kwargs['unroll']
    else:
        unroll = False
    if 'stateful' in kwargs:
        stateful = kwargs['stateful']
        del kwargs['stateful']
    else:
        stateful = False
    if not hidden_dim:
        hidden_dim = output_dim

    encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state)
    for i in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs))
        encoder.add(Dropout(dropout))
    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1])
    for i in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs))

    input = Input(batch_shape=(shape[0],shape[1]))
    print input.shape
    embedded_input = Embedding(input_dim=lookup_matrix.shape[0], output_dim=lookup_matrix.shape[1], weights=[lookup_matrix])(input)
    print embedded_input.shape
    input._keras_history[0].supports_masking = True

    encoded_seq = dense1(embedded_input)
    # print encoded_seq.shape
    encoded_seq = encoder(encoded_seq)
    print encoded_seq

    if broadcast_state:
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = [None] * 2
    encoded_seq = dense2(encoded_seq)
    inputs = [input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]
    decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states})

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder

    print "==========Input========="
    print model.input
    print "==========Input========="
    print model.output

    return model
コード例 #7
0
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''
    ''' 
        Below block is used for computing the shape - batch_input_shape=(batch_size, timesteps, data_dim) 
        batch_size creates a statefull LSTM while None makes it unstateful 
    '''
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    ''' 
        Sequential model :- https://keras.io/layers/recurrent/
        unroll - Nothing important 
        return_state -  Boolean. Whether to return the last state in addition to the output.

    '''
    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    ''' 
        TimeDistributed :- https://keras.io/layers/wrappers/
    '''
    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    ''' 
        Readout lets you feed the output of your RNN from the previous time step back to the current time step.
    '''
    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))




    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = Trueoutput_dim
        inputs += [truth_tensor]


    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth) 
        # depth是整数时,相当于编码器和解码器都有相同的层数
    if batch_input_shape:
        shape = batch_input_shape 
        # 批输入的shape作为模型输入的shape
    elif input_shape:
        shape = (batch_size,) + input_shape 
        # 不指定batch input shape,则用批大小拼接input shape,如batch size为32,input为768,拼接后就是(32,768)
        # input shape 必须是一个元组
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
            # 一般情况下通用的shape(批大小,输入序列长度,输入维度)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
        # 隐藏层的维度如果也是None?那代表什么呢

    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    '''
    参数:
        readout:是否额外将输出进行处理 选项有add(True),multiply,average,maximum等
        state_sync:状态是否在内部传播,源码中对initial_states的处理不一样,对每个cell的state都进行传播到下一个batch
        stateful:keras特性,在不同的batch之间传递cells的状态,而不是仅仅在cell之间传递状态,即stateful
                  在stateful = True 时,我们要在fit中手动使得shuffle = False。随后,在X[i](表示输入矩阵中第
                  i个sample)这个小序列训练完之后,Keras会将将训练完的记忆参数传递给X[i+bs](表示第i+bs个sample),
                  作为其初始的记忆参数。
        unroll:keras特性,将LSTM网络展开,也就是原本的时序序列直接展开成多个cell拼接,可以加快速度,但是占用更多内存
    '''
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))
    # 根据depth[0]指定编码器深度

    dense1 = TimeDistributed(Dense(hidden_dim))
    '''
    # dence1:
    # 使用TimeDistributed层对1个batch中样本(input_length,input_dim)每个向量都进行Dense操作,在整个length长度下,这个样本
    # 都共享TimeDistributed层的权重,即输出后变成(batch_size,input_length,hidden_dim)
    '''
    dense1.supports_masking = True
    dense2 = Dense(output_dim)
    '''
    dence2:
        处理从encoder之后的编码,整型为output_dim,再送给decoder
    '''

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)
    '''
    参数:
        teaching force :它每次不使用上一个state的输出作为下一个state的输入,而是直接
        使用训练数据的标准答案(ground truth)的对应上一项作为下一个state的输入。
        结合beam search和计划抽样,使用一个概率p来决定使用teaching还是free training,随着训练epoch增加,
        概率p也会减少,相当于逐步的减小teaching的采样频率,确保模型既能快速学习,又有泛化能力
    '''

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))
    # 根据depth[1]指定解码器的深度

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    # 对输入数据先通过TimeDistributed层,处理成hidden_dim的向量维度
    encoded_seq = encoder(encoded_seq)
    # 再通过encoder编码
    # 以下是一些选项的处理,是否广播状态,是否teaching模式等
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]
    # 编码之后的后续处理

    # 解码,initial_state是否接受从编码器传递过来的状态,
    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    seq2seq_model = Model(inputs, decoded_seq)
    # 整个模型就是从输入到解码seq,可以将编码器单独拿出来,使用其中的编码
    # 另外,模型处理的实时新闻序列到股价波动序列,如果要将休盘期内新闻信息也纳入训练,
    # 则需要共享编码器和解码权重,并增加新的Flatten和Dence层,将解码器输出序列视为波动编码,再进入Dence输出标量
    # 涉及到,在RecurrentSequential后增加Sequencial序列
    seq2seq_model.encoder = encoder
    seq2seq_model.decoder = decoder

    decoded_vec = Flatten()(decoded_seq)
    decoded_vec = Dense(1, activation='tanh')(decoded_vec)
    seq2vec_model = Model(inputs, [decoded_seq, decoded_vec])
    # 最终模型有1个输入,2个输出

    return seq2vec_model
コード例 #9
0
def Seq2Seq(output_dim,
            output_length,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
コード例 #10
0
def SkipThoughtModel(sent_len,
                     vocab_size,
                     embed_dims,
                     output_length,
                     output_dim,
                     dropout=0.4,
                     unroll=False,
                     teacher_force=False):
    input_sent = Input(shape=(sent_len, vocab_size), dtype=K.floatx())
    input_sent._keras_history[0].supports_masking = True

    encoder = RecurrentContainer(readout=True,
                                 input_length=sent_len,
                                 unroll=unroll,
                                 stateful=False)
    # for i in range(depth[0]):
    encoder.add(LSTMCell(embed_dims, batch_input_shape=(None, embed_dims)))
    encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(embed_dims))
    dense1.supports_masking = True
    dense2 = Dense(embed_dims)

    encoded_seq = dense1(input)
    encoded_seq = encoder(encoded_seq)

    states = [None] * 2
    encoded_seq = dense2(encoded_seq)
    inputs = [input]
    if teacher_force:
        truth_tensor_prev = Input(batch_shape=(None, output_length,
                                               output_dim))
        truth_tensor_prev._keras_history[0].supports_masking = True
        truth_tensor_next = Input(batch_shape=(None, output_length,
                                               output_dim))
        truth_tensor_next._keras_history[0].supports_masking = True
        inputs += [truth_tensor_prev, truth_tensor_next]

    prev_decoder = build_decoder(dropout=dropout,
                                 unroll=unroll,
                                 output_length=output_length)
    next_decoder = build_decoder()
    prev_decoded_seq = prev_decoder({
        'input':
        encoded_seq,
        'ground_truth':
        inputs[1] if teacher_force else None,
        'initial_readout':
        encoded_seq,
        'states':
        states
    })

    next_decoded_seq = next_decoder({
        'input':
        encoded_seq,
        'ground_truth':
        inputs[2] if teacher_force else None,
        'initial_readout':
        encoded_seq,
        'states':
        states
    })

    model = Model(inputs, [prev_decoded_seq, next_decoded_seq])
    model.encoder = encoder
    model.decoders = [prev_decoder, next_decoder]
    return model
コード例 #11
0
def SkipThoughtModel_new(output_dim,
                         output_length,
                         batch_input_shape=None,
                         input_shape=None,
                         batch_size=None,
                         input_dim=None,
                         input_length=None,
                         hidden_dim=None,
                         depth=1,
                         broadcast_state=True,
                         unroll=False,
                         stateful=False,
                         inner_broadcast_state=True,
                         teacher_force=False,
                         peek=False,
                         dropout=0.):
    '''
    Seq2seq model based on [1] and [2]. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    Arguments:
    - output_dim : Required output dimension.
    - hidden_dim : The dimension of the internal representations of the model.
    - output_length : Length of the required output sequence.
    - depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    - broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    - inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    - peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    - dropout : Dropout probability in between layers.

    Returns: Keras model to be trained.
    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder_next = RecurrentSequential(
        readout='add' if peek else 'readout_only',
        state_sync=inner_broadcast_state,
        decode=True,
        output_length=output_length,
        unroll=unroll,
        stateful=stateful,
        teacher_force=teacher_force)
    decoder_prev = RecurrentSequential(
        readout='add' if peek else 'readout_only',
        state_sync=inner_broadcast_state,
        decode=True,
        output_length=output_length,
        unroll=unroll,
        stateful=stateful,
        teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder_next.add(
            Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder_next.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

        decoder_prev.add(
            Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder_prev.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor_next = Input(batch_shape=(shape[0], output_length,
                                               output_dim))
        truth_tensor_next._keras_history[0].supports_masking = True
        truth_tensor_prev = Input(batch_shape=(None, output_length,
                                               output_dim))
        truth_tensor_prev._keras_history[0].supports_masking = True
        inputs += [truth_tensor_prev, truth_tensor_next]

    prev_decoded_seq = decoder_prev(
        encoded_seq,
        ground_truth=inputs[1] if teacher_force else None,
        initial_readout=encoded_seq,
        initial_state=states)

    next_decoded_seq = decoder_next(
        encoded_seq,
        ground_truth=inputs[2] if teacher_force else None,
        initial_readout=encoded_seq,
        initial_state=states)

    model = Model(inputs, [prev_decoded_seq, next_decoded_seq])
    model.encoder = encoder
    model.decoder = [decoder_prev, decoder_next]
    return model