def build_lstm(output_dim, embeddings): loss_function = "categorical_crossentropy" # this is the placeholder tensor for the input sequences sequence = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype="int32") # this embedding layer will transform the sequences of integers embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=MAX_SEQUENCE_LENGTH, weights=[embeddings], trainable=True)(sequence) # 4 convolution layers (each 1000 filters) cnn = [ Convolution1D(filter_length=filters, nb_filter=1000, border_mode="same") for filters in [2, 3, 5, 7] ] # concatenate merged_cnn = merge([cnn(embedded) for cnn in cnn], mode="concat") # create attention vector from max-pooled convoluted maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) attention_vector = maxpool(merged_cnn) forwards = AttentionLSTM(64, attention_vector)(embedded) backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTM layers bi_lstm = merge([forwards, backwards], mode="concat", concat_axis=-1) after_dropout = Dropout(0.5)(bi_lstm) # softmax output layer output = Dense(output_dim=output_dim, activation="softmax")(after_dropout) # the complete omdel model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile("adagrad", loss_function, metrics=["accuracy"]) return model
def lstm_attention(X_train, y_train, X_test, y_test, vocab_size): X_train = sequence.pad_sequences(X_train, maxlen=MAX_LEN) X_test = sequence.pad_sequences(X_test, maxlen=MAX_LEN) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() # data data = Input(shape=(MAX_LEN, ), dtype='int32', name='data') # embedding embedding = Embedding(vocab_size, EMBED_SIZE, input_length=MAX_LEN, dropout=0.2) data_embedding = embedding(data) # dropout dropout = Dropout(0.25) data_dropout = dropout(data_embedding) # rnn rnn = RNN(HIDDEN_SIZE) data_rnn = RNN(data_dropout) #data_dropout = dropout(data_rnn) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) data_pool = maxpool(data_dropout) rnn = AttentionLSTM(HIDDEN_SIZE, data_pool)
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding( input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 256), # weights=weights, mask_zero=True) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating # embedding.params = [] # embedding.updates = [] # question rnn part f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, dropout_U=0.2, consume_less='mem') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, dropout_U=0.2, consume_less='mem', go_backwards=True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) avepool = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) # otherwise, it will raise a exception like: # Layer lambda_1 does not # support masking, but was passed an input_mask: Elemwise{neq,no_inplace}.0 maxpool.__setattr__('supports_masking', True) avepool.__setattr__('supports_masking', True) question_pool = merge( [maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1) # answer rnn part f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, return_sequences=True, consume_less='mem', single_attention_param=True) b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, return_sequences=True, consume_less='mem', go_backwards=True, single_attention_param=True) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1) return question_pool, answer_pool
''' Embedding层只能作为模型的第一层 输入:最大单词数,即字典长度;句子向量表示的输出维度 # weights=[weights] ''' weights = np.load('word2vec_100_dim.embeddings') # (22353L,100L) model.add(Embedding(input_dim=MAX_NB_WORDS, output_dim=100, weights=[weights])) ''' outputshape: 如果return_sequences=True,那么输出3维 tensor(nb_samples, timesteps, output_dim) . 否则输出2维tensor(nb_samples,output_dim)。 Exception: Input 0 is incompatible with layer dense_1: expected ndim=2, found ndim=3 ''' # lstm = LSTM(128, W_regularizer=l2(0.01), return_sequences=True) # model.add(AttentionLSTMWrapper(lstm, single_attention_param=True)) model.add( AttentionLSTM(100, W_regularizer=l2(0.01), dropout_W=0.2, dropout_U=0.2)) model.add(Activation('tanh')) model.add(Dense(1, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() # 打印模型的概况 ###################################### # 训练LSTM_ATTNets模型 ###################################### print('Train...') print('\tHere, batch_size =', BATCH_SIZE, ", epoch =", EPOCH, ", lr =", LR) # early_stopping = EarlyStopping(monitor='val_loss', patience=2)
def get_attention_lstm(word_index_to_embeddings_map, max_len, rich_context: bool = False, **kwargs): # converting embeddings to numpy 2d array: shape = (vocabulary_size, 300) embeddings = np.asarray([ np.array(x, dtype=float32) for x in word_index_to_embeddings_map.values() ]) print('embeddings.shape', embeddings.shape) lstm_size = kwargs.get('lstm_size') dropout = kwargs.get('dropout') assert lstm_size assert dropout # define basic four input layers - for warrant0, warrant1, reason, claim sequence_layer_warrant0_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_warrant0_input") sequence_layer_warrant1_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_warrant1_input") sequence_layer_reason_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_reason_input") sequence_layer_claim_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_claim_input") sequence_layer_debate_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_debate_input") # now define embedded layers of the input embedded_layer_warrant0_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_warrant0_input) embedded_layer_warrant1_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_warrant1_input) embedded_layer_reason_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_reason_input) embedded_layer_claim_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_claim_input) embedded_layer_debate_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_debate_input) bidi_lstm_layer_reason = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM Reason')(embedded_layer_reason_input) bidi_lstm_layer_claim = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM Claim')(embedded_layer_claim_input) # add context to the attention layer bidi_lstm_layer_debate = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM Context')(embedded_layer_debate_input) if rich_context: # merge reason and claim context_concat = merge([ bidi_lstm_layer_reason, bidi_lstm_layer_claim, bidi_lstm_layer_debate ], mode='concat') else: context_concat = merge([bidi_lstm_layer_reason, bidi_lstm_layer_claim], mode='concat') # max-pooling max_pool_lambda_layer = Lambda( lambda x: keras.backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) max_pool_lambda_layer.supports_masking = True attention_vector = max_pool_lambda_layer(context_concat) attention_warrant0 = AttentionLSTM( lstm_size, attention_vector)(embedded_layer_warrant0_input) attention_warrant1 = AttentionLSTM( lstm_size, attention_vector)(embedded_layer_warrant1_input) # concatenate them dropout_layer = Dropout(dropout)(merge( [attention_warrant0, attention_warrant1])) # and add one extra layer with ReLU dense1 = Dense(int(lstm_size / 2), activation='relu')(dropout_layer) output_layer = Dense(1, activation='sigmoid')(dense1) model = Model([ sequence_layer_warrant0_input, sequence_layer_warrant1_input, sequence_layer_reason_input, sequence_layer_claim_input, sequence_layer_debate_input ], output=output_layer) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) from keras.utils.visualize_util import plot plot(model, show_shapes=True, to_file='/tmp/model-att.png') # from keras.utils.visualize_util import plot # plot(model, show_shapes=True, to_file='/tmp/attlstm.png') return model
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get( 'n_embed_dims', 100), weights=weights, mask_zero=True) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating # embedding.params = [] # embedding.updates = [] # dropout dropout = Dropout(0.25) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # question rnn part f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, dropout_U=0.2, consume_less='mem') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, dropout_U=0.2, consume_less='mem', go_backwards=True) question_f_rnn = f_rnn(question_dropout) question_b_rnn = b_rnn(question_dropout) question_f_dropout = dropout(question_f_rnn) question_b_dropout = dropout(question_b_rnn) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = merge( [maxpool(question_f_dropout), maxpool(question_b_dropout)], mode='concat', concat_axis=-1) # answer rnn part f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, single_attn=True, dropout_U=0.2, return_sequences=True, consume_less='mem') b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, single_attn=True, dropout_U=0.2, return_sequences=True, consume_less='mem', go_backwards=True) answer_f_rnn = f_rnn(answer_dropout) answer_b_rnn = b_rnn(answer_dropout) answer_f_dropout = dropout(answer_f_rnn) answer_b_dropout = dropout(answer_b_rnn) answer_pool = merge( [maxpool(answer_f_dropout), maxpool(answer_b_dropout)], mode='concat', concat_axis=-1) # activation activation = Activation('tanh') question_output = activation(question_pool) answer_output = activation(answer_pool) return question_output, answer_output
def get_model(name, X_train, y_train, embeddings, batch_size, nb_epoch, max_len, max_features, nb_classes=17): print('Building model', name) # get correct loss loss_function = 'binary_crossentropy' if name == 'LSTM+ATT': # this is the placeholder tensor for the input sequences sequence = Input(shape=(max_len, ), dtype='int32') # this embedding layer will transform the sequences of integers # into vectors of size 128 embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])(sequence) # 4 convolution layers (each 1000 filters) cnn = [ Convolution1D(filter_length=filters, nb_filter=1000, border_mode='same') for filters in [2, 3, 5, 7] ] # concatenate question = merge([cnn(embedded) for cnn in cnn], mode='concat') # create attention vector from max-pooled convoluted maxpool = Lambda( lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) attention_vector = maxpool(question) forwards = AttentionLSTM(64, attention_vector)(embedded) backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTMs answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1) after_dropout = Dropout(0.5)(answer_rnn) # we have 17 classes output = Dense(nb_classes, activation='sigmoid')(after_dropout) model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile('adam', loss_function, metrics=[loss_function]) # model.compile('adam', 'hinge', metrics=['hinge']) print("Layers: ", model.layers) for layer in model.layers: if isinstance(layer, AttentionLSTM): print(type(layer.attention_vec)) # print('Attention vector shape:', layer.attention_vec.shape) -- doesn't print anything... piece of sh*t model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=1) return model if name == 'LSTM': # this is the placeholder tensor for the input sequences sequence = Input(shape=(max_len, ), dtype='int32') # this embedding layer will transform the sequences of integers # into vectors of size 128 embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])(sequence) # apply forwards and backward LSTM forwards = LSTM(64)(embedded) backwards = LSTM(64, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTMs answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1) after_dropout = Dropout(0.5)(answer_rnn) # we have 17 classes output = Dense(nb_classes, activation='sigmoid')(after_dropout) model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile('adam', loss_function, metrics=[loss_function]) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=0) return model if name == 'MLP': model = Sequential() model.add(Dense(512, input_shape=(max_len, ))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss=loss_function, optimizer='adam', metrics=[loss_function]) model.fit(X_train, y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, verbose=0) return model
def build(self): question, answer = self._get_inputs() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) question_embedding = embedding(question) a_embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) answer_embedding = embedding(answer) a_embedding.set_weights(embedding.get_weights()) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # rnn forward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) backward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) question_lstm = merge( [forward_lstm(question_dropout), backward_lstm(question_dropout)], mode='concat', concat_axis=-1) # dropout question_dropout = dropout(question_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) question_model = Model(input=[question], output=[question_output]) # attentional rnn forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) answer_lstm = merge( [forward_lstm(answer_dropout), backward_lstm(answer_dropout)], mode='concat', concat_axis=-1) # dropout answer_dropout = dropout(answer_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') answer_output = activation(answer_pool) answer_model = Model(input=[question, answer], output=[answer_output]) return question_model, answer_model
def build(self): question = self.question answer = self.get_answer() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 100)) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating embedding.params = [] embedding.updates = [] # dropout dropout = Dropout(0.25) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # question rnn part f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True) b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, go_backwards=True) question_rnn = merge( [f_rnn(question_dropout), b_rnn(question_dropout)], mode='concat', concat_axis=-1) question_dropout = dropout(question_rnn) # regularize regularize = ActivityRegularization(l2=0.0001) question_dropout = regularize(question_dropout) # could add convolution layer here (as in paper) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) # answer rnn part f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, return_sequences=True) b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, return_sequences=True, go_backwards=True) # f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True) # b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, go_backwards=True) answer_rnn = merge([f_rnn(answer_dropout), b_rnn(answer_dropout)], mode='concat', concat_axis=-1) answer_dropout = dropout(answer_rnn) answer_dropout = regularize(answer_dropout) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) answer_output = activation(answer_pool) return question_output, answer_output