def build(self): assert self.config['questions_len'] == self.config['answers_len'] question = self.questions answer = self.get_answer() weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim = self.config['n_words'], output_dim = weights.shape[1], weights = [weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # Dense() hidden_layer = TimeDistributed(Dense(200,activation = 'tanh')) # 输入层处理 question_hl = hidden_layer(question_embedding) answer_hl = hidden_layer(answer_embedding) # 一维卷积核 cnns = [Conv1D(kernel_size = kernel_size, filters = 1000, activation = 'tanh', padding = 'same') for kernel_size in [2,3,5,7]] # 卷积层输出 question_cnn = concatenate([cnn(question_hl) for cnn in cnns],axis = -1) answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns],axis = -1) # 池化层输出 maxpool = Lambda(lambda x: K.max(x,axis = 1,keepdims = False), output_shape = lambda x: (x[0],x[2])) maxpool.supports_masking = True # maxpooling层输出 question_pool = maxpool(question_hl) answer_pool = maxpool(answer_hl) return question_pool, answer_pool
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True enc = Dense(100, activation='tanh') question_pool = enc(maxpool(question_cnn)) answer_pool = enc(maxpool(answer_cnn)) return question_pool, answer_pool
def cit_nocit_rnn_rnn_cnn(max_sentence_len, max_words): inp = Input(shape=(max_sentence_len, )) emb = Embedding(max_words, 128, input_length=max_sentence_len)(inp) fwd_rnn = LSTM(128, return_sequences=True)(emb) rev_rnn = LSTM(128, return_sequences=True, go_backwards=True)(emb) merged = concatenate([fwd_rnn, rev_rnn], axis=-1) cnns = [ Conv1D(500, filter_length, activation='tanh', padding='same') for filter_length in [1, 2, 3, 5] ] allCnns = concatenate([cnn(merged) for cnn in cnns]) maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True pooled = maxpool(allCnns) dense = Dense(2, activation='sigmoid')(pooled) model = Model(inputs=inp, outputs=dense) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def build(self): question = self.questions answer = self.get_answer() weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim = self.config['n_words'], output_dim = weights.shape[1], weights = [weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141, return_sequences = True, consume_less = 'mem') b_rnn = LSTM(141, return_sequences = True,consume_less = 'mem', go_backwards = True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # 池化操作 maxpool = Lambda( lambda x: K.max(x, axis = 1, keepdims = False),output_shape = lambda x:(x[0],x[2])) maxpool.supports_masking = True question_pool = merge([maxpool(question_f_rnn),maxpool(question_b_rnn)],mode = 'concat',concat_axis = -1) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) answer_pool = merge([maxpool(answer_f_rnn),maxpool(answer_b_rnn)],mode = 'concat',concat_axis = -1) return question_pool,answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers question_weights = np.load(self.config.initial_question_weights()) q_embedding = Embedding(input_dim=question_weights.shape[0], output_dim=question_weights.shape[1], weights=[question_weights]) question_embedding = q_embedding(question) answer_weights = np.load(self.config.initial_answer_weights()) a_embedding = Embedding(input_dim=answer_weights.shape[0], output_dim=answer_weights.shape[1], weights=[answer_weights]) answer_embedding = a_embedding(answer) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_embedding) answer_pool = maxpool(answer_embedding) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], # mask_zero=True, weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # question rnn part f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1) # answer rnn part from attention_lstm import AttentionLSTMWrapper f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True) b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], # mask_zero=True, weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # question rnn part f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1) # answer rnn part from attention_lstm import AttentionLSTMWrapper f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True) b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1) return question_pool, answer_pool
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True enc = Dense(100, activation='tanh') question_pool = enc(maxpool(question_cnn)) answer_pool = enc(maxpool(answer_cnn)) return question_pool, answer_pool
def build(self): question = self.questions answer = self.get_answer() weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim = self.config['n_words'], output_dim = weights.shape[1], weights = [weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) hidden_layer = TimeDistributed(Dense(200,activation = 'tanh')) question_hl = hidden_layer(question_embedding) answer_hl = hidden_layer(answer_embedding) cnns = [Conv2D(filters = 3, kernel_size =(), activation = 'tanh', padding = 'same')] #question_cnn = maxpool = Lambda( lambda x: K.max(x,axis = 1,keepdims = False),output_shape = lambda x: (x[0],x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool,answer_pool
def BIGRUCNNmodel(self,weights, hidden_dim = 128): margin = 0.05 enc_timesteps = 30 dec_timesteps = 30 # hidden_dim = 128 # initialize the question and answer shapes and datatype question = Input(shape=(enc_timesteps,), dtype='int32', name='question_base') answer = Input(shape=(dec_timesteps,), dtype='int32', name='answer') answer_good = Input(shape=(dec_timesteps,), dtype='int32', name='answer_good_base') answer_bad = Input(shape=(dec_timesteps,), dtype='int32', name='answer_bad_base') qa_embedding = Embedding(input_dim=len(weights),output_dim=weights.shape[1],mask_zero=True,weights=[weights], trainable=False) question_embedding = qa_embedding(question) answer_embedding = qa_embedding(answer) # pass the question embedding through bi-lstm gru1 = Bidirectional(GRU(units=hidden_dim, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, name='BIGRU'), merge_mode='concat'); question_pool1 = gru1(question_embedding); answer_pool1 = gru1(answer_embedding); gru2 = Bidirectional(GRU(units=int(hidden_dim/2), dropout=0.2, recurrent_dropout=0.2, return_sequences=True, name='BIGRU2'), merge_mode='concat'); question_pool = gru2(question_pool1); answer_pool = gru2(answer_pool1); # pass the embedding from bi-lstm through cnn cnns = [Convolution1D(filter_length=filter_length,nb_filter=300,activation='relu',border_mode='same', kernel_initializer='random_normal') for filter_length in [1, 2, 3, 5]] for cnn in cnns: cnn.supports_masking = True question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') # merge: (None,30,500)*4->(None,30,2000) answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') # apply max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) # maxpool.__setattr__('supports_masking',True) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) # get the cosine similarity similarity = self.get_cosine_similarity() merged_model = merge([question_pool, answer_pool],mode=similarity, output_shape=lambda _: (None, 1)) lstm_convolution_model = Model(inputs=[question, answer], outputs=merged_model, name='bigru_convolution_model') print(lstm_convolution_model.summary()) good_similarity = lstm_convolution_model([question, answer_good]) bad_similarity = lstm_convolution_model([question, answer_bad]) # compute the loss loss = merge([good_similarity, bad_similarity],mode=lambda x: K.relu(margin - x[0] + x[1]),output_shape=lambda x: x[0]) # return the training and prediction model prediction_model = Model(inputs=[question, answer_good], outputs=good_similarity, name='prediction_model') prediction_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="adam") training_model = Model(inputs=[question, answer_good, answer_bad], outputs=loss, name='training_model') training_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="adam") print(training_model.summary()) return training_model, prediction_model
def get_convolutional_lstm_model(model_param, embedding_file, vocab_size): hidden_dim = 200 weights = np.load(embedding_file) question = Input(shape=(model_param.enc_timesteps, ), dtype='int32', name='question_base') answer = Input(shape=(model_param.dec_timesteps, ), dtype='int32', name='answer_good_base') QaEmbedding = Embedding( input_dim=vocab_size, output_dim=weights.shape[1], # dropout=0.2, weights=[weights]) question_embedding = QaEmbedding(question) answer_embedding = QaEmbedding(answer) f_rnn = LSTM(hidden_dim, return_sequences=True) #b_rnn = LSTM(hidden_dim, return_sequences=True,go_backwards=True) b_rnn = LSTM(hidden_dim, return_sequences=True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) cnns = [ Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [1, 2, 3, 5] ] question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) similarity = ModelFactory.get_similarity("cosine") basic_model = merge([question_pool, answer_pool], mode=similarity, output_shape=lambda _: (None, 1)) lstm_convolution_model = Model(input=[question, answer], output=basic_model, name='basic_model') return lstm_convolution_model
def build(self): question = self.question answer = self.get_answer() # add embedding layers question_weights = np.load(self.config.initial_question_weights()) q_embedding = Embedding(input_dim=question_weights.shape[0], output_dim=question_weights.shape[1], weights=[question_weights]) question_embedding = q_embedding(question) answer_weights = np.load(self.config.initial_answer_weights()) a_embedding = Embedding(input_dim=answer_weights.shape[0], output_dim=answer_weights.shape[1], weights=[answer_weights]) answer_embedding = a_embedding(answer) # question rnn part f_rnn = LSTM(141, return_sequences=True, implementation=1) b_rnn = LSTM(141, return_sequences=True, implementation=1, go_backwards=True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True # question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1) question_pool = concatenate( [maxpool(question_f_rnn), maxpool(question_b_rnn)]) # answer rnn part from attention_lstm import AttentionLSTMWrapper f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True) b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) #answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1) answer_pool = concatenate( [maxpool(answer_f_rnn), maxpool(answer_b_rnn)]) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers question_weights = np.load(self.config.initial_question_weights()) q_embedding = Embedding(input_dim=question_weights.shape[0], output_dim=question_weights.shape[1], weights=[question_weights]) question_embedding = q_embedding(question) answer_weights = np.load(self.config.initial_answer_weights()) a_embedding = Embedding(input_dim=answer_weights.shape[0], output_dim=answer_weights.shape[1], weights=[answer_weights]) answer_embedding = a_embedding(answer) f_rnn = LSTM(141, return_sequences=True, implementation=1) b_rnn = LSTM(141, return_sequences=True, implementation=1, go_backwards=True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) question_pool = concatenate([qf_rnn, qb_rnn], axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) answer_pool = concatenate([af_rnn, ab_rnn], axis=-1) # cnn cnns = [ Conv1D(kernel_size=kernel_size, filters=100, activation='tanh', padding='same') for kernel_size in [1, 2, 3, 5] ] # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') question_cnn = concatenate([cnn(question_pool) for cnn in cnns]) # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns]) maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def build(self): assert self.config.question_len() == self.config.answer_len() question = self.question answer = self.get_answer() # add embedding layers question_weights = np.load(self.config.initial_question_weights()) q_embedding = Embedding(input_dim=question_weights.shape[0], output_dim=question_weights.shape[1], weights=[question_weights]) question_embedding = q_embedding(question) answer_weights = np.load(self.config.initial_answer_weights()) a_embedding = Embedding(input_dim=answer_weights.shape[0], output_dim=answer_weights.shape[1], weights=[answer_weights]) answer_embedding = a_embedding(answer) hidden_layer = TimeDistributed(Dense(200, activation='tanh')) question_hl = hidden_layer(question_embedding) answer_hl = hidden_layer(answer_embedding) # cnn cnns = [ Conv1D(kernel_size=kernel_size, filters=100, activation='tanh', padding='same') for kernel_size in [2, 3, 5, 7] ] # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') question_cnn = concatenate([cnn(question_hl) for cnn in cnns]) # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns]) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True # enc = Dense(100, activation='tanh') # question_pool = enc(maxpool(question_cnn)) # answer_pool = enc(maxpool(answer_cnn)) question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim = self.config['n_words'], output_dim = weights.shape[1], mask_zero = True, weights = [weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) maxpool = Lambda(lambda x: K.max(x,axis = 1, keepdims = False),output_shape = lambda x:(x[0],x[2])) maxpool.supports_masking = True question_pool = maxpool(question_embedding) answer_pool = maxpool(answer_embedding) return question_pool,answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem') qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) # cnn cnns = [ Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [1, 2, 3, 5] ] question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def get_attention_vectors(bidi_layers, rich_context=True): bl = bidi_layers # max-pooling max_pool_lambda_layer = Lambda( lambda x: keras.backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) max_pool_lambda_layer.supports_masking = True if rich_context: attention_vector_for_w0 = max_pool_lambda_layer( concatenate([bl[2], bl[3], bl[0], bl[4], bl[5]])) attention_vector_for_w1 = max_pool_lambda_layer( concatenate([bl[2], bl[3], bl[1], bl[4], bl[5]])) else: attention_vector_for_w0 = max_pool_lambda_layer( concatenate([bl[2], bl[3], bl[0]])) attention_vector_for_w1 = max_pool_lambda_layer( concatenate([bl[2], bl[3], bl[1]])) return [attention_vector_for_w0, attention_vector_for_w1]
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141, return_sequences=True, implementation=1) b_rnn = LSTM(141, return_sequences=True, implementation=1, go_backwards=True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) question_pool = concatenate([qf_rnn, qb_rnn], axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) answer_pool = concatenate([af_rnn, ab_rnn], axis=-1) # cnn cnns = [Conv1D(kernel_size=kernel_size, filters=500, activation='tanh', padding='same') for kernel_size in [1, 2, 3, 5]] # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') question_cnn = concatenate([cnn(question_pool) for cnn in cnns], axis=-1) # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns], axis=-1) maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], mask_zero=True, # dropout=0.2, weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_embedding) answer_pool = maxpool(answer_embedding) return question_pool, answer_pool
def build(self): question = self.questions answer = self.get_answer() weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim = self.config['n_words'], output_dim = weights.shape[1], weights = [weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141,return_sequences = True, implementation = 1) b_rnn = LSTM(141,return_sequences = True, implementation = 1, go_backwards = True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) # question_pool = concatenate([qf_rnn,qb_rnn], axis = -1) af_rnn = f_rnn(answer_embedding) ab_rnn = f_rnn(answer_embedding) answer_pool = concatenate([af_rnn,ab_rnn], axis = -1) #cnn cnns = [Conv1D(kernel_size = kernel_size, filters = 500, activation = 'tanh', padding = 'same') for kernel_size in [1,2,3,5]] question_cnn = concatenate([cnn(question_embedding) for cnn in cnns], axis = -1) answer_cnn = concatenate([cnn(answer_embedding) for cnn in cnns],axis = -1) maxpool = Lambda(lambda x: K.max(x,axis = 1, keepdims = False),output_shape = lambda x:(x[0],x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool,answer_pool
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) hidden_layer = TimeDistributed(Dense(200, activation='tanh')) question_hl = hidden_layer(question_embedding) answer_hl = hidden_layer(answer_embedding) # cnn cnns = [Conv1D(kernel_size=kernel_size, filters=1000, activation='tanh', padding='same') for kernel_size in [2, 3, 5, 7]] # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') question_cnn = concatenate([cnn(question_hl) for cnn in cnns], axis=-1) # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns], axis=-1) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True # enc = Dense(100, activation='tanh') # question_pool = enc(maxpool(question_cnn)) # answer_pool = enc(maxpool(answer_cnn)) question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem') qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [1, 2, 3, 5]] question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def get_baseline_model(options: dict, embedding: np.ndarray): # converting embeddings to numpy 2d array: shape = (vocabulary_size, emb_dim) max_len = options.get('padding') lstm_size = options.get('lstm_size') dropout = options.get('dropout') optimizer = options.get('optimizer') loss = options.get('loss') activation1 = options.get('activation1') activation2 = options.get('activation2') print('LSTM_01: embeddings.shape', embedding.shape) # define basic four input layers - for warrant0, warrant1, reason, claim sequence_layer_warrant0_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_input_warrant0") sequence_layer_warrant1_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_input_warrant1") sequence_layer_reason_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_input_reason") sequence_layer_claim_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_input_claim") sequence_layer_debate_title_input = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_input_debateTitle") sequence_layer_debate_info_input = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_input_debateInfo") # now define embedded layers of the input embedded_layer_warrant0_input = Embedding( embedding.shape[0], embedding.shape[1], input_length=max_len, weights=[embedding], mask_zero=True)(sequence_layer_warrant0_input) embedded_layer_warrant1_input = Embedding( embedding.shape[0], embedding.shape[1], input_length=max_len, weights=[embedding], mask_zero=True)(sequence_layer_warrant1_input) bidi_lstm_layer_warrant0 = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM_W0')(embedded_layer_warrant0_input) bidi_lstm_layer_warrant1 = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM_W1')(embedded_layer_warrant1_input) # max-pooling max_pool_lambda_layer = Lambda( lambda x: keras.backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) max_pool_lambda_layer.supports_masking = True attention_warrant0 = LSTM(lstm_size)(bidi_lstm_layer_warrant0) attention_warrant1 = LSTM(lstm_size)(bidi_lstm_layer_warrant1) # concatenate them dropout_layer = Dropout(dropout)(concatenate( [attention_warrant0, attention_warrant1])) # and add one extra dense layer dense1 = Dense(int(lstm_size), activation=activation1)(dropout_layer) output_layer = Dense(1, activation=activation2)(dense1) model = Model(inputs=[ sequence_layer_warrant0_input, sequence_layer_warrant1_input, sequence_layer_reason_input, sequence_layer_claim_input, sequence_layer_debate_title_input, sequence_layer_debate_info_input ], outputs=output_layer) model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy', dev_pred]) return model
def cnn_lstm_f1(): with open('vocab.data', 'rb') as fin: vocab = pickle.load(fin) question1 = Input(shape=(20, )) question2 = Input(shape=(20, )) q1 = Embedding(vocab.nb_words + 1, 300, weights=[vocab.embedding], input_length=20, trainable=False)(question1) q2 = Embedding(vocab.nb_words + 1, 300, weights=[vocab.embedding], input_length=20, trainable=False)(question2) f_rnn = LSTM(30, return_sequences=True, implementation=1) b_rnn = LSTM(30, return_sequences=True, implementation=1, go_backwards=True) pos = Position_Embedding(mode='concat') att = Attention(20) q1 = BatchNormalization()(q1) qf_rnn = f_rnn(q1) qb_rnn = b_rnn(q1) q1_rnn = concatenate([qf_rnn, qb_rnn], axis=-1) q1_rnn = pos(q1_rnn) q1_rnn = concatenate([q1_rnn, att(q1_rnn)]) q2 = BatchNormalization()(q2) af_rnn = f_rnn(q2) ab_rnn = b_rnn(q2) q2_rnn = concatenate([af_rnn, ab_rnn], axis=-1) q2_rnn = pos(q2_rnn) q2_rnn = concatenate([q2_rnn, att(q2_rnn)]) # cnn cnns = [ Conv1D(kernel_size=kernel_size, filters=100, activation='tanh', padding='same') for kernel_size in [1, 2, 3, 5] ] # qq_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') q1_cnn = concatenate([cnn(q1_rnn) for cnn in cnns], axis=-1) # q2_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') q2_cnn = concatenate([cnn(q2_rnn) for cnn in cnns], axis=-1) maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True q1_pool = Dropout(0.05)(maxpool(q1_cnn)) q2_pool = Dropout(0.05)(maxpool(q2_cnn)) merged1 = Dense(100, activation='relu')(q1_pool) merged2 = Dense(100, activation='relu')(q2_pool) merged = concatenate([merged1, merged2]) is_duplicate = Dense(1, activation='sigmoid')(merged) model = Model(inputs=[question1, question2], outputs=is_duplicate) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
unroll=self.layer.unroll, input_length=input_shape[1]) if self.layer.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.layer.states[i], states[i])) if self.layer.return_sequences: return outputs else: return last_output Maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) Maxpool.supports_masking = True def Encoder(hidden_size, activation=None, return_sequences=True, bidirectional=False, use_gru=True): if activation is None: activation = ELU() if use_gru: def _encoder(x): if bidirectional: branch_1 = GRU(hidden_size, activation='linear', return_sequences=return_sequences, go_backwards=False)(x) branch_2 = GRU(hidden_size, activation='linear', return_sequences=return_sequences, go_backwards=True)(x) x = concatenate([branch_1, branch_2]) x = activation(x) return x
def LSTMCNN4model(self, weights, hidden_dim=100): margin = 0.05 enc_timesteps = 30 dec_timesteps = 30 # hidden_dim = 100 # initialize the question and answer shapes and datatype question = Input(shape=(enc_timesteps, ), dtype='int32', name='question_base') answer = Input(shape=(dec_timesteps, ), dtype='int32', name='answer') answer_good = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_good_base') answer_bad = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_bad_base') qa_embedding = Embedding(input_dim=len(weights), output_dim=weights.shape[1], mask_zero=True, weights=[weights], trainable=False) question_embedding = qa_embedding(question) answer_embedding = qa_embedding(answer) # pass the question embedding through bi-lstm f_rnn = LSTM(hidden_dim, return_sequences=True, recurrent_dropout=0.2, dropout=0.2) b_rnn = LSTM(hidden_dim, return_sequences=True, recurrent_dropout=0.2, dropout=0.2) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) print(answer_pool) # pass the embedding from bi-lstm through cnn cnns = [ Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [1, 2, 3, 5] ] for cnn in cnns: cnn.supports_masking = True question_cnn = merge( [cnn(question_pool) for cnn in cnns], mode='concat') # merge: (None,30,500)*4->(None,30,2000) answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') drop = Dropout(0.2) question_cnn = drop(question_cnn) answer_cnn = drop(answer_cnn) # apply max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) # maxpool.__setattr__('supports_masking',True) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) # get the cosine similarity similarity = self.get_cosine_similarity() merged_model = merge([question_pool, answer_pool], mode=similarity, output_shape=lambda _: (None, 1)) lstm_convolution_model = Model(inputs=[question, answer], outputs=merged_model, name='lstm_convolution_model') # print(lstm_convolution_model.summary()) good_similarity = lstm_convolution_model([question, answer_good]) bad_similarity = lstm_convolution_model([question, answer_bad]) # compute the loss loss = merge([good_similarity, bad_similarity], mode=lambda x: K.relu(margin - x[0] + x[1]), output_shape=lambda x: x[0]) # return the training and prediction model adam = Adam(lr=0.001) prediction_model = Model(inputs=[question, answer_good], outputs=good_similarity, name='prediction_model') prediction_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer=adam) training_model = Model(inputs=[question, answer_good, answer_bad], outputs=loss, name='training_model') training_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer=adam) # print(training_model.summary()) return training_model, prediction_model
def attention_lstm(): with open('vocab.data', 'rb') as fin: vocab = pickle.load(fin) question1 = Input(shape=(15, )) question2 = Input(shape=(15, )) q1 = Embedding(vocab.nb_words + 1, 300, weights=[vocab.embedding], input_length=15, trainable=False)(question1) q2 = Embedding(vocab.nb_words + 1, 300, weights=[vocab.embedding], input_length=15, trainable=False)(question2) pos = Position_Embedding() f_rnn = LSTM(256, return_sequences=True, consume_less='mem') b_rnn = LSTM(256, return_sequences=True, consume_less='mem', go_backwards=True) maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True q1 = pos(q1) q2 = pos(q2) qf_rnn = f_rnn(q1) qb_rnn = b_rnn(q1) # q1_rnn = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) q1_rnn = concatenate([qf_rnn, qb_rnn], axis=-1) af_rnn = f_rnn(q2) ab_rnn = b_rnn(q2) # q2_rnn = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) q2_rnn = concatenate([af_rnn, ab_rnn], axis=-1) att = Attention(20) q1_att = maxpool(att([q1_rnn, q1_rnn, q1_rnn])) q1 = Dense(200, activation='relu')(q1_att) q2_att = maxpool(attention([q2_rnn, q2_rnn, q2_rnn])) q2 = Dense(200, activation='relu')(q2_att) merged = concatenate([q1, q2]) merged = Dense(200, activation='relu')(merged) merged = Dropout(0)(merged) merged = BatchNormalization()(merged) is_duplicate = Dense(1, activation='sigmoid')(merged) model = Model(inputs=[question1, question2], outputs=is_duplicate) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def get_attention_lstm(word_index_to_embeddings_map, max_len, rich_context: bool = False, **kwargs): # converting embeddings to numpy 2d array: shape = (vocabulary_size, 300) embeddings = np.asarray([ np.array(x, dtype=float32) for x in word_index_to_embeddings_map.values() ]) print('embeddings.shape', embeddings.shape) lstm_size = kwargs.get('lstm_size') dropout = kwargs.get('dropout') assert lstm_size assert dropout # define basic four input layers - for warrant0, warrant1, reason, claim sequence_layer_warrant0_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_warrant0_input") sequence_layer_warrant1_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_warrant1_input") sequence_layer_reason_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_reason_input") sequence_layer_claim_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_claim_input") sequence_layer_debate_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_debate_input") # now define embedded layers of the input embedded_layer_warrant0_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_warrant0_input) embedded_layer_warrant1_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_warrant1_input) embedded_layer_reason_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_reason_input) embedded_layer_claim_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_claim_input) embedded_layer_debate_input = Embedding( embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings], mask_zero=True)(sequence_layer_debate_input) bidi_lstm_layer_reason = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM Reason')(embedded_layer_reason_input) bidi_lstm_layer_claim = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM Claim')(embedded_layer_claim_input) # add context to the attention layer bidi_lstm_layer_debate = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM Context')(embedded_layer_debate_input) if rich_context: # merge reason and claim context_concat = merge([ bidi_lstm_layer_reason, bidi_lstm_layer_claim, bidi_lstm_layer_debate ], mode='concat') else: context_concat = merge([bidi_lstm_layer_reason, bidi_lstm_layer_claim], mode='concat') # max-pooling max_pool_lambda_layer = Lambda( lambda x: keras.backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) max_pool_lambda_layer.supports_masking = True attention_vector = max_pool_lambda_layer(context_concat) attention_warrant0 = AttentionLSTM( lstm_size, attention_vector)(embedded_layer_warrant0_input) attention_warrant1 = AttentionLSTM( lstm_size, attention_vector)(embedded_layer_warrant1_input) # concatenate them dropout_layer = Dropout(dropout)(merge( [attention_warrant0, attention_warrant1])) # and add one extra layer with ReLU dense1 = Dense(int(lstm_size / 2), activation='relu')(dropout_layer) output_layer = Dense(1, activation='sigmoid')(dense1) model = Model([ sequence_layer_warrant0_input, sequence_layer_warrant1_input, sequence_layer_reason_input, sequence_layer_claim_input, sequence_layer_debate_input ], output=output_layer) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) from keras.utils.visualize_util import plot plot(model, show_shapes=True, to_file='/tmp/model-att.png') # from keras.utils.visualize_util import plot # plot(model, show_shapes=True, to_file='/tmp/attlstm.png') return model
def get_lstm_cnn_model(embedding_file, vocab_size): margin = 0.2 hidden_dim = 141 enc_timesteps = 200 dec_timesteps = 200 weights = np.load(embedding_file) # initialize the question and answer shapes and datatype question = Input(shape=(enc_timesteps, ), dtype='int32', name='question_base') answer = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_good_base') answer_good = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_good_base') answer_bad = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_bad_base') # embed the question and answers qa_embedding = Embedding(input_dim=vocab_size, output_dim=weights.shape[1], weights=[weights]) question_embedding = qa_embedding(question) print(question_embedding) exit() answer_embedding = qa_embedding(answer) # pass the question embedding through bi-lstm f_rnn = LSTM(hidden_dim, return_sequences=True) b_rnn = LSTM(hidden_dim, return_sequences=True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = concatenate([qf_rnn, qb_rnn], axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = concatenate([af_rnn, ab_rnn], axis=-1) filter_sizes = [2, 2] cnns = [ Convolution1D(filters=500, kernel_size=ngram_size, activation='tanh', padding='same') for ngram_size in filter_sizes ] question_cnn = concatenate([cnn(question_pool) for cnn in cnns]) answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns]) # apply max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) # get similarity similarity score merged_model = Dot(axes=1, normalize=True)([question_pool, answer_pool]) lstm_convolution_model = Model(inputs=[question, answer], outputs=merged_model, name='lstm_convolution_model') good_similarity = lstm_convolution_model([question, answer_good]) bad_similarity = lstm_convolution_model([question, answer_bad]) # compute the loss loss = Lambda(lambda x: K.relu(x[1] - x[0] + margin))( [good_similarity, bad_similarity]) # return the training and prediction model prediction_model = Model(inputs=[question, answer_good], outputs=good_similarity, name='prediction_model') prediction_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="rmsprop") training_model = Model(inputs=[question, answer_good, answer_bad], outputs=loss, name='training_model') training_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="rmsprop") return training_model, prediction_model
def get_attention_lstm_intra_warrant_kb_pooled(word_index_to_embeddings_map, max_len, rich_context=False, lstm_size=32, warrant_lstm_size=32, dropout=0.1, kb_embeddings=None, fn_embeddings=None): # converting embeddings to numpy 2d array: shape = (vocabulary_size, 300) embeddings = np.asarray([ np.array(x, dtype=np.float32) for x in word_index_to_embeddings_map.values() ]) # max-pooling max_pool_lambda_layer = Lambda( lambda x: keras.backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) max_pool_lambda_layer.supports_masking = True # sum-pooling sum_pool_lambda_layer = Lambda( lambda x: keras.backend.sum(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) sum_pool_lambda_layer.supports_masking = True # define basic four input layers - for warrant0, warrant1, reason, claim sequence_layer_warrant0_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_warrant0_input") sequence_layer_warrant1_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_warrant1_input") sequence_layer_reason_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_reason_input") sequence_layer_claim_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_claim_input") sequence_layer_debate_input = Input(shape=(max_len, ), dtype='int32', name="sequence_layer_debate_input") # now define embedded layers of the input word_emb_layer = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, name='word_emb', weights=[embeddings], mask_zero=True) embedded_layer_warrant0_input = word_emb_layer( sequence_layer_warrant0_input) embedded_layer_warrant1_input = word_emb_layer( sequence_layer_warrant1_input) embedded_layer_reason_input = word_emb_layer(sequence_layer_reason_input) embedded_layer_claim_input = word_emb_layer(sequence_layer_claim_input) embedded_layer_debate_input = word_emb_layer(sequence_layer_debate_input) if kb_embeddings is not None: sequence_layer_warrant0_input_kb = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_warrant0_input_kb") sequence_layer_warrant1_input_kb = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_warrant1_input_kb") sequence_layer_reason_input_kb = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_reason_input_kb") sequence_layer_claim_input_kb = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_claim_input_kb") kb_emb_layer = Embedding(kb_embeddings.shape[0], kb_embeddings.shape[1], input_length=max_len, name='kb_emb_layer', weights=[kb_embeddings], mask_zero=True) embedded_layer_warrant0_input_kb = kb_emb_layer( sequence_layer_warrant0_input_kb) embedded_layer_warrant1_input_kb = kb_emb_layer( sequence_layer_warrant1_input_kb) embedded_layer_reason_input_kb = kb_emb_layer( sequence_layer_reason_input_kb) embedded_layer_claim_input_kb = kb_emb_layer( sequence_layer_claim_input_kb) kb_dense = Dense(lstm_size * 2, activation='relu') kb_vector_w0 = kb_dense( sum_pool_lambda_layer( concatenate([ embedded_layer_reason_input_kb, embedded_layer_claim_input_kb, embedded_layer_warrant0_input_kb ]))) kb_vector_w1 = kb_dense( sum_pool_lambda_layer( concatenate([ embedded_layer_reason_input_kb, embedded_layer_claim_input_kb, embedded_layer_warrant1_input_kb ]))) if fn_embeddings is not None: sequence_layer_warrant0_input_fn = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_warrant0_input_fn") sequence_layer_warrant1_input_fn = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_warrant1_input_fn") sequence_layer_reason_input_fn = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_reason_input_fn") sequence_layer_claim_input_fn = Input( shape=(max_len, ), dtype='int32', name="sequence_layer_claim_input_fn") fn_emb_layer = Embedding(fn_embeddings.shape[0], fn_embeddings.shape[1], input_length=max_len, name='fn_emb_layer', weights=[fn_embeddings], mask_zero=True) embedded_layer_warrant0_input_fn = fn_emb_layer( sequence_layer_warrant0_input_fn) embedded_layer_warrant1_input_fn = fn_emb_layer( sequence_layer_warrant1_input_fn) embedded_layer_reason_input_fn = fn_emb_layer( sequence_layer_reason_input_fn) embedded_layer_claim_input_fn = fn_emb_layer( sequence_layer_claim_input_fn) fn_dense = Dense(lstm_size * 2, activation='relu') fn_vector_w0 = fn_dense( sum_pool_lambda_layer( concatenate([ embedded_layer_reason_input_fn, embedded_layer_claim_input_fn, embedded_layer_warrant0_input_fn ]))) fn_vector_w1 = fn_dense( sum_pool_lambda_layer( concatenate([ embedded_layer_reason_input_fn, embedded_layer_claim_input_fn, embedded_layer_warrant1_input_fn ]))) bidi_lstm_layer_warrant0 = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM-W0')(embedded_layer_warrant0_input) bidi_lstm_layer_warrant1 = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM-W1')(embedded_layer_warrant1_input) bidi_lstm_layer_reason = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM-Reason')(embedded_layer_reason_input) bidi_lstm_layer_claim = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM-Claim')(embedded_layer_claim_input) # add context to the attention layer bidi_lstm_layer_debate = Bidirectional( LSTM(lstm_size, return_sequences=True), name='BiDiLSTM-Context')(embedded_layer_debate_input) # two attention vectors if rich_context: attention_vector_for_w0 = max_pool_lambda_layer( concatenate([ bidi_lstm_layer_reason, bidi_lstm_layer_claim, bidi_lstm_layer_warrant1, bidi_lstm_layer_debate ])) attention_vector_for_w1 = max_pool_lambda_layer( concatenate([ bidi_lstm_layer_reason, bidi_lstm_layer_claim, bidi_lstm_layer_warrant0, bidi_lstm_layer_debate ])) else: attention_vector_for_w0 = max_pool_lambda_layer( concatenate([ bidi_lstm_layer_reason, bidi_lstm_layer_claim, bidi_lstm_layer_warrant1 ])) attention_vector_for_w1 = max_pool_lambda_layer( concatenate([ bidi_lstm_layer_reason, bidi_lstm_layer_claim, bidi_lstm_layer_warrant0 ])) attention_warrant0 = AttentionLSTM(warrant_lstm_size)( bidi_lstm_layer_warrant0, constants=concatenate([ attention_vector_for_w0, *((kb_vector_w0, ) if kb_embeddings is not None else ()), *((fn_vector_w0, ) if fn_embeddings is not None else ()), ])) attention_warrant1 = AttentionLSTM(warrant_lstm_size)( bidi_lstm_layer_warrant1, constants=concatenate([ attention_vector_for_w1, *((kb_vector_w1, ) if kb_embeddings is not None else ()), *((fn_vector_w1, ) if fn_embeddings is not None else ()), ])) # concatenate them dropout_layer = Dropout(dropout)(concatenate([ add([attention_warrant0, attention_warrant1]), attention_warrant0, attention_warrant1 ])) # and add one extra layer with ReLU dense1 = Dense(int(warrant_lstm_size / 2), activation='relu')(dropout_layer) output_layer = Dense(1, activation='sigmoid')(dense1) model = Model([ sequence_layer_warrant0_input, sequence_layer_warrant1_input, sequence_layer_reason_input, sequence_layer_claim_input, sequence_layer_debate_input, *((sequence_layer_warrant0_input_kb, sequence_layer_warrant1_input_kb, sequence_layer_reason_input_kb, sequence_layer_claim_input_kb) if kb_embeddings is not None else ()), *((sequence_layer_warrant0_input_fn, sequence_layer_warrant1_input_fn, sequence_layer_reason_input_fn, sequence_layer_claim_input_fn) if fn_embeddings is not None else ()) ], output=output_layer) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def get_lstm_cnn_model(self, embedding_file, vocab_size): """ Return the bilstm + cnn training and prediction model Args: embedding_file (str): embedding file name vacab_size (integer): size of the vocabulary Returns: training_model: model used to train using cosine similarity loss prediction_model: model used to predict the similarity """ margin = 0.05 hidden_dim = 200 enc_timesteps = 150 dec_timesteps = 150 weights = np.load(embedding_file) # initialize the question and answer shapes and datatype question = Input(shape=(enc_timesteps, ), dtype='int32', name='question_base') answer = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_good_base') answer_good = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_good_base') answer_bad = Input(shape=(dec_timesteps, ), dtype='int32', name='answer_bad_base') # embed the question and answers qa_embedding = Embedding(input_dim=vocab_size, output_dim=weights.shape[1], weights=[weights]) question_embedding = qa_embedding(question) answer_embedding = qa_embedding(answer) # pass the question embedding through bi-lstm f_rnn = LSTM(hidden_dim, return_sequences=True) b_rnn = LSTM(hidden_dim, return_sequences=True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) # pass the embedding from bi-lstm through cnn cnns = [ Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [1, 2, 3, 5] ] question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') # apply max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) # get similarity similarity score similarity = self.get_cosine_similarity() merged_model = merge([question_pool, answer_pool], mode=similarity, output_shape=lambda _: (None, 1)) lstm_convolution_model = Model(inputs=[question, answer], outputs=merged_model, name='lstm_convolution_model') good_similarity = lstm_convolution_model([question, answer_good]) bad_similarity = lstm_convolution_model([question, answer_bad]) # compute the loss loss = merge([good_similarity, bad_similarity], mode=lambda x: K.relu(margin - x[0] + x[1]), output_shape=lambda x: x[0]) # return the training and prediction model prediction_model = Model(inputs=[question, answer_good], outputs=good_similarity, name='prediction_model') prediction_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="rmsprop") training_model = Model(inputs=[question, answer_good, answer_bad], outputs=loss, name='training_model') training_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="rmsprop") return training_model, prediction_model