def build(self): seq1 = Input(name='seq1', shape=[self.config['seq1_maxlen']]) seq2 = Input(name='seq2', shape=[self.config['seq2_maxlen']]) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = self.config['embed_trainable']) seq1_embed = embedding(seq1) seq1_embed = Dropout(0.5)(seq1_embed) seq2_embed = embedding(seq2) seq2_embed = Dropout(0.5)(seq2_embed) lstm = Bidirectional(LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate'])) seq1_rep = lstm(seq1_embed) seq2_rep = lstm(seq2_embed) cross = Match(match_type='concat')([seq1_rep, seq2_rep]) cross_reshape = Reshape((-1, ))(cross) mm_k = Lambda(lambda x: K.tf.nn.top_k(x, k=100, sorted=True)[0])(cross_reshape) pool_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k) if self.config['target_mode'] == 'classification': out = Dense(2, activation='softmax')(pool_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out = Dense(1)(pool_flat_drop) model = Model(inputs=[seq1, seq2], outputs=out) return model
def build_model(self): encoding_layer1 = Bidirectional(GRU(300, return_sequences=True, dropout=0.2)) encoded_sentence_1 = encoding_layer1(self.Q1_emb) # (?, len, 600) encoded_sentence_2 = encoding_layer1(self.Q2_emb) # (?, len, 600) q_conv1 = Conv1D(32, 3, padding='same')(encoded_sentence_1) show_layer_info('Conv1D', q_conv1) d_conv1 = Conv1D(32, 3, padding='same')(encoded_sentence_2) show_layer_info('Conv1D', d_conv1) cross = Match(match_type='plus')([q_conv1, d_conv1]) show_layer_info('Match-plus', cross) z = Reshape((self.word_max_len, self.word_max_len, -1))(cross) show_layer_info('Reshape', z) for i in range(2): z = Conv2D(filters=self.kernel_counts_2d[i], kernel_size=self.kernel_sizes_2d[i], padding='same', activation='relu')(z) show_layer_info('Conv2D', z) z = MaxPooling2D(pool_size=(self.mpool_sizes_2d[i][0], self.mpool_sizes_2d[i][1]))(z) show_layer_info('MaxPooling2D', z) pool1_flat = Flatten()(z) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=0.2)(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) out_ = Dense(1, activation='sigmoid')(pool1_flat_drop) show_layer_info('Dense', out_) return out_
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) q_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(q_embed) show_layer_info('Bidirectional-LSTM', q_rep) d_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(d_embed) show_layer_info('Bidirectional-LSTM', d_rep) cross = Match(match_type='dot')([q_rep, d_rep]) #cross = Dot(axes=[2, 2])([q_embed, d_embed]) show_layer_info('Match-dot', cross) cross_reshape = Reshape((-1, ))(cross) show_layer_info('Reshape', cross_reshape) mm_k = Lambda(lambda x: K.tf.nn.top_k( x, k=self.config['topk'], sorted=True)[0])(cross_reshape) show_layer_info('Lambda-topk', mm_k) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) #model = Model(inputs=[query, doc, dpool_index], outputs=out_) model = Model(inputs=[query, doc], outputs=out_) model.summary() return model
def build(self): def conv2d_work(input_dim): seq = Sequential() assert self.config['num_conv2d_layers'] > 0 for i in range(self.config['num_conv2d_layers']): seq.add(Conv2D(filters=self.config['2d_kernel_counts'][i], kernel_size=self.config['2d_kernel_sizes'][i], padding='same', activation='relu')) seq.add(MaxPooling2D(pool_size=(self.config['2d_mpool_sizes'][i][0], self.config['2d_mpool_sizes'][i][1]))) return seq query = Input(name='query', shape=(self.config['text1_maxlen'],)) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'],)) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) q_conv1 = Conv1D(self.config['1d_kernel_count'], self.config['1d_kernel_size'], padding='same') (q_embed) show_layer_info('Conv1D', q_conv1) d_conv1 = Conv1D(self.config['1d_kernel_count'], self.config['1d_kernel_size'], padding='same') (d_embed) show_layer_info('Conv1D', d_conv1) cross = Match(match_type='plus')([q_conv1, d_conv1]) show_layer_info('Match-plus', cross) z = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], -1))(cross) show_layer_info('Reshape', z) for i in range(self.config['num_conv2d_layers']): z = Conv2D(filters=self.config['2d_kernel_counts'][i], kernel_size=self.config['2d_kernel_sizes'][i], padding='same', activation='relu')(z) show_layer_info('Conv2D', z) z = MaxPooling2D(pool_size=(self.config['2d_mpool_sizes'][i][0], self.config['2d_mpool_sizes'][i][1]))(z) show_layer_info('MaxPooling2D', z) #dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1])([conv2d, dpool_index]) pool1_flat = Flatten()(z) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=out_) return model
def build(self): def conv2d_work(input_dim): seq = Sequential() assert self.config['num_conv2d_layers'] > 0 for i in range(self.config['num_conv2d_layers']): seq.add( Conv2D(filters=self.config['2d_kernel_counts'][i], kernel_size=self.config['2d_kernel_sizes'][i], padding='same', activation='relu')) seq.add( MaxPooling2D( pool_size=(self.config['2d_mpool_sizes'][i][0], self.config['2d_mpool_sizes'][i][1]))) return seq query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) # ########## compute attention weights for the query words: better then mvlstm alone if self.config["text1_attention"]: q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)( q_embed) # use_bias=False to simple combination show_layer_info('Dense', q_w) q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ), name="q_w")(q_w) show_layer_info('Lambda-softmax', q_w) # ########## add attention weights for Q_words q_w_layer = Lambda(lambda x: K.repeat_elements( q_w, rep=self.config['embed_size'], axis=2))(q_w) show_layer_info('repeat', q_w_layer) q_embed = Multiply()([q_w_layer, q_embed]) show_layer_info('Dot-qw', q_embed) # ####################### attention text1 # ########## compute attention weights for the document words: if self.config['text2_attention']: d_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(d_embed) show_layer_info('Dense', d_w) d_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text2_maxlen'], ), name="d_w")(d_w) show_layer_info('Lambda-softmax', d_w) # ########## add attention weights for D_words d_w_layer = Lambda(lambda x: K.repeat_elements( d_w, rep=self.config['embed_size'], axis=2))(d_w) d_embed = Multiply()([d_w_layer, d_embed]) show_layer_info('Dot-qw', d_embed) # ####################### attention text2 q_conv1 = Conv1D(self.config['1d_kernel_count'], self.config['1d_kernel_size'], padding='same')(q_embed) show_layer_info('Conv1D', q_conv1) d_conv1 = Conv1D(self.config['1d_kernel_count'], self.config['1d_kernel_size'], padding='same')(d_embed) show_layer_info('Conv1D', d_conv1) cross = Match(match_type='plus')([q_conv1, d_conv1]) show_layer_info('Match-plus', cross) z = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], -1))(cross) show_layer_info('Reshape', z) # add the passages attention if self.config["passage_attention"]: # ########################## compute the passages attention weights p_cross = Permute((2, 1, 3))(z) show_layer_info('p_cross', p_cross) starts = [ i for i in range(0, self.config['text2_maxlen'], self.config['context_len']) ] slice_layer = [ crop(1, start, start + self.config['context_len']) for start in starts ] slices = [slice_layer_i(p_cross) for slice_layer_i in slice_layer] attention_ws = [] for slice in slices: s_dw = Dense(1, use_bias=False)(slice) s_dw = Lambda(lambda x: softmax(x, axis=1))(s_dw) attention_ws.append(s_dw) d_w = concatenate(attention_ws, 1) show_layer_info('attW', d_w) z = Multiply()([d_w, p_cross]) show_layer_info('Multiply', z) # ########################## passages attention for i in range(self.config['num_conv2d_layers']): z = Conv2D(filters=self.config['2d_kernel_counts'][i], kernel_size=self.config['2d_kernel_sizes'][i], padding='same', activation='relu')(z) show_layer_info('Conv2D', z) z = MaxPooling2D( pool_size=(self.config['2d_mpool_sizes'][i][0], self.config['2d_mpool_sizes'][i][1]))(z) show_layer_info('MaxPooling2D', z) #dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1])([conv2d, dpool_index]) pool1_flat = Flatten()(z) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=out_) return model
def build(self): query = Input(name="query", batch_shape=[None, None], dtype='int32') show_layer_info('Input', query) doc = Input(name="doc", batch_shape=[None, None], dtype='int32') show_layer_info('Input', doc) input_embed = self.config['vocab_size'] if self.config['mask_zero'] else self.config['vocab_size'] embedding = Embedding(input_embed, self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable, name="embeddings", mask_zero=self.config['mask_zero']) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) q_lstm_layer = Bidirectional(LSTM(self.config["number_q_lstm_units"], dropout=self.config["q_lstm_dropout"], recurrent_dropout=self.config["q_lstm_dropout"], return_sequences=True), name="q_lstm") d_lstm_layer = Bidirectional(LSTM(self.config["number_d_lstm_units"], dropout=self.config["d_lstm_dropout"], recurrent_dropout=self.config["d_lstm_dropout"], return_sequences=True), name="d_lstm") q_mat = q_lstm_layer(q_embed) show_layer_info('Bibirectional-LSTM', q_mat) d_mat = d_lstm_layer(d_embed) show_layer_info('Bibirectional-LSTM', d_mat) input_mat = Match(normalize=True)([q_mat, d_mat]) # the result is cosine similarity matrix show_layer_info('Match', input_mat) #input_mat = BatchNormalization()(input_mat) #input_mat = Dropout(self.config["dropout_rate"])(input_mat) input_mat = Reshape((self.config["text1_maxlen"], self.config["text2_maxlen"]))(input_mat) show_layer_info('Match', input_mat) merged = Conv1D(self.config['filters'], self.config['kernel_size'], activation=self.config['conv_activation'], name="conv1", padding='same')(input_mat) merged = BatchNormalization()(merged) merged = Dropout(self.config["conv_dropout"])(merged) show_layer_info('Conv1D', merged) merged = MaxPooling1D(pool_size=self.config['pool_size'], name="maxPool1")(merged) show_layer_info('MaxPooling1D', merged) merged = Conv1D(self.config['filters'], self.config['kernel_size'], activation=self.config['conv_activation'], name="conv2", padding='same')(input_mat) show_layer_info('Conv1D', merged) merged = BatchNormalization()(merged) merged = Dropout(self.config["conv_dropout"])(merged) merged = MaxPooling1D(pool_size=self.config['pool_size'], name="maxPool2")(merged) show_layer_info('MaxPooling1D', merged) """ merged = Conv1D(self.config['filters'], self.config['kernel_size'], activation=self.config['conv_activation'], name="conv3", padding='same')(input_mat) show_layer_info('Conv1D', merged) merged = BatchNormalization()(merged) merged = Dropout(self.config["conv_dropout"])(merged) merged = MaxPooling1D(pool_size=self.config['pool_size'], name="maxPool3")(merged) """ show_layer_info('MaxPooling1D', merged) merged = Flatten()(merged) dense = Dense(self.config["hidden_sizes"][0], activation=self.config['hidden_activation'], name="MLP_combine_0")(merged) show_layer_info('Dense', dense) for i in range(self.config["num_layers"] - 1): dense = BatchNormalization()(dense) dense = Dropout(self.config["dropout_rate"])(dense) dense = Dense(self.config["hidden_sizes"][i + 1], activation=self.config['hidden_activation'], name="MLP_combine_" + str(i + 1))(dense) show_layer_info('Dense', dense) dense = BatchNormalization()(dense) dense = Dropout(self.config["dropout_rate"])(dense) # out_ = Dense(1, activation=self.config['output_activation'], name="MLP_out")(dense) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation=self.config['output_activation'], name="MLP_out")(dense) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1, activation=self.config['output_activation'], name="MLP_out")(dense) show_layer_info('Output', out_) model = Model(inputs=[query, doc], outputs=[out_]) plot_model(model, to_file='../conv_wc_model_plot.png', show_shapes=True, show_layer_names=True) return model
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding_q', q_embed) # ########## compute attention weights for the query words: better then mvlstm alone if self.config["text1_attention"]: q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)( q_embed) # use_bias=False to simple combination show_layer_info('Dense', q_w) q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(q_w) show_layer_info('Lambda-softmax', q_w) # ########## add attention weights for Q_words q_w_layer = Lambda(lambda x: K.repeat_elements( q_w, rep=self.config['embed_size'], axis=2))(q_w) show_layer_info('repeat', q_w_layer) q_embed = Multiply()([q_w_layer, q_embed]) show_layer_info('Dot-qw', q_embed) # ####################### attention d_embed = embedding(doc) show_layer_info('Embedding_d', d_embed) # ########## compute attention weights for the document words: if self.config['text2_attention']: d_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(d_embed) show_layer_info('Dense', d_w) d_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text2_maxlen'], ))(d_w) show_layer_info('Lambda-softmax', d_w) # ########## add attention weights for D_words d_w_layer = Lambda(lambda x: K.repeat_elements( d_w, rep=self.config['embed_size'], axis=2))(d_w) d_embed = Multiply()([d_w_layer, d_embed]) show_layer_info('Dot-qw', d_embed) # ####################### attention q_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(q_embed) show_layer_info('Bidirectional-LSTM_q', q_rep) q_rep = BatchNormalization()(q_rep) q_rep = Dropout(self.config["dropout_lstm"])(q_rep) d_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(d_embed) show_layer_info('Bidirectional-LSTM_d', d_rep) d_rep = BatchNormalization()(d_rep) d_rep = Dropout(self.config["dropout_lstm"])(d_rep) cross = Match(match_type='dot')([q_rep, d_rep]) show_layer_info('Match-dot', cross) # ####################### convolutions cov1 = Conv2D(self.config['filters'], self.config['kernel_size'], activation='relu', name="conv1", padding='same')(cross) cov1 = BatchNormalization()(cov1) cov1 = Dropout(self.config["dropout_rate"])(cov1) show_layer_info('Conv1', cov1) cov1 = MaxPooling2D(pool_size=3, name="maxPool")(cov1) show_layer_info('MaxPooling1D-cov2', cov1) cov2 = Conv2D(self.config['filters'], self.config['kernel_size'], activation='relu', name="conv2", padding='same')(cov1) cov2 = BatchNormalization()(cov2) cross = Dropout(self.config["dropout_rate"])(cov2) show_layer_info('Conv2', cov2) # ###################### convolutions cross_reshape = Reshape((-1, ))(cross) show_layer_info('Reshape', cross_reshape) mm_k = Lambda(lambda x: K.tf.nn.top_k( x, k=self.config['topk'], sorted=True)[0])(cross_reshape) show_layer_info('Lambda-topk', mm_k) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=out_) plot_model(model, to_file='../amvlstm_conv.png', show_shapes=True, show_layer_names=True) return model
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding_q', q_embed) # ########## compute attention weights for the query words: better then mvlstm alone if self.config["text1_attention"]: q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)( q_embed) # use_bias=False to simple combination show_layer_info('Dense', q_w) q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ), name="q_w")(q_w) show_layer_info('Lambda-softmax', q_w) # ########## add attention weights for Q_words q_w_layer = Lambda(lambda x: K.repeat_elements( q_w, rep=self.config['embed_size'], axis=2))(q_w) show_layer_info('repeat', q_w_layer) q_embed = Multiply()([q_w_layer, q_embed]) show_layer_info('Dot-qw', q_embed) # ####################### attention d_embed = embedding(doc) show_layer_info('Embedding_d', d_embed) # ########## compute attention weights for the document words: if self.config['text2_attention']: d_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(d_embed) show_layer_info('Dense', d_w) d_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text2_maxlen'], ))(d_w) show_layer_info('Lambda-softmax', d_w) # ########## add attention weights for D_words d_w_layer = Lambda(lambda x: K.repeat_elements( d_w, rep=self.config['embed_size'], axis=2))(d_w) d_embed = Multiply()([d_w_layer, d_embed]) show_layer_info('Dot-qw', d_embed) # ####################### attention q_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(q_embed) show_layer_info('Bidirectional-LSTM_q', q_rep) # ################# add attention for query positions: if self.config["position_att_text1"]: pos_w = Dense(1, activation='tanh')( q_rep) # TimeDistributed(Dense(1, activation='tanh'))(q_rep) pos_w = Flatten()(pos_w) pos_w = Activation('softmax')(pos_w) pos_w = RepeatVector(self.config['hidden_size'] * 2)(pos_w) pos_w = Permute([2, 1])(pos_w) q_rep = Multiply()([q_rep, pos_w]) # merge([q_rep, pos_w], mode='mul') d_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(d_embed) show_layer_info('Bidirectional-LSTM_d', d_rep) # ################# add attention for document positions: if self.config["position_att_text2"]: # https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/ # timedistributed repeats the net between brackets for every input time step of the generated by the bi-LSTM # so, same weights are applied for all the time steps. Without it, different weights are learned pos_w = Dense(1, activation='tanh')( d_rep) # TimeDistributed(Dense(1, activation='tanh'))(d_rep) pos_w = Flatten()(pos_w) pos_w = Activation('softmax')(pos_w) pos_w = RepeatVector(self.config['hidden_size'] * 2)(pos_w) pos_w = Permute([2, 1])(pos_w) d_rep = Multiply()([d_rep, pos_w]) # merge([d_rep, pos_w], mode='mul') # cross = Match(match_type='dot')([q_rep, d_rep]) show_layer_info('Match-dot', cross) cross_reshape = Reshape((-1, ))(cross) show_layer_info('Reshape', cross_reshape) mm_k = Lambda(lambda x: K.tf.nn.top_k( x, k=self.config['topk'], sorted=True)[0])(cross_reshape) show_layer_info('Lambda-topk', mm_k) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=out_) plot_model(model, to_file='../amvlstm.png', show_shapes=True, show_layer_names=True) return model
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding( Masking(mask_value=self.config['vocab_size'] - 1)(query)) show_layer_info('Embedding', q_embed) d_embed = embedding( Masking(mask_value=self.config['vocab_size'] - 1)(doc)) show_layer_info('Embedding', d_embed) q_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(q_embed) show_layer_info('Bidirectional-LSTM', q_rep) d_rep = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(d_embed) show_layer_info('Bidirectional-LSTM', d_rep) # Output size: sample,timestep,2*hidden_num cross = Match(match_type=self.config['match_type'], embedding_size=2 * self.config['hidden_size'], r=5)([q_rep, d_rep]) # cross = Dot(axes=[2, 2])([q_embed, d_embed]) show_layer_info('Match', cross) if self.config['match_type'] != 'tensor2': cross_reshape = Reshape((-1, ))(cross) show_layer_info('Reshape', cross_reshape) mm_k = Lambda(lambda x: K.tf.nn.top_k( x, k=self.config['topk'], sorted=True)[0])(cross_reshape) show_layer_info('Lambda-topk', mm_k) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k) show_layer_info('Dropout', pool1_flat_drop) else: act_cross = Activation('relu')(cross) pool1_flat_drop = Lambda(lambda x: K.tf.reshape( K.tf.nn.top_k(K.tf.transpose( K.tf.reshape(x, (-1, x.shape[1] * x.shape[2], x.shape[3])), [0, 2, 1]), k=self.config['topk'], sorted=True)[0], (-1, K.tf.Dimension(self.config['topk'] * x.shape[3].value))))( act_cross) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) # model = Model(inputs=[query, doc, dpool_index], outputs=out_) model = Model(inputs=[query, doc], outputs=out_) return model