def HAN( embed_mat, MAX_LEN, MAX_SENTS, num_cls, gru_sz1 = 100, gru_sz2 = 100): embedding_layer = Embedding(embed_mat.shape[0] , embed_mat.shape[1], weights=[embed_mat], input_length=MAX_LEN, mask_zero=True, trainable=True) sentence_input = Input(shape=(MAX_LEN,), dtype='int32') embedded_sequences = embedding_layer( sentence_input )#sentence_input) l_lstm = Bidirectional(LSTM(gru_sz1, return_sequences=True))(embedded_sequences) l_att = AttLayer( name='att_1')(l_lstm) sentEncoder = Model(sentence_input, l_att) review_input = Input(shape=(MAX_SENTS,MAX_LEN), dtype='int32') review_encoder = TimeDistributed(sentEncoder)(review_input) review_encoder = Masking(mask_value=0.)(review_encoder ) l_lstm_sent = Bidirectional(LSTM(gru_sz2, return_sequences=True))(review_encoder) l_att_sent = AttLayer(name='att_2')(l_lstm_sent) preds = Dense(num_cls, activation='softmax',name='twt_softmax')(l_att_sent) model = Model(review_input, preds) opt = Adagrad(lr=0.1)#, clipvalue=5.0 model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc']) return model
def __init__(self, C=4, V=40000, MAX_SENT=20, MAX_LEN=100, name='hanmodel.h5'): self.name = name input = Input(shape=(MAX_LEN, ), dtype='int32', name='input') #RNN支持mask x = Embedding(V, 32, mask_zero=True)(input) h = Bidirectional(GRU(64, return_sequences=True))(x) z = AttLayer()(h) sent_model = Model(input, z) sent_input = Input(shape=(MAX_SENT, MAX_LEN), dtype='int32', name='sent_input') h = TimeDistributed(sent_model)(sent_input) h = Masking()(h) h = Bidirectional(GRU(64, return_sequences=True))(h) z = AttLayer()(h) z = Dense(128, activation='relu')(z) # z = BatchNormalization()(z) z = Dense(C, activation='softmax')(z) model = Model(sent_input, z) model.compile('adam', 'categorical_crossentropy', metrics=['acc']) self.model = model
def _init_layers(self): # Sentence-level model self.sent_input = Input(shape=(None,)) self.sent_embedding = Embedding(input_dim=self.vocab_size + 1, output_dim=self.embedding_dim) self.sent_recurrent_cells = [Bidirectional(self.recurrent_cell(units=self.hidden_dim, return_sequences=True)) if self.bidirectional else self.recurrent_cell(units=self.hidden_dim, return_sequences=True) for _ in range(self.hidden_layers)] self.sent_att_layer = AttLayer(attention_dim=self.hidden_dim) # Document-level model self.doc_input = Input(shape=(None, None)) self.doc_recurrent_cells = [Bidirectional(self.recurrent_cell(units=self.top_hidden_dim, return_sequences=True)) if self.bidirectional else self.recurrent_cell(units=self.top_hidden_dim, return_sequences=True) for _ in range(self.hidden_layers)] self.doc_att_layer = AttLayer(attention_dim=self.top_hidden_dim) self.out_layer = Dense(units=1, activation='sigmoid')
def feed_forward(self, x, train_top): #6层网络 xs_till_now = [] xs_till_now.append(x) filter_size = 128 x = Dense(filter_size)(x) x1 = Activation('relu')(x) x1 = Conv1D(filter_size, 3, padding='same', trainable=train_top)(x1) xs_till_now.append(x1) x = Concatenate()(xs_till_now) # x2 = Conv1D(filter_size, 3, padding='same', trainable=train_top)(x1) # x = Add()([xmap, x2]) # x = MaxPool1D(pool_size=3, strides=2)(x) for _ in range(5): x1 = Activation('relu')(x) x1 = Conv1D(filter_size, 3, padding='same', trainable=train_top)(x1) xs_till_now.append(x1) x = Concatenate()(xs_till_now) # x2 = Conv1D(filter_size, 3, padding='same',trainable=train_top)(x1) # x = Add()([x, x2]) # x = MaxPool1D(pool_size=3, strides=2)(x) hs = [] for xi in xs_till_now: hs.append(AttLayer()(xi)) x = GlobalMaxPool1D()(x) hs.append(x) return Concatenate()(hs)
def feed_forward(self, x, train_top): h = Bidirectional(GRU(128, return_sequences=True, trainable=train_top), trainable=train_top)(x) # h2 = Bidirectional(GRU(64, return_sequences=True, trainable=train_top), trainable=train_top)(h1) # h = Add()([h1, h2]) z = AttLayer()(h) return z
def build_ABLSTM(self, paramsObj, weight=[]): model = Sequential() # Embeddings if len(weight) == 0 or paramsObj.use_word_embedding == False: model.add( Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH)) else: model.add( Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, weights=[weight], input_length=config.MAX_SEQ_LENGTH, trainable=paramsObj.train_embedding)) model.add( Bidirectional( GRU(128, dropout=0.2, recurrent_dropout=0.1, return_sequences=True))) # TODO: add time steps again model.add(AttLayer()) model.add(Dense(config.ClassNum, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def build_HAN1(self, paramsObj, weight=[]): # Embeddings if len(weight) == 0 or paramsObj.use_word_embedding == False: # NOT use word embedding embedding_layer = Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH) else: # use word embedding embedding_layer = Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH, weights=[weight], trainable=paramsObj.train_embedding) # Create the sentModel sentence_input = Input( shape=(config.MAX_SEQ_LENGTH, ), # no need to specify the last dimension, why dtype='int32', name='sentence_input') embedding_sequences = embedding_layer(sentence_input) l_lstm = Bidirectional(GRU(100, return_sequences=True))(embedding_sequences) l_dense = TimeDistributed(Dense(200))(l_lstm) l_att = AttLayer()(l_dense) sentEncoder = Model(sentence_input, l_att) # dialogModel dialog_input = Input(shape=(config.MAX_SENTS, config.MAX_SEQ_LENGTH), dtype='int32') dialog_encoder = TimeDistributed(sentEncoder)(dialog_input) l_lstm_sent = Bidirectional(GRU(100, return_sequences=True))(dialog_encoder) l_dense_sent = TimeDistributed(Dense(200))(l_lstm_sent) l_att_sent = AttLayer()(l_dense_sent) # output layer preds = Dense(2, activation='softmax')(l_att_sent) model = Model(dialog_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) return model
def build_functional_model(self): word_input = Input(shape=(None, ), name='word_input') decoder_input = Input(shape=(None, ), name='decoder_input') conversation_input = Input(shape=(None, None), name='conversation_input') # Word-level Encoder embed_layer = Embedding(input_dim=self.vocab_size, output_dim=self.embedding_dim, mask_zero=True, name='embedding') word_encoder_layers = [Bidirectional(self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=False)) if self.encoder_type == 'bidi' \ else self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=False) for _ in range(self.num_encoder_layers)] word_att_layer = AttLayer(attention_dim=self.encoder_dim) # Utterance-level Encoder utt_encoder_layer = self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=True, name='utterance_rnn') # Build word-level encoder word_embedded = embed_layer(word_input) word_embedded = Dropout(0.2)(word_embedded) for l_ix, l in enumerate(word_encoder_layers): if l_ix == 0: h_out = l(word_embedded) else: h_out = l(h_out) h_att_word = word_att_layer(h_out) word_encoder = Model(inputs=word_input, output=h_att_word) # Build context-level encoder context_encoder = TimeDistributed(word_encoder)(conversation_input) context_h_out, state_h, state_c = utt_encoder_layer(context_encoder) # Decoder decoder_embed = embed_layer(decoder_input) decoder_embed = Dropout(0.2)(decoder_embed) decoder = self.rec_cell(units=self.decoder_dim, return_sequences=True) decoder_output = decoder(decoder_embed) decoder_combined_context = Lambda( self._dot_attention_block)([context_h_out, decoder_output]) logits = Dense(units=self.vocab_size, activation='linear', name='logits')(decoder_combined_context) self.model = Model(inputs=[conversation_input, decoder_input], outputs=logits) self.model.compile(optimizer=self.optimizer, loss=self.sparse_loss) self.model.summary()
def __init__(self, C=4, V=40000, MAX_LEN=600, embed_matrix=None, name='sscharmodel.h5', PE=False, train_embed=False): self.MAX_LEN = MAX_LEN self.PE = PE self.name = name input = Input(shape=(MAX_LEN, ), dtype='int32') #CNN不支持mask,即 mask_zero=True if embed_matrix is None: x = Embedding(V, 32)(input) else: embed1 = Embedding(embed_matrix.shape[0], embed_matrix.shape[1], weights=[embed_matrix], trainable=train_embed) x = embed1(input) if self.PE: e_input = Input(shape=(MAX_LEN, ), dtype='int32', name='PE_in') ex = Embedding(self.MAX_LEN, 32, name='PE')(e_input) x = Concatenate()([x, ex]) kss = [2, 3, 4, 5] hs = [] for ks in kss: h = Conv1D(128, ks, activation='relu', padding='same')(x) # h = GlobalMaxPool1D()(h) h1 = GlobalMaxPool1D()(h) h2 = GlobalAveragePooling1D()(h) h3 = AttLayer()(h) h = Concatenate()([h1, h2, h3]) hs.append(h) hs = Concatenate()(hs) # hs = BatchNormalization()(hs) z = Dense(128, activation='relu')(hs) # z = BatchNormalization()(z) z = Dense(C, activation='softmax')(z) if self.PE: model = Model([input, e_input], z) else: model = Model(input, z) opt = Adagrad(lr=0.005) model.compile(opt, 'categorical_crossentropy', metrics=['acc']) self.model = model
def RNNAtt(embed_mat, MAX_LEN, num_cls, rnn_sz=100): embed = Embedding(embed_mat.shape[0], embed_mat.shape[1], weights=[embed_mat], input_length=MAX_LEN, trainable=False) sequence_input = Input(shape=(MAX_LEN, ), dtype='int32') embedded_sequences = embed(sequence_input) l_lstm = Bidirectional(LSTM(rnn_sz, return_sequences=True))(embedded_sequences) z = AttLayer()(l_lstm) preds = Dense(num_cls, activation='softmax')(z) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['acc']) return model
def build_model(self): self._init_layers() in_layer = Input(shape=(None,)) embedded = self.embedding_layer(in_layer) if self.embedding_dropout_rate > 0.: embedded = Dropout(rate=self.embedding_dropout_rate)(embedded) for layer_idx, layer in enumerate(self.recurrent_cells): if layer_idx == 0: h_out = layer(embedded) else: h_out = layer(h_out) # x, attn = self.attention_layer(h_out) # x = Dropout(rate=0.5)(x) # x = Lambda(lambda x: K.max(x, axis=1))(h_out) x = AttLayer(attention_dim=self.hidden_dim)(h_out) y_out = self.out_layer(x) model = Model(inputs=in_layer, outputs=y_out) model.summary() self.model = model
def feed_input(input_x, sub_name): x1 = Embedding(input_dim=num_word, output_dim=embed_dim, name=sub_name + 'embed_s', weights=[embed_mat], trainable=False)(input_x) x2 = Embedding(input_dim=num_word, output_dim=embed_dim, name=sub_name + 'embed_d', weights=[embed_mat], trainable=True)(input_x) x = Concatenate()([x1, x2]) # CNN model kls = [2, 3, 4, 5] hs = [] for kl in kls: h = Conv1D(conv_dim, kl, activation='relu')(x) # h = GlobalMaxPool1D()(h) h = AttLayer()(h) hs.append(h) h2 = Concatenate()(hs) h2 = BatchNormalization()(h2) return h2
def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, hidden_size, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) with tf.variable_scope('discriminator'): # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): input = Input(tensor=self.input_x) embedding_layer = Embedding(vocab_size, embedding_size, input_length=sequence_length, mask_zero=True, trainable=True) lstm = Bidirectional(GRU(hidden_size, return_sequences=True)) att = AttLayer(bias=True, name='att_1') encoded = embedding_layer(input) encoded = lstm(encoded) self.encoded_f = att(encoded) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.Variable(tf.truncated_normal( [hidden_size * 2, num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.encoded_f, W, b, name="scores") self.ypred_for_auc = tf.nn.softmax(self.scores) self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss self.params = [ param for param in tf.trainable_variables() if 'discriminator' in param.name ] d_optimizer = tf.train.AdamOptimizer(1e-4) grads_and_vars = d_optimizer.compute_gradients(self.loss, self.params, aggregation_method=2) self.train_op = d_optimizer.apply_gradients(grads_and_vars)
def _init_layers(self): self.embedding_layer = Embedding(input_dim=self.vocab_size + 1, output_dim=self.embedding_dim, mask_zero=False) self.recurrent_cells = [Bidirectional(self.recurrent_cell(units=self.hidden_dim, return_sequences=True)) if self.bidirectional else self.recurrent_cell(units=self.hidden_dim, return_sequences=True) for _ in range(self.hidden_layers)] self.attention_layer = AttLayer(attention_dim=self.hidden_dim) self.out_layer = Dense(units=1, activation='sigmoid')
print('embedded_sequences SHAPE') print(embedded_sequences.get_shape()) # Bidirectional GRU l_gru = MultiplicativeLSTM(gru_output_size, return_sequences=True)(embedded_sequences) l_dense = TimeDistributed(Dense(units=gru_output_size))(l_gru) print('l_gru SHAPE') print(l_gru.get_shape()) print('l_dense SHAPE') print(l_dense.get_shape()) # Word-Level Attention Layer l_att = AttLayer()(l_dense) print('l_att SHAPE') print(l_att.get_shape()) sentEncoder = Model(sentence_input, l_att) sentEncoder.compile( optimizer=Adam(0.0001), loss='mse', metrics={}, ) review_encoder = TimeDistributed(sentEncoder)(review_input) #sentEncoder.summary() print('l_att SHAPE')
conv2 = Conv1D(embedding_dims, kernel_size=4, activation='tanh')(reshape) conv2 = MaxPool1D(int((maxsents*maxlen)/5))(conv2) conv2 = Flatten()(conv2) conv3 = Conv1D(embedding_dims, kernel_size=8, activation='tanh')(reshape) conv3 = MaxPool1D(int((maxsents*maxlen)/5))(conv3) conv3 = Flatten()(conv3) concatenated_tensor = keras.layers.Concatenate(axis=1)([fasttext , conv1 , conv2 , conv3]) fasttext = Dense(units=embedding_dims, activation='tanh')(concatenated_tensor) # Bidirectional GRU l_gru_sent = TimeDistributed(Bidirectional(GRU(gru_output_size, return_sequences=True)))(review_embedded) l_gru_sent = keras.layers.Concatenate()( [ l_gru_sent , Reshape((maxsents,maxlen,gru_output_size))( keras.layers.RepeatVector(maxsents*maxlen)(fasttext) ) ] ) l_dense_sent = TimeDistributed(TimeDistributed(Dense(units=gru_output_size)))(l_gru_sent) l_att_sent = TimeDistributed(AttLayer())(l_dense_sent) # Bidirectional GRU l_gru_review = Bidirectional(GRU(gru_output_size, return_sequences=True))(l_att_sent) l_gru_review = keras.layers.Concatenate()( [ l_gru_review , keras.layers.RepeatVector(maxsents)(fasttext) ] ) l_dense_review = TimeDistributed(Dense(units=gru_output_size))(l_gru_review) postp = AttLayer()(l_dense_review) # Memory Mechanism aux_mem = Dense(units=(final_output), activation='tanh', weights=(init_m_aux.transpose(),np.zeros(gru_output_size+embedding_dims)), name='memory')(aux_input) postp_aux = keras.layers.Concatenate( axis = 1 )( [ postp , fasttext , aux_mem , age_input , dep_input] ) postp = Dropout(0.05)(postp_aux) postp = Dense(units=(final_output))(postp) # Softmax/Sigmoid Output Layer preds = Dense(units=y_train.shape[1], activation='softmax', weights=[init_m_full, bias_full], name='main')(postp)
def build_ABCNN(self, paramsObj, weight=[]): # Embeddings if len(weight) == 0 or paramsObj.use_word_embedding == False: # NOT use word embedding embedding_layer = Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH) else: # use word embedding embedding_layer = Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH, weights=[weight], trainable=paramsObj.train_embedding) # Create Model main_input = Input(shape=(config.MAX_SEQ_LENGTH, ), dtype='int32', name='main_input') embedding_sequences = embedding_layer(main_input) # params inner = 'outer' type = 'atten' if (inner == 'inner'): padding = 'valid' else: padding = 'same' conv_att_features = [] # i did not use the pool_size and num_filter here nb_filter = 10 for filter_length, pool_size, num_filter in zip( paramsObj.filter_size, paramsObj.pool_size, paramsObj.num_filter): convolution_layer = Conv1D(filters=nb_filter, kernel_size=filter_length, padding=padding, activation='relu', name='convLayer' + str(filter_length)) conv_out = convolution_layer(embedding_sequences) ###attenton######### if (type == 'atten' and inner == 'inner'): att_inpt = TimeDistributed(Dense(nb_filter))(conv_out) att_out = AttLayer(name='AttLayer' + str(filter_length))(att_inpt) conv_att_features.append(att_out) elif (type == 'max'): out = MaxPooling1D(name='maxPooling' + str(filter_length), pool_size=(config.MAX_SEQ_LENGTH - filter_length + 1))(conv_out) conv_att_features.append(out) else: conv_att_features.append(conv_out) if (len(paramsObj.filter_size) > 1): X = concatenate(conv_att_features, axis=1) else: X = conv_att_features[0] if (type == 'max'): X = Flatten()(X) if (inner == 'outer'): X = TimeDistributed(Dense(len(paramsObj.filter_size) * nb_filter), name='DenseTimeDistributed')(X) X = AttLayer(name='AttLayer')(X) X = Dropout(0.9)(X) # x = Dense(output_dim=hidden_dims, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01))(attention_features) hidden_dims = 100 x = Dense(units=hidden_dims, activation='relu')(X) # dense hidden layer predictions = Dense(config.ClassNum, activation='softmax')(x) # build the model model = Model(main_input, predictions) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def build_model(self): context_input = Input(shape=(None, ), name='context_input') current_input = Input(shape=(None, ), name='current_input') response_input = Input(shape=(None, ), name='response_input') decoder_target = tf.placeholder(shape=[None, None], dtype='int32') embed_layer = Embedding(input_dim=self.vocab_size, output_dim=self.embedding_dim, mask_zero=True, name='embedding') context_embed = embed_layer(context_input) current_embed = embed_layer(current_input) context_embed = Dropout(0.2)(context_embed) current_embed = Dropout(0.2)(current_embed) # ENCODER context_bidi_encoder1 = self.rec_cell(units=self.encoder_dim, return_sequences=True, name='context_encoder1') context_bidi_encoder2 = self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=False, name='context_encoder2') current_bidi_encoder1 = self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=False, name='current_encoder1') current_bidi_encoder2 = self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=True, name='current_encoder2') # Encode context context_encoded = context_bidi_encoder1(context_embed) context_encoded = context_bidi_encoder2(context_encoded) context_att_h = AttLayer(attention_dim=200)(context_encoded) context_att_h = Lambda(lambda x: K.expand_dims(x, 1))(context_att_h) # Encode current utterance to respond to current_encoded = current_bidi_encoder1(current_embed) current_encoded, state_h, state_c = current_bidi_encoder2( current_encoded) # current_encoded, fwd_h, fwd_c, bwd_h, bwd_c = current_bidi_encoder1(current_embed) # state_h = Concatenate()([fwd_h, bwd_h]) # state_c = Concatenate()([fwd_c, bwd_c]) current_att_h = AttLayer(attention_dim=200)(current_encoded) current_att_h = Lambda(lambda x: K.expand_dims(x, 1))(current_att_h) encoded_concat = Concatenate(axis=1, name='context_current_concat')( [context_att_h, current_att_h]) encoder_output = self.rec_cell( units=self.encoder_dim, return_sequences=True, return_state=False, go_backwards=True, name='top_level_encoder')(encoded_concat) # DECODER rnn_decoder = self.rec_cell(units=self.decoder_dim, return_sequences=True, name='decoder1') decoder_embed = embed_layer(response_input) decoder_embed = Dropout(0.2)(decoder_embed) decoder_output = rnn_decoder(decoder_embed, initial_state=[state_h, state_c]) # Attention attention = Dot(axes=[2, 2], name='decoder_encoder_dot')( [decoder_output, encoder_output]) attention = Activation('softmax', name='attention_probs')(attention) context = Dot(axes=[2, 1], name='att_encoder_context')([attention, encoder_output]) decoder_combined_context = Concatenate(name='decoder_context_concat')( [context, decoder_output]) logits_out = Dense(units=self.vocab_size, activation='linear', name='logits')(decoder_combined_context) self.model = Model( inputs=[context_input, current_input, response_input], outputs=logits_out) self.model.compile(loss=self.sparse_loss, optimizer=self.optimizer, target_tensors=[decoder_target]) self.model.summary()
# Embedding Layer embedding_layer = Embedding(max_features, embedding_dims, input_length=maxlen) # WORD-LEVEL sentence_input = Input(shape=(maxlen,), dtype='int32') embedded_sequences = embedding_layer(sentence_input) # Bidirectional GRU l_gru = Bidirectional(GRU(gru_output_size, return_sequences=True))(embedded_sequences) l_dense = TimeDistributed(Dense(units=gru_output_size))(l_gru) # Word-Level Attention Layer l_att = AttLayer()(l_dense) sentEncoder = Model(sentence_input, l_att) sentEncoder.compile( optimizer=Adam(0.0001), loss='mse', metrics={}, ) review_encoder = TimeDistributed(sentEncoder)(review_input) # SENTENCE_LEVEL # Bidirectional GRU l_gru_sent = Bidirectional(GRU(gru_output_size, return_sequences=True))(review_encoder)#(reshaped) l_dense_sent = TimeDistributed(Dense(units=gru_output_size))(l_gru_sent)
def build_hang(self, paramsObj, weight=[]): # Embeddings if len(weight) == 0 or paramsObj.use_word_embedding == False: # NOT use word embedding embedding_layer = Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH) else: # use word embedding embedding_layer = Embedding(config.MAX_NUM_WORDS, config.EMBEDDING_DIM, input_length=config.MAX_SEQ_LENGTH, weights=[weight], trainable=paramsObj.train_embedding) # Create Model main_input = Input( shape=(config.MAX_SEQ_LENGTH, ), # no need to specify the last dimension, why dtype='int32', name='main_input') embedding_sequences = embedding_layer(main_input) embedding_sequences = Dropout( paramsObj.dropout_rate)(embedding_sequences) conv_feature_list = [] for filter_size, pool_size, num_filter in zip(paramsObj.filter_size, paramsObj.pool_size, paramsObj.num_filter): conv_layer = Conv1D(filters=num_filter, kernel_size=filter_size, strides=1, padding='same', activation='relu')(embedding_sequences) pool_layer = MaxPooling1D(pool_size=pool_size)(conv_layer) conv_feature_list.append(pool_layer) if (len(conv_feature_list) == 1): out = conv_feature_list[0] else: out = concatenate(conv_feature_list, axis=1) # network = Model(inputs=cnn_inp, outputs=out) X = TimeDistributed(Dense( len(paramsObj.filter_size) * paramsObj.pool_size[0]), name='DenseTimeDistributed')(out) X = AttLayer(name='AttLayer')(X) # add dense layer to complete the model X = Dropout(paramsObj.dropout_rate)(X) X = Dense(paramsObj.dense_layer_size, kernel_initializer='uniform', activation='relu')(X) # output layer predictions = Dense(config.ClassNum, activation='softmax')(X) model = Model(main_input, predictions) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
return h2 h2 = feed_input(input_x, 'term') h2c = feed_input(input_xc, 'char') h2 = Concatenate()([h2, h2c]) # 训练集合大小为315364,大约1000*256 embed = Embedding(input_dim=num_word, output_dim=embed_dim, name='embed_s', weights=[embed_mat], trainable=True) x = embed(input_x) h2_term = LSTM(rnn_unit_1, return_sequences=True)(x) h2_term = AttLayer()(h2_term) x_c = embed(input_xc) h2_c = LSTM(rnn_unit_1, return_sequences=True)(x_c) h2_c = AttLayer()(h2_c) h2 = Concatenate()([h2_term, h2_c]) pred = Dense(class_num, activation='softmax')(h2) k_model = keras.Model([input_x, input_xc], pred) opt = keras.optimizers.Adam(0.001) k_model.compile(opt, 'categorical_crossentropy', [ 'acc', ]) earlystop = EarlyStopping(min_delta=0.01, patience=1) save_best = ModelCheckpoint(os.path.join(MODEL_PATH, "model.h5"), save_best_only=True)