def learn_model(self, features, labels, degrade_mask, epochs=10, batch_size=None, model=None): if not model and not self.model: model = Sequential() masking = Masking(mask_value=0.0, input_shape=(features.shape[1], features.shape[2],)) model.add(masking) crf = CRF(#input_shape=(features.shape[1], features.shape[2],), units=labels.shape[-1], sparse_target=False, #kernel_regularizer=keras.regularizers.l2(0.), #bias_regularizer=keras.regularizers.l2(0.005), #chain_regularizer=keras.regularizers.l2(0.005), #boundary_regularizer=keras.regularizers.l2(0.005), learn_mode='marginal', test_mode='marginal', unroll=self.unroll_flag, ) model.add(crf) model.compile(optimizer=self.opt, loss=crf.loss_function, metrics=[crf.accuracy]) elif self.model: model = self.model #assert features.shape[0] == len(self.degrade_mask) weights = self._weight_logic(features, degrade_mask) model.fit(features, labels, epochs=epochs, batch_size=batch_size, verbose=1, sample_weight=weights) return model
def build_model(input_length, emb_input_dim, emb_out_dim, lstm_hidden_units, num_cls, embedding_matrix=None): model = Sequential() if embedding_matrix is None: model.add(Embedding(emb_input_dim, emb_out_dim, mask_zero=True)) else: model.add( Embedding(emb_input_dim, emb_out_dim, weights=[embedding_matrix], trainable=True)) model.add(Bidirectional(LSTM(lstm_hidden_units, return_sequences=True))) model.add(Dropout(DROPOUT_RATE)) model.add(TimeDistributed(Dense(num_cls))) crf_layer = CRF(num_cls) model.add(crf_layer) model.compile('rmsprop', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) model.summary() return model
def build_bilstm_crf_model(NUM_CLASS): """ 带embedding的双向LSTM + crf """ model = Sequential() # model.add(Embedding(VOCAB_SIZE, output_dim=EMBEDDING_OUT_DIM, mask_zero=True)) # model.add(Dropout(DROPOUT_RATE)) model.add(LSTM(HIDDEN_UNITS, return_sequences=True)) # model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) # model.add(Dropout(DROPOUT_RATE)) model.add(TimeDistributed(Dense(NUM_CLASS))) model.add(Dropout(0.5)) crf_layer = CRF(NUM_CLASS, sparse_target=True) model.add(crf_layer) model.build((None, 238, 768)) # model.summary() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(loss=losses.crf_loss, optimizer='rmsprop', metrics=[metrics.crf_accuracy]) return model
def tokenvec_bilstm2_crf_model(self): """ 构建模型:使用预训练向量进行模型训练 :return: """ model = Sequential() embedding_layer = Embedding( self.VOCAB_SIZE + 1, # 最大整数+1 self.EMBEDDING_DIM, # 字向量的embedding维度 weights=[self.embedding_matrix], # 字向量参数 input_length=self.TIME_STAMPS, trainable=False, # 是否训练 mask_zero=True) # 是否把0看作为一个应该被遮蔽的特殊的padding值,所有层都必须支持 masking model.add(embedding_layer) model.add(Bidirectional(LSTM(128, return_sequences=True))) # 双向 LSTM 网络层 model.add(Dropout(0.5)) model.add(Bidirectional(LSTM(64, return_sequences=True))) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(self.NUM_CLASSES))) # 实体类型的全连接层 crf_layer = CRF(self.NUM_CLASSES, sparse_target=True) model.add(crf_layer) model.compile('adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) model.summary() return model
def _build_network(self): model = Sequential() if self.embedding_matrix is not None: print('****************************') embedding_layer = Embedding(self.vocab_size + 1, self.embedding_dim, weights=[self.embedding_matrix], input_length=self.time_stamps, trainable=False, mask_zero=True) else: embedding_layer = Embedding(self.vocab_size + 1, self.embedding_dim, input_length=self.time_stamps, mask_zero=True) model.add(embedding_layer) model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(0.5)) model.add(Bidirectional(LSTM(64, return_sequences=True))) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(self.num_classes))) crf_layer = CRF(self.num_classes, sparse_target=True) model.add(crf_layer) model.compile('adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) model.summary() self.model = model
def build_embedding_lstm2_crf_model(VOCAB_SIZE, NUM_CLASS, TIME_STAMPS): """ 带embedding的双向LSTM + crf """ model = Sequential() model.add( Embedding(VOCAB_SIZE, output_dim=EMBEDDING_OUT_DIM, mask_zero=True)) # model.add(Dropout(DROPOUT_RATE)) model.add(recurrent.GRU(HIDDEN_UNITS // 2, return_sequences=True)) # model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) # model.add(Dropout(DROPOUT_RATE)) model.add(TimeDistributed(Dense(NUM_CLASS))) crf_layer = CRF(NUM_CLASS, sparse_target=True) model.add(crf_layer) # model.summary() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(adam, loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) return model
def tokenvec_bilstm2_crf_model(self): model = Sequential() embedding_layer = Embedding(self.VOCAB_SIZE + 1, self.EMBEDDING_DIM, weights=[self.embedding_matrix], input_length=self.TIME_STAMPS, trainable=False, mask_zero=True) model.add(embedding_layer) # 150,300 model.add(Bidirectional(LSTM( 128, return_sequences=True))) # 150,256 2*128=256 model.add(Dropout(0.5)) model.add(Bidirectional(LSTM( 64, return_sequences=True))) #150,128 2*64=128 model.add(Dropout(0.5)) # model.add(Bidirectional(LSTM(32, return_sequences=True))) #150,64 2*32=64 # model.add(Dropout(0.5)) model.add(TimeDistributed(Dense( self.NUM_CLASSES))) # 150,15 15个class_dict crf_layer = CRF( self.NUM_CLASSES, sparse_target=True) # 150,15 15个class_dict crf计算分数最高的class model.add(crf_layer) model.compile('adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) model.summary() return model
def create_model(): print('load data') nb_words, word_index = get_tokenizer() print('create model') word2vec = Word2Vec.load('e:/2018汽车行业文本处理/train_data/w2v_char.mod') embedding_matrix = np.zeros((nb_words, 100)) for word, i in word_index.items(): if word in word2vec.wv.vocab: try: embedding_matrix[i] = word2vec.wv.word_vec(word) except: pass embedding_layer = Embedding(nb_words, 100, input_length=130, weights=[embedding_matrix], trainable=True) model = Sequential() model.add(Embedding(nb_words, output_dim=100, input_length=130)) model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(0.5)) model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(11))) crf_layer = CRF(11) model.add(crf_layer) model.compile('rmsprop', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) return model
def build_embedding_bilstm2_crf_model(NUM_CLASS, embeddings_matrix, input_length): """ 带embedding的双向LSTM + crf """ model = Sequential() model.add( Embedding(len(embeddings_matrix), EMBEDDING_DIM, weights=[embeddings_matrix], input_length=input_length, trainable=False)) # model.add(Dropout(DROPOUT_RATE)) model.add(Bidirectional(LSTM(HIDDEN_UNITS // 2, return_sequences=True))) # model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) # model.add(Dropout(DROPOUT_RATE)) model.add(TimeDistributed(Dense(NUM_CLASS))) crf_layer = CRF(NUM_CLASS, sparse_target=True) model.add(crf_layer) # model.summary() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(adam, loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) return model
def tokenvec_bilstm2_crf_model(self): model = Sequential() # embedding_layer = Embedding(self.VOCAB_SIZE + 1, # self.EMBEDDING_DIM, # weights=[self.embedding_matrix], # input_length=self.TIME_STAMPS, # trainable=False, # mask_zero=True) model.add(Embedding(self.VOCAB_SIZE+1, self.EMBEDDING_DIM, mask_zero=True)) # Random embedding # model.add(embedding_layer) model.add(Bidirectional(GRU(256, return_sequences=True, kernel_regularizer=keras.regularizers.l2(0.01)))) model.add(Dropout(0.5)) model.add(Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=keras.regularizers.l2(0.01)))) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(self.NUM_CLASSES, kernel_regularizer=keras.regularizers.l2(0.01)))) crf_layer = CRF(self.NUM_CLASSES, sparse_target=True, kernel_regularizer=keras.regularizers.l2(0.01)) model.add(crf_layer) Adam = keras.optimizers.adam(lr=0.005) model.compile(Adam, loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) model.summary() return model
def __init__(self, function): self.vocab_size = 5690 self.embedding_dim = 50 self.max_length = 80 self.hidden_units = 100 self.dropout_rate = 0.3 self.batch_size = 64 self.function = function self.char = json.load(open('char.json', 'r')) self.num_class_cws = 4 self.num_class_ner = 10 self.graph = tf.get_default_graph() self.model = Sequential() self.model.add( Embedding(self.vocab_size, output_dim=self.embedding_dim, input_length=self.max_length)) self.model.add( Bidirectional(LSTM(self.hidden_units, return_sequences=True))) self.model.add(Dropout(self.dropout_rate)) self.model.add( Bidirectional(LSTM(self.hidden_units, return_sequences=True))) self.model.add(Dropout(self.dropout_rate)) if self.function == 'cws': self.model.add(TimeDistributed(Dense(self.num_class_cws))) crf_layer = CRF(self.num_class_cws) self.model.add(crf_layer) elif self.function == 'ner': self.model.add(TimeDistributed(Dense(self.num_class_ner))) crf_layer = CRF(self.num_class_ner) self.model.add(crf_layer) else: print('Illegal parameter, please enter [cws | ner]') sys.exit(0) self.model.compile('adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy])
def create_lstm_classification_model(level, modelname, vocab_size, POS_size, num_classes, EMBED_MATRIX, embed_dim): ''' cread bilstm model level: sent, token ''' # model input x1_in = Input(shape=(None,)) x2_in = Input(shape=(None,)) # embedding layer word_embed = Embedding(input_dim=vocab_size, output_dim=embed_dim, embeddings_initializer=Constant(EMBED_MATRIX), input_length=None)(x1_in) if 'POS' in modelname: POS_embed = Embedding(input_dim=POS_size, output_dim=20, input_length=None)(x2_in) word_embed = Concatenate()([word_embed, POS_embed]) # dropout layer word_embed = Dropout(0.2)(word_embed) # bilstm layer x = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(word_embed) # crf and dense layer accroding to crf and level if level == 'token': if 'Crf' in modelname: x = TimeDistributed(Dense(50, activation="relu"))(x) output = CRF(num_classes)(x) else: output = TimeDistributed(Dense(num_classes, activation="softmax"))(x) else: x = Lambda(lambda x: x[:, 0])(x) if 'Crf' in modelname: x = Dense(50, activation='relu')(x) output = CRF(num_classes)(x) else: output = Dense(num_classes, activation='softmax')(x) # build and compile model model = Model([x1_in, x2_in], output) if 'Crf' in modelname: model.compile(loss=crf_loss, optimizer='rmsprop', metrics=[crf_viterbi_accuracy]) else: model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['categorical_accuracy']) model.summary() return model
def specify_(self): """ Specifies a bidirectional LSTM-CRF for sequence tagging using Keras. Implements a hybrid long short-term memory network-condition random field (LSTM-CRF) for sequence tagging. Returns: model: a keras model, excluding crf layer crf: a crf layer implemented using keras.contrib """ ## INPUT LAYER # the input layer must be of fixed length because of the CRF output layer input_layer = Input(shape=(self.max_seq_len,)) ## TOKEN EMBEDDING LAYER # if specified, load pre-trained token embeddings otherwise initialize # randomly if self.token_embedding_matrix is not None: # plus 1 because of '0' word. token_embed = Embedding( input_dim=self.ds[0].word_type_count + 1, output_dim=self.token_embedding_matrix.shape[1], weights=[self.token_embedding_matrix], input_length=self.max_seq_len, mask_zero=True, trainable=(not self.freeze_token_embeddings))(input_layer) else: token_embed = Embedding( input_dim=self.ds[0].word_type_count + 1, output_dim=self.token_embedding_dimension, input_length=self.max_seq_len, mask_zero=True)(input_) ## TOKEN BILSTM LAYER token_bilstm = Bidirectional(LSTM( units=100, return_sequences=True, recurrent_dropout=self.dropout_rate))(token_embed) ## FULLY CONNECTED LAYER fully_connected = TimeDistributed(Dense( units=100, activation=self.activation_function))(token_bilstm) ## SEQUENCE OPTIMIZING LAYER (CRF) crf = CRF(self.ds[0].tag_type_count) output_layer = crf(fully_connected) # fully specified model model = Model(inputs=input_layer, outputs=output_layer) # update class attributes self.model, self.crf = model, crf
def create_bert_classification_model(bert_path, level, modelname, num_classes): ''' create bert model level: sent, token ''' # model input x1_in = Input(shape=(None,)) x2_in = Input(shape=(None,)) # bert model layer bert_model = load_bert_model(bert_path) for l in bert_model.layers: l.trainable = True x = bert_model([x1_in, x2_in]) # crf and dense layer accroding to crf and level if level == 'token': if 'Crf' in modelname: x = TimeDistributed(Dense(50, activation="relu"))(x) output = CRF(num_classes)(x) else: output = TimeDistributed(Dense(num_classes, activation="softmax"))(x) else: x = Lambda(lambda x: x[:, 0])(x) if 'Crf' in modelname: x = Dense(50, activation='relu')(x) output = CRF(num_classes)(x) else: output = Dense(num_classes, activation='softmax')(x) # build and compile model model = Model([x1_in, x2_in], output) if 'Crf' in modelname: model.compile(loss=crf_loss, optimizer=Adam(1e-5), metrics=[crf_viterbi_accuracy]) else: model.compile(loss='categorical_crossentropy', optimizer=Adam(1e-5), metrics=['categorical_accuracy']) model.summary() return model
def build_embedding_bilstm2_crf_model(): """ 带embedding的双向LSTM + crf """ model = Sequential() model.add(Embedding(VOCAB_SIZE, output_dim=EMBEDDING_OUT_DIM, input_length=TIME_STAMPS)) model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) model.add(Dropout(DROPOUT_RATE)) model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) model.add(Dropout(DROPOUT_RATE)) model.add(TimeDistributed(Dense(NUM_CLASS))) crf_layer = CRF(NUM_CLASS) model.add(crf_layer) model.compile('rmsprop', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) return model
def _get_model(self): # 创建模型 model = Sequential() model.add( Bidirectional(LSTM(256, return_sequences=True), input_shape=(self.sentence_len, self.wordvec_size), name='biLSTM1')) model.add( Bidirectional(LSTM(512, return_sequences=True), name='biLSTM2')) crf = CRF(WordRecModelTool().get_labels_size(), name='crf') model.add(crf) model.summary() adam = Adam() # 编译模型 model.compile(adam, loss=crf.loss_function, metrics=[crf.accuracy]) return model
def tokenvec_bilstm2_crf_model(self): model = Sequential() embedding_layer = Embedding(self.VOCAB_SIZE + 1, self.EMBEDDING_DIM, weights=[self.embedding_matrix], input_length=self.TIME_STAMPS, trainable=False) model.add(embedding_layer) model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(0.5)) model.add(Bidirectional(LSTM(64, return_sequences=True))) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(self.NUM_CLASSES))) crf_layer = CRF(self.NUM_CLASSES, sparse_target=True) model.add(crf_layer) model.compile('adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) model.summary() return model
def __init__(self, input_length, para_emb_dim, num_tags, hidden_dim=200, dropout=0.5): self.num_tags = num_tags self.model = Sequential() self.model.add( Bidirectional(LSTM(hidden_dim, return_sequences=True), input_shape=(input_length, para_emb_dim))) self.model.add(Dropout(dropout)) # self.model.add(Bidirectional(LSTM(hidden_dim, return_sequences=True), input_shape=(input_length, para_emb_dim))) # self.model.add(Dropout(dropout)) self.model.add(TimeDistributed(Dense(self.num_tags))) crf = CRF(self.num_tags) self.model.add(crf) self.model.compile('rmsprop', loss=crf_loss, metrics=[crf_accuracy])
def entity_recognition_model(encoder='dgcnn', inference='softmax'): # Input text_idx_in = Input(shape=(None, )) tag_label_in = Input(shape=(None, )) text_idx, tag_label = text_idx_in, tag_label_in # mask_2dim shape(batch_size, seq_len) mask_2dim = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(text_idx) # mask_3dim = Lambda(lambda x: K.expand_dims(x, 2))(mask_2dim) # shape (batch_size, batch_max_seq_len, 1) # encoder if encoder == 'bilstm': sentence_repr = bilstm_encoder(text_idx, mask_2dim) elif encoder == 'dgcnn': sentence_repr = dgcnn_encoder(text_idx, mask_2dim) else: raise Exception('`encoder` must be `dgcnn` or `bilstm`') # inference, loss if inference == 'softmax': output = Dense(len(tag2id), activation='softmax')(sentence_repr) loss = K.sparse_categorical_crossentropy(tag_label, output) loss = K.sum(loss * mask_2dim) / K.sum(mask_2dim) elif inference == 'crf': out = TimeDistributed(Dense(hidden_dim, activation="relu"))(sentence_repr) crf = CRF(len(tag2id), sparse_target=True) output = crf(out, mask=mask_2dim) tag_label = K.one_hot(K.cast(tag_label, 'int32'), len(tag2id)) loss = crf.get_negative_log_likelihood(y_true=tag_label, X=out, mask=mask_2dim) else: raise Exception('`inference` must be `softmax` or `crf`') pred_model = Model(inputs=[text_idx_in], outputs=[output]) model = Model(inputs=[text_idx_in, tag_label_in], outputs=[output]) model.add_loss(loss) model.compile(optimizer=Adam(lr=1e-3), ) model.summary() return model, pred_model
def train_model(batch_size, n_epoch, embedding_matrix, word_index, num_class, train_data, tag,Y): embedding_layer = Embedding(len(word_index) + 1, 100, weights=[embedding_matrix]) model = Sequential() model.add(embedding_layer) model.add(Bidirectional(LSTM(100, return_sequences=True, dropout=0.4, activation='tanh'))) # model.add(Dropout(0.4)) # model.add(Bidirectional(LSTM(100, return_sequences=True, dropout=0.4, activation='tanh'))) # model.add(Dropout(0.4)) model.add(TimeDistributed(Dense(num_class))) # model.add(Dropout(0.4)) # model.add(Dense(num_class, activation='softmax')) crf_layer = CRF(num_class, sparse_target=True) model.add(crf_layer) model.summary() optimizer = optimizers.Adam(lr=0.01) model.compile(optimizer=optimizer, loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) X_train, X_test, y_train, y_test = train_test_split(train_data, Y, test_size=0.6) model.fit(X_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epoch, validation_data=[X_test, y_test]) model.save_weights('./model/crf2.h5') return model,X_test,y_test
def model(): passage_input = layers.Input(shape=(units,), dtype='int16') passage_embd = layers.Embedding(MAX_WORD_INDEX + 1, 100, # weights=[embedding_matrix], input_length=units, mask_zero=True)(passage_input) # passage_posi = PositionEmbedding(input_dim=MAX_WORD_INDEX + 1, # The maximum absolute value of positions. # output_dim=100, # The dimension of embeddings. # mask_zero=False, # # The index that presents padding (because `0` will be used in relative positioning). # input_shape=(None,), # name='Pos-Embd', )(passage_input) # passage = layers.Add()([passage_embd, passage_posi]) passage = passage_embd p_encoder = layers.Bidirectional(layers.LSTM(int(tag_num / 2), return_sequences=True))(passage) p_encoder = layers.Bidirectional(layers.LSTM(int(tag_num / 2), return_sequences=True))(p_encoder) p_encoder = layers.LSTM(tag_num, return_sequences=True)(p_encoder) p_encoder = layers.LSTM(tag_num, return_sequences=True)(p_encoder) # p_encoder = passage # p_encoder = SeqSelfAttention(attention_activation='sigmoid')(p_encoder) # p_encoder = multi_head(2, 1000, tag_num, p_encoder) crf = CRF(tag_num, sparse_target=True) p_encoder = crf(p_encoder) # a_decoder = Attention(1, 4)([p_encoder, q_encoder, alt_encoder]) # a_decoder = layers.Flatten()(a_decoder) # alternatives_input = layers.Flatten()(alternatives_input) # a_decoder = layers.Concatenate()([a_decoder, alternatives_input]) # a_decoder = layers.GlobalMaxPooling1D()(a_decoder) output = p_encoder rc_model = models.Model(inputs=passage_input, outputs=output) opti = optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) rc_model.compile(optimizer=opti, loss=crf.loss_function, metrics=[crf.accuracy]) rc_model.summary() return rc_model
def build_embedding_lstm2_crf_model(VOCAB_SIZE, NUM_CLASS, TIME_STAMPS): """ 带embedding的双向LSTM + crf """ model = Sequential() model.add( Embedding(VOCAB_SIZE, output_dim=EMBEDDING_OUT_DIM, mask_zero=True)) # model.add(Dropout(DROPOUT_RATE)) model.add( Convolution2D(nb_filters, (kernel_size[0], kernel_size[1]), padding='same', input_shape=input_shape)) # 卷积层1 model.add(Activation('relu')) # 激活层 model.add(Convolution2D(nb_filters, (kernel_size[0], kernel_size[1]))) # 卷积层2 model.add(Activation('relu')) # 激活层 model.add(MaxPooling2D(pool_size=pool_size)) # 池化层 model.add(Dropout(0.25)) # 神经元随机失活 model.add(Flatten()) # 拉成一维数据 model.add(Dense(128)) # 全连接层1 model.add(Activation('relu')) # 激活层 model.add(Dropout(0.5)) # 随机失活 model.add(Dense(nb_classes)) # 全连接层2 model.add(Activation('softmax')) # Softmax评分 # model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) # model.add(Dropout(DROPOUT_RATE)) model.add(TimeDistributed(Dense(NUM_CLASS))) crf_layer = CRF(NUM_CLASS, sparse_target=True) model.add(crf_layer) # model.summary() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(adam, loss=crf_layer.loss_function, metrics=[crf_layer.accuracy]) return model
def build_model(input_length, emb_input_dim, emb_out_dim, lstm_hidden_units, num_cls, embedding_matrix=None): l_input = Input(shape=(input_length, )) if embedding_matrix is None: l_emb = Embedding(emb_input_dim, emb_out_dim)(l_input) #l_emb = Embedding(emb_input_dim, emb_out_dim, mask_zero=True)(l_input) else: l_emb = Embedding(emb_input_dim, emb_out_dim, weights=[embedding_matrix], trainable=True)(l_input) # add bilstm layer l_posemb = Position_Embedding()(l_emb) l_posemb = Dropout(0.1)(l_posemb) l_bilstm = Bidirectional(LSTM(lstm_hidden_units, return_sequences=True))(l_posemb) # add attention layer l_att = Attention(nb_head=8, size_per_head=32)([l_bilstm, l_bilstm, l_bilstm]) print('l_att.shape:', l_att.shape) # add dense layer l_dense = TimeDistributed(Dense(num_cls))(l_att) crf = CRF(num_cls) l_crf = crf(l_dense) model = Model(l_input, l_crf) model.compile('rmsprop', loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() return model
def __init__(self, keep_words=None): l_input_ids = layers.Input(shape=(ec_cfg.max_seq_len, ), dtype='int32') l_token_type_ids = layers.Input(shape=(ec_cfg.max_seq_len, ), dtype='int32') bert_model = load_pretrained_model( join(DATA_PATH, ec_cfg.model_type, 'bert_config.json'), # join(DATA_PATH, ec_cfg.model_type, 'bert_model.ckpt'), None, num_hidden_layers=ec_cfg.num_hidden_layers, keep_words=keep_words) # 建立模型,加载权重 net = bert_model([l_input_ids, l_token_type_ids]) # net = layers.Embedding(10000, 10)(l_input_ids) # net = layers.Dense(config.label_num, activation='sigmoid', name='class')(net) self._crf = CRF(ec_cfg.label_num, test_mode='viterbi') net = self._crf(net) # print('net after crf', net) self.model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=net) self.model.summary()
def bilstm_crf_model(sequence_max_len, input_feature, dropout_rate, num_class, hidden_unit_num): model = Sequential() model.add( Masking(mask_value=0, input_shape=(sequence_max_len, input_feature))) model.add(Bidirectional(LSTM(hidden_unit_num, return_sequences=True))) model.add(Dropout(dropout_rate)) model.add(Bidirectional(LSTM(hidden_unit_num, return_sequences=True))) model.add(Dropout(dropout_rate)) model.add(TimeDistributed(Dense(num_class))) crf = CRF(num_class, sparse_target=False) model.add(crf) # compile:loss, optimizer, metrics model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) # plot_model(model,to_file= log_dir + 'model.png') model.summary() return model
def __build_model(self): model = Sequential() embedding_layer = Embedding(input_dim=len(self.vocab) + 1, output_dim=self.embedding_dim, weights=[self.embedding_mat], trainable=False) model.add(embedding_layer) bilstm_layer = Bidirectional(LSTM(units=256, return_sequences=True)) model.add(bilstm_layer) model.add(TimeDistributed(Dense(256, activation="relu"))) crf_layer = CRF(units=len(self.tags), sparse_target=True) model.add(crf_layer) model.compile(optimizer="adam", loss=crf_loss, metrics=[crf_viterbi_accuracy]) model.summary() return model
def build_crf_model(self, word_vector): print("load model") left = Input(shape=(self.maxlen, )) left_embedding = Embedding(self.vocab_num + 1, self.embedding_dim, weights=[word_vector], input_length=self.maxlen, mask_zero=True, trainable=True)(left) right = Input(shape=(self.maxlen, )) right_embedding = Embedding(self.dictFeatureNum, self.dict_embedding_size, input_length=self.maxlen, mask_zero=True, trainable=True)(right) ## 0~24 model = Concatenate(axis=-1)([left_embedding, right_embedding]) model = Bidirectional( LSTM(self.bilstm_hidden_dim, recurrent_dropout=self.dropout_rate, return_sequences=True))(model) model = LSTM(self.lstm_hidden_dim, recurrent_dropout=self.dropout_rate, return_sequences=True)(model) model = TimeDistributed(Dense(50, activation="relu"))(model) # model = TimeDistributed(Dense(50, activation="relu"))(model) crf = CRF(self.cate_num) out = crf(model) adam = optimizers.Adam(lr=self.lr) # output = Dense(self.catenum, activation='softmax')(single_drop) model = Model(input=[left, right], output=out) model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy]) return model
def prepare_for_transfer(self, datasets): """Prepares the BiLSTM-CRF for transfer learning by recreating its last layer. Prepares the BiLSTM-CRF model(s) at `self.models` for transfer learning by removing their CRF classifiers and replacing them with un-trained CRF classifiers of the appropriate size (i.e. number of units equal to number of output tags) for the target datasets. References: - https://stackoverflow.com/questions/41378461/how-to-use-models-from-keras-applications-for-transfer-learnig/41386444#41386444 """ self.datasets = datasets # replace with target datasets models, self.models = self.models, [] # wipe models for dataset, model in zip(self.datasets, models): # remove the old CRF classifier and define a new one model.layers.pop() new_crf = CRF(len(dataset.type_to_idx['tag']), name='target_crf_classifier') # create the new model new_input = model.input new_output = new_crf(model.layers[-1].output) self.models.append(Model(new_input, new_output)) self.compile()
from keras import backend as K keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) print(K.tensorflow_backend._get_available_gpus()) model = keras.Sequential() model.add(keras.layers.Embedding(len(word_set),500,mask_zero=True)) #300 model.add(keras.layers.Bidirectional(keras.layers.LSTM(300,return_sequences=True))) #300 #model.add(keras.layers.LSTM(300,return_sequences=True)) # model.add(keras.layers.Activation('relu')) # model.add(keras.layers.Dropout(0.5)) # model.add(keras.layers.TimeDistributed(keras.layers.Dense(len(label_set)))) model.add(keras.layers.Dense(len(label_set))) CRF = CRF(len(label_set)) model.add(CRF) # model.add(keras.layers.Activation('softmax')) #sigmoid binary_loss = 'binary_crossentropy' categorical_loss = 'categorical_crossentropy' model.compile(optimizer='adam', loss=CRF.loss_function , metrics=[CRF.accuracy]) #categorical_loss #CRF.accuracy #print(training_x.shape) training_y = turn_one_hot(training_y,len(label_set)) #print(training_y.shape) print(model.summary())
(csi_train_label.shape[0], csi_train_label.shape[1], 1)) # 划分训练集和测试集 train, test, train_label, test_label = train_test_split(csi_train_data, csi_train_label, test_size=0.3) # build model model = Sequential() model.add(Masking(mask_value=0, input_shape=(sequence_max_len, input_feature))) model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(dropout_rate)) model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(dropout_rate)) model.add(TimeDistributed(Dense(num_class))) crf = CRF(num_class, sparse_target=False) model.add(crf) # compile:loss, optimizer, metrics model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) # plot_model(model,to_file= log_dir + 'model.png') model.summary() # callbacks reduce_lr = LearningRateScheduler(scheduler) filepath = log_dir + 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')