def load_model(self): tf.keras.backend.clear_session() logging.info("Loading RuBERT model...") paths = get_checkpoint_paths("model_bert") inputs = load_trained_model_from_checkpoint( config_file=paths.config, checkpoint_file=paths.checkpoint, seq_len=50) outputs = MaskedGlobalMaxPool1D(name="Pooling")(inputs.output) vocab = load_vocabulary(paths.vocab) return tf.keras.Model(inputs=inputs.inputs, outputs=outputs), vocab, Tokenizer(vocab)
def __init__(self, docs, vec): self.texts = np.array(docs) self.vec = vec paths = get_checkpoint_paths(".") inputs = load_trained_model_from_checkpoint( config_file=paths.config, checkpoint_file=paths.checkpoint, seq_len=50) outputs = MaskedGlobalMaxPool1D(name='Pooling')(inputs.output) self.model = Model(inputs=inputs.inputs, outputs=outputs) self.vocab = load_vocabulary(paths.vocab) self.tokenizer = Tokenizer(self.vocab)
def build_bert(model, poolings=None, output_layer_num=1): """Extract embeddings from texts. :param model: Path to the checkpoint or built model without MLM and NSP. :param texts: Iterable texts. :param poolings: Pooling methods. Word embeddings will be returned if it is None. Otherwise concatenated pooled embeddings will be returned. :param vocabs: A dict should be provided if model is built. :param cased: Whether it is cased for tokenizer. :param batch_size: Batch size. :param cut_embed: The computed embeddings will be cut based on their input lengths. :param output_layer_num: The number of layers whose outputs will be concatenated as a single output. Only available when `model` is a path to checkpoint. :return: A list of numpy arrays representing the embeddings. """ model = get_pretrained(PretrainedList.multi_cased_base) if isinstance(model, (str, type(u''))): paths = get_checkpoint_paths(model) model = load_trained_model_from_checkpoint( config_file=paths.config, checkpoint_file=paths.checkpoint, output_layer_num=output_layer_num, ) outputs = [] if poolings is not None: if isinstance(poolings, (str, type(u''))): poolings = [poolings] # outputs = [] for pooling in poolings: if pooling == POOL_NSP: outputs.append( Extract(index=0, name='Pool-NSP')(model.outputs[0])) elif pooling == POOL_MAX: outputs.append( MaskedGlobalMaxPool1D(name='Pool-Max')(model.outputs[0])) elif pooling == POOL_AVE: outputs.append( keras.layers.GlobalAvgPool1D(name='Pool-Ave')( model.outputs[0])) else: raise ValueError('Unknown pooling method: {}'.format(pooling)) # print(outputs) if len(outputs) == 1: outputs = outputs[0] else: outputs = keras.layers.Concatenate(name='Concatenate')(outputs) outputs = Lambda(bert_output_sum)(outputs) # model = keras.models.Model(inputs=model.inputs, outputs=outputs) return model.inputs, outputs
def test_masked_global_max_pool_1d_predict(self): embed = np.random.standard_normal((11, 13)) input_layer = keras.layers.Input(shape=(None, )) embed_layer = keras.layers.Embedding( input_dim=11, output_dim=13, mask_zero=True, weights=[embed], )(input_layer) pool_layer = MaskedGlobalMaxPool1D()(embed_layer) model = keras.models.Model(inputs=input_layer, outputs=pool_layer) model.compile(optimizer='adam', loss='mse') x = np.array([[1, 2, 0, 0], [2, 3, 4, 0]]) y = model.predict(x) self.assertTrue(np.allclose(np.max(embed[1:3], axis=0), y[0])) self.assertTrue(np.allclose(np.max(embed[2:5], axis=0), y[1]))
def test_masked_global_max_pool_1d_fit(self): input_layer = keras.layers.Input(shape=(None, )) embed_layer = keras.layers.Embedding( input_dim=11, output_dim=13, mask_zero=False, )(input_layer) pool_layer = MaskedGlobalMaxPool1D()(embed_layer) dense_layer = keras.layers.Dense(units=2, activation='softmax')(pool_layer) model = keras.models.Model(inputs=input_layer, outputs=dense_layer) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') model.summary() x = np.random.randint(0, 11, (32, 7)) y = np.random.randint(0, 2, (32, )) model.fit(x, y)
def test_masked_global_max_pool_1d_predict(self): input_layer = keras.layers.Input(shape=(None, )) embed_layer = keras.layers.Embedding(input_dim=5, output_dim=6, mask_zero=True, name='Embed')(input_layer) pool_layer = MaskedGlobalMaxPool1D()(embed_layer) model = keras.models.Model(inputs=input_layer, outputs=pool_layer) model.compile(optimizer='adam', loss='mse') x = np.array([[1, 2, 0, 0], [2, 3, 4, 0]]) y = model.predict(x) embed = model.get_layer('Embed').get_weights()[0] expected = np.max(embed[1:3], axis=0) self.assertTrue(np.allclose(expected, y[0]), (expected, y[0])) expected = np.max(embed[2:5], axis=0) self.assertTrue(np.allclose(expected, y[1]), (expected, y[1]))
def test_masked_conv_1d_fit(self): input_layer = keras.layers.Input(shape=(None,)) embed_layer = keras.layers.Embedding( input_dim=11, output_dim=13, mask_zero=True, )(input_layer) conv_layer = MaskedConv1D(filters=7, kernel_size=3, padding='same')(embed_layer) pool_layer = MaskedGlobalMaxPool1D()(conv_layer) dense_layer = keras.layers.Dense(units=2, activation='softmax')(pool_layer) model = keras.models.Model(inputs=input_layer, outputs=dense_layer) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') model.summary() x = np.array(np.random.randint(0, 11, (32, 7)).tolist() * 100) y = np.array(np.random.randint(0, 2, (32,)).tolist() * 100) model.fit(x, y, epochs=10) y_hat = model.predict(x).argmax(axis=-1) self.assertEqual(y.tolist(), y_hat.tolist())
def link_model(): input_en = Input(shape=(13, ), name='kb_en') input_begin = Input(shape=(13, ), name='begin') input_end = Input(shape=(13, ), name='end') bert_path = 'bert_model/' config_path = bert_path + 'bert_config.json' checkpoint_path = bert_path + 'bert_model.ckpt' bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, trainable=True, seq_len=52) entity_embedding = Embedding(input_dim=312452, output_dim=768, weights=[embedding_matrix_entity], trainable=True, name='entity_embedding') men_sen = bert_model.output mask_sen = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))( bert_model.input[0]) men_sen = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))( [men_sen, mask_sen]) men_sen = SpatialDropout1D(0.15)(men_sen) [forward, backward] = Bidirectional(CuDNNGRU(128, return_sequences=True), merge_mode=None)(men_sen, mask=None) gru = concatenate([forward, backward], axis=-1) max_x = MaskedGlobalMaxPool1D()(gru) x = StateMix()([input_begin, input_end, forward, backward]) t_dim = K.int_shape(x)[-1] x = Lambda(seq_and_vec, output_shape=(13, t_dim * 2))([x, max_x]) mask = Lambda(lambda x: K.cast(K.greater(x, -1), 'float32'))(input_begin) kb_en = entity_embedding(input_en) x = concatenate([kb_en, x], axis=-1) x = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([x, mask]) x = Dropout(0.1)(x) x = Conv1D(128, 1, activation='relu', padding='same')(x) # x = Dense(units=128, activation='relu')(x) x = TimeDistributed(Dropout(0.1))(x) x = Dense(units=1, activation='sigmoid')(x) model = Model(bert_model.inputs + [input_en, input_begin, input_end], x) model.compile(optimizer=adam(), loss=binary_crossentropy, metrics=[metrics_f1]) return model
if len(sys.argv) != 2: print('python load_model.py UNZIPPED_MODEL_PATH') sys.exit(-1) print( 'This demo demonstrates how to load the pre-trained model and extract the sentence embedding with pooling.' ) model_path = sys.argv[1] config_path = os.path.join(model_path, 'bert_config.json') checkpoint_path = os.path.join(model_path, 'bert_model.ckpt') dict_path = os.path.join(model_path, 'vocab.txt') model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=10) pool_layer = MaskedGlobalMaxPool1D(name='Pooling')(model.output) model = keras.models.Model(inputs=model.inputs, outputs=pool_layer) model.summary(line_length=120) token_dict = load_vocabulary(dict_path) tokenizer = Tokenizer(token_dict) text = '语言模型' tokens = tokenizer.tokenize(text) print('Tokens:', tokens) indices, segments = tokenizer.encode(first=text, max_len=10) predicts = model.predict([np.array([indices]), np.array([segments])])[0] print('Pooled:', predicts.tolist()[:5])