def load(self, preprocessor_file, json_file, weights_file, custom_objects=None): """load text classification application Args: preprocessor_file: path to load preprocessor json_file: path to load model architecture weights_file: path to load model weights custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. Must provided when using custom layer. """ self.preprocessor = TextClassificationPreprocessor.load(preprocessor_file) logging.info('Load preprocessor from {}'.format(preprocessor_file)) custom_objects = custom_objects or {} custom_objects.update(get_custom_objects()) with open(json_file, 'r') as reader: self.model = model_from_json(reader.read(), custom_objects=custom_objects) logging.info('Load model architecture from {}'.format(json_file)) self.model.load_weights(weights_file) logging.info('Load model weight from {}'.format(weights_file)) self.trainer = TextClassificationTrainer(self.model, self.preprocessor) self.predictor = TextClassificationPredictor(self.model, self.preprocessor)
def test_siamese_bilstm_model(self): # word, char spm_model = SiameseBiLSTM(num_class=self.num_class, use_word=True, word_embeddings=self.word_embeddings, word_vocab_size=self.word_vocab_size, word_embed_dim=self.word_embed_dim, word_embed_trainable=False, use_char=True, char_embeddings=self.char_embeddings, char_vocab_size=self.char_vocab_size, char_embed_dim=self.char_embed_dim, char_embed_trainable=False, use_bert=False, max_len=10).build_model() # char, bert spm_model = SiameseBiLSTM(num_class=self.num_class, use_word=False, use_char=True, char_embeddings=self.char_embeddings, char_vocab_size=self.char_vocab_size, char_embed_dim=self.char_embed_dim, char_embed_trainable=False, use_bert=True, bert_config_file=self.bert_config_file, bert_checkpoint_file=self.bert_model_file, max_len=10).build_model() # test save and load json_file = os.path.join(self.checkpoint_dir, 'siamese_bilstm_spm.json') weights_file = os.path.join(self.checkpoint_dir, 'siamese_bilstm_spm.hdf5') save_keras_model(spm_model, json_file, weights_file) assert os.path.exists(json_file) assert os.path.exists(weights_file) load_keras_model(json_file, weights_file, custom_objects=get_custom_objects()) os.remove(json_file) os.remove(weights_file) assert not os.path.exists(json_file) assert not os.path.exists(weights_file)
def test_bert_model(self): spm_model = BertSPM(num_class=self.num_class, bert_config_file=self.bert_config_file, bert_checkpoint_file=self.bert_model_file, bert_trainable=True, max_len=10).build_model() # test save and load json_file = os.path.join(self.checkpoint_dir, 'bert_spm.json') weights_file = os.path.join(self.checkpoint_dir, 'bert_spm.hdf5') save_keras_model(spm_model, json_file, weights_file) assert os.path.exists(json_file) assert os.path.exists(weights_file) load_keras_model(json_file, weights_file, custom_objects=get_custom_objects()) os.remove(json_file) os.remove(weights_file) assert not os.path.exists(json_file) assert not os.path.exists(weights_file)
def load(self, preprocessor_file: str, json_file: str, weights_file: str, custom_objects: Optional[Dict[str, Any]] = None) -> None: """Load ner application from disk. There are 3 things in total that we need to load: 1) preprocessor, which stores the vocabulary and embedding matrix built during pre-processing and helps us prepare feature input for ner model; 2) model architecture, which describes the framework of our ner model; 3) model weights, which stores the value of ner model's parameters. Args: preprocessor_file: path to load preprocessor json_file: path to load model architecture weights_file: path to load model weights custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. We will automatically add all the custom layers of this project to custom_objects. So you can ignore this argument in most cases unlesss you use your own custom layer. """ self.preprocessor = NERPreprocessor.load(preprocessor_file) logging.info('Load preprocessor from {}'.format(preprocessor_file)) custom_objects = custom_objects or {} custom_objects.update(get_custom_objects()) with open(json_file, 'r') as reader: self.model = tf.keras.models.model_from_json( reader.read(), custom_objects=custom_objects) logging.info('Load model architecture from {}'.format(json_file)) self.model.load_weights(weights_file) logging.info('Load model weight from {}'.format(weights_file)) self.trainer = NERTrainer(self.model, self.preprocessor) self.predictor = NERPredictor(self.model, self.preprocessor)
def test_bilstm_cnn_model(self): # char, no CRF, no word input ner_model = BiLSTMCNNNER(num_class=self.num_class, char_embeddings=self.char_embeddings, char_vocab_size=self.char_vocab_size, char_embed_dim=self.char_embed_dim, char_embed_trainable=False, use_word=False, use_crf=False).build_model() # char, CRF, no word, no bert input ner_model = BiLSTMCNNNER(num_class=self.num_class, char_embeddings=self.char_embeddings, char_vocab_size=self.char_vocab_size, char_embed_dim=self.char_embed_dim, char_embed_trainable=False, use_word=False, use_crf=True).build_model() # char, CRF, word, no bert input ner_model = BiLSTMCNNNER(num_class=self.num_class, char_embeddings=self.char_embeddings, char_vocab_size=self.char_vocab_size, char_embed_dim=self.char_embed_dim, char_embed_trainable=False, use_word=True, word_embeddings=self.word_embeddings, word_vocab_size=self.word_vocab_size, word_embed_dim=self.word_embed_dim, word_embed_trainable=False, use_crf=True).build_model() # char, CRF, word, bert ner_model = BiLSTMCNNNER(num_class=self.num_class, char_embeddings=self.char_embeddings, char_vocab_size=self.char_vocab_size, char_embed_dim=self.char_embed_dim, char_embed_trainable=False, use_bert=True, bert_config_file=self.bert_config_file, bert_checkpoint_file=self.bert_model_file, bert_trainable=True, use_word=True, word_embeddings=self.word_embeddings, word_vocab_size=self.word_vocab_size, word_embed_dim=self.word_embed_dim, word_embed_trainable=False, max_len=16, use_crf=True).build_model() # test save and load json_file = os.path.join(self.checkpoint_dir, 'bilstm_cnn_ner.json') weights_file = os.path.join(self.checkpoint_dir, 'bilstm_cnn_ner.hdf5') save_keras_model(ner_model, json_file, weights_file) assert os.path.exists(json_file) assert os.path.exists(weights_file) load_keras_model(json_file, weights_file, custom_objects=get_custom_objects()) os.remove(json_file) os.remove(weights_file) assert not os.path.exists(json_file) assert not os.path.exists(weights_file)