コード例 #1
0
    def load(self, preprocessor_file, json_file, weights_file, custom_objects=None):
        """load text classification application

        Args:
            preprocessor_file: path to load preprocessor
            json_file: path to load model architecture
            weights_file: path to load model weights
            custom_objects: Optional dictionary mapping names (strings) to custom classes or
                            functions to be considered during deserialization. Must provided when
                            using custom layer.

        """
        self.preprocessor = TextClassificationPreprocessor.load(preprocessor_file)
        logging.info('Load preprocessor from {}'.format(preprocessor_file))

        custom_objects = custom_objects or {}
        custom_objects.update(get_custom_objects())
        with open(json_file, 'r') as reader:
            self.model = model_from_json(reader.read(), custom_objects=custom_objects)
        logging.info('Load model architecture from {}'.format(json_file))

        self.model.load_weights(weights_file)
        logging.info('Load model weight from {}'.format(weights_file))

        self.trainer = TextClassificationTrainer(self.model, self.preprocessor)
        self.predictor = TextClassificationPredictor(self.model, self.preprocessor)
コード例 #2
0
    def test_siamese_bilstm_model(self):
        # word, char
        spm_model = SiameseBiLSTM(num_class=self.num_class,
                                  use_word=True,
                                  word_embeddings=self.word_embeddings,
                                  word_vocab_size=self.word_vocab_size,
                                  word_embed_dim=self.word_embed_dim,
                                  word_embed_trainable=False,
                                  use_char=True,
                                  char_embeddings=self.char_embeddings,
                                  char_vocab_size=self.char_vocab_size,
                                  char_embed_dim=self.char_embed_dim,
                                  char_embed_trainable=False,
                                  use_bert=False,
                                  max_len=10).build_model()

        # char, bert
        spm_model = SiameseBiLSTM(num_class=self.num_class,
                                  use_word=False,
                                  use_char=True,
                                  char_embeddings=self.char_embeddings,
                                  char_vocab_size=self.char_vocab_size,
                                  char_embed_dim=self.char_embed_dim,
                                  char_embed_trainable=False,
                                  use_bert=True,
                                  bert_config_file=self.bert_config_file,
                                  bert_checkpoint_file=self.bert_model_file,
                                  max_len=10).build_model()

        # test save and load
        json_file = os.path.join(self.checkpoint_dir,
                                 'siamese_bilstm_spm.json')
        weights_file = os.path.join(self.checkpoint_dir,
                                    'siamese_bilstm_spm.hdf5')

        save_keras_model(spm_model, json_file, weights_file)
        assert os.path.exists(json_file)
        assert os.path.exists(weights_file)

        load_keras_model(json_file,
                         weights_file,
                         custom_objects=get_custom_objects())
        os.remove(json_file)
        os.remove(weights_file)
        assert not os.path.exists(json_file)
        assert not os.path.exists(weights_file)
コード例 #3
0
    def test_bert_model(self):
        spm_model = BertSPM(num_class=self.num_class,
                            bert_config_file=self.bert_config_file,
                            bert_checkpoint_file=self.bert_model_file,
                            bert_trainable=True,
                            max_len=10).build_model()

        # test save and load
        json_file = os.path.join(self.checkpoint_dir, 'bert_spm.json')
        weights_file = os.path.join(self.checkpoint_dir, 'bert_spm.hdf5')

        save_keras_model(spm_model, json_file, weights_file)
        assert os.path.exists(json_file)
        assert os.path.exists(weights_file)

        load_keras_model(json_file,
                         weights_file,
                         custom_objects=get_custom_objects())
        os.remove(json_file)
        os.remove(weights_file)
        assert not os.path.exists(json_file)
        assert not os.path.exists(weights_file)
コード例 #4
0
    def load(self,
             preprocessor_file: str,
             json_file: str,
             weights_file: str,
             custom_objects: Optional[Dict[str, Any]] = None) -> None:
        """Load ner application from disk.

        There are 3 things in total that we need to load: 1) preprocessor, which stores the
        vocabulary and embedding matrix built during pre-processing and helps us prepare feature
        input for ner model; 2) model architecture, which describes the framework of our ner model;
        3) model weights, which stores the value of ner model's parameters.

        Args:
            preprocessor_file: path to load preprocessor
            json_file: path to load model architecture
            weights_file: path to load model weights
            custom_objects: Optional dictionary mapping names (strings) to custom classes or
                            functions to be considered during deserialization. We will
                            automatically add all the custom layers of this project to
                            custom_objects. So you can ignore this argument in most cases unlesss
                            you use your own custom layer.

        """
        self.preprocessor = NERPreprocessor.load(preprocessor_file)
        logging.info('Load preprocessor from {}'.format(preprocessor_file))

        custom_objects = custom_objects or {}
        custom_objects.update(get_custom_objects())
        with open(json_file, 'r') as reader:
            self.model = tf.keras.models.model_from_json(
                reader.read(), custom_objects=custom_objects)
        logging.info('Load model architecture from {}'.format(json_file))

        self.model.load_weights(weights_file)
        logging.info('Load model weight from {}'.format(weights_file))

        self.trainer = NERTrainer(self.model, self.preprocessor)
        self.predictor = NERPredictor(self.model, self.preprocessor)
コード例 #5
0
ファイル: test_ner_models.py プロジェクト: techwitz/fancy-nlp
    def test_bilstm_cnn_model(self):
        # char, no CRF, no word input
        ner_model = BiLSTMCNNNER(num_class=self.num_class,
                                 char_embeddings=self.char_embeddings,
                                 char_vocab_size=self.char_vocab_size,
                                 char_embed_dim=self.char_embed_dim,
                                 char_embed_trainable=False,
                                 use_word=False,
                                 use_crf=False).build_model()

        # char, CRF, no word, no bert input
        ner_model = BiLSTMCNNNER(num_class=self.num_class,
                                 char_embeddings=self.char_embeddings,
                                 char_vocab_size=self.char_vocab_size,
                                 char_embed_dim=self.char_embed_dim,
                                 char_embed_trainable=False,
                                 use_word=False,
                                 use_crf=True).build_model()

        # char, CRF, word, no bert input
        ner_model = BiLSTMCNNNER(num_class=self.num_class,
                                 char_embeddings=self.char_embeddings,
                                 char_vocab_size=self.char_vocab_size,
                                 char_embed_dim=self.char_embed_dim,
                                 char_embed_trainable=False,
                                 use_word=True,
                                 word_embeddings=self.word_embeddings,
                                 word_vocab_size=self.word_vocab_size,
                                 word_embed_dim=self.word_embed_dim,
                                 word_embed_trainable=False,
                                 use_crf=True).build_model()

        # char, CRF, word, bert
        ner_model = BiLSTMCNNNER(num_class=self.num_class,
                                 char_embeddings=self.char_embeddings,
                                 char_vocab_size=self.char_vocab_size,
                                 char_embed_dim=self.char_embed_dim,
                                 char_embed_trainable=False,
                                 use_bert=True,
                                 bert_config_file=self.bert_config_file,
                                 bert_checkpoint_file=self.bert_model_file,
                                 bert_trainable=True,
                                 use_word=True,
                                 word_embeddings=self.word_embeddings,
                                 word_vocab_size=self.word_vocab_size,
                                 word_embed_dim=self.word_embed_dim,
                                 word_embed_trainable=False,
                                 max_len=16,
                                 use_crf=True).build_model()

        # test save and load
        json_file = os.path.join(self.checkpoint_dir, 'bilstm_cnn_ner.json')
        weights_file = os.path.join(self.checkpoint_dir, 'bilstm_cnn_ner.hdf5')

        save_keras_model(ner_model, json_file, weights_file)
        assert os.path.exists(json_file)
        assert os.path.exists(weights_file)

        load_keras_model(json_file,
                         weights_file,
                         custom_objects=get_custom_objects())
        os.remove(json_file)
        os.remove(weights_file)
        assert not os.path.exists(json_file)
        assert not os.path.exists(weights_file)