def save(self, dirpath): """ Saves model to local disk, given a dirpath Parameters ---------- dirpath : str a directory where model artifacts will be saved. Model saves a weights.h5 weights file, a params.json parameter file, and a preprocessor.pkl preprocessor file. Returns ------- None """ if self.model_ is None or self.preprocessor_ is None: raise ValueError("No model artifacts to save, either run fit() to train or load() a trained model") if not os.path.exists(dirpath): os.makedirs(dirpath) weights_file = os.path.join(dirpath, "weights.h5") params_file = os.path.join(dirpath, "params.json") preprocessor_file = os.path.join(dirpath, "preprocessor.pkl") save_model(self.model_, weights_file, params_file) self.preprocessor_.save(preprocessor_file) write_param_file(self.get_params(), os.path.join(dirpath, "params.yaml"))
def main(args): print('Loading dataset...') x_train, y_train = load_data_and_labels(args.train_data) x_valid, y_valid = load_data_and_labels(args.valid_data) x_test, y_test = load_data_and_labels(args.test_data) x_train = np.r_[x_train, x_valid, x_test] y_train = np.r_[y_train, y_valid, y_test] print('Transforming datasets...') p = IndexTransformer(use_char=args.no_char_feature) p.fit(x_train, y_train) print('Building a model.') model = BiLSTMCRF(char_embedding_dim=args.char_emb_size, word_embedding_dim=args.word_emb_size, char_lstm_size=args.char_lstm_units, word_lstm_size=args.word_lstm_units, char_vocab_size=p.char_vocab_size, word_vocab_size=p.word_vocab_size, num_labels=p.label_size, dropout=args.dropout, use_char=args.no_char_feature, use_crf=args.no_use_crf) model, loss = model.build() model.compile(loss=loss, optimizer='adam') print('Training the model...') trainer = Trainer(model, preprocessor=p) trainer.train(x_train, y_train, x_valid, y_valid, epochs=args.max_epoch) print('Saving the model...') save_model(model, args.weights_file, args.params_file) p.save(args.preprocessor_file)
def test_save(self): # Train the model. trainer = Trainer(self.model, preprocessor=self.p) trainer.train(self.x_train, self.y_train) # Save the model. save_model(self.model, self.weights_file, self.params_file) self.p.save(self.preprocessor_file)
def test_save_and_load(self): char_vocab_size = 100 word_vocab_size = 10000 num_labels = 10 model = BiLSTMCRF(char_vocab_size=char_vocab_size, word_vocab_size=word_vocab_size, num_labels=num_labels) model, loss = model.build() self.assertFalse(os.path.exists(self.weights_file)) self.assertFalse(os.path.exists(self.params_file)) save_model(model, self.weights_file, self.params_file) self.assertTrue(os.path.exists(self.weights_file)) self.assertTrue(os.path.exists(self.params_file)) model = load_model(self.weights_file, self.params_file)
def save(self, weights_file, params_file, preprocessor_file): self.p.save(preprocessor_file) save_model(self.model, weights_file, params_file)
p.fit(x_train, y_train) print('Loading word embeddings...') embeddings = load_glove(EMBEDDING_PATH) embeddings = filter_embeddings(embeddings, p._word_vocab.vocab, EMBEDDING_DIM) print('Building a model.') model = ELModel(char_embedding_dim=32, word_embedding_dim=EMBEDDING_DIM, char_lstm_size=32, word_lstm_size=EMBEDDING_DIM, char_vocab_size=p.char_vocab_size, word_vocab_size=p.word_vocab_size, num_labels=p.label_size, embeddings=embeddings) model, loss = model.build() model.compile(loss=loss, optimizer='adam') print('Training the model...') trainer = Trainer(model, preprocessor=p) trainer.train(x_train, y_train, x_test, y_test, callbacks=[ TensorBoard(log_dir=log_dir, write_graph=False), ModelCheckpoint(weights_path, save_weights_only=True), ReduceLROnPlateau(), EarlyStopping(patience=EARLY_STOP)]) print('Saving the model...') save_model(model, os.path.join(log_dir, 'weights.h5'), os.path.join(log_dir, 'params.json')) p.save(os.path.join(log_dir, 'preprocessor.pkl')) # model.save('weights.h5', 'params.json')