def infer(): # Load infer configuration, the same as test test_args = ConfigSection() ConfigLoader("config.cfg").load_config(config_dir, {"POS_infer": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "id2class.pkl") test_args["num_classes"] = len(index2label) # Define the same model model = SeqLabeling(test_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) print("model loaded!") # Data Loader raw_data_loader = BaseLoader(data_infer_path) infer_data = raw_data_loader.load_lines() # Inference interface infer = SeqLabelInfer(pickle_path) results = infer.predict(model, infer_data) for res in results: print(res) print("Inference finished!")
def predict(): # Config Loader test_args = ConfigSection() ConfigLoader().load_config(cfgfile, {"POS_test": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "label2id.pkl") test_args["num_classes"] = len(index2label) # load dev data dev_data = load_pickle(pickle_path, "data_dev.pkl") # Define the same model model = AdvSeqLabel(test_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./save/trained_model.pkl") print("model loaded!") # Tester test_args["evaluator"] = SeqLabelEvaluator() tester = SeqLabelTester(**test_args.data) # Start testing tester.test(model, dev_data)
def infer(): # Config Loader test_args = ConfigSection() ConfigLoader().load_config(cfgfile, {"POS_test": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "label2id.pkl") test_args["num_classes"] = len(index2label) # Define the same model model = AdvSeqLabel(test_args) try: ModelLoader.load_pytorch(model, "./save/trained_model.pkl") print('model loaded!') except Exception as e: print('cannot load model!') raise # Data Loader infer_data = SeqLabelDataSet(load_func=BaseLoader.load_lines) infer_data.load(data_infer_path, vocabs={"word_vocab": word2index}, infer=True) print('data loaded') # Inference interface infer = SeqLabelInfer(pickle_path) results = infer.predict(model, infer_data) print(results) print("Inference finished!")
def infer(): # load dataset print("Loading data...") ds_loader = ClassDatasetLoader(train_data_dir) data = ds_loader.load() unlabeled_data = [x[0] for x in data] # pre-process data pre = ClassPreprocess() data = pre.run(data, pickle_path=save_dir) print("vocabulary size:", pre.vocab_size) print("number of classes:", pre.num_classes) model_args = ConfigSection() # TODO: load from config file model_args["vocab_size"] = pre.vocab_size model_args["num_classes"] = pre.num_classes # ConfigLoader.load_config(config_dir, {"text_class_model": model_args}) # construct model print("Building model...") cnn = CNNText(model_args) # Dump trained parameters into the model ModelLoader.load_pytorch(cnn, os.path.join(save_dir, model_name)) print("model loaded!") infer = ClassificationInfer(pickle_path=save_dir) results = infer.predict(cnn, unlabeled_data) print(results)
def infer(): # load dataset print("Loading data...") word_vocab = load_pickle(save_dir, "word2id.pkl") label_vocab = load_pickle(save_dir, "label2id.pkl") print("vocabulary size:", len(word_vocab)) print("number of classes:", len(label_vocab)) infer_data = TextClassifyDataSet(load_func=ClassDataSetLoader.load) infer_data.load(train_data_dir, vocabs={"word_vocab": word_vocab, "label_vocab": label_vocab}) model_args = ConfigSection() model_args["vocab_size"] = len(word_vocab) model_args["num_classes"] = len(label_vocab) ConfigLoader.load_config(config_dir, {"text_class_model": model_args}) # construct model print("Building model...") cnn = CNNText(model_args) # Dump trained parameters into the model ModelLoader.load_pytorch(cnn, os.path.join(save_dir, model_name)) print("model loaded!") infer = ClassificationInfer(pickle_path=save_dir) results = infer.predict(cnn, infer_data) print(results)
def infer(): # Load infer configuration, the same as test test_args = ConfigSection() ConfigLoader().load_config(config_path, {"POS_infer": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "label2id.pkl") test_args["num_classes"] = len(index2label) # Define the same model model = SeqLabeling(test_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print("model loaded!") # Load infer data infer_data = SeqLabelDataSet(load_func=BaseLoader.load) infer_data.load(data_infer_path, vocabs={"word_vocab": word2index}, infer=True) # inference infer = SeqLabelInfer(pickle_path) results = infer.predict(model, infer_data) print(results)
def train_test(): # Config Loader train_args = ConfigSection() ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS": train_args}) # Data Loader loader = TokenizeDatasetLoader(cws_data_path) train_data = loader.load_pku() # Preprocessor p = SeqLabelPreprocess() data_train = p.run(train_data, pickle_path=pickle_path) train_args["vocab_size"] = p.vocab_size train_args["num_classes"] = p.num_classes # Trainer trainer = SeqLabelTrainer(**train_args.data) # Model model = SeqLabeling(train_args) # Start training trainer.train(model, data_train) print("Training finished!") # Saver saver = ModelSaver("./data_for_tests/saved_model.pkl") saver.save_pytorch(model) print("Model saved!") del model, trainer, loader # Define the same model model = SeqLabeling(train_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl") print("model loaded!") # Load test configuration test_args = ConfigSection() ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args}) # Tester tester = SeqLabelTester(**test_args.data) # Start testing tester.test(model, data_train) # print test results print(tester.show_metrics()) print("model tested!")
def train_test(): # Config Loader train_args = ConfigSection() ConfigLoader().load_config(config_path, {"POS_infer": train_args}) # define dataset data_train = TokenizeDataSetLoader().load(cws_data_path) word_vocab = Vocabulary() label_vocab = Vocabulary() data_train.update_vocab(word_seq=word_vocab, label_seq=label_vocab) data_train.index_field("word_seq", word_vocab).index_field("label_seq", label_vocab) data_train.set_origin_len("word_seq") data_train.rename_field("label_seq", "truth").set_target(truth=False) train_args["vocab_size"] = len(word_vocab) train_args["num_classes"] = len(label_vocab) save_pickle(word_vocab, pickle_path, "word2id.pkl") save_pickle(label_vocab, pickle_path, "label2id.pkl") # Trainer trainer = SeqLabelTrainer(**train_args.data) # Model model = SeqLabeling(train_args) # Start training trainer.train(model, data_train) # Saver saver = ModelSaver("./save/saved_model.pkl") saver.save_pytorch(model) del model, trainer # Define the same model model = SeqLabeling(train_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./save/saved_model.pkl") # Load test configuration test_args = ConfigSection() ConfigLoader().load_config(config_path, {"POS_infer": test_args}) test_args["evaluator"] = SeqLabelEvaluator() # Tester tester = SeqLabelTester(**test_args.data) # Start testing data_train.set_target(truth=True) tester.test(model, data_train)
def train(): # Config Loader train_args = ConfigSection() test_args = ConfigSection() ConfigLoader().load_config(cfgfile, { "train": train_args, "test": test_args }) print("loading data set...") data = SeqLabelDataSet(load_func=TokenizeDataSetLoader.load) data.load(cws_data_path) data_train, data_dev = data.split(ratio=0.3) train_args["vocab_size"] = len(data.word_vocab) train_args["num_classes"] = len(data.label_vocab) print("vocab size={}, num_classes={}".format(len(data.word_vocab), len(data.label_vocab))) change_field_is_target(data_dev, "truth", True) save_pickle(data_dev, "./save/", "data_dev.pkl") save_pickle(data.word_vocab, "./save/", "word2id.pkl") save_pickle(data.label_vocab, "./save/", "label2id.pkl") # Trainer trainer = SeqLabelTrainer(epochs=train_args["epochs"], batch_size=train_args["batch_size"], validate=train_args["validate"], use_cuda=train_args["use_cuda"], pickle_path=train_args["pickle_path"], save_best_dev=True, print_every_step=10, model_name="trained_model.pkl", evaluator=SeqLabelEvaluator()) # Model model = AdvSeqLabel(train_args) try: ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print('model parameter loaded!') except Exception as e: print("No saved model. Continue.") pass # Start training trainer.train(model, data_train, data_dev) print("Training finished!") # Saver saver = ModelSaver("./save/trained_model.pkl") saver.save_pytorch(model) print("Model saved!")
def train(): # Trainer trainer = Trainer(**train_args.data) def _define_optim(obj): obj._optimizer = torch.optim.Adam(obj._model.parameters(), **optim_args.data) obj._scheduler = torch.optim.lr_scheduler.LambdaLR( obj._optimizer, lambda ep: .75**(ep / 5e4)) def _update(obj): obj._scheduler.step() obj._optimizer.step() trainer.define_optimizer = lambda: _define_optim(trainer) trainer.update = lambda: _update(trainer) trainer.get_loss = lambda predict, truth: trainer._loss_func( **predict, **truth) trainer._create_validator = lambda x: MyTester(**test_args.data) # Model model = BiaffineParser(**model_args.data) # use pretrain embedding embed, _ = EmbedLoader.load_embedding( model_args['word_emb_dim'], emb_file_name, 'glove', word_v, os.path.join(processed_datadir, 'word_emb.pkl')) model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=False) model.word_embedding.padding_idx = word_v.padding_idx model.word_embedding.weight.data[word_v.padding_idx].fill_(0) model.pos_embedding.padding_idx = pos_v.padding_idx model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0) try: ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print('model parameter loaded!') except Exception as _: print("No saved model. Continue.") pass # Start training trainer.train(model, train_data, dev_data) print("Training finished!") # Saver saver = ModelSaver("./save/saved_model.pkl") saver.save_pytorch(model) print("Model saved!")
def train_test(): # Config Loader train_args = ConfigSection() ConfigLoader().load_config(config_path, {"POS_infer": train_args}) # define dataset data_train = SeqLabelDataSet(load_func=TokenizeDataSetLoader.load) data_train.load(cws_data_path) train_args["vocab_size"] = len(data_train.word_vocab) train_args["num_classes"] = len(data_train.label_vocab) save_pickle(data_train.word_vocab, pickle_path, "word2id.pkl") save_pickle(data_train.label_vocab, pickle_path, "label2id.pkl") # Trainer trainer = SeqLabelTrainer(**train_args.data) # Model model = SeqLabeling(train_args) # Start training trainer.train(model, data_train) # Saver saver = ModelSaver("./save/saved_model.pkl") saver.save_pytorch(model) del model, trainer # Define the same model model = SeqLabeling(train_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./save/saved_model.pkl") # Load test configuration test_args = ConfigSection() ConfigLoader().load_config(config_path, {"POS_infer": test_args}) test_args["evaluator"] = SeqLabelEvaluator() # Tester tester = SeqLabelTester(**test_args.data) # Start testing change_field_is_target(data_train, "truth", True) tester.test(model, data_train)
def test(): # Tester tester = MyTester(**test_args.data) # Model model = BiaffineParser(**model_args.data) try: ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print('model parameter loaded!') except Exception as _: print("No saved model. Abort test.") raise # Start training tester.test(model, dev_data) print(tester.show_metrics()) print("Testing finished!")
def train(): # Config Loader train_args = ConfigSection() test_args = ConfigSection() ConfigLoader("good_path").load_config(cfgfile, { "train": train_args, "test": test_args }) # Data Loader loader = TokenizeDatasetLoader(cws_data_path) train_data = loader.load_pku() # Preprocessor preprocessor = SeqLabelPreprocess() data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3) train_args["vocab_size"] = preprocessor.vocab_size train_args["num_classes"] = preprocessor.num_classes # Trainer trainer = SeqLabelTrainer(**train_args.data) # Model model = AdvSeqLabel(train_args) try: ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print('model parameter loaded!') except Exception as e: print("No saved model. Continue.") pass # Start training trainer.train(model, data_train, data_dev) print("Training finished!") # Saver saver = ModelSaver("./save/saved_model.pkl") saver.save_pytorch(model) print("Model saved!")
def load(self, model_name, config_file="config", section_name="model"): """ Load a pre-trained FastNLP model together with additional data. :param model_name: str, the name of a FastNLP model. :param config_file: str, the name of the config file which stores the initialization information of the model. (default: "config") :param section_name: str, the name of the corresponding section in the config file. (default: model) """ assert type(model_name) is str if model_name not in FastNLP_MODEL_COLLECTION: raise ValueError("No FastNLP model named {}.".format(model_name)) if not self.model_exist(model_dir=self.model_dir): self._download(model_name, FastNLP_MODEL_COLLECTION[model_name]["url"]) model_class = self._get_model_class(FastNLP_MODEL_COLLECTION[model_name]["class"]) print("Restore model class {}".format(str(model_class))) model_args = ConfigSection() ConfigLoader.load_config(os.path.join(self.model_dir, config_file), {section_name: model_args}) print("Restore model hyper-parameters {}".format(str(model_args.data))) # fetch dictionary size and number of labels from pickle files self.word_vocab = load_pickle(self.model_dir, "word2id.pkl") model_args["vocab_size"] = len(self.word_vocab) self.label_vocab = load_pickle(self.model_dir, "label2id.pkl") model_args["num_classes"] = len(self.label_vocab) # Construct the model model = model_class(model_args) print("Model constructed.") # To do: framework independent ModelLoader.load_pytorch(model, os.path.join(self.model_dir, FastNLP_MODEL_COLLECTION[model_name]["pickle"])) print("Model weights loaded.") self.model = model self.infer_type = FastNLP_MODEL_COLLECTION[model_name]["type"] print("Inference ready.")
def infer(): # Load infer configuration, the same as test test_args = ConfigSection() ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "id2class.pkl") test_args["num_classes"] = len(index2label) # Define the same model model = SeqLabeling(test_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl") print("model loaded!") # Data Loader raw_data_loader = BaseLoader(data_infer_path) infer_data = raw_data_loader.load_lines() """ Transform strings into list of list of strings. [ [word_11, word_12, ...], [word_21, word_22, ...], ... ] In this case, each line in "people_infer.txt" is already a sentence. So load_lines() just splits them. """ # Inference interface infer = Predictor(pickle_path) results = infer.predict(model, infer_data) print(results) print("Inference finished!")
def infer(): # Load infer configuration, the same as test test_args = ConfigSection() ConfigLoader().load_config(config_dir, {"POS_infer": test_args}) # fetch dictionary size and number of labels from pickle files word_vocab = load_pickle(pickle_path, "word2id.pkl") label_vocab = load_pickle(pickle_path, "label2id.pkl") test_args["vocab_size"] = len(word_vocab) test_args["num_classes"] = len(label_vocab) print("vocabularies loaded") # Define the same model model = SeqLabeling(test_args) print("model defined") # Dump trained parameters into the model ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) print("model loaded!") # Data Loader infer_data = SeqLabelDataSet(load_func=BaseLoader.load) infer_data.load(data_infer_path, vocabs={ "word_vocab": word_vocab, "label_vocab": label_vocab }, infer=True) print("data set prepared") # Inference interface infer = SeqLabelInfer(pickle_path) results = infer.predict(model, infer_data) for res in results: print(res) print("Inference finished!")
def test_training(): # Config Loader trainer_args = ConfigSection() model_args = ConfigSection() ConfigLoader().load_config(config_dir, { "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args}) data_set = TokenizeDataSetLoader().load(data_path) word_vocab = Vocabulary() label_vocab = Vocabulary() data_set.update_vocab(word_seq=word_vocab, label_seq=label_vocab) data_set.index_field("word_seq", word_vocab).index_field("label_seq", label_vocab) data_set.set_origin_len("word_seq") data_set.rename_field("label_seq", "truth").set_target(truth=False) data_train, data_dev = data_set.split(0.3, shuffle=True) model_args["vocab_size"] = len(word_vocab) model_args["num_classes"] = len(label_vocab) save_pickle(word_vocab, pickle_path, "word2id.pkl") save_pickle(label_vocab, pickle_path, "label2id.pkl") trainer = SeqLabelTrainer( epochs=trainer_args["epochs"], batch_size=trainer_args["batch_size"], validate=False, use_cuda=False, pickle_path=pickle_path, save_best_dev=trainer_args["save_best_dev"], model_name=model_name, optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), ) # Model model = SeqLabeling(model_args) # Start training trainer.train(model, data_train, data_dev) # Saver saver = ModelSaver(os.path.join(pickle_path, model_name)) saver.save_pytorch(model) del model, trainer # Define the same model model = SeqLabeling(model_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) # Load test configuration tester_args = ConfigSection() ConfigLoader().load_config(config_dir, {"test_seq_label_tester": tester_args}) # Tester tester = SeqLabelTester(batch_size=4, use_cuda=False, pickle_path=pickle_path, model_name="seq_label_in_test.pkl", evaluator=SeqLabelEvaluator() ) # Start testing with validation data data_dev.set_target(truth=True) tester.test(model, data_dev)
def train_and_test(): # Config Loader trainer_args = ConfigSection() model_args = ConfigSection() ConfigLoader("config.cfg").load_config(config_dir, { "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args }) # Data Loader pos_loader = POSDatasetLoader(data_path) train_data = pos_loader.load_lines() # Preprocessor p = SeqLabelPreprocess() data_train, data_dev = p.run(train_data, pickle_path=pickle_path, train_dev_split=0.5) model_args["vocab_size"] = p.vocab_size model_args["num_classes"] = p.num_classes # Trainer: two definition styles # 1 # trainer = SeqLabelTrainer(trainer_args.data) # 2 trainer = SeqLabelTrainer( epochs=trainer_args["epochs"], batch_size=trainer_args["batch_size"], validate=trainer_args["validate"], use_cuda=trainer_args["use_cuda"], pickle_path=pickle_path, save_best_dev=trainer_args["save_best_dev"], model_name=model_name, optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), ) # Model model = SeqLabeling(model_args) # Start training trainer.train(model, data_train, data_dev) print("Training finished!") # Saver saver = ModelSaver(os.path.join(pickle_path, model_name)) saver.save_pytorch(model) print("Model saved!") del model, trainer, pos_loader # Define the same model model = SeqLabeling(model_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) print("model loaded!") # Load test configuration tester_args = ConfigSection() ConfigLoader("config.cfg").load_config( config_dir, {"test_seq_label_tester": tester_args}) # Tester tester = SeqLabelTester(save_output=False, save_loss=False, save_best_dev=False, batch_size=4, use_cuda=False, pickle_path=pickle_path, model_name="seq_label_in_test.pkl", print_every_step=1) # Start testing with validation data tester.test(model, data_dev) # print test results print(tester.show_metrics()) print("model tested!")
def train_and_test(): # Config Loader trainer_args = ConfigSection() model_args = ConfigSection() ConfigLoader().load_config(config_dir, { "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args }) data_set = SeqLabelDataSet() data_set.load(data_path) train_set, dev_set = data_set.split(0.3, shuffle=True) model_args["vocab_size"] = len(data_set.word_vocab) model_args["num_classes"] = len(data_set.label_vocab) save_pickle(data_set.word_vocab, pickle_path, "word2id.pkl") save_pickle(data_set.label_vocab, pickle_path, "label2id.pkl") trainer = SeqLabelTrainer( epochs=trainer_args["epochs"], batch_size=trainer_args["batch_size"], validate=False, use_cuda=trainer_args["use_cuda"], pickle_path=pickle_path, save_best_dev=trainer_args["save_best_dev"], model_name=model_name, optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), ) # Model model = SeqLabeling(model_args) # Start training trainer.train(model, train_set, dev_set) print("Training finished!") # Saver saver = ModelSaver(os.path.join(pickle_path, model_name)) saver.save_pytorch(model) print("Model saved!") del model, trainer change_field_is_target(dev_set, "truth", True) # Define the same model model = SeqLabeling(model_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) print("model loaded!") # Load test configuration tester_args = ConfigSection() ConfigLoader().load_config(config_dir, {"test_seq_label_tester": tester_args}) # Tester tester = SeqLabelTester(batch_size=4, use_cuda=False, pickle_path=pickle_path, model_name="seq_label_in_test.pkl", evaluator=SeqLabelEvaluator()) # Start testing with validation data tester.test(model, dev_set) print("model tested!")