def infer(): # Config Loader test_args = ConfigSection() ConfigLoader().load_config(cfgfile, {"POS_test": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "label2id.pkl") test_args["num_classes"] = len(index2label) # Define the same model model = AdvSeqLabel(test_args) try: ModelLoader.load_pytorch(model, "./save/trained_model.pkl") print('model loaded!') except Exception as e: print('cannot load model!') raise # Data Loader infer_data = SeqLabelDataSet(load_func=BaseLoader.load_lines) infer_data.load(data_infer_path, vocabs={"word_vocab": word2index}, infer=True) print('data loaded') # Inference interface infer = SeqLabelInfer(pickle_path) results = infer.predict(model, infer_data) print(results) print("Inference finished!")
def predict(): # Config Loader test_args = ConfigSection() ConfigLoader().load_config(cfgfile, {"POS_test": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "label2id.pkl") test_args["num_classes"] = len(index2label) # load dev data dev_data = load_pickle(pickle_path, "data_dev.pkl") # Define the same model model = AdvSeqLabel(test_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./save/trained_model.pkl") print("model loaded!") # Tester test_args["evaluator"] = SeqLabelEvaluator() tester = SeqLabelTester(**test_args.data) # Start testing tester.test(model, dev_data)
def _get_section(self, sect_name): """This is the function to get the section with the section name. :param sect_name: The name of section what wants to load. :return: The section. """ sect = ConfigSection() ConfigLoader().load_config(self.file_path, {sect_name: sect}) return sect
def train(): # Config Loader train_args = ConfigSection() test_args = ConfigSection() ConfigLoader().load_config(cfgfile, {"train": train_args, "test": test_args}) print("loading data set...") data = SeqLabelDataSet(load_func=TokenizeDataSetLoader.load) data.load(cws_data_path) data_train, data_dev = data.split(ratio=0.3) train_args["vocab_size"] = len(data.word_vocab) train_args["num_classes"] = len(data.label_vocab) print("vocab size={}, num_classes={}".format(len(data.word_vocab), len(data.label_vocab))) change_field_is_target(data_dev, "truth", True) save_pickle(data_dev, "./save/", "data_dev.pkl") save_pickle(data.word_vocab, "./save/", "word2id.pkl") save_pickle(data.label_vocab, "./save/", "label2id.pkl") # Trainer trainer = SeqLabelTrainer(epochs=train_args["epochs"], batch_size=train_args["batch_size"], validate=train_args["validate"], use_cuda=train_args["use_cuda"], pickle_path=train_args["pickle_path"], save_best_dev=True, print_every_step=10, model_name="trained_model.pkl", evaluator=SeqLabelEvaluator()) # Model model = AdvSeqLabel(train_args) try: ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print('model parameter loaded!') except Exception as e: print("No saved model. Continue.") pass # Start training trainer.train(model, data_train, data_dev) print("Training finished!") # Saver saver = ModelSaver("./save/trained_model.pkl") saver.save_pytorch(model) print("Model saved!")
def test_case_2(self): config = "[section_A]\n[section_B]\n" with open("./test.cfg", "w", encoding="utf-8") as f: f.write(config) saver = ConfigSaver("./test.cfg") section = ConfigSection() section["doubles"] = 0.8 section["tt"] = [1, 2, 3] section["test"] = 105 section["str"] = "this is a str" saver.save_config_file("section_A", section) os.system("rm ./test.cfg")
def _load_all(src): model_path = src src = os.path.dirname(src) word_v = _load(src + '/word_v.pkl') pos_v = _load(src + '/pos_v.pkl') tag_v = _load(src + '/tag_v.pkl') pos_pp = torch.load(src + '/pos_pp.pkl')['pipeline'] model_args = ConfigSection() ConfigLoader.load_config('cfg.cfg', {'model': model_args}) model_args['word_vocab_size'] = len(word_v) model_args['pos_vocab_size'] = len(pos_v) model_args['num_label'] = len(tag_v) model = BiaffineParser(**model_args.data) model.load_state_dict(torch.load(model_path)) return { 'word_v': word_v, 'pos_v': pos_v, 'tag_v': tag_v, 'model': model, 'pos_pp': pos_pp, }
def test_case_1(self): config_file_dir = "test/io/" config_file_name = "config" config_file_path = os.path.join(config_file_dir, config_file_name) tmp_config_file_path = os.path.join(config_file_dir, "tmp_config") with open(config_file_path, "r") as f: lines = f.readlines() standard_section = ConfigSection() t_section = ConfigSection() ConfigLoader().load_config(config_file_path, { "test": standard_section, "t": t_section }) config_saver = ConfigSaver(config_file_path) section = ConfigSection() section["doubles"] = 0.8 section["tt"] = 0.5 section["test"] = 105 section["str"] = "this is a str" test_case_2_section = section test_case_2_section["double"] = 0.5 for k in section.__dict__.keys(): standard_section[k] = section[k] config_saver.save_config_file("test", section) config_saver.save_config_file("another-test", section) config_saver.save_config_file("one-another-test", section) config_saver.save_config_file("test-case-2", section) test_section = ConfigSection() at_section = ConfigSection() another_test_section = ConfigSection() one_another_test_section = ConfigSection() a_test_case_2_section = ConfigSection() ConfigLoader().load_config( config_file_path, { "test": test_section, "another-test": another_test_section, "t": at_section, "one-another-test": one_another_test_section, "test-case-2": a_test_case_2_section }) assert test_section == standard_section assert at_section == t_section assert another_test_section == section assert one_another_test_section == section assert a_test_case_2_section == test_case_2_section config_saver.save_config_file("test", section) with open(config_file_path, "w") as f: f.writelines(lines) with open(tmp_config_file_path, "w") as f: f.write('[test]\n') f.write('this is an fault example\n') tmp_config_saver = ConfigSaver(tmp_config_file_path) try: tmp_config_saver._read_section() except Exception as e: pass os.remove(tmp_config_file_path) try: tmp_config_saver = ConfigSaver("file-NOT-exist") except Exception as e: pass
# emb_file_name = '/home/yfshao/glove.6B.100d.txt' # loader = ConlluDataLoader() datadir = '/home/yfshao/workdir/parser-data/' train_data_name = "train_ctb5.txt" dev_data_name = "dev_ctb5.txt" test_data_name = "test_ctb5.txt" emb_file_name = "/home/yfshao/workdir/parser-data/word_OOVthr_30_100v.txt" # emb_file_name = "/home/yfshao/workdir/word_vector/cc.zh.300.vec" loader = CTBDataLoader() cfgfile = './cfg.cfg' processed_datadir = './save' # Config Loader train_args = ConfigSection() test_args = ConfigSection() model_args = ConfigSection() optim_args = ConfigSection() ConfigLoader.load_config( cfgfile, { "train": train_args, "test": test_args, "model": model_args, "optim": optim_args }) print('trainre Args:', train_args.data) print('test Args:', test_args.data) print('optim Args:', optim_args.data)
dim=attention_unit, num_vec=attention_hops) self.mlp = MLP( size_layer=[lstm_hidden_size * 2 * attention_hops, nfc, class_num]) def forward(self, x): x_emb = self.embedding(x) output = self.lstm(x_emb) after_attention, penalty = self.attention(output, x) after_attention = after_attention.view(after_attention.size(0), -1) output = self.mlp(after_attention) return output def loss(self, predict, ground_truth): print("predict:%s; g:%s" % (str(predict.size()), str(ground_truth.size()))) print(ground_truth) return F.cross_entropy(predict, ground_truth) train_args = ConfigSection() ConfigLoader("good path").load_config('config.cfg', {"train": train_args}) train_args['vocab'] = len(word2index) trainer = ClassificationTrainer(**train_args.data) # for k in train_args.__dict__.keys(): # print(k, train_args[k]) model = SELF_ATTENTION_YELP_CLASSIFICATION(train_args) trainer.train(model, train_data, dev_data)