Beispiel #1
0
def infer():
    # Config Loader
    test_args = ConfigSection()
    ConfigLoader().load_config(cfgfile, {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
    test_args["vocab_size"] = len(word2index)
    index2label = load_pickle(pickle_path, "label2id.pkl")
    test_args["num_classes"] = len(index2label)

    # Define the same model
    model = AdvSeqLabel(test_args)

    try:
        ModelLoader.load_pytorch(model, "./save/trained_model.pkl")
        print('model loaded!')
    except Exception as e:
        print('cannot load model!')
        raise

    # Data Loader
    infer_data = SeqLabelDataSet(load_func=BaseLoader.load_lines)
    infer_data.load(data_infer_path, vocabs={"word_vocab": word2index}, infer=True)
    print('data loaded')

    # Inference interface
    infer = SeqLabelInfer(pickle_path)
    results = infer.predict(model, infer_data)

    print(results)
    print("Inference finished!")
Beispiel #2
0
def predict():
    # Config Loader
    test_args = ConfigSection()
    ConfigLoader().load_config(cfgfile, {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
    test_args["vocab_size"] = len(word2index)
    index2label = load_pickle(pickle_path, "label2id.pkl")
    test_args["num_classes"] = len(index2label)

    # load dev data
    dev_data = load_pickle(pickle_path, "data_dev.pkl")

    # Define the same model
    model = AdvSeqLabel(test_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./save/trained_model.pkl")
    print("model loaded!")

    # Tester
    test_args["evaluator"] = SeqLabelEvaluator()
    tester = SeqLabelTester(**test_args.data)

    # Start testing
    tester.test(model, dev_data)
Beispiel #3
0
    def _get_section(self, sect_name):
        """This is the function to get the section with the section name.

        :param sect_name: The name of section what wants to load.
        :return: The section.
        """
        sect = ConfigSection()
        ConfigLoader().load_config(self.file_path, {sect_name: sect})
        return sect
Beispiel #4
0
def _load_all(src):
    model_path = src
    src = os.path.dirname(src)

    word_v = _load(src + '/word_v.pkl')
    pos_v = _load(src + '/pos_v.pkl')
    tag_v = _load(src + '/tag_v.pkl')
    pos_pp = torch.load(src + '/pos_pp.pkl')['pipeline']

    model_args = ConfigSection()
    ConfigLoader.load_config('cfg.cfg', {'model': model_args})
    model_args['word_vocab_size'] = len(word_v)
    model_args['pos_vocab_size'] = len(pos_v)
    model_args['num_label'] = len(tag_v)

    model = BiaffineParser(**model_args.data)
    model.load_state_dict(torch.load(model_path))
    return {
        'word_v': word_v,
        'pos_v': pos_v,
        'tag_v': tag_v,
        'model': model,
        'pos_pp': pos_pp,
    }
Beispiel #5
0
def train():
    # Config Loader
    train_args = ConfigSection()
    test_args = ConfigSection()
    ConfigLoader().load_config(cfgfile, {"train": train_args, "test": test_args})

    print("loading data set...")
    data = SeqLabelDataSet(load_func=TokenizeDataSetLoader.load)
    data.load(cws_data_path)
    data_train, data_dev = data.split(ratio=0.3)
    train_args["vocab_size"] = len(data.word_vocab)
    train_args["num_classes"] = len(data.label_vocab)
    print("vocab size={}, num_classes={}".format(len(data.word_vocab), len(data.label_vocab)))

    change_field_is_target(data_dev, "truth", True)
    save_pickle(data_dev, "./save/", "data_dev.pkl")
    save_pickle(data.word_vocab, "./save/", "word2id.pkl")
    save_pickle(data.label_vocab, "./save/", "label2id.pkl")

    # Trainer
    trainer = SeqLabelTrainer(epochs=train_args["epochs"], batch_size=train_args["batch_size"],
                              validate=train_args["validate"],
                              use_cuda=train_args["use_cuda"], pickle_path=train_args["pickle_path"],
                              save_best_dev=True, print_every_step=10, model_name="trained_model.pkl",
                              evaluator=SeqLabelEvaluator())

    # Model
    model = AdvSeqLabel(train_args)
    try:
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as e:
        print("No saved model. Continue.")
        pass

    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
    saver = ModelSaver("./save/trained_model.pkl")
    saver.save_pytorch(model)
    print("Model saved!")
    def test_case_1(self):
        config_file_dir = "test/io/"
        config_file_name = "config"
        config_file_path = os.path.join(config_file_dir, config_file_name)

        tmp_config_file_path = os.path.join(config_file_dir, "tmp_config")

        with open(config_file_path, "r") as f:
            lines = f.readlines()

        standard_section = ConfigSection()
        t_section = ConfigSection()
        ConfigLoader().load_config(config_file_path, {
            "test": standard_section,
            "t": t_section
        })

        config_saver = ConfigSaver(config_file_path)

        section = ConfigSection()
        section["doubles"] = 0.8
        section["tt"] = 0.5
        section["test"] = 105
        section["str"] = "this is a str"

        test_case_2_section = section
        test_case_2_section["double"] = 0.5

        for k in section.__dict__.keys():
            standard_section[k] = section[k]

        config_saver.save_config_file("test", section)
        config_saver.save_config_file("another-test", section)
        config_saver.save_config_file("one-another-test", section)
        config_saver.save_config_file("test-case-2", section)

        test_section = ConfigSection()
        at_section = ConfigSection()
        another_test_section = ConfigSection()
        one_another_test_section = ConfigSection()
        a_test_case_2_section = ConfigSection()

        ConfigLoader().load_config(
            config_file_path, {
                "test": test_section,
                "another-test": another_test_section,
                "t": at_section,
                "one-another-test": one_another_test_section,
                "test-case-2": a_test_case_2_section
            })

        assert test_section == standard_section
        assert at_section == t_section
        assert another_test_section == section
        assert one_another_test_section == section
        assert a_test_case_2_section == test_case_2_section

        config_saver.save_config_file("test", section)

        with open(config_file_path, "w") as f:
            f.writelines(lines)

        with open(tmp_config_file_path, "w") as f:
            f.write('[test]\n')
            f.write('this is an fault example\n')

        tmp_config_saver = ConfigSaver(tmp_config_file_path)
        try:
            tmp_config_saver._read_section()
        except Exception as e:
            pass
        os.remove(tmp_config_file_path)

        try:
            tmp_config_saver = ConfigSaver("file-NOT-exist")
        except Exception as e:
            pass
Beispiel #7
0
emb_file_name = "/home/yfshao/workdir/parser-data/word_OOVthr_30_100v.txt"
# emb_file_name = "/home/yfshao/workdir/word_vector/cc.zh.300.vec"
loader = CTBDataLoader()

cfgfile = './cfg.cfg'
processed_datadir = './save'

# Config Loader
train_args = ConfigSection()
test_args = ConfigSection()
model_args = ConfigSection()
optim_args = ConfigSection()
ConfigLoader.load_config(
    cfgfile, {
        "train": train_args,
        "test": test_args,
        "model": model_args,
        "optim": optim_args
    })
print('trainre Args:', train_args.data)
print('test Args:', test_args.data)
print('optim Args:', optim_args.data)


# Pickle Loader
def save_data(dirpath, **kwargs):
    import _pickle
    if not os.path.exists(dirpath):
        os.mkdir(dirpath)
    for name, data in kwargs.items():
        with open(os.path.join(dirpath, name + '.pkl'), 'wb') as f:
Beispiel #8
0
                                       dim=attention_unit,
                                       num_vec=attention_hops)
        self.mlp = MLP(
            size_layer=[lstm_hidden_size * 2 * attention_hops, nfc, class_num])

    def forward(self, x):
        x_emb = self.embedding(x)
        output = self.lstm(x_emb)
        after_attention, penalty = self.attention(output, x)
        after_attention = after_attention.view(after_attention.size(0), -1)
        output = self.mlp(after_attention)
        return output

    def loss(self, predict, ground_truth):
        print("predict:%s; g:%s" %
              (str(predict.size()), str(ground_truth.size())))
        print(ground_truth)
        return F.cross_entropy(predict, ground_truth)


train_args = ConfigSection()
ConfigLoader("good path").load_config('config.cfg', {"train": train_args})
train_args['vocab'] = len(word2index)

trainer = ClassificationTrainer(**train_args.data)

# for k in train_args.__dict__.keys():
#     print(k, train_args[k])
model = SELF_ATTENTION_YELP_CLASSIFICATION(train_args)
trainer.train(model, train_data, dev_data)