예제 #1
0
def main():
    parser = argparse.ArgumentParser(
        description="Dependency-Guided LSTM CRF implementation")
    opt = parse_arguments(parser)
    conf = Config(opt)

    reader = Reader(conf.digit2zero)
    setSeed(opt, conf.seed)

    trains = reader.read_conll(conf.train_file, -1, True)
    devs = reader.read_conll(conf.dev_file, conf.dev_num, False)
    tests = reader.read_conll(conf.test_file, conf.test_num, False)

    if conf.context_emb != ContextEmb.none:
        print('Loading the {} vectors for all datasets.'.format(
            conf.context_emb.name))
        conf.context_emb_size = reader.load_elmo_vec(
            conf.train_file.replace(".sd", "").replace(".ud", "").replace(
                ".sud", "").replace(".predsd", "").replace(
                    ".predud", "").replace(".stud", "").replace(".ssd", "") +
            "." + conf.context_emb.name + ".vec", trains)
        reader.load_elmo_vec(
            conf.dev_file.replace(".sd", "").replace(".ud", "").replace(
                ".sud", "").replace(".predsd", "").replace(
                    ".predud", "").replace(".stud", "").replace(".ssd", "") +
            "." + conf.context_emb.name + ".vec", devs)
        reader.load_elmo_vec(
            conf.test_file.replace(".sd", "").replace(".ud", "").replace(
                ".sud", "").replace(".predsd", "").replace(
                    ".predud", "").replace(".stud", "").replace(".ssd", "") +
            "." + conf.context_emb.name + ".vec", tests)

    conf.use_iobes(trains + devs + tests)
    conf.build_label_idx(trains)

    conf.build_deplabel_idx(trains + devs + tests)
    print("# deplabels: ", len(conf.deplabels))
    print("dep label 2idx: ", conf.deplabel2idx)

    conf.build_word_idx(trains, devs, tests)
    conf.build_emb_table()
    conf.map_insts_ids(trains + devs + tests)

    print("num chars: " + str(conf.num_char))
    # print(str(config.char2idx))

    print("num words: " + str(len(conf.word2idx)))
    # print(config.word2idx)
    if opt.mode == "train":
        if conf.train_num != -1:
            random.shuffle(trains)
            trains = trains[:conf.train_num]
        learn_from_insts(conf, conf.num_epochs, trains, devs, tests)
    else:
        ## Load the trained model.
        test_model(conf, tests)
        # pass

    print(opt.mode)
예제 #2
0
def main():
    parser = argparse.ArgumentParser(
        description="Dependency-Guided LSTM CRF implementation")
    opt = parse_arguments(parser)
    conf = Config(opt)

    reader = Reader(conf.digit2zero)
    setSeed(opt, conf.seed)

    trains = reader.read_conll(conf.train_file, -1, True)
    devs = reader.read_conll(conf.dev_file, conf.dev_num, False)
    tests = reader.read_conll(conf.test_file, conf.test_num, False)

    conf.use_iobes(trains)
    conf.build_label_idx(trains)

    conf.build_deplabel_idx(trains + devs + tests)
    print("# deplabels: ", len(conf.deplabels))
    print("dep label 2idx: ", conf.deplabel2idx)

    conf.build_word_idx(trains + devs + tests)
    conf.build_emb_table()
    conf.map_insts_ids(trains)

    print("num chars: " + str(conf.num_char))
    # print(str(config.char2idx))

    print("num words: " + str(len(conf.word2idx)))
    # print(config.word2idx)
    if opt.mode == "train":
        if conf.train_num != -1:
            random.shuffle(trains)
            trains = trains[:conf.train_num]
        learn_from_insts(conf, conf.num_epochs, trains)

    print(opt.mode)