예제 #1
0
    def test_fit(self):
        # test_11
        nagisa.fit(
            train_file="nagisa/data/sample_datasets/sample.train",
            dev_file="nagisa/data/sample_datasets/sample.dev",
            test_file="nagisa/data/sample_datasets/sample.test",
            model_name="sample",
        )

        # test_12
        nagisa.fit(train_file="nagisa/data/sample_datasets/sample.train",
                   dev_file="nagisa/data/sample_datasets/sample.dev",
                   test_file="nagisa/data/sample_datasets/sample.test",
                   dict_file="nagisa/data/sample_datasets/sample.dict",
                   emb_file="nagisa/data/sample_datasets/sample.emb",
                   model_name="sample",
                   newline="EOS",
                   delimiter="\t")
예제 #2
0
def main():
    random.seed(1234)

    # preprocess
    fn_in = "kwdlc.txt"
    X, Y = nagisa.utils.load_file(fn_in)
    indice = [i for i in range(len(X))]
    random.shuffle(indice)

    num_train = int(0.8 * len(indice))
    num_dev = int(0.1 * len(indice))
    num_test = int(0.1 * len(indice))

    train_X = [X[i] for i in indice[:num_train]]
    train_Y = [Y[i] for i in indice[:num_train]]
    dev_X = [X[i] for i in indice[num_train:num_train + num_dev]]
    dev_Y = [Y[i] for i in indice[num_train:num_train + num_dev]]
    test_X = [
        X[i]
        for i in indice[num_train + num_dev:num_train + num_dev + num_test]
    ]
    test_Y = [
        Y[i]
        for i in indice[num_train + num_dev:num_train + num_dev + num_test]
    ]

    fn_out_train = "kwdlc.train"
    fn_out_dev = "kwdlc.dev"
    fn_out_test = "kwdlc.test"
    write_file(fn_out_train, train_X, train_Y)
    write_file(fn_out_dev, dev_X, dev_Y)
    write_file(fn_out_test, test_X, test_Y)

    # start training
    fn_out_model = "kwdlc_ner_model"
    nagisa.fit(train_file=fn_out_train,
               dev_file=fn_out_dev,
               test_file=fn_out_test,
               model_name=fn_out_model)
예제 #3
0
import nagisa

# After finish training, save the three model files (*.vocabs, *.params, *.hp).
nagisa.fit(train_file="jp/sample.train",
           dev_file="jp/sample.dev",
           test_file="jp/sample.test",
           model_name="jp/sample")

# Build the tagger by loading the trained model files.
sample_tagger = nagisa.Tagger(vocabs='jp/sample.vocabs',
                              params='jp/sample.params',
                              hp='jp/sample.hp')

text = "福岡・博多の観光情報"
words = sample_tagger.tagging(text)
print(words)
#> 福岡/PROPN ・/SYM 博多/PROPN の/ADP 観光/NOUN 情報/NOUN
예제 #4
0
    with open(fn_out, "w") as f:
        for words, postags in data:
            for word, postag in zip(words, postags):
                f.write("\t".join([word, postag]) + "\n")
            f.write("EOS\n")


if __name__ == "__main__":
    # files
    fn_in_train = "UD_Japanese-GSD/ja_gsd-ud-train.conllu"
    fn_in_dev = "UD_Japanese-GSD/ja_gsd-ud-dev.conllu"
    fn_in_test = "UD_Japanese-GSD/ja_gsd-ud-test.conllu"

    fn_out_train = "ja_gsd_ud.train"
    fn_out_dev = "ja_gsd_ud.dev"
    fn_out_test = "ja_gsd_ud.test"

    fn_out_model = "ja_gsd_ud"

    # write files for nagisa
    write_file(fn_in_train, fn_out_train)
    write_file(fn_in_dev, fn_out_dev)
    write_file(fn_in_test, fn_out_test)

    # start training
    nagisa.fit(train_file=fn_out_train,
               dev_file=fn_out_dev,
               test_file=fn_out_test,
               model_name=fn_out_model)
예제 #5
0
import nagisa

# After finish training, save the three model files (*.vocabs, *.params, *.hp).
nagisa.fit(train_file="cantonese/train.txt",
           dev_file="cantonese/dev.txt",
           test_file="cantonese/test.txt",
           model_name="cantonese/model")

# Build the tagger by loading the trained model files.
sample_tagger = nagisa.Tagger(vocabs='cantonese/model.vocabs',
                              params='cantonese/model.params',
                              hp='cantonese/model.hp')

text = "我唔系随便嘅人,我随便起来唔系人"
words = sample_tagger.tagging(text)
print(words)
#> 福岡/PROPN ・/SYM 博多/PROPN の/ADP 観光/NOUN 情報/NOUN
import nagisa

nagisa.fit(train_file="data/kwdlc.train",
           dev_file="data/kwdlc.dev",
           test_file="data/kwdlc.test",
           model_name="data/kwdlc_ner_model")