Beispiel #1
0
def train_module(corpus, moduleName: str, saveModulePath: str):
    if moduleName == "countSpace":
        model = CountSpace()
        model.train(corpus)
        model.save_model(saveModulePath, json_format=False)
    elif moduleName == "normalizer":
        print("s")
    elif moduleName == "noun":
        print("s")
Beispiel #2
0
def train_space_model(corpus_fname, model_fname):
    model = CountSpace()
    model.train(corpus_fname)
    model.save_model(model_fname, json_format=False)
Beispiel #3
0
# model.train('./korquad_1.txt')
# model.save_model('model_spacing_2.h5', json_format=False)

# model.train(corpus_file_name)
# model.save_model('model_spacing.h5', json_format=False)
# model = CountSpace.load_model('model_spacing.h5', json_format=False)
# model.train()

# model_2_file_name = '../KorQuAD_2.1_train_00/korquad2.1_train_0.json'
# model_2 = CountSpace()
# model.train(model_2_file_name)
# model.save_model('model_2_spacing', json_format=False)

model = CountSpace()
model.load_model('model_spacing', json_format=False)
model.train('korquad.txt')
model.save_model('korean_spacing_model.h5', json_format=False)

# model = CountSpace()
# model.load_model('model_spacing_3.h5', json_format=False)
# model.train('./korquad_3.txt')
# model.save_model('model_spacing_4.h5', json_format=False)

verbose = False
mc = 10  # min_count
ft = 0.4  # force_abs_threshold
nt = -0.3  # nonspace_threshold
st = 0.4  # space_threshold

sentence = '지않고'
try:
    from soyspacing.countbase import CountSpace
    from soyspacing.countbase import RuleDict
except:
    #!pip install soyspacing
    from soyspacing.countbase import CountSpace
    from soyspacing.countbase import RuleDict

import re
import json

# Soyspacing 모델 학습하기
corpus_fname = './134963_norm.txt'
rule_dict = RuleDict('./space_rules.txt')
model = CountSpace()
model.train(corpus_fname)

# Soyspacing parameter 정하기
verbose = False
mc = 10  # min_count
ft = 0.3  # force_abs_threshold
nt = -0.3  # nonspace_threshold
st = 0.3  # space_threshold

for i in range(1, 30, 2):
    if i < 9:
        i_start = "0{}".format(i)
        i_end = "0{}".format(i + 1)
    elif i == 9:
        i_start = "09"
        i_end = "10"