def train_module(corpus, moduleName: str, saveModulePath: str): if moduleName == "countSpace": model = CountSpace() model.train(corpus) model.save_model(saveModulePath, json_format=False) elif moduleName == "normalizer": print("s") elif moduleName == "noun": print("s")
def train_space_model(corpus_fname, model_fname): model = CountSpace() model.train(corpus_fname) model.save_model(model_fname, json_format=False)
# model.save_model('model_spacing_2.h5', json_format=False) # model.train(corpus_file_name) # model.save_model('model_spacing.h5', json_format=False) # model = CountSpace.load_model('model_spacing.h5', json_format=False) # model.train() # model_2_file_name = '../KorQuAD_2.1_train_00/korquad2.1_train_0.json' # model_2 = CountSpace() # model.train(model_2_file_name) # model.save_model('model_2_spacing', json_format=False) model = CountSpace() model.load_model('model_spacing', json_format=False) model.train('korquad.txt') model.save_model('korean_spacing_model.h5', json_format=False) # model = CountSpace() # model.load_model('model_spacing_3.h5', json_format=False) # model.train('./korquad_3.txt') # model.save_model('model_spacing_4.h5', json_format=False) verbose = False mc = 10 # min_count ft = 0.4 # force_abs_threshold nt = -0.3 # nonspace_threshold st = 0.4 # space_threshold sentence = '지않고' # with parameters
from soyspacing.countbase import RuleDict, CountSpace corpus_fname = 'sentences.txt' model = CountSpace() model.train(corpus_fname) model.save_model("soispace.model")