def __init__(self, max_left_length=10, max_right_length=9, predictor_headers=None, verbose=True, min_num_of_features=1, max_frequency_when_noun_is_eojeol=30, eojeol_counter_filtering_checkpoint=200000, min_eojeol_frequency=1, extract_compound=True, extract_pos_feature=False, extract_determiner=False, postprocessing=None, logpath=None): self.max_left_length = max_left_length self.max_right_length = max_right_length self.lrgraph = None self.verbose = verbose self.min_num_of_features = min_num_of_features self.max_frequency_when_noun_is_eojeol = max_frequency_when_noun_is_eojeol self.eojeol_counter_filtering_checkpoint = eojeol_counter_filtering_checkpoint self.min_eojeol_frequency = min_eojeol_frequency self.extract_compound = extract_compound self.extract_pos_feature = extract_pos_feature self.extract_determiner = extract_determiner self.logpath = logpath if logpath: check_dirs(logpath) if not postprocessing: postprocessing = [ 'detaching_features', 'ignore_features', 'ignore_NJ' ] elif isinstance(postprocessing) == str: postprocessing = [postprocessing] self.postprocessing = postprocessing if not predictor_headers: predictor_headers = self._set_default_predictor_header() self._load_predictor(predictor_headers)
def write_log(path, header, words): check_dirs(path) with open(path, 'a', encoding='utf-8') as f: f.write('\n{}\n'.format(header)) for word in sorted(words): f.write('{}\n'.format(word))