Beispiel #1
0
    def __init__(self,
                 max_left_length=10,
                 max_right_length=9,
                 predictor_headers=None,
                 verbose=True,
                 min_num_of_features=1,
                 max_frequency_when_noun_is_eojeol=30,
                 eojeol_counter_filtering_checkpoint=200000,
                 min_eojeol_frequency=1,
                 extract_compound=True,
                 extract_pos_feature=False,
                 extract_determiner=False,
                 postprocessing=None,
                 logpath=None):

        self.max_left_length = max_left_length
        self.max_right_length = max_right_length
        self.lrgraph = None
        self.verbose = verbose
        self.min_num_of_features = min_num_of_features
        self.max_frequency_when_noun_is_eojeol = max_frequency_when_noun_is_eojeol
        self.eojeol_counter_filtering_checkpoint = eojeol_counter_filtering_checkpoint
        self.min_eojeol_frequency = min_eojeol_frequency
        self.extract_compound = extract_compound
        self.extract_pos_feature = extract_pos_feature
        self.extract_determiner = extract_determiner
        self.logpath = logpath

        if logpath:
            check_dirs(logpath)

        if not postprocessing:
            postprocessing = [
                'detaching_features', 'ignore_features', 'ignore_NJ'
            ]
        elif isinstance(postprocessing) == str:
            postprocessing = [postprocessing]

        self.postprocessing = postprocessing

        if not predictor_headers:
            predictor_headers = self._set_default_predictor_header()

        self._load_predictor(predictor_headers)
def write_log(path, header, words):
    check_dirs(path)
    with open(path, 'a', encoding='utf-8') as f:
        f.write('\n{}\n'.format(header))
        for word in sorted(words):
            f.write('{}\n'.format(word))