def expand_by_link(): # 임시로 마지막 어미의 키워드 부착: 집에서 따라하지 마세요 :P for key in groups.keys(): for klass in groups[key]: for rule in klass['rules']: rule.append(key) refgroups = groups.copy() def find_rules_to_attach(last): rules = [] for key in refgroups.keys(): g = refgroups[key] for k in g: if (('-' in k['after'] or last in k['after']) and (not ('notafter' in k) or not (last in k['notafter']))): for r in k['rules']: if re.match('.*' + ENC(r[1]) + '$', ENC(last[:-1])): rules.append(r) return rules def expand_class(klass): while True: rules_to_expand = [r for r in klass['rules'] if r[0][-1] == '-'] if not rules_to_expand: return new_rules = [r for r in klass['rules'] if r[0][-1] != '-'] for r in rules_to_expand: last = r[-1] attaches = find_rules_to_attach(last) for a in attaches: if a[2]: striplen = len(ENC(a[2])) else: striplen = 0 new_suffix = DEC(ENC(r[0])[:-1-striplen] + a[0][1:]) new_rules.append([new_suffix] + r[1:3] + a[3:]) klass['rules'] = new_rules for key in groups.keys(): if key[-1] != '-': continue for klass in groups[key]: expand_class(klass) # 임시로 부착했던 마지막 어미 키워드 삭제 for key in groups.keys(): for klass in groups[key]: for rule in klass['rules']: del rule[-1]
def expand_by_link(): # 임시로 마지막 어미의 키워드 부착: 집에서 따라하지 마세요 :P for key in groups.keys(): for klass in groups[key]: for rule in klass['rules']: rule.append(key) refgroups = groups.copy() def find_rules_to_attach(last): rules = [] for key in refgroups.keys(): g = refgroups[key] for k in g: if (('-' in k['after'] or last in k['after']) and (not k.has_key('notafter') or not last in k['notafter'])): for r in k['rules']: if re.match(NFD(u'.*' + r[1] + '$'), NFD(last[:-1].decode('utf-8'))): rules.append(r) return rules def expand_class(klass): while True: rules_to_expand = [r for r in klass['rules'] if r[0][-1] == '-'] if not rules_to_expand: return new_rules = [r for r in klass['rules'] if r[0][-1] != '-'] for r in rules_to_expand: last = r[-1] attaches = find_rules_to_attach(last) for a in attaches: if a[2]: striplen = len(NFD(a[2])) else: striplen = 0 new_suffix = NFC(NFD(r[0])[:-1-striplen] + a[0][1:]) new_rules.append([new_suffix] + r[1:3] + a[3:]) klass['rules'] = new_rules for key in groups.keys(): if key[-1] != '-': continue for klass in groups[key]: expand_class(klass) # 임시로 부착했던 마지막 어미 키워드 삭제 for key in groups.keys(): for klass in groups[key]: for rule in klass['rules']: del rule[-1]
def expand_by_cond(): for key in groups.keys(): for klass in groups[key]: new_rules = [] for rule in klass['rules']: if isinstance(rule[1], list): for c in rule[1]: new_rules.append([rule[0], c] + rule[2:]) else: new_rules.append(rule) klass['rules'] = new_rules
def expand_by_cond(): for key in groups.keys(): for klass in groups[key]: new_rules = [] for rule in klass['rules']: if isinstance(rule[1], list) : for c in rule[1]: new_rules.append([rule[0], c] + rule[2:]) else: new_rules.append(rule) klass['rules'] = new_rules
def clean_up_cond(): for key in groups.keys(): for klass in groups[key]: for c in ['after', 'notafter', 'cond', 'notcond']: if c not in klass: continue new = set() for item in klass[c]: if item == '#용언': new.add('#동사') new.add('#형용사') else: new.add(item) klass[c] = sorted(list(new)) return groups
def clean_up_cond(): for key in groups.keys(): for klass in groups[key]: for c in ['after', 'notafter', 'cond', 'notcond']: if not klass.has_key(c): continue new = set() for item in klass[c]: if item == '#용언': new.add('#동사') new.add('#형용사') else: new.add(item) klass[c] = sorted(list(new)) return groups
expand_class(klass) # 임시로 부착했던 마지막 어미 키워드 삭제 for key in groups.keys(): for klass in groups[key]: for rule in klass['rules']: del rule[-1] expand_by_cond() clean_up_cond() expand_by_link() # 연결이 끝나면 그룹끼리 구분할 필요가 없다. klasses = [] for key in groups.keys(): for r in groups[key]: r['name'] = key klasses += groups[key] # 선어말어미 연결 정보도 필요 없다. for klass in klasses: for condname in ['after', 'notafter']: try: klass[condname] = [c for c in klass[condname] if c[0] != '-'] except: pass # 같은 조건의 클래스를 머지한다. def eq_klass_cond(a, b):
for klass in groups[key]: expand_class(klass) # 임시로 부착했던 마지막 어미 키워드 삭제 for key in groups.keys(): for klass in groups[key]: for rule in klass['rules']: del rule[-1] expand_by_cond() clean_up_cond() expand_by_link() # 연결이 끝나면 그룹끼리 구분할 필요가 없다. klasses = [] for key in groups.keys(): klasses += groups[key] # 선어말어미 연결 정보도 필요 없다. for klass in klasses: for condname in ['after', 'notafter']: try: klass[condname] = [c for c in klass[condname] if c[0] != '-'] except: pass # 같은 조건의 클래스를 머지한다. def eq_klass_cond(a, b): for condname in ['after', 'notafter', 'cond', 'notcond']: if a.has_key(condname) and b.has_key(condname): if a[condname] != b[condname]: