def beyazlastirici_paths(mt_lexicon): beyaz = mt_lexicon.get_item_by_id('beyaz_Adj') assert beyaz is not None states = [(become_S, 'laş'), (verbRoot_S, ''), (vCausTir_S, 'tır'), (verbRoot_S, ''), (vAgt_S, 'ıcı'), (noun_S, ''), (a3sg_S, ''), (pnon_S, ''), (nom_ST, '')] stem_transition = StemTransition(beyaz, adjectiveRoot_ST) surface_transitions = [ SurfaceTransition(stem_transition.surface, stem_transition) ] path = SearchPath.initial(stem_transition, 'laştırıcı') paths = [path] previous_state = adjectiveRoot_ST attrs = path.phonetic_attributes for state in states: st, surface = state transition = SurfaceTransition(surface, SuffixTransition(previous_state, st)) surface_transitions.append(transition) new_attrs = calculate_phonetic_attributes(surface, frozenset(attrs)) new_path = paths[-1].copy(transition, new_attrs) paths.append(new_path) previous_state = st attrs = new_attrs # < (beyaz_Adj)(-)(beyaz: adjectiveRoot_ST + laş:become_S + verbRoot_S + tır: vCausTır_S + verbRoot_S # + ıcı:vAgt_S + noun_S + a3sg_S + pnon_S + nom_ST) > for path in paths: print(path) return paths
def test_initial_search_path(): from zeyrek.attributes import calculate_phonetic_attributes word = "beyazlaşacak" attrs = calculate_phonetic_attributes(word) dict_item = DictionaryItem("beyaz", "beyaz", PrimaryPos.Adjective, SecondaryPos.NONE, [], "beyaz", 0) transition = StemTransition(dict_item, root_S, attrs, word) assert transition.dict_item.lemma == "beyaz" p = SearchPath.initial(transition, "laşacak") assert p.stem_transition == transition
def dict_item(): word = 'ev' lemma = word root = word primary_pos = PrimaryPos.Noun secondary_pos = SecondaryPos.NONE attrs = calculate_phonetic_attributes(word) pronunciation = word index = 0 return DictionaryItem(lemma, root, primary_pos, secondary_pos, attrs, pronunciation, index)
def searchpath_dict_item_with_tail_and_RA_voicing(mt_lexicon): adak = mt_lexicon.get_item_by_id('adak_Noun') # , elma, beyaz, meyve stem_transition = StemTransition(adak, noun_S, calculate_phonetic_attributes( adak.pronunciation), surface='adağ') print(f"Stem transition: {stem_transition}") path = SearchPath.initial(stem_transition, 'a') print(f"Path {path}") return path
def test_stem_transition(): from zeyrek.attributes import calculate_phonetic_attributes word_line = 'beyaz [P:Adj]' lexicon = RootLexicon.from_lines([word_line]) morphotactics = TurkishMorphotactics(lexicon=lexicon) dict_item = lexicon.get_matching_items('beyaz')[0] transition = morphotactics.stem_transitions.prefix_matches('beyaz')[0] assert transition.to_ == adjectiveRoot_ST assert str( transition) == "<(Dict: beyaz [P:Adj]):beyaz → [adjectiveRoot_ST:Adj]>" assert transition.condition is None assert transition.condition_count == 0 assert transition.dict_item.lemma == 'beyaz' assert transition.from_ is root_S calculated_attrs = calculate_phonetic_attributes('beyaz') assert transition.attrs == calculated_attrs assert type(transition.to_) == MorphemeState
def advance(self, path: SearchPath): """ for all allowed matching outgoing transitions, new paths are generated. Transition `conditions` are used for checking if a `search path` is allowed to pass a transition. :param path: :return: """ new_paths = [] # for all outgoing transitions. # print(f"\n\n ADVANCE {path} for {len(path.current_state.outgoing)} transitions") for transition in path.current_state.outgoing: # if tail is empty and this transitions surface is not empty, no need to check. if len(path.tail) == 0 and transition.has_surface_form: logging.debug( f"Rejecting path {path}: Path and transition surface mismatch: " ) continue surface = generate_surface(transition, path.phonetic_attributes) # no need to go further if generated surface form is not a prefix of the paths's tail. tail_starts_with = path.tail.startswith(surface) if not tail_starts_with: logging.debug( f"Rejecting path {path}: tail doesnt start with {path.tail}-{surface}" ) continue # check conditions. if not transition.can_pass(path): logging.debug( f"Rejecting path {path}-{transition}: can't pass") continue # epsilon (empty) transition. Add and continue. Use existing attributes. if not transition.has_surface_form: blank_surface_transition = SurfaceTransition("", transition) new_path = path.copy(blank_surface_transition, path.phonetic_attributes) new_paths.append(new_path) logging.debug(f"Appending path {new_path}") continue surface_transition = SurfaceTransition(surface, transition) # if tail is equal to surface, no need to calculate phonetic attributes. tail_equals_surface = path.tail == surface attributes = path.phonetic_attributes if tail_equals_surface \ else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes)) # This is required for suffixes like `cik` and `ciğ` # an extra attribute is added if "cik" or "ciğ" is generated and matches the tail. # if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting # suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed. if PhoneticAttribute.CannotTerminate in attributes: attributes.discard(PhoneticAttribute.CannotTerminate) last_token = transition.last_template_token if last_token.type_ == 'LAST_VOICED': attributes.add(PhoneticAttribute.ExpectsConsonant) elif last_token.type_ == 'LAST_NOT_VOICED': attributes.add(PhoneticAttribute.ExpectsVowel) attributes.add(PhoneticAttribute.CannotTerminate) p = path.copy(surface_transition, attributes) logging.debug(f"P path: {p}") new_paths.append(p) logging.debug(f"FINAL: ") for i, p in enumerate(new_paths): logging.debug(f"\t {i}: {p}") # print() return new_paths