def advance(self, path: SearchPath) -> List[SearchPath]: new_paths: List[SearchPath] = [] for transition in path.current_state.outgoing: # assert transition.__class__ == SuffixTransition suffix_transition = transition if len(path.tail) == 0 and suffix_transition.has_surface_form(): # NO DEBUG continue else: surface = SurfaceTransition.generate_surface( suffix_transition, path.phonetic_attributes) tail_starts_with = TurkishAlphabet.INSTANCE.starts_with_ignore_diacritics(path.tail, surface) if\ self.ascii_tolerant else path.tail.startswith(surface) if not tail_starts_with: if self.debug_mode: raise NotImplementedError("Not implemented debug_mode") else: if self.debug_mode: raise NotImplementedError("Not implemented debug_mode") if suffix_transition.can_pass(path): if not suffix_transition.has_surface_form(): new_paths.append( path.get_copy( SurfaceTransition("", suffix_transition), path.phonetic_attributes)) else: surface_transition = SurfaceTransition( surface, suffix_transition) tail_equals_surface = TurkishAlphabet.INSTANCE.equals_ignore_diacritics(path.tail, surface)\ if self.ascii_tolerant else path.tail == surface attributes = deepcopy(path.phonetic_attributes) if tail_equals_surface else \ AttributesHelper.get_morphemic_attributes(surface, path.phonetic_attributes) try: attributes.remove( PhoneticAttribute.CannotTerminate) except KeyError: logger.debug( "There is no CannotTerminate element in the set" ) last_token = suffix_transition.get_last_template_token( ) if last_token.type_ == SurfaceTransition.TemplateTokenType.LAST_VOICED: attributes.add( PhoneticAttribute.ExpectsConsonant) elif last_token.type_ == SurfaceTransition.TemplateTokenType.LAST_NOT_VOICED: attributes.add(PhoneticAttribute.ExpectsVowel) attributes.add( PhoneticAttribute.CannotTerminate) p: SearchPath = path.get_copy( surface_transition, attributes) new_paths.append(p) return new_paths
def generate(self, item: DictionaryItem = None, morphemes: Tuple[Morpheme, ...] = None, candidates: Tuple[StemTransition, ...] = None) -> Tuple['WordGenerator.Result', ...]: if item: candidates_st: Tuple[StemTransition, ...] = self.stem_transitions.get_transitions_for_item(item) return self.generate(candidates=candidates_st, morphemes=morphemes) # no item means generate(List<StemTransition> candidates, List<Morpheme> morphemes) is called paths: List['WordGenerator.GenerationPath'] = [] for candidate in candidates: search_path: SearchPath = SearchPath.initial_path(candidate, " ") # morphemes_in_path: Tuple[Morpheme] if len(morphemes) > 0: if morphemes[0] == search_path.current_state.morpheme: morphemes_in_path = morphemes[1:] else: morphemes_in_path = morphemes else: morphemes_in_path = () paths.append(WordGenerator.GenerationPath(search_path, morphemes_in_path)) # search graph result_paths: Tuple['WordGenerator.GenerationPath'] = self.search(paths) result: List['WordGenerator.Result'] = [] for path in result_paths: analysis = SingleAnalysis.from_search_path(path.path) result.append(WordGenerator.Result(analysis.surface_form(), analysis)) return tuple(result)
def copy_(self, path: SearchPath) -> 'WordGenerator.GenerationPath': last_transition: SurfaceTransition = path.get_last_transition() m: Morpheme = last_transition.get_morpheme() if len(last_transition.surface) == 0: if len(self.morphemes) == 0: return WordGenerator.GenerationPath(path, self.morphemes) if m == self.morphemes[0]: return WordGenerator.GenerationPath(path, self.morphemes[1:]) else: return WordGenerator.GenerationPath(path, self.morphemes) if m != self.morphemes[0]: raise Exception("Cannot generate Generation copy because transition morpheme and first morpheme to " "consume does not match.") return WordGenerator.GenerationPath(path, self.morphemes[1:])
def analyze(self, inp: str) -> Tuple[SingleAnalysis, ...]: if self.debug_mode: raise NotImplementedError("Debug mode is not implemented") candidates = self.stem_transitions.get_prefix_matches( inp, self.ascii_tolerant) paths: List[SearchPath] = [] for candidate in candidates: length = len(candidate.surface) tail = inp[length:] paths.append(SearchPath.initial_path(candidate, tail)) result_paths: Tuple[SearchPath] = self.search(paths) result: List[SingleAnalysis] = [] for path in result_paths: analysis: SingleAnalysis = SingleAnalysis.from_search_path(path) result.append(analysis) return tuple(result)
def from_search_path(search_path: SearchPath) -> 'SingleAnalysis': morphemes: List['SingleAnalysis.MorphemeData'] = [] derivation_count = 0 for transition in search_path.transitions: if transition.is_derivative(): derivation_count += 1 morpheme = transition.get_morpheme() if morpheme != TurkishMorphotactics.nom and morpheme != TurkishMorphotactics.pnon: if len(transition.surface) == 0: morpheme_data = SingleAnalysis.empty_morpheme_cache.get(morpheme) if morpheme_data is None: morpheme_data = SingleAnalysis.MorphemeData(morpheme, "") SingleAnalysis.empty_morpheme_cache[morpheme] = morpheme_data morphemes.append(morpheme_data) else: morpheme_data = SingleAnalysis.MorphemeData(morpheme, transition.surface) morphemes.append(morpheme_data) group_boundaries: np.ndarray = np.zeros(derivation_count + 1, dtype=np.int32) morpheme_counter = 0 derivation_counter = 1 for morpheme_data in morphemes: if morpheme_data.morpheme.derivational_: group_boundaries[derivation_counter] = morpheme_counter derivation_counter += 1 morpheme_counter += 1 item = search_path.get_dictionary_item() if item.has_attribute(RootAttribute.Dummy): item = item.reference_item return SingleAnalysis(item, morphemes, group_boundaries)
def accept_(self, visitor: SearchPath) -> bool: return visitor.get_dictionary_item().secondary_pos == self.pos
def accept_(self, visitor: SearchPath) -> bool: return visitor.get_dictionary_item() not in self.items
def accept_(self, visitor: SearchPath) -> bool: return self.item is not None and visitor.has_dictionary_item( self.item)
def accept_(self, visitor: SearchPath) -> bool: previous_state = visitor.get_previous_state() return previous_state is not None and previous_state.morpheme in self.morphemes
def accept_(self, visitor: SearchPath) -> bool: for s in self.surfaces: if visitor.get_stem_transition().surface == s: return True return False
def accept_(self, visitor: SearchPath) -> bool: return visitor.get_stem_transition().surface == self.surface
def accept_(self, visitor: SearchPath) -> bool: previous_state = visitor.get_previous_state() return previous_state is None or not previous_state == self.state
def accept_(self, visitor: SearchPath) -> bool: return visitor.contains_suffix_with_surface_()
def accept_(self, visitor: SearchPath) -> bool: return visitor.get_dictionary_item().has_attribute(self.attribute)