コード例 #1
0
    def advance(self, path: SearchPath) -> List[SearchPath]:
        new_paths: List[SearchPath] = []

        for transition in path.current_state.outgoing:
            # assert transition.__class__ == SuffixTransition
            suffix_transition = transition
            if len(path.tail) == 0 and suffix_transition.has_surface_form():
                # NO DEBUG
                continue
            else:
                surface = SurfaceTransition.generate_surface(
                    suffix_transition, path.phonetic_attributes)
                tail_starts_with = TurkishAlphabet.INSTANCE.starts_with_ignore_diacritics(path.tail, surface) if\
                    self.ascii_tolerant else path.tail.startswith(surface)
                if not tail_starts_with:
                    if self.debug_mode:
                        raise NotImplementedError("Not implemented debug_mode")
                else:
                    if self.debug_mode:
                        raise NotImplementedError("Not implemented debug_mode")
                    if suffix_transition.can_pass(path):
                        if not suffix_transition.has_surface_form():
                            new_paths.append(
                                path.get_copy(
                                    SurfaceTransition("", suffix_transition),
                                    path.phonetic_attributes))
                        else:
                            surface_transition = SurfaceTransition(
                                surface, suffix_transition)
                            tail_equals_surface = TurkishAlphabet.INSTANCE.equals_ignore_diacritics(path.tail, surface)\
                                if self.ascii_tolerant else path.tail == surface

                            attributes = deepcopy(path.phonetic_attributes) if tail_equals_surface else \
                                AttributesHelper.get_morphemic_attributes(surface, path.phonetic_attributes)
                            try:
                                attributes.remove(
                                    PhoneticAttribute.CannotTerminate)
                            except KeyError:
                                logger.debug(
                                    "There is no CannotTerminate element in the set"
                                )
                            last_token = suffix_transition.get_last_template_token(
                            )
                            if last_token.type_ == SurfaceTransition.TemplateTokenType.LAST_VOICED:
                                attributes.add(
                                    PhoneticAttribute.ExpectsConsonant)
                            elif last_token.type_ == SurfaceTransition.TemplateTokenType.LAST_NOT_VOICED:
                                attributes.add(PhoneticAttribute.ExpectsVowel)
                                attributes.add(
                                    PhoneticAttribute.CannotTerminate)

                            p: SearchPath = path.get_copy(
                                surface_transition, attributes)
                            new_paths.append(p)
        return new_paths
コード例 #2
0
    def generate(self, item: DictionaryItem = None, morphemes: Tuple[Morpheme, ...] = None,
                 candidates: Tuple[StemTransition, ...] = None) -> Tuple['WordGenerator.Result', ...]:
        if item:
            candidates_st: Tuple[StemTransition, ...] = self.stem_transitions.get_transitions_for_item(item)
            return self.generate(candidates=candidates_st, morphemes=morphemes)
        # no item means generate(List<StemTransition> candidates, List<Morpheme> morphemes) is called
        paths: List['WordGenerator.GenerationPath'] = []

        for candidate in candidates:
            search_path: SearchPath = SearchPath.initial_path(candidate, " ")
            # morphemes_in_path: Tuple[Morpheme]
            if len(morphemes) > 0:
                if morphemes[0] == search_path.current_state.morpheme:
                    morphemes_in_path = morphemes[1:]
                else:
                    morphemes_in_path = morphemes
            else:
                morphemes_in_path = ()

            paths.append(WordGenerator.GenerationPath(search_path, morphemes_in_path))

        # search graph
        result_paths: Tuple['WordGenerator.GenerationPath'] = self.search(paths)
        result: List['WordGenerator.Result'] = []

        for path in result_paths:
            analysis = SingleAnalysis.from_search_path(path.path)
            result.append(WordGenerator.Result(analysis.surface_form(), analysis))

        return tuple(result)
コード例 #3
0
        def copy_(self, path: SearchPath) -> 'WordGenerator.GenerationPath':
            last_transition: SurfaceTransition = path.get_last_transition()
            m: Morpheme = last_transition.get_morpheme()

            if len(last_transition.surface) == 0:
                if len(self.morphemes) == 0:
                    return WordGenerator.GenerationPath(path, self.morphemes)
                if m == self.morphemes[0]:
                    return WordGenerator.GenerationPath(path, self.morphemes[1:])
                else:
                    return WordGenerator.GenerationPath(path, self.morphemes)

            if m != self.morphemes[0]:
                raise Exception("Cannot generate Generation copy because transition morpheme and first morpheme to "
                                "consume does not match.")
            return WordGenerator.GenerationPath(path, self.morphemes[1:])
コード例 #4
0
    def analyze(self, inp: str) -> Tuple[SingleAnalysis, ...]:
        if self.debug_mode:
            raise NotImplementedError("Debug mode is not implemented")

        candidates = self.stem_transitions.get_prefix_matches(
            inp, self.ascii_tolerant)

        paths: List[SearchPath] = []

        for candidate in candidates:
            length = len(candidate.surface)
            tail = inp[length:]
            paths.append(SearchPath.initial_path(candidate, tail))

        result_paths: Tuple[SearchPath] = self.search(paths)
        result: List[SingleAnalysis] = []

        for path in result_paths:
            analysis: SingleAnalysis = SingleAnalysis.from_search_path(path)
            result.append(analysis)

        return tuple(result)
コード例 #5
0
    def from_search_path(search_path: SearchPath) -> 'SingleAnalysis':
        morphemes: List['SingleAnalysis.MorphemeData'] = []
        derivation_count = 0

        for transition in search_path.transitions:
            if transition.is_derivative():
                derivation_count += 1

            morpheme = transition.get_morpheme()
            if morpheme != TurkishMorphotactics.nom and morpheme != TurkishMorphotactics.pnon:
                if len(transition.surface) == 0:
                    morpheme_data = SingleAnalysis.empty_morpheme_cache.get(morpheme)
                    if morpheme_data is None:
                        morpheme_data = SingleAnalysis.MorphemeData(morpheme, "")
                        SingleAnalysis.empty_morpheme_cache[morpheme] = morpheme_data

                    morphemes.append(morpheme_data)
                else:
                    morpheme_data = SingleAnalysis.MorphemeData(morpheme, transition.surface)
                    morphemes.append(morpheme_data)

        group_boundaries: np.ndarray = np.zeros(derivation_count + 1, dtype=np.int32)
        morpheme_counter = 0
        derivation_counter = 1

        for morpheme_data in morphemes:
            if morpheme_data.morpheme.derivational_:
                group_boundaries[derivation_counter] = morpheme_counter
                derivation_counter += 1

            morpheme_counter += 1

        item = search_path.get_dictionary_item()
        if item.has_attribute(RootAttribute.Dummy):
            item = item.reference_item

        return SingleAnalysis(item, morphemes, group_boundaries)
コード例 #6
0
 def accept_(self, visitor: SearchPath) -> bool:
     return visitor.get_dictionary_item().secondary_pos == self.pos
コード例 #7
0
 def accept_(self, visitor: SearchPath) -> bool:
     return visitor.get_dictionary_item() not in self.items
コード例 #8
0
 def accept_(self, visitor: SearchPath) -> bool:
     return self.item is not None and visitor.has_dictionary_item(
         self.item)
コード例 #9
0
 def accept_(self, visitor: SearchPath) -> bool:
     previous_state = visitor.get_previous_state()
     return previous_state is not None and previous_state.morpheme in self.morphemes
コード例 #10
0
 def accept_(self, visitor: SearchPath) -> bool:
     for s in self.surfaces:
         if visitor.get_stem_transition().surface == s:
             return True
     return False
コード例 #11
0
 def accept_(self, visitor: SearchPath) -> bool:
     return visitor.get_stem_transition().surface == self.surface
コード例 #12
0
 def accept_(self, visitor: SearchPath) -> bool:
     previous_state = visitor.get_previous_state()
     return previous_state is None or not previous_state == self.state
コード例 #13
0
 def accept_(self, visitor: SearchPath) -> bool:
     return visitor.contains_suffix_with_surface_()
コード例 #14
0
 def accept_(self, visitor: SearchPath) -> bool:
     return visitor.get_dictionary_item().has_attribute(self.attribute)