Exemple #1
0
    def format_(self, analysis: SingleAnalysis, apostrophe: str) -> str:
        item = analysis.item
        ending = analysis.get_ending()
        if apostrophe is None and not self.apostrophe_required(analysis):
            return item.normalized_lemma() + ending if RootAttribute.NoQuote in item.attributes else \
                analysis.get_stem() + ending
        else:
            if apostrophe is None:
                apostrophe = "'"

            return item.normalized_lemma() + apostrophe + ending if len(
                ending) > 0 else item.normalized_lemma()
Exemple #2
0
    def format_to_case(self, analysis: SingleAnalysis,
                       type_: 'WordAnalysisSurfaceFormatter.CaseType',
                       apostrophe: str) -> str:
        formatted = self.format_(analysis, apostrophe)

        if type_ == WordAnalysisSurfaceFormatter.CaseType.DEFAULT_CASE:
            return formatted
        if type_ == WordAnalysisSurfaceFormatter.CaseType.LOWER_CASE:
            return formatted.translate(self.ALPHABET.lower_map).lower()
        if type_ == WordAnalysisSurfaceFormatter.CaseType.UPPER_CASE:
            return formatted.translate(self.ALPHABET.upper_map).upper()
        if type_ == WordAnalysisSurfaceFormatter.CaseType.TITLE_CASE:
            return Turkish.capitalize(formatted)
        if type_ == WordAnalysisSurfaceFormatter.CaseType.UPPER_CASE_ROOT_LOWER_CASE_ENDING:
            ending = analysis.get_ending()
            lemma_upper = analysis.item.normalized_lemma().translate(
                self.ALPHABET.upper_map).upper()

            if len(ending) == 0:
                return lemma_upper
            else:
                if apostrophe is None and not self.apostrophe_required(
                        analysis):
                    return lemma_upper + ending

                if apostrophe is None:
                    apostrophe = "'"

                return lemma_upper + apostrophe + ending
        return ""
Exemple #3
0
    def try_word_with_apostrophe(self, word: str, secondary_pos: SecondaryPos) -> Tuple[SingleAnalysis, ...]:
        normalized = self.ALPHABET.normalize_apostrophe(word)

        index = normalized.find(chr(39))
        if index > 0 and index != len(normalized) - 1:
            stem = normalized[0: index]
            ending = normalized[index + 1:]
            se = StemAndEnding(stem, ending)
            stem_normalized = self.ALPHABET.normalize(se.stem).replace(".", "")
            ending_normalized = self.ALPHABET.normalize(se.ending)
            pronunciation = self.guess_pronunciation(stem_normalized)
            capitalize: bool = secondary_pos == SecondaryPos.ProperNoun or secondary_pos == SecondaryPos.Abbreviation
            pronunciation_possible: bool = self.ALPHABET.contains_vowel(pronunciation)
            item = DictionaryItem(
                Turkish.capitalize(normalized) if capitalize else stem if pronunciation_possible else word,
                stem_normalized, PrimaryPos.Noun, secondary_pos, pronunciation=pronunciation)
            if not pronunciation_possible:
                result = (SingleAnalysis.dummy(word, item),)
                return result
            else:
                item_does_not_exist: bool = item not in self.lexicon
                if item_does_not_exist:
                    item.attributes.add(RootAttribute.Runtime)
                    self.analyzer.stem_transitions.add_dictionary_item(item)

                to_parse = stem_normalized + ending_normalized
                no_quotes_parses: Tuple[SingleAnalysis] = self.analyzer.analyze(to_parse)
                if item_does_not_exist:
                    self.analyzer.stem_transitions.remove_dictionary_item(item)

                analyses: Tuple[SingleAnalysis] = tuple(no_quotes_parse for no_quotes_parse in no_quotes_parses if
                                                        no_quotes_parse.get_stem() == stem_normalized)
                return analyses
        else:
            return ()
    def generate(self, item: DictionaryItem = None, morphemes: Tuple[Morpheme, ...] = None,
                 candidates: Tuple[StemTransition, ...] = None) -> Tuple['WordGenerator.Result', ...]:
        if item:
            candidates_st: Tuple[StemTransition, ...] = self.stem_transitions.get_transitions_for_item(item)
            return self.generate(candidates=candidates_st, morphemes=morphemes)
        # no item means generate(List<StemTransition> candidates, List<Morpheme> morphemes) is called
        paths: List['WordGenerator.GenerationPath'] = []

        for candidate in candidates:
            search_path: SearchPath = SearchPath.initial_path(candidate, " ")
            # morphemes_in_path: Tuple[Morpheme]
            if len(morphemes) > 0:
                if morphemes[0] == search_path.current_state.morpheme:
                    morphemes_in_path = morphemes[1:]
                else:
                    morphemes_in_path = morphemes
            else:
                morphemes_in_path = ()

            paths.append(WordGenerator.GenerationPath(search_path, morphemes_in_path))

        # search graph
        result_paths: Tuple['WordGenerator.GenerationPath'] = self.search(paths)
        result: List['WordGenerator.Result'] = []

        for path in result_paths:
            analysis = SingleAnalysis.from_search_path(path.path)
            result.append(WordGenerator.Result(analysis.surface_form(), analysis))

        return tuple(result)
Exemple #5
0
    def analyze(self, inp: str) -> Tuple[SingleAnalysis, ...]:
        if self.debug_mode:
            raise NotImplementedError("Debug mode is not implemented")

        candidates = self.stem_transitions.get_prefix_matches(
            inp, self.ascii_tolerant)

        paths: List[SearchPath] = []

        for candidate in candidates:
            length = len(candidate.surface)
            tail = inp[length:]
            paths.append(SearchPath.initial_path(candidate, tail))

        result_paths: Tuple[SearchPath] = self.search(paths)
        result: List[SingleAnalysis] = []

        for path in result_paths:
            analysis: SingleAnalysis = SingleAnalysis.from_search_path(path)
            result.append(analysis)

        return tuple(result)
Exemple #6
0
    def try_without_apostrophe(self, word: str, secondary_pos: SecondaryPos) -> Tuple[SingleAnalysis]:
        normalized = None
        if self.ALPHABET.contains_foreign_diacritics(word):
            normalized = self.ALPHABET.foreign_diacritics_to_turkish(word)

        normalized = self.ALPHABET.normalize(word) if normalized is None else self.ALPHABET.normalize(normalized)
        capitalize: bool = secondary_pos == SecondaryPos.ProperNoun or secondary_pos == SecondaryPos.Abbreviation
        pronunciation = self.guess_pronunciation(normalized.replace(".", ""))
        item = DictionaryItem(Turkish.capitalize(normalized) if capitalize else normalized, normalized, PrimaryPos.Noun,
                              secondary_pos, pronunciation=pronunciation)
        if self.ALPHABET.contains_vowel(pronunciation):
            result = (SingleAnalysis.dummy(word, item),)
            return result
        else:
            item_does_not_exist: bool = item not in self.lexicon
            if item_does_not_exist:
                item.attributes.add(RootAttribute.Runtime)
                self.analyzer.stem_transitions.add_dictionary_item(item)

            results: Tuple[SingleAnalysis] = self.analyzer.analyze(normalized)
            if item_does_not_exist:
                self.analyzer.stem_transitions.remove_dictionary_item(item)

            return results