Python format_morpheme_container_for_parsesetの例、trnltk.morphology.model.formatter.format_morpheme_container_for_parseset Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_calculator_with_parsesets.py プロジェクト: aliok/trnltk

    def _generate_likelihood(self, surface, leading_context=None, following_context=None):
        assert leading_context or following_context

        likelihoods = []
        results = self.contextless_parser.parse(surface)

        if not results:
            return None

        if len(results)==1:
            return [(formatter.format_morpheme_container_for_parseset(results[0]), 1.0)]

        for result in results:
            formatted_parse_result = formatter.format_morpheme_container_for_parseset(result)
            likelihood = 0.0
            if leading_context and following_context:
                likelihood = self.generator.calculate_likelihood(result, leading_context, following_context)
            elif leading_context:
                likelihood = self.generator.calculate_oneway_likelihood(result, leading_context, True)
            elif following_context:
                likelihood = self.generator.calculate_oneway_likelihood(result, following_context, False)

            likelihoods.append((formatted_parse_result, likelihood))

        return likelihoods

コード例 #2

0

ファイルを表示

ファイル: test_likelihoodcalculator_with_parsesets.py プロジェクト: v-mostafapour/trnltk

    def _generate_likelihood(self,
                             surface,
                             leading_context=None,
                             following_context=None):
        assert leading_context or following_context

        likelihoods = []
        results = self.contextless_parser.parse(surface)

        if not results:
            return None

        if len(results) == 1:
            return [
                (formatter.format_morpheme_container_for_parseset(results[0]),
                 1.0)
            ]

        for result in results:
            formatted_parse_result = formatter.format_morpheme_container_for_parseset(
                result)
            likelihood = 0.0
            if leading_context and following_context:
                likelihood = self.calculator.calculate_likelihood(
                    result, leading_context, following_context)
            elif leading_context:
                likelihood = self.calculator.calculate_oneway_likelihood(
                    result, leading_context, True)
            elif following_context:
                likelihood = self.calculator.calculate_oneway_likelihood(
                    result, following_context, False)

            likelihoods.append((formatted_parse_result, likelihood))

        return likelihoods

コード例 #3

0

ファイルを表示

    def test_should_format_for_parseset(self):
        parse_result = self.parser.parse(u'kitaba')[0]
        assert_that(
            formatter.format_morpheme_container_for_parseset(parse_result),
            equal_to(u'kitap+Noun+A3sg+Pnon+Dat'))

        parse_result = self.parser.parse(u'yaptırtmayı')[0]
        assert_that(
            formatter.format_morpheme_container_for_parseset(parse_result),
            equal_to(
                u'yap+Verb+Verb+Caus+Verb+Caus+Pos+Noun+Inf+A3sg+Pnon+Acc'))

コード例 #4

0

ファイルを表示

ファイル: test_calculator.py プロジェクト: aliok/trnltk

    def _test_generate_likelihood(self, surface, leading_context=None, following_context=None, create_calculation_context=False):
        self.generator.build_indexes()

        assert leading_context or following_context

        leading_context = self._get_context(leading_context)
        following_context = self._get_context(following_context)

        likelihoods = []
        results = self.contextless_parser.parse(surface)
        for result in results:
            calculation_context = None
            if create_calculation_context:
                calculation_context = {}

            formatted_parse_result = formatter.format_morpheme_container_for_parseset(result)
            likelihood = 0.0
            if leading_context and following_context:
                likelihood = self.generator.calculate_likelihood(result, leading_context, following_context, calculation_context)
            elif leading_context:
                likelihood = self.generator.calculate_oneway_likelihood(result, leading_context, True, calculation_context)
            elif following_context:
                likelihood = self.generator.calculate_oneway_likelihood(result, following_context, False, calculation_context)

            likelihoods.append((formatted_parse_result, likelihood, calculation_context))

        for item in likelihoods:
            pprint.pprint(item)

コード例 #5

0

ファイルを表示

    def _test_generate_likelihood(self,
                                  surface,
                                  leading_context=None,
                                  following_context=None,
                                  calculation_context=None):
        assert leading_context or following_context

        likelihoods = []
        results = self.contextless_parser.parse(surface)

        for result in results:
            formatted_parse_result = formatter.format_morpheme_container_for_parseset(
                result)
            likelihood = 0.0
            if leading_context and following_context:
                likelihood = self.generator.calculate_likelihood(
                    result, leading_context, following_context,
                    calculation_context)
            elif leading_context:
                likelihood = self.generator.calculate_oneway_likelihood(
                    result, leading_context, True, calculation_context)
            elif following_context:
                likelihood = self.generator.calculate_oneway_likelihood(
                    result, following_context, False, calculation_context)

            likelihoods.append((formatted_parse_result, likelihood))

        for item in likelihoods:
            print item

コード例 #6

0

ファイルを表示

    def add_parse_result(self, uuid_for_parse_result, parse_result,
                         likelihood_value, likelihood_percentage,
                         is_correct_parse_result, calculation_context):
        parse_result_containers = self._context.get('parse_results') or []

        parse_result_container = {
            'uuid':
            uuid_for_parse_result,
            'formatted_parse_result':
            formatter.format_morpheme_container_for_parseset(parse_result,
                                                             add_space=True),
            'likelihood_value':
            likelihood_value,
            'likelihood_percentage':
            likelihood_percentage,
            'likelihood_percentage_color':
            self._get_likelihood_percentage_color(likelihood_percentage),
            'correct_parse_result':
            is_correct_parse_result,
            'calculation_context':
            calculation_context
        }

        parse_result_containers.append(parse_result_container)

        self._context['parse_results'] = parse_result_containers

コード例 #7

0

ファイルを表示

ファイル: test_contextlessdistributioncalculator.py プロジェクト: aliok/trnltk

    def _test_calculate(self, surface):
        results = self.contextless_parser.parse(surface)

        likelihoods = []

        for result in results:
            formatted_parse_result = formatter.format_morpheme_container_for_parseset(result)
            formatted_parse_result_likelihood = self.calculator.calculate(result)
            likelihoods.append((formatted_parse_result, formatted_parse_result_likelihood))

        pprint.pprint(likelihoods)

コード例 #8

0

ファイルを表示

    def _get_word_morpheme_container_tuple(self, seq, expected_result=None):
        res = self.parser.parse(seq)

        if res:
            if expected_result:
                matching_containers = filter(lambda parse_result : formatter.format_morpheme_container_for_parseset(parse_result)==expected_result, res)
                if matching_containers:
                    return seq, matching_containers[0]
                else:
                    return seq, None
            else:
                return seq, res[0]
        else:
            return seq, None

コード例 #9

0

ファイルを表示

ファイル: parseresultcorrectmarkercontroller.py プロジェクト: aliok/trnltk

    def save_parse_result_for_word(self, word_id, parse_result_uuid):
        """
        @type word_id: ObjectId
        @type parse_result_uuid: str or unicode
        """
        parse_result = self.sessionmanager.get_parse_result(parse_result_uuid)
        assert parse_result, "No parse result found with id {}".format(parse_result)

        word = self.dbmanager.get_word(word_id)
        if not word:
            raise Exception("Word not found for setting the correct parse result! {}".format(word_id))

        # check if the parse result belongs to the given word
        assert word['surface'] == parse_result.get_surface() or TurkishAlphabet.lower(word['surface']) == parse_result.get_surface()

        self.dbmanager.set_parse_result_for_word(word, formatter.format_morpheme_container_for_parseset(parse_result), parse_result)

コード例 #10

0

ファイルを表示

ファイル: learnerview.py プロジェクト: aliok/trnltk

    def add_parse_result(self, uuid_for_parse_result, parse_result, likelihood_value, likelihood_percentage, is_correct_parse_result, calculation_context):
        parse_result_containers = self._context.get('parse_results') or []

        parse_result_container = {
            'uuid' : uuid_for_parse_result,
            'formatted_parse_result' : formatter.format_morpheme_container_for_parseset(parse_result, add_space=True),
            'likelihood_value' : likelihood_value,
            'likelihood_percentage' : likelihood_percentage,
            'likelihood_percentage_color' : self._get_likelihood_percentage_color(likelihood_percentage),
            'correct_parse_result' : is_correct_parse_result,
            'calculation_context' : calculation_context
        }

        parse_result_containers.append(parse_result_container)

        self._context['parse_results'] = parse_result_containers

コード例 #11

0

ファイルを表示

ファイル: test_calculator_interpolation.py プロジェクト: aliok/trnltk

    def _test_generate_likelihood(self, surface, leading_context=None, following_context=None, calculation_context=None):
        assert leading_context or following_context

        likelihoods = []
        results = self.contextless_parser.parse(surface)

        for result in results:
            formatted_parse_result = formatter.format_morpheme_container_for_parseset(result)
            likelihood = 0.0
            if leading_context and following_context:
                likelihood = self.generator.calculate_likelihood(result, leading_context, following_context, calculation_context)
            elif leading_context:
                likelihood = self.generator.calculate_oneway_likelihood(result, leading_context, True, calculation_context)
            elif following_context:
                likelihood = self.generator.calculate_oneway_likelihood(result, following_context, False, calculation_context)

            likelihoods.append((formatted_parse_result, likelihood))

        for item in likelihoods:
            print item

コード例 #12

0

ファイルを表示

ファイル: creator.py プロジェクト: aliok/trnltk

    def create_word_binding_from_morpheme_container(self, word_str, morpheme_container):
        assert (word_str == morpheme_container.get_surface_so_far()) or (TurkishAlphabet.lower(word_str[0])+word_str[1:] == morpheme_container.get_surface_so_far())

        root_str = morpheme_container.get_root().str
        lemma = morpheme_container.get_root().lexeme.lemma
        lemma_root = morpheme_container.get_root().lexeme.root
        root_syntactic_category = morpheme_container.get_root().lexeme.syntactic_category
        root_secondary_syntactic_category = morpheme_container.get_root().lexeme.secondary_syntactic_category
        root = RootBinding(root_str, lemma, lemma_root, root_syntactic_category, root_secondary_syntactic_category)

        word_syntactic_category = morpheme_container.get_surface_syntactic_category()
        word_secondary_syntactic_category = morpheme_container.get_surface_secondary_syntactic_category()

        parse_result = formatter.format_morpheme_container_for_parseset(morpheme_container)
        word = WordBinding(word_str, parse_result, root, word_syntactic_category, word_secondary_syntactic_category)

        if morpheme_container.get_transitions():
            so_far = root_str
            for transition in morpheme_container.get_transitions():
                if isinstance(transition.suffix_form_application.suffix_form.suffix, FreeTransitionSuffix):
                    continue

                suffix_name = transition.suffix_form_application.suffix_form.suffix.name
                suffix_pretty_name = transition.suffix_form_application.suffix_form.suffix.pretty_name
                suffix_form = transition.suffix_form_application.suffix_form.form
                suffix_application = transition.suffix_form_application.fitting_suffix_form
                suffix_actual_application = transition.suffix_form_application.actual_suffix_form
                word_with_suffix_application = None
                if (so_far + suffix_actual_application)==root_str:
                    word_with_suffix_application = morpheme_container.get_root().lexeme.root + suffix_application
                else:
                    word_with_suffix_application = so_far + suffix_application
                so_far += suffix_actual_application
                if transition.is_derivational():
                    suffix = DerivationalSuffixBinding(suffix_name, suffix_pretty_name, suffix_form, suffix_application, suffix_actual_application, word_with_suffix_application, so_far, transition.to_state.syntactic_category)
                    word.suffixes.append(suffix)
                else:
                    suffix = InflectionalSuffixBinding(suffix_name, suffix_pretty_name, suffix_form, suffix_application, suffix_actual_application, word_with_suffix_application, so_far, transition.to_state.syntactic_category)
                    word.suffixes.append(suffix)
        return word

コード例 #13

0

ファイルを表示

    def _test_generate_likelihood(self,
                                  surface,
                                  leading_context=None,
                                  following_context=None,
                                  create_calculation_context=False):
        self.generator.build_indexes()

        assert leading_context or following_context

        leading_context = self._get_context(leading_context)
        following_context = self._get_context(following_context)

        likelihoods = []
        results = self.contextless_parser.parse(surface)
        for result in results:
            calculation_context = None
            if create_calculation_context:
                calculation_context = {}

            formatted_parse_result = formatter.format_morpheme_container_for_parseset(
                result)
            likelihood = 0.0
            if leading_context and following_context:
                likelihood = self.generator.calculate_likelihood(
                    result, leading_context, following_context,
                    calculation_context)
            elif leading_context:
                likelihood = self.generator.calculate_oneway_likelihood(
                    result, leading_context, True, calculation_context)
            elif following_context:
                likelihood = self.generator.calculate_oneway_likelihood(
                    result, following_context, False, calculation_context)

            likelihoods.append(
                (formatted_parse_result, likelihood, calculation_context))

        for item in likelihoods:
            pprint.pprint(item)

コード例 #14

0

ファイルを表示

ファイル: parseresultcorrectmarkercontroller.py プロジェクト: v-mostafapour/trnltk

    def save_parse_result_for_word(self, word_id, parse_result_uuid):
        """
        @type word_id: ObjectId
        @type parse_result_uuid: str or unicode
        """
        parse_result = self.sessionmanager.get_parse_result(parse_result_uuid)
        assert parse_result, "No parse result found with id {}".format(
            parse_result)

        word = self.dbmanager.get_word(word_id)
        if not word:
            raise Exception(
                "Word not found for setting the correct parse result! {}".
                format(word_id))

        # check if the parse result belongs to the given word
        assert word['surface'] == parse_result.get_surface(
        ) or TurkishAlphabet.lower(
            word['surface']) == parse_result.get_surface()

        self.dbmanager.set_parse_result_for_word(
            word,
            formatter.format_morpheme_container_for_parseset(parse_result),
            parse_result)

コード例 #15

0

ファイルを表示

ファイル: morphemecontainer.py プロジェクト: aliok/trnltk

 def format(self, add_space=False):
     return formatter.format_morpheme_container_for_parseset(self, add_space)

コード例 #16

0

ファイルを表示

ファイル: learnercontroller.py プロジェクト: efebuyuk/trnltk

    def go_to_word(self, word_id):
        """
        @type word_id: ObjectId
        """
        assert LearnerController.WORD_COUNT_TO_SHOW_IN_CONTEXT >= LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT

        assert word_id

        self.sessionmanager.delete_parse_results()

        # find and set new word in view
        word = self.dbmanager.get_word(word_id)
        assert word
        self.learnerview.set_current_word(word)

        word_index = word["index"]
        corpus_id = word["corpus_id"]

        # set corpus id in the view
        self.learnerview.set_corpus_id(corpus_id)

        # find and set contexts (to be shown) in view
        leading_start_index_to_show = word_index - LearnerController.WORD_COUNT_TO_SHOW_IN_CONTEXT
        leading_end_index_to_show = word_index - 1

        following_start_index_to_show = word_index + 1
        following_end_index_to_show = word_index + LearnerController.WORD_COUNT_TO_SHOW_IN_CONTEXT

        leading_words = self.dbmanager.get_words_in_range(
            corpus_id, leading_start_index_to_show, leading_end_index_to_show
        )
        following_words = self.dbmanager.get_words_in_range(
            corpus_id, following_start_index_to_show, following_end_index_to_show
        )

        self.learnerview.set_leading_words(leading_words)
        self.learnerview.set_following_words(following_words)

        # set counts and indices of the new word within counts in view
        all_nonparsed_count = self.dbmanager.count_all_nonparsed(corpus_id)
        prior_nonparsed_count = self.dbmanager.count_nonparsed_prior_to_index(corpus_id, word_index)

        all_count = self.dbmanager.count_all(corpus_id)

        if not word["parsed"]:
            self.learnerview.set_all_nonparsed_count(all_nonparsed_count)
            self.learnerview.set_prior_nonparsed_count(prior_nonparsed_count)

        self.learnerview.set_all_count(all_count)

        # find previous and next nonparsed words and set the stuff on the ui
        previous_nonparsed_word = self.dbmanager.find_previous_nonparsed_word(corpus_id, word_index)
        next_nonparsed_word = self.dbmanager.find_next_nonparsed_word(corpus_id, word_index)

        if previous_nonparsed_word:
            self.learnerview.set_previous_nonparsed_word(previous_nonparsed_word)
        if next_nonparsed_word:
            self.learnerview.set_next_nonparsed_word(next_nonparsed_word)

        next_word = self.dbmanager.find_next_word(corpus_id, word)
        if next_word:
            self.learnerview.set_next_word(next_word)

        # find parse context words
        leading_parse_context_words = (
            leading_words[-LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT :]
            if len(leading_words) >= LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT
            else leading_words[:]
        )
        following_parse_context_words = (
            following_words[: LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT]
            if len(following_words) >= LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT
            else following_words[:]
        )

        leading_parse_context = self.parse_context_creator.create(leading_parse_context_words)
        following_parse_context = self.parse_context_creator.create(following_parse_context_words)

        # parse and set parse results in view
        parse_results_with_likelihoods = []

        calculation_context = {}
        parse_results = self.contextful_morphological_parser.parse_with_likelihoods(
            word["surface"], leading_parse_context, following_parse_context, calculation_context
        )
        if not parse_results:
            return

        for parse_result_index, (parse_result, likelihood) in enumerate(parse_results):
            parse_results_with_likelihoods.append((parse_result, likelihood, calculation_context[parse_result_index]))

        total_likelihood = sum([t[1] for t in parse_results_with_likelihoods])

        # sort by likelihood then "shortness"
        parse_results_with_likelihoods = sorted(
            parse_results_with_likelihoods, key=lambda tup: (tup[1], -len(tup[0].get_transitions())), reverse=True
        )

        for parse_result, likelihood_value, calculation_context in parse_results_with_likelihoods:
            uuid_for_parse_result = self.sessionmanager.put_parse_result_in_session(parse_result, calculation_context)
            likelihood_percent = likelihood_value / total_likelihood * 100.0 if total_likelihood > 0.0 else 0.0
            is_correct_parse_result = (
                word["parsed"]
                and formatter.format_morpheme_container_for_parseset(parse_result) == word["parse_result"]
            )
            self.learnerview.add_parse_result(
                uuid_for_parse_result,
                parse_result,
                likelihood_value,
                likelihood_percent,
                is_correct_parse_result,
                calculation_context,
            )

コード例 #17

0

ファイルを表示

ファイル: learnercontroller.py プロジェクト: v-mostafapour/trnltk

    def go_to_word(self, word_id):
        """
        @type word_id: ObjectId
        """
        assert LearnerController.WORD_COUNT_TO_SHOW_IN_CONTEXT >= LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT

        assert word_id

        self.sessionmanager.delete_parse_results()

        # find and set new word in view
        word = self.dbmanager.get_word(word_id)
        assert word
        self.learnerview.set_current_word(word)

        word_index = word['index']
        corpus_id = word['corpus_id']

        # set corpus id in the view
        self.learnerview.set_corpus_id(corpus_id)

        # find and set contexts (to be shown) in view
        leading_start_index_to_show = word_index - LearnerController.WORD_COUNT_TO_SHOW_IN_CONTEXT
        leading_end_index_to_show = word_index - 1

        following_start_index_to_show = word_index + 1
        following_end_index_to_show = word_index + LearnerController.WORD_COUNT_TO_SHOW_IN_CONTEXT

        leading_words = self.dbmanager.get_words_in_range(
            corpus_id, leading_start_index_to_show, leading_end_index_to_show)
        following_words = self.dbmanager.get_words_in_range(
            corpus_id, following_start_index_to_show,
            following_end_index_to_show)

        self.learnerview.set_leading_words(leading_words)
        self.learnerview.set_following_words(following_words)

        # set counts and indices of the new word within counts in view
        all_nonparsed_count = self.dbmanager.count_all_nonparsed(corpus_id)
        prior_nonparsed_count = self.dbmanager.count_nonparsed_prior_to_index(
            corpus_id, word_index)

        all_count = self.dbmanager.count_all(corpus_id)

        if not word['parsed']:
            self.learnerview.set_all_nonparsed_count(all_nonparsed_count)
            self.learnerview.set_prior_nonparsed_count(prior_nonparsed_count)

        self.learnerview.set_all_count(all_count)

        # find previous and next nonparsed words and set the stuff on the ui
        previous_nonparsed_word = self.dbmanager.find_previous_nonparsed_word(
            corpus_id, word_index)
        next_nonparsed_word = self.dbmanager.find_next_nonparsed_word(
            corpus_id, word_index)

        if previous_nonparsed_word:
            self.learnerview.set_previous_nonparsed_word(
                previous_nonparsed_word)
        if next_nonparsed_word:
            self.learnerview.set_next_nonparsed_word(next_nonparsed_word)

        next_word = self.dbmanager.find_next_word(corpus_id, word)
        if next_word:
            self.learnerview.set_next_word(next_word)

        # find parse context words
        leading_parse_context_words = leading_words[
            -LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT:] if len(
                leading_words
            ) >= LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT else leading_words[:]
        following_parse_context_words = following_words[:LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT] if len(
            following_words
        ) >= LearnerController.WORD_COUNT_TO_USE_AS_PARSE_CONTEXT else following_words[:]

        leading_parse_context = self.parse_context_creator.create(
            leading_parse_context_words)
        following_parse_context = self.parse_context_creator.create(
            following_parse_context_words)

        # parse and set parse results in view
        parse_results_with_likelihoods = []

        calculation_context = {}
        parse_results = self.contextful_morphological_parser.parse_with_likelihoods(
            word['surface'], leading_parse_context, following_parse_context,
            calculation_context)
        if not parse_results:
            return

        for parse_result_index, (parse_result,
                                 likelihood) in enumerate(parse_results):
            parse_results_with_likelihoods.append(
                (parse_result, likelihood,
                 calculation_context[parse_result_index]))

        total_likelihood = sum([t[1] for t in parse_results_with_likelihoods])

        # sort by likelihood then "shortness"
        parse_results_with_likelihoods = sorted(
            parse_results_with_likelihoods,
            key=lambda tup: (tup[1], -len(tup[0].get_transitions())),
            reverse=True)

        for parse_result, likelihood_value, calculation_context in parse_results_with_likelihoods:
            uuid_for_parse_result = self.sessionmanager.put_parse_result_in_session(
                parse_result, calculation_context)
            likelihood_percent = likelihood_value / total_likelihood * 100.0 if total_likelihood > 0.0 else 0.0
            is_correct_parse_result = word[
                'parsed'] and formatter.format_morpheme_container_for_parseset(
                    parse_result) == word['parse_result']
            self.learnerview.add_parse_result(uuid_for_parse_result,
                                              parse_result, likelihood_value,
                                              likelihood_percent,
                                              is_correct_parse_result,
                                              calculation_context)

コード例 #18

0

ファイルを表示

ファイル: morphemecontainer.py プロジェクト: v-mostafapour/trnltk

 def format(self, add_space=False):
     return formatter.format_morpheme_container_for_parseset(
         self, add_space)

コード例 #19

0

ファイルを表示

ファイル: parser.py プロジェクト: aliok/trnltk

 def add_parse_result(self, contextless_parse_result, offsets):
     parse_result_str = formatter.format_morpheme_container_for_parseset(contextless_parse_result)
     self.parse_results[parse_result_str] = contextless_parse_result
     self.parse_result_occurrences[parse_result_str] = offsets

コード例 #20

0

ファイルを表示

 def add_parse_result(self, contextless_parse_result, offsets):
     parse_result_str = formatter.format_morpheme_container_for_parseset(
         contextless_parse_result)
     self.parse_results[parse_result_str] = contextless_parse_result
     self.parse_result_occurrences[parse_result_str] = offsets

コード例 #21

0

ファイルを表示

ファイル: creator.py プロジェクト: v-mostafapour/trnltk

    def create_word_binding_from_morpheme_container(self, word_str,
                                                    morpheme_container):
        assert (word_str == morpheme_container.get_surface_so_far()) or (
            TurkishAlphabet.lower(word_str[0]) + word_str[1:]
            == morpheme_container.get_surface_so_far())

        root_str = morpheme_container.get_root().str
        lemma = morpheme_container.get_root().lexeme.lemma
        lemma_root = morpheme_container.get_root().lexeme.root
        root_syntactic_category = morpheme_container.get_root(
        ).lexeme.syntactic_category
        root_secondary_syntactic_category = morpheme_container.get_root(
        ).lexeme.secondary_syntactic_category
        root = RootBinding(root_str, lemma, lemma_root,
                           root_syntactic_category,
                           root_secondary_syntactic_category)

        word_syntactic_category = morpheme_container.get_surface_syntactic_category(
        )
        word_secondary_syntactic_category = morpheme_container.get_surface_secondary_syntactic_category(
        )

        parse_result = formatter.format_morpheme_container_for_parseset(
            morpheme_container)
        word = WordBinding(word_str, parse_result, root,
                           word_syntactic_category,
                           word_secondary_syntactic_category)

        if morpheme_container.get_transitions():
            so_far = root_str
            for transition in morpheme_container.get_transitions():
                if isinstance(
                        transition.suffix_form_application.suffix_form.suffix,
                        FreeTransitionSuffix):
                    continue

                suffix_name = transition.suffix_form_application.suffix_form.suffix.name
                suffix_pretty_name = transition.suffix_form_application.suffix_form.suffix.pretty_name
                suffix_form = transition.suffix_form_application.suffix_form.form
                suffix_application = transition.suffix_form_application.fitting_suffix_form
                suffix_actual_application = transition.suffix_form_application.actual_suffix_form
                word_with_suffix_application = None
                if (so_far + suffix_actual_application) == root_str:
                    word_with_suffix_application = morpheme_container.get_root(
                    ).lexeme.root + suffix_application
                else:
                    word_with_suffix_application = so_far + suffix_application
                so_far += suffix_actual_application
                if transition.is_derivational():
                    suffix = DerivationalSuffixBinding(
                        suffix_name, suffix_pretty_name, suffix_form,
                        suffix_application, suffix_actual_application,
                        word_with_suffix_application, so_far,
                        transition.to_state.syntactic_category)
                    word.suffixes.append(suffix)
                else:
                    suffix = InflectionalSuffixBinding(
                        suffix_name, suffix_pretty_name, suffix_form,
                        suffix_application, suffix_actual_application,
                        word_with_suffix_application, so_far,
                        transition.to_state.syntactic_category)
                    word.suffixes.append(suffix)
        return word

コード例 #22

0

ファイルを表示

ファイル: test_formatter.py プロジェクト: aliok/trnltk

    def test_should_format_for_parseset(self):
        parse_result = self.parser.parse(u'kitaba')[0]
        assert_that(formatter.format_morpheme_container_for_parseset(parse_result), equal_to(u'kitap+Noun+A3sg+Pnon+Dat'))

        parse_result = self.parser.parse(u'yaptırtmayı')[0]
        assert_that(formatter.format_morpheme_container_for_parseset(parse_result), equal_to(u'yap+Verb+Verb+Caus+Verb+Caus+Pos+Noun+Inf+A3sg+Pnon+Acc'))

コード例 #23

0

ファイルを表示

ファイル: parser.py プロジェクト: Shathra/turkish-lemmatizer-disambiguation

root_map = root_map_generator.generate(all_roots)

suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph())))
suffix_graph.initialize()

predefined_paths = PredefinedPaths(root_map, suffix_graph)
predefined_paths.create_predefined_paths()

word_root_finder = WordRootFinder(root_map)
text_numeral_root_finder = TextNumeralRootFinder(root_map)
digit_numeral_root_finder = DigitNumeralRootFinder()
proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder()
proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder()

parser = UpperCaseSupportingContextlessMorphologicalParser(suffix_graph, predefined_paths,
	[word_root_finder, text_numeral_root_finder, digit_numeral_root_finder,
	 proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder])

sentence = sys.argv[1].decode('utf-8')

for word in sentence.split():
	lst = parser.parse(word)
	root_set = set()
	for element in lst:
		formatted = formatter.format_morpheme_container_for_parseset(element)
		root = formatted[:formatted.index('+')]
		root_set.add(root.lower())

	for root in root_set:
		print(root.encode('utf-8'), end=' ')
	print()