예제 #1
0
    def assert_transitions_generated(self, word_to_parse, parse_result_to_pick, expected_transitions):
        picked_morpheme_container = None

        resolutions = self.parser.parse(word_to_parse)
        for resolution in resolutions:
            if formatter.format_morpheme_container_for_tests(resolution) == parse_result_to_pick:
                picked_morpheme_container = resolution
                break

        assert_that(picked_morpheme_container, not_none(),
            u'Parse result to pick {} does not exist in parse resolutions : {}'.format(parse_result_to_pick, [formatter.format_morpheme_container_for_tests(r) for r in resolutions]))

        generated_transitions = self.transition_generator.generate_transitions(word_to_parse, picked_morpheme_container)
        generated_transitions_strs = [(generated_transition.get_surface_so_far(), formatter.format_morpheme_container_for_tests(generated_transition)) for generated_transition in
                                                                                                                              generated_transitions]
        generated_transitions_strs = list(set(generated_transitions_strs))
        generated_transitions_strs = sorted(generated_transitions_strs, cmp=lambda x, y: cmp(len(x[1]), len(y[1])))

        assert_that(len(generated_transitions_strs), equal_to(len(expected_transitions)))

        for i in range(len(expected_transitions)):
            (expected_word, expected_parse_result) = expected_transitions[i]
            (generated_word, generated_parse_result_str) = generated_transitions_strs[i]

            assert_that(expected_word, equal_to(generated_word))
            assert_that(expected_parse_result, equal_to(generated_parse_result_str))
예제 #2
0
    def test_should_format_for_tests(self):
        parse_result = self.parser.parse(u'kitaba')[0]
        assert_that(
            formatter.format_morpheme_container_for_tests(parse_result),
            equal_to(u'kitab(kitap)+Noun+A3sg+Pnon+Dat(+yA[a])'))

        parse_result = self.parser.parse(u'yaptırtmayı')[0]
        assert_that(
            formatter.format_morpheme_container_for_tests(parse_result),
            equal_to(
                u'yap(yapmak)+Verb+Verb+Caus(dIr[tır])+Verb+Caus(t[t])+Pos+Noun+Inf(mA[ma])+A3sg+Pnon+Acc(+yI[yı])'
            ))
예제 #3
0
    def predefined_morpheme_containers(self, root_str, syntactic_category, secondary_syntactic_category):
        predefined_morpheme_containers = []
        for root in self.morpheme_container_map.keys():
            if root.str==root_str and root.lexeme.syntactic_category==syntactic_category and root.lexeme.secondary_syntactic_category==secondary_syntactic_category:
                predefined_morpheme_containers.extend(self.morpheme_container_map[root])

        return [formatter.format_morpheme_container_for_tests(r) for r in predefined_morpheme_containers]
예제 #4
0
파일: parser.py 프로젝트: aliok/trnltk
    def _traverse_candidates(self, candidates, results, word):
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug('Gonna traverse %d candidates:', len(candidates))
            for c in candidates:
                logger.debug('\t%s', c)

        new_candidates = []
        for morpheme_container in candidates:
            logger.debug(' Traversing candidate: %s', morpheme_container)

            morpheme_containers_for_candidate = self._traverse_candidate(morpheme_container, word)
            for morpheme_container_for_candidate in morpheme_containers_for_candidate:
                if morpheme_container_for_candidate.get_last_state().type==State.TERMINAL:
                    if not morpheme_container_for_candidate.get_remaining_surface():
                        results.append(morpheme_container_for_candidate)
                        if logger.isEnabledFor(logging.DEBUG):
                            logger.debug("Found a terminal result --------------------->")
                            logger.debug(morpheme_container_for_candidate)
                            logger.debug(formatter.format_morpheme_container_for_tests(morpheme_container_for_candidate))
                    else:
                        if logger.isEnabledFor(logging.DEBUG):
                            logger.debug("Found a morpheme container with terminal state, but there is still something to parse. Remaining:%s MorphemeContainer:%s", morpheme_container_for_candidate.get_remaining_surface(), morpheme_container_for_candidate)
                else:
                    new_candidates.append(morpheme_container_for_candidate)

        if new_candidates:
            new_candidates = self._traverse_candidates(new_candidates, results, word)

        return new_candidates
예제 #5
0
    def assert_transitions_generated(self, word_to_parse, parse_result_to_pick,
                                     expected_transitions):
        picked_morpheme_container = None

        resolutions = self.parser.parse(word_to_parse)
        for resolution in resolutions:
            if formatter.format_morpheme_container_for_tests(
                    resolution) == parse_result_to_pick:
                picked_morpheme_container = resolution
                break

        assert_that(
            picked_morpheme_container, not_none(),
            u'Parse result to pick {} does not exist in parse resolutions : {}'
            .format(parse_result_to_pick, [
                formatter.format_morpheme_container_for_tests(r)
                for r in resolutions
            ]))

        generated_transitions = self.transition_generator.generate_transitions(
            word_to_parse, picked_morpheme_container)
        generated_transitions_strs = [
            (generated_transition.get_surface_so_far(),
             formatter.format_morpheme_container_for_tests(
                 generated_transition))
            for generated_transition in generated_transitions
        ]
        generated_transitions_strs = list(set(generated_transitions_strs))
        generated_transitions_strs = sorted(
            generated_transitions_strs,
            cmp=lambda x, y: cmp(len(x[1]), len(y[1])))

        assert_that(len(generated_transitions_strs),
                    equal_to(len(expected_transitions)))

        for i in range(len(expected_transitions)):
            (expected_word, expected_parse_result) = expected_transitions[i]
            (generated_word,
             generated_parse_result_str) = generated_transitions_strs[i]

            assert_that(expected_word, equal_to(generated_word))
            assert_that(expected_parse_result,
                        equal_to(generated_parse_result_str))
예제 #6
0
    def predefined_morpheme_containers(self, root_str, syntactic_category,
                                       secondary_syntactic_category):
        predefined_morpheme_containers = []
        for root in self.morpheme_container_map.keys():
            if root.str == root_str and root.lexeme.syntactic_category == syntactic_category and root.lexeme.secondary_syntactic_category == secondary_syntactic_category:
                predefined_morpheme_containers.extend(
                    self.morpheme_container_map[root])

        return [
            formatter.format_morpheme_container_for_tests(r)
            for r in predefined_morpheme_containers
        ]
예제 #7
0
파일: playground.py 프로젝트: aliok/trnltk
def parse_contextless(word_str, *syntactic_categories):
    if not isinstance(word_str, unicode) and isinstance(word_str, str):
        word_str = word_str.decode('utf-8')

    parse_results = contextless_parser.parse(word_str)
    if syntactic_categories:
        parse_results = filter(lambda parse_result: parse_result.get_last_state().syntactic_category in syntactic_categories, parse_results)

    if not parse_results:
        print u'No parse result found'
    else:
        for parse_result in parse_results:
            formatted_output = formatter.format_morpheme_container_for_tests(parse_result)
            if formatted_output.endswith(u'Verb+Zero+Pres+A3sg'):
                continue
            else:
                print formatted_output
예제 #8
0
def parse_contextless(word_str, *syntactic_categories):
    if not isinstance(word_str, unicode) and isinstance(word_str, str):
        word_str = word_str.decode('utf-8')

    parse_results = contextless_parser.parse(word_str)
    if syntactic_categories:
        parse_results = filter(
            lambda parse_result: parse_result.get_last_state().
            syntactic_category in syntactic_categories, parse_results)

    if not parse_results:
        print u'No parse result found'
    else:
        for parse_result in parse_results:
            formatted_output = formatter.format_morpheme_container_for_tests(
                parse_result)
            if formatted_output.endswith(u'Verb+Zero+Pres+A3sg'):
                continue
            else:
                print formatted_output
예제 #9
0
    def _traverse_candidates(self, candidates, results, word):
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug('Gonna traverse %d candidates:', len(candidates))
            for c in candidates:
                logger.debug('\t%s', c)

        new_candidates = []
        for morpheme_container in candidates:
            logger.debug(' Traversing candidate: %s', morpheme_container)

            morpheme_containers_for_candidate = self._traverse_candidate(
                morpheme_container, word)
            for morpheme_container_for_candidate in morpheme_containers_for_candidate:
                if morpheme_container_for_candidate.get_last_state(
                ).type == State.TERMINAL:
                    if not morpheme_container_for_candidate.get_remaining_surface(
                    ):
                        results.append(morpheme_container_for_candidate)
                        if logger.isEnabledFor(logging.DEBUG):
                            logger.debug(
                                "Found a terminal result --------------------->"
                            )
                            logger.debug(morpheme_container_for_candidate)
                            logger.debug(
                                formatter.format_morpheme_container_for_tests(
                                    morpheme_container_for_candidate))
                    else:
                        if logger.isEnabledFor(logging.DEBUG):
                            logger.debug(
                                "Found a morpheme container with terminal state, but there is still something to parse. Remaining:%s MorphemeContainer:%s",
                                morpheme_container_for_candidate.
                                get_remaining_surface(),
                                morpheme_container_for_candidate)
                else:
                    new_candidates.append(morpheme_container_for_candidate)

        if new_candidates:
            new_candidates = self._traverse_candidates(new_candidates, results,
                                                       word)

        return new_candidates
예제 #10
0
 def parse_result(self, word):
     return [formatter.format_morpheme_container_for_tests(r) for r in (self.parser.parse(word))]
예제 #11
0
    def test_should_format_for_tests(self):
        parse_result = self.parser.parse(u'kitaba')[0]
        assert_that(formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'kitab(kitap)+Noun+A3sg+Pnon+Dat(+yA[a])'))

        parse_result = self.parser.parse(u'yaptırtmayı')[0]
        assert_that(formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'yap(yapmak)+Verb+Verb+Caus(dIr[tır])+Verb+Caus(t[t])+Pos+Noun+Inf(mA[ma])+A3sg+Pnon+Acc(+yI[yı])'))
예제 #12
0
 def parse_result(self, word):
     return [
         formatter.format_morpheme_container_for_tests(r)
         for r in (self.parser.parse(word))
     ]