def assert_transitions_generated(self, word_to_parse, parse_result_to_pick, expected_transitions): picked_morpheme_container = None resolutions = self.parser.parse(word_to_parse) for resolution in resolutions: if formatter.format_morpheme_container_for_tests(resolution) == parse_result_to_pick: picked_morpheme_container = resolution break assert_that(picked_morpheme_container, not_none(), u'Parse result to pick {} does not exist in parse resolutions : {}'.format(parse_result_to_pick, [formatter.format_morpheme_container_for_tests(r) for r in resolutions])) generated_transitions = self.transition_generator.generate_transitions(word_to_parse, picked_morpheme_container) generated_transitions_strs = [(generated_transition.get_surface_so_far(), formatter.format_morpheme_container_for_tests(generated_transition)) for generated_transition in generated_transitions] generated_transitions_strs = list(set(generated_transitions_strs)) generated_transitions_strs = sorted(generated_transitions_strs, cmp=lambda x, y: cmp(len(x[1]), len(y[1]))) assert_that(len(generated_transitions_strs), equal_to(len(expected_transitions))) for i in range(len(expected_transitions)): (expected_word, expected_parse_result) = expected_transitions[i] (generated_word, generated_parse_result_str) = generated_transitions_strs[i] assert_that(expected_word, equal_to(generated_word)) assert_that(expected_parse_result, equal_to(generated_parse_result_str))
def test_should_format_for_tests(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that( formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'kitab(kitap)+Noun+A3sg+Pnon+Dat(+yA[a])')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that( formatter.format_morpheme_container_for_tests(parse_result), equal_to( u'yap(yapmak)+Verb+Verb+Caus(dIr[tır])+Verb+Caus(t[t])+Pos+Noun+Inf(mA[ma])+A3sg+Pnon+Acc(+yI[yı])' ))
def predefined_morpheme_containers(self, root_str, syntactic_category, secondary_syntactic_category): predefined_morpheme_containers = [] for root in self.morpheme_container_map.keys(): if root.str==root_str and root.lexeme.syntactic_category==syntactic_category and root.lexeme.secondary_syntactic_category==secondary_syntactic_category: predefined_morpheme_containers.extend(self.morpheme_container_map[root]) return [formatter.format_morpheme_container_for_tests(r) for r in predefined_morpheme_containers]
def _traverse_candidates(self, candidates, results, word): if logger.isEnabledFor(logging.DEBUG): logger.debug('Gonna traverse %d candidates:', len(candidates)) for c in candidates: logger.debug('\t%s', c) new_candidates = [] for morpheme_container in candidates: logger.debug(' Traversing candidate: %s', morpheme_container) morpheme_containers_for_candidate = self._traverse_candidate(morpheme_container, word) for morpheme_container_for_candidate in morpheme_containers_for_candidate: if morpheme_container_for_candidate.get_last_state().type==State.TERMINAL: if not morpheme_container_for_candidate.get_remaining_surface(): results.append(morpheme_container_for_candidate) if logger.isEnabledFor(logging.DEBUG): logger.debug("Found a terminal result --------------------->") logger.debug(morpheme_container_for_candidate) logger.debug(formatter.format_morpheme_container_for_tests(morpheme_container_for_candidate)) else: if logger.isEnabledFor(logging.DEBUG): logger.debug("Found a morpheme container with terminal state, but there is still something to parse. Remaining:%s MorphemeContainer:%s", morpheme_container_for_candidate.get_remaining_surface(), morpheme_container_for_candidate) else: new_candidates.append(morpheme_container_for_candidate) if new_candidates: new_candidates = self._traverse_candidates(new_candidates, results, word) return new_candidates
def assert_transitions_generated(self, word_to_parse, parse_result_to_pick, expected_transitions): picked_morpheme_container = None resolutions = self.parser.parse(word_to_parse) for resolution in resolutions: if formatter.format_morpheme_container_for_tests( resolution) == parse_result_to_pick: picked_morpheme_container = resolution break assert_that( picked_morpheme_container, not_none(), u'Parse result to pick {} does not exist in parse resolutions : {}' .format(parse_result_to_pick, [ formatter.format_morpheme_container_for_tests(r) for r in resolutions ])) generated_transitions = self.transition_generator.generate_transitions( word_to_parse, picked_morpheme_container) generated_transitions_strs = [ (generated_transition.get_surface_so_far(), formatter.format_morpheme_container_for_tests( generated_transition)) for generated_transition in generated_transitions ] generated_transitions_strs = list(set(generated_transitions_strs)) generated_transitions_strs = sorted( generated_transitions_strs, cmp=lambda x, y: cmp(len(x[1]), len(y[1]))) assert_that(len(generated_transitions_strs), equal_to(len(expected_transitions))) for i in range(len(expected_transitions)): (expected_word, expected_parse_result) = expected_transitions[i] (generated_word, generated_parse_result_str) = generated_transitions_strs[i] assert_that(expected_word, equal_to(generated_word)) assert_that(expected_parse_result, equal_to(generated_parse_result_str))
def predefined_morpheme_containers(self, root_str, syntactic_category, secondary_syntactic_category): predefined_morpheme_containers = [] for root in self.morpheme_container_map.keys(): if root.str == root_str and root.lexeme.syntactic_category == syntactic_category and root.lexeme.secondary_syntactic_category == secondary_syntactic_category: predefined_morpheme_containers.extend( self.morpheme_container_map[root]) return [ formatter.format_morpheme_container_for_tests(r) for r in predefined_morpheme_containers ]
def parse_contextless(word_str, *syntactic_categories): if not isinstance(word_str, unicode) and isinstance(word_str, str): word_str = word_str.decode('utf-8') parse_results = contextless_parser.parse(word_str) if syntactic_categories: parse_results = filter(lambda parse_result: parse_result.get_last_state().syntactic_category in syntactic_categories, parse_results) if not parse_results: print u'No parse result found' else: for parse_result in parse_results: formatted_output = formatter.format_morpheme_container_for_tests(parse_result) if formatted_output.endswith(u'Verb+Zero+Pres+A3sg'): continue else: print formatted_output
def parse_contextless(word_str, *syntactic_categories): if not isinstance(word_str, unicode) and isinstance(word_str, str): word_str = word_str.decode('utf-8') parse_results = contextless_parser.parse(word_str) if syntactic_categories: parse_results = filter( lambda parse_result: parse_result.get_last_state(). syntactic_category in syntactic_categories, parse_results) if not parse_results: print u'No parse result found' else: for parse_result in parse_results: formatted_output = formatter.format_morpheme_container_for_tests( parse_result) if formatted_output.endswith(u'Verb+Zero+Pres+A3sg'): continue else: print formatted_output
def _traverse_candidates(self, candidates, results, word): if logger.isEnabledFor(logging.DEBUG): logger.debug('Gonna traverse %d candidates:', len(candidates)) for c in candidates: logger.debug('\t%s', c) new_candidates = [] for morpheme_container in candidates: logger.debug(' Traversing candidate: %s', morpheme_container) morpheme_containers_for_candidate = self._traverse_candidate( morpheme_container, word) for morpheme_container_for_candidate in morpheme_containers_for_candidate: if morpheme_container_for_candidate.get_last_state( ).type == State.TERMINAL: if not morpheme_container_for_candidate.get_remaining_surface( ): results.append(morpheme_container_for_candidate) if logger.isEnabledFor(logging.DEBUG): logger.debug( "Found a terminal result --------------------->" ) logger.debug(morpheme_container_for_candidate) logger.debug( formatter.format_morpheme_container_for_tests( morpheme_container_for_candidate)) else: if logger.isEnabledFor(logging.DEBUG): logger.debug( "Found a morpheme container with terminal state, but there is still something to parse. Remaining:%s MorphemeContainer:%s", morpheme_container_for_candidate. get_remaining_surface(), morpheme_container_for_candidate) else: new_candidates.append(morpheme_container_for_candidate) if new_candidates: new_candidates = self._traverse_candidates(new_candidates, results, word) return new_candidates
def parse_result(self, word): return [formatter.format_morpheme_container_for_tests(r) for r in (self.parser.parse(word))]
def test_should_format_for_tests(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that(formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'kitab(kitap)+Noun+A3sg+Pnon+Dat(+yA[a])')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that(formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'yap(yapmak)+Verb+Verb+Caus(dIr[tır])+Verb+Caus(t[t])+Pos+Noun+Inf(mA[ma])+A3sg+Pnon+Acc(+yI[yı])'))
def parse_result(self, word): return [ formatter.format_morpheme_container_for_tests(r) for r in (self.parser.parse(word)) ]