def setUp(self): logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() word_root_finder = WordRootFinder(self.root_map) self.parser = ContextlessMorphologicalParser(suffix_graph, None, [word_root_finder])
def initialize(): all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(CircumflexConvertingRootGenerator.generate(di)) root_map_generator = RootMapGenerator() root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph( NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) text_numeral_root_finder = TextNumeralRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) global contextless_parser contextless_parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, text_numeral_root_finder, digit_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ])
def setUpClass(cls): super( MorphemeContainerContextlessProbabilityGeneratorWithContainersTest, cls).setUpClass() all_roots = [] lexicon_lines = u''' duvar tutku saç oğul [A:LastVowelDrop] demek [A:RootChange, Passive_In, Passive_InIl] bu [P:Det] '''.strip().splitlines() lexemes = LexiconLoader.load_from_lines(lexicon_lines) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() word_root_finder = WordRootFinder(cls.root_map) cls.contextless_parser = ContextlessMorphologicalParser( suffix_graph, None, [word_root_finder])
def setUp(self): logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() self.mock_brute_force_noun_root_finder = BruteForceVerbRootFinder() self.parser = ContextlessMorphologicalParser( suffix_graph, None, [self.mock_brute_force_noun_root_finder])
def setUpClass(cls): super(StatisticalParserTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph( BasicSuffixGraph())) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) contextless_parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ]) parseset_index = "001" dom = parse( os.path.join( os.path.dirname(__file__), '../../testresources/parsesets/parseset{}.xml'.format( parseset_index))) parseset = ParseSetBinding.build( dom.getElementsByTagName("parseset")[0]) parse_set_word_list = [] for sentence in parseset.sentences: parse_set_word_list.extend(sentence.words) complete_word_concordance_index = CompleteWordConcordanceIndex( parse_set_word_list) cls.parser = StatisticalParser(contextless_parser, complete_word_concordance_index)
def setUpClass(cls): super(ParserTestWithProperNouns, cls).setUpClass() cls.root_map = dict() suffix_graph = ProperNounSuffixGraph(BasicSuffixGraph()) suffix_graph.initialize() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() cls.parser = ContextlessMorphologicalParser(suffix_graph, None, [proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder])
def setUpClass(cls): super(_LikelihoodCalculatorTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph( NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) cls.contextless_parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ]) cls.mongodb_connection = pymongo.Connection(host='127.0.0.1') cls.collection_map = { 1: cls.mongodb_connection['trnltk']['wordUnigrams999'], 2: cls.mongodb_connection['trnltk']['wordBigrams999'], 3: cls.mongodb_connection['trnltk']['wordTrigrams999'] } cls.generator = None
class FormatterTest(unittest.TestCase): @classmethod def setUpClass(cls): super(FormatterTest, cls).setUpClass() all_roots = [] dictionary_content = ["kitap", "yapmak"] lexemes = LexiconLoader.load_from_lines(dictionary_content) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) cls.root_map = RootMapGenerator().generate(all_roots) def setUp(self): logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() word_root_finder = WordRootFinder(self.root_map) self.parser = ContextlessMorphologicalParser(suffix_graph, None, [word_root_finder]) def test_should_format_for_simple_parseset(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that(formatter.format_morpheme_container_for_simple_parseset(parse_result), equal_to(u'(1,"kitap+Noun+A3sg+Pnon+Dat")')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that(formatter.format_morpheme_container_for_simple_parseset(parse_result), equal_to(u'(1,"yap+Verb")(2,"Verb+Caus")(3,"Verb+Caus+Pos")(4,"Noun+Inf+A3sg+Pnon+Acc")')) def test_should_format_for_tests(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that(formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'kitab(kitap)+Noun+A3sg+Pnon+Dat(+yA[a])')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that(formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'yap(yapmak)+Verb+Verb+Caus(dIr[tır])+Verb+Caus(t[t])+Pos+Noun+Inf(mA[ma])+A3sg+Pnon+Acc(+yI[yı])')) def test_should_format_for_parseset(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that(formatter.format_morpheme_container_for_parseset(parse_result), equal_to(u'kitap+Noun+A3sg+Pnon+Dat')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that(formatter.format_morpheme_container_for_parseset(parse_result), equal_to(u'yap+Verb+Verb+Caus+Verb+Caus+Pos+Noun+Inf+A3sg+Pnon+Acc'))
class FormatterTest(unittest.TestCase): @classmethod def setUpClass(cls): super(FormatterTest, cls).setUpClass() all_roots = [] dictionary_content = ["kitap", "yapmak"] lexemes = LexiconLoader.load_from_lines(dictionary_content) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) cls.root_map = RootMapGenerator().generate(all_roots) def setUp(self): logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() word_root_finder = WordRootFinder(self.root_map) self.parser = ContextlessMorphologicalParser(suffix_graph, None, [word_root_finder]) def test_should_format_for_simple_parseset(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that( formatter.format_morpheme_container_for_simple_parseset( parse_result), equal_to(u'(1,"kitap+Noun+A3sg+Pnon+Dat")')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that( formatter.format_morpheme_container_for_simple_parseset( parse_result), equal_to( u'(1,"yap+Verb")(2,"Verb+Caus")(3,"Verb+Caus+Pos")(4,"Noun+Inf+A3sg+Pnon+Acc")' )) def test_should_format_for_tests(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that( formatter.format_morpheme_container_for_tests(parse_result), equal_to(u'kitab(kitap)+Noun+A3sg+Pnon+Dat(+yA[a])')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that( formatter.format_morpheme_container_for_tests(parse_result), equal_to( u'yap(yapmak)+Verb+Verb+Caus(dIr[tır])+Verb+Caus(t[t])+Pos+Noun+Inf(mA[ma])+A3sg+Pnon+Acc(+yI[yı])' )) def test_should_format_for_parseset(self): parse_result = self.parser.parse(u'kitaba')[0] assert_that( formatter.format_morpheme_container_for_parseset(parse_result), equal_to(u'kitap+Noun+A3sg+Pnon+Dat')) parse_result = self.parser.parse(u'yaptırtmayı')[0] assert_that( formatter.format_morpheme_container_for_parseset(parse_result), equal_to( u'yap+Verb+Verb+Caus+Verb+Caus+Pos+Noun+Inf+A3sg+Pnon+Acc'))