def setUpClass(cls): super(MorphemeContainerContextlessProbabilityGeneratorWithContainersTest, cls).setUpClass() all_roots = [] lexicon_lines = u''' duvar tutku saç oğul [A:LastVowelDrop] demek [A:RootChange, Passive_In, Passive_InIl] bu [P:Det] '''.strip().splitlines() lexemes = LexiconLoader.load_from_lines(lexicon_lines) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() word_root_finder = WordRootFinder(cls.root_map) cls.contextless_parser = ContextlessMorphologicalParser(suffix_graph, None, [word_root_finder])
def setUpClass(cls): super( MorphemeContainerContextlessProbabilityGeneratorWithContainersTest, cls).setUpClass() all_roots = [] lexicon_lines = u''' duvar tutku saç oğul [A:LastVowelDrop] demek [A:RootChange, Passive_In, Passive_InIl] bu [P:Det] '''.strip().splitlines() lexemes = LexiconLoader.load_from_lines(lexicon_lines) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = BasicSuffixGraph() suffix_graph.initialize() word_root_finder = WordRootFinder(cls.root_map) cls.contextless_parser = ContextlessMorphologicalParser( suffix_graph, None, [word_root_finder])
def setUpClass(cls): super(FormatterTest, cls).setUpClass() all_roots = [] dictionary_content = ["kitap", "yapmak"] lexemes = LexiconLoader.load_from_lines(dictionary_content) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) cls.root_map = RootMapGenerator().generate(all_roots)
def test_should_load_lexicon_from_str(self): dictionary_content = u''' a [P:Interj] aba [P:Adj] abadî abat [P:Adj; A:NoVoicing] Abdal abdest [A:NoVoicing] abes [P:Adj] abes [P:Adv] ablak [P:Adj; A:NoVoicing] abuk [P:Adj, Dup;A:NoVoicing, NoSuffix] acemborusu [A:CompoundP3sg; R:acemboru] acembuselik aciz [A:LastVowelDrop] âciz [P:Adj] açık [P:Adj] ad ad [P:Noun; A:Doubling, InverseHarmony] addetmek [A:Voicing, Aorist_A] addolmak [A:Causative_dIr] ahlat [A:NoVoicing, Plural] akşam [P:Noun, Time] atamak [A:Causative_It] sürtmek yemek [P:Noun] yemek [A:Causative_dIr] ürkmek [A:Causative_It] ''' dictionary_lines = dictionary_content.split('\n') dictionary_lines = [l.strip() for l in dictionary_lines] dictionary_lines = filter(lambda line: line, dictionary_lines) lexemes = LexiconLoader.load_from_lines(dictionary_lines) assert_that(lexemes, has_length(len(dictionary_lines)), str(len(lexemes)-len(dictionary_lines))) assert_that(lexemes, has_item(Lexeme(u'a', u'a', SyntacticCategory.INTERJECTION, None, None))) assert_that(lexemes, has_item(Lexeme(u'aba', u'aba', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'abadî', u'abadî', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'abat', u'abat', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'Abdal', u'Abdal', SyntacticCategory.NOUN, SecondarySyntacticCategory.PROPER_NOUN, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'abdest', u'abdest', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'abes', u'abes', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'abes', u'abes', SyntacticCategory.ADVERB, None, None))) assert_that(lexemes, has_item(Lexeme(u'ablak', u'ablak', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'abuk', u'abuk', SyntacticCategory.ADJECTIVE, SecondarySyntacticCategory.DUPLICATOR, {LexemeAttribute.NoSuffix, LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'acemborusu', u'acemboru', SyntacticCategory.NOUN, None, {LexemeAttribute.CompoundP3sg, LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'acembuselik', u'acembuselik', SyntacticCategory.NOUN, None, {LexemeAttribute.Voicing}))) assert_that(lexemes, has_item(Lexeme(u'aciz', u'aciz', SyntacticCategory.NOUN, None, {LexemeAttribute.LastVowelDrop, LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'âciz', u'âciz', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'açık', u'açık', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.Voicing}))) assert_that(lexemes, has_item(Lexeme(u'ad', u'ad', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'ad', u'ad', SyntacticCategory.NOUN, None, {LexemeAttribute.Doubling, LexemeAttribute.InverseHarmony, LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'addetmek', u'addet', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_dIr, LexemeAttribute.Voicing}))) assert_that(lexemes, has_item(Lexeme(u'addolmak', u'addol', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_I, LexemeAttribute.Causative_dIr, LexemeAttribute.NoVoicing, LexemeAttribute.Passive_In}))) assert_that(lexemes, has_item(Lexeme(u'ahlat', u'ahlat', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing, LexemeAttribute.Plural}))) assert_that(lexemes, has_item(Lexeme(u'akşam', u'akşam', SyntacticCategory.NOUN, SecondarySyntacticCategory.TIME, {LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'atamak', u'ata', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_I, LexemeAttribute.Causative_It, LexemeAttribute.NoVoicing, LexemeAttribute.Passive_In, LexemeAttribute.ProgressiveVowelDrop}))) assert_that(lexemes, has_item(Lexeme(u'sürtmek', u'sürt', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_Ir, LexemeAttribute.NoVoicing}))) assert_that(lexemes, has_item(Lexeme(u'yemek', u'yemek', SyntacticCategory.NOUN, None, {LexemeAttribute.Voicing}))) assert_that(lexemes, has_item(Lexeme(u'yemek', u'ye', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_dIr, LexemeAttribute.NoVoicing, LexemeAttribute.Passive_In, LexemeAttribute.ProgressiveVowelDrop}))) assert_that(lexemes, has_item(Lexeme(u'ürkmek', u'ürk', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_It, LexemeAttribute.NoVoicing})))