예제 #1
0
    def setUpClass(cls):
        super(MorphemeContainerContextlessProbabilityGeneratorWithContainersTest, cls).setUpClass()
        all_roots = []

        lexicon_lines = u'''
            duvar
            tutku
            saç
            oğul [A:LastVowelDrop]
            demek [A:RootChange, Passive_In, Passive_InIl]
            bu [P:Det]
        '''.strip().splitlines()

        lexemes = LexiconLoader.load_from_lines(lexicon_lines)
        for di in lexemes:
            all_roots.extend(RootGenerator.generate(di))

        root_map_generator = RootMapGenerator()
        cls.root_map = root_map_generator.generate(all_roots)

        suffix_graph = BasicSuffixGraph()
        suffix_graph.initialize()

        word_root_finder = WordRootFinder(cls.root_map)

        cls.contextless_parser = ContextlessMorphologicalParser(suffix_graph, None,
            [word_root_finder])
예제 #2
0
    def setUpClass(cls):
        super(
            MorphemeContainerContextlessProbabilityGeneratorWithContainersTest,
            cls).setUpClass()
        all_roots = []

        lexicon_lines = u'''
            duvar
            tutku
            saç
            oğul [A:LastVowelDrop]
            demek [A:RootChange, Passive_In, Passive_InIl]
            bu [P:Det]
        '''.strip().splitlines()

        lexemes = LexiconLoader.load_from_lines(lexicon_lines)
        for di in lexemes:
            all_roots.extend(RootGenerator.generate(di))

        root_map_generator = RootMapGenerator()
        cls.root_map = root_map_generator.generate(all_roots)

        suffix_graph = BasicSuffixGraph()
        suffix_graph.initialize()

        word_root_finder = WordRootFinder(cls.root_map)

        cls.contextless_parser = ContextlessMorphologicalParser(
            suffix_graph, None, [word_root_finder])
예제 #3
0
    def setUpClass(cls):
        super(FormatterTest, cls).setUpClass()
        all_roots = []

        dictionary_content = ["kitap", "yapmak"]
        lexemes = LexiconLoader.load_from_lines(dictionary_content)
        for di in lexemes:
            all_roots.extend(RootGenerator.generate(di))

        cls.root_map = RootMapGenerator().generate(all_roots)
예제 #4
0
    def setUpClass(cls):
        super(FormatterTest, cls).setUpClass()
        all_roots = []

        dictionary_content = ["kitap", "yapmak"]
        lexemes = LexiconLoader.load_from_lines(dictionary_content)
        for di in lexemes:
            all_roots.extend(RootGenerator.generate(di))

        cls.root_map = RootMapGenerator().generate(all_roots)
예제 #5
0
    def test_should_load_lexicon_from_str(self):
        dictionary_content = u'''
            a [P:Interj]
            aba [P:Adj]
            abadî
            abat [P:Adj; A:NoVoicing]
            Abdal
            abdest [A:NoVoicing]
            abes [P:Adj]
            abes [P:Adv]
            ablak [P:Adj; A:NoVoicing]
            abuk [P:Adj, Dup;A:NoVoicing, NoSuffix]
            acemborusu [A:CompoundP3sg; R:acemboru]
            acembuselik
            aciz [A:LastVowelDrop]
            âciz [P:Adj]
            açık [P:Adj]
            ad
            ad [P:Noun; A:Doubling, InverseHarmony]
            addetmek [A:Voicing, Aorist_A]
            addolmak [A:Causative_dIr]
            ahlat [A:NoVoicing, Plural]
            akşam [P:Noun, Time]
            atamak [A:Causative_It]
            sürtmek
            yemek [P:Noun]
            yemek [A:Causative_dIr]
            ürkmek [A:Causative_It]
        '''
        dictionary_lines = dictionary_content.split('\n')
        dictionary_lines = [l.strip() for l in dictionary_lines]
        dictionary_lines = filter(lambda line: line, dictionary_lines)

        lexemes = LexiconLoader.load_from_lines(dictionary_lines)

        assert_that(lexemes, has_length(len(dictionary_lines)), str(len(lexemes)-len(dictionary_lines)))

        assert_that(lexemes, has_item(Lexeme(u'a', u'a', SyntacticCategory.INTERJECTION, None, None)))
        assert_that(lexemes, has_item(Lexeme(u'aba', u'aba', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'abadî', u'abadî', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'abat', u'abat', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'Abdal', u'Abdal', SyntacticCategory.NOUN, SecondarySyntacticCategory.PROPER_NOUN, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'abdest', u'abdest', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'abes', u'abes', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'abes', u'abes', SyntacticCategory.ADVERB, None, None)))
        assert_that(lexemes, has_item(Lexeme(u'ablak', u'ablak', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'abuk', u'abuk', SyntacticCategory.ADJECTIVE, SecondarySyntacticCategory.DUPLICATOR, {LexemeAttribute.NoSuffix, LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'acemborusu', u'acemboru', SyntacticCategory.NOUN, None, {LexemeAttribute.CompoundP3sg, LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'acembuselik', u'acembuselik', SyntacticCategory.NOUN, None, {LexemeAttribute.Voicing})))
        assert_that(lexemes, has_item(Lexeme(u'aciz', u'aciz', SyntacticCategory.NOUN, None, {LexemeAttribute.LastVowelDrop, LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'âciz', u'âciz', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'açık', u'açık', SyntacticCategory.ADJECTIVE, None, {LexemeAttribute.Voicing})))
        assert_that(lexemes, has_item(Lexeme(u'ad', u'ad', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'ad', u'ad', SyntacticCategory.NOUN, None, {LexemeAttribute.Doubling, LexemeAttribute.InverseHarmony, LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'addetmek', u'addet', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_dIr, LexemeAttribute.Voicing})))
        assert_that(lexemes, has_item(Lexeme(u'addolmak', u'addol', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_I, LexemeAttribute.Causative_dIr, LexemeAttribute.NoVoicing, LexemeAttribute.Passive_In})))
        assert_that(lexemes, has_item(Lexeme(u'ahlat', u'ahlat', SyntacticCategory.NOUN, None, {LexemeAttribute.NoVoicing, LexemeAttribute.Plural})))
        assert_that(lexemes, has_item(Lexeme(u'akşam', u'akşam', SyntacticCategory.NOUN, SecondarySyntacticCategory.TIME, {LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'atamak', u'ata', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_I, LexemeAttribute.Causative_It, LexemeAttribute.NoVoicing, LexemeAttribute.Passive_In, LexemeAttribute.ProgressiveVowelDrop})))
        assert_that(lexemes, has_item(Lexeme(u'sürtmek', u'sürt', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_Ir, LexemeAttribute.NoVoicing})))
        assert_that(lexemes, has_item(Lexeme(u'yemek', u'yemek', SyntacticCategory.NOUN, None, {LexemeAttribute.Voicing})))
        assert_that(lexemes, has_item(Lexeme(u'yemek', u'ye', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_dIr, LexemeAttribute.NoVoicing, LexemeAttribute.Passive_In, LexemeAttribute.ProgressiveVowelDrop})))
        assert_that(lexemes, has_item(Lexeme(u'ürkmek', u'ürk', SyntacticCategory.VERB, None, {LexemeAttribute.Aorist_A, LexemeAttribute.Causative_It, LexemeAttribute.NoVoicing})))