Ejemplo n.º 1
0
    def setUp(self):
        logging.basicConfig(level=logging.INFO)
        parser_logger.setLevel(logging.INFO)
        suffix_applier_logger.setLevel(logging.INFO)

        suffix_graph = BasicSuffixGraph()
        suffix_graph.initialize()

        self.mock_brute_force_noun_compound_root_finder = BruteForceCompoundNounRootFinder(
        )

        self.parser = ContextlessMorphologicalParser(
            suffix_graph, None,
            [self.mock_brute_force_noun_compound_root_finder])
 def setUp(self):
     self.root_finder = BruteForceCompoundNounRootFinder()
class BruteForceCompoundNounRootFinderTest(unittest.TestCase):
    def setUp(self):
        self.root_finder = BruteForceCompoundNounRootFinder()

    def test_should_check_invalid_cases(self):
        f = lambda: self.root_finder.find_roots_for_partial_input(None, None)
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input("", None)
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input(None, "")
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input("", "")
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input(u"a", None)
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input(u"a", u"")
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input(u"ab", u"a")
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input(u"ab", u"ad")
        self.assertRaises(AssertionError, f)

        f = lambda: self.root_finder.find_roots_for_partial_input(u"ab", u"ada")
        self.assertRaises(AssertionError, f)

    def test_should_find_no_roots(self):
        roots = self.root_finder.find_roots_for_partial_input(u"abc", u"abcdef")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"a", u"anu")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"an", u"anu")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"anu", u"anu")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"a", u"anun")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"an", u"anun")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"anu", u"anun")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"anun", u"anun")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"t", u"tatın")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"ta", u"tatın")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"tat", u"tatın")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"tatı", u"tatın")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"tatın", u"tatın")
        assert_that(roots, has_length(0))

        roots = self.root_finder.find_roots_for_partial_input(u"suborusu", u"suborusun")
        assert_that(roots, has_length(0))

    def test_should_create_roots_without_consontant_insertion_s(self):
        # most of the following words are made up!

        # no orthographic changes, no consontant insertion 's'
        roots = self.root_finder.find_roots_for_partial_input(u"bacakkalemi", u"bacakkalemini")
        assert_that(roots, has_length(1))
        assert_that(roots[0].str, equal_to(u'bacakkalem'))
        assert_that(roots[0].lexeme.root, equal_to(u'bacakkalemi'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'bacakkalemi'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))

        # with explicit NoVoicing
        roots = self.root_finder.find_roots_for_partial_input(u"adamotu", u"adamotunu")
        assert_that(roots, has_length(1))
        assert_that(roots[0].str, equal_to(u'adamot'))
        assert_that(roots[0].lexeme.root, equal_to(u'adamotu'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'adamotu'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.NoVoicing}))

        # with possible voicing
        roots = self.root_finder.find_roots_for_partial_input(u"aslankuyruğu", u"aslankuyruğundan")
        assert_that(roots, has_length(3))
        assert_that(roots[0].str, equal_to(u'aslankuyruğ'))
        assert_that(roots[0].lexeme.root, equal_to(u'aslankuyruğu'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'aslankuyruğu'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))
        assert_that(roots[1].str, equal_to(u'aslankuyrug'))
        assert_that(roots[1].lexeme.root, equal_to(u'aslankuyruğu'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'aslankuyruğu'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))
        assert_that(roots[2].str, equal_to(u'aslankuyruk'))
        assert_that(roots[2].lexeme.root, equal_to(u'aslankuyruğu'))
        assert_that(roots[2].lexeme.lemma, equal_to(u'aslankuyruğu'))
        assert_that(roots[2].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[2].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))

        # with InverseHarmony
        roots = self.root_finder.find_roots_for_partial_input(u"dünyahali", u"dünyahaline")
        assert_that(roots, has_length(1))
        assert_that(roots[0].str, equal_to(u'dünyahal'))
        assert_that(roots[0].lexeme.root, equal_to(u'dünyahali'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'dünyahali'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony}))

        # with InverseHarmony and possible voicing
        roots = self.root_finder.find_roots_for_partial_input(u"abcvaadi", u"abcvaadini")
        assert_that(roots, has_length(2))
        assert_that(roots[0].str, equal_to(u'abcvaad'))
        assert_that(roots[0].lexeme.root, equal_to(u'abcvaadi'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'abcvaadi'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony}))
        assert_that(roots[1].str, equal_to(u'abcvaat'))
        assert_that(roots[1].lexeme.root, equal_to(u'abcvaadi'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'abcvaadi'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony}))

        # with InverseHarmony and explicit NoVoicing
        roots = self.root_finder.find_roots_for_partial_input(u"anaşefkati", u"anaşefkatini")
        assert_that(roots, has_length(1))
        assert_that(roots[0].str, equal_to(u'anaşefkat'))
        assert_that(roots[0].lexeme.root, equal_to(u'anaşefkati'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'anaşefkati'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony, LexemeAttribute.NoVoicing}))

        # with doubling
        roots = self.root_finder.find_roots_for_partial_input(u"gönülsırrı", u"gönülsırrına")
        assert_that(roots, has_length(2))
        assert_that(roots[0].str, equal_to(u'gönülsırr'))
        assert_that(roots[0].lexeme.root, equal_to(u'gönülsırrı'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'gönülsırrı'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))
        assert_that(roots[1].str, equal_to(u'gönülsır'))
        assert_that(roots[1].lexeme.root, equal_to(u'gönülsırrı'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'gönülsırrı'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.Doubling}))

        # with doubling and explicit NoVoicing
        roots = self.root_finder.find_roots_for_partial_input(u"müşterihakkı", u"müşterihakkına")
        assert_that(roots, has_length(2))
        assert_that(roots[0].str, equal_to(u'müşterihakk'))
        assert_that(roots[0].lexeme.root, equal_to(u'müşterihakkı'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'müşterihakkı'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.NoVoicing}))
        assert_that(roots[1].str, equal_to(u'müşterihak'))
        assert_that(roots[1].lexeme.root, equal_to(u'müşterihakkı'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'müşterihakkı'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.NoVoicing, LexemeAttribute.Doubling}))

        # with doubling and InverseHarmony
        roots = self.root_finder.find_roots_for_partial_input(u"olaymahalli", u"olaymahalline")
        assert_that(roots, has_length(2))
        assert_that(roots[0].str, equal_to(u'olaymahall'))
        assert_that(roots[0].lexeme.root, equal_to(u'olaymahalli'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'olaymahalli'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony}))
        assert_that(roots[1].str, equal_to(u'olaymahal'))
        assert_that(roots[1].lexeme.root, equal_to(u'olaymahalli'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'olaymahalli'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony, LexemeAttribute.Doubling}))

        # with doubling, possible voicing and inverse harmony
        roots = self.root_finder.find_roots_for_partial_input(u"yaşhaddi", u"yaşhaddinden")
        assert_that(roots, has_length(3))
        assert_that(roots[0].str, equal_to(u'yaşhadd'))
        assert_that(roots[0].lexeme.root, equal_to(u'yaşhaddi'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'yaşhaddi'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony}))
        assert_that(roots[1].str, equal_to(u'yaşhad'))
        assert_that(roots[1].lexeme.root, equal_to(u'yaşhaddi'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'yaşhaddi'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony, LexemeAttribute.Doubling}))
        assert_that(roots[2].str, equal_to(u'yaşhat'))
        assert_that(roots[2].lexeme.root, equal_to(u'yaşhaddi'))
        assert_that(roots[2].lexeme.lemma, equal_to(u'yaşhaddi'))
        assert_that(roots[2].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[2].lexeme.attributes,
            equal_to({LexemeAttribute.CompoundP3sg, LexemeAttribute.InverseHarmony, LexemeAttribute.Doubling}))

    def test_should_create_roots_with_consontant_insertion_s(self):
        # most of the following words are made up!
        roots = self.root_finder.find_roots_for_partial_input(u"suborusu", u"suborusuna")
        assert_that(roots, has_length(2))
        assert_that(roots[0].str, equal_to(u'suborus'))
        assert_that(roots[0].lexeme.root, equal_to(u'suborusu'))
        assert_that(roots[0].lexeme.lemma, equal_to(u'suborusu'))
        assert_that(roots[0].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[0].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))
        assert_that(roots[1].str, equal_to(u'suboru'))
        assert_that(roots[1].lexeme.root, equal_to(u'suborusu'))
        assert_that(roots[1].lexeme.lemma, equal_to(u'suborusu'))
        assert_that(roots[1].lexeme.syntactic_category, equal_to(SyntacticCategory.NOUN))
        assert_that(roots[1].lexeme.attributes, equal_to({LexemeAttribute.CompoundP3sg}))