Ejemplo n.º 1
0
    def test_paradigm_citation_dist(self):

        paradigm = sc("E:O:O:.")
        cells = [
            Term("E:U:U:."),
            Term("E:U:A:."),
            Term("E:A:U:."),
            Term("E:A:A:.")
        ]
        headers = [Term("E:O:U:."), Term("E:O:A:."), Term("E:U:O:."), Term("E:A:O:.")]

        usl_collection = [
            usl(Word(Morpheme([cells[0], cells[1]]), Morpheme([cells[3], cells[1], cells[2]]))),
            usl(Word(Morpheme([cells[0], cells[2]]), Morpheme([cells[3]]))),
            usl(Word(Morpheme([headers[0], cells[3]]), Morpheme([headers[3], headers[2]]))),
            usl(Word(Morpheme([cells[1]]), Morpheme([cells[1]])))
        ]

        result = paradigm_usl_distribution(paradigm, usl_collection)
        correct_result = np.zeros((2, 2), dtype=np.int32)

        correct_result[0][0] = 4
        correct_result[0][1] = 5
        correct_result[1][0] = 4
        correct_result[1][1] = 4

        self.assertEqual(len(result), 1, "The paradigm has one table so we should have one distribution table")
        self.assertTrue(np.array_equal(result[0], correct_result))
Ejemplo n.º 2
0
 def test(rules, expected=None):
     if expected:
         try:
             usl(rules)
         except IEMLObjectResolutionError as e:
             self.assertListEqual(e.errors, expected)
         else:
             self.fail()
     else:
         with self.assertRaises(IEMLObjectResolutionError):
             usl(rules)
Ejemplo n.º 3
0
    def test_usl_ranking(self):
        usl_collection = [
            usl(Word(Morpheme([self.terms[1], self.terms[3], self.terms[2]]),
                     Morpheme([self.terms[1], self.terms[3]]))),
            usl(Word(Morpheme([self.terms[2], self.terms[3]]), Morpheme([self.terms[2]]))),
            usl(Word(Morpheme([self.terms[3], self.terms[1]])))
        ]

        result = rank_usls(self.term_scripts[1:4], usl_collection)

        self.assertEqual(len(result), 3)
        self.assertEqual(result[self.term_scripts[1]], [usl_collection[0], usl_collection[2], usl_collection[1]])
        self.assertEqual(result[self.term_scripts[2]], [usl_collection[1], usl_collection[0], usl_collection[2]])
        self.assertEqual(result[self.term_scripts[3]], [usl_collection[0], usl_collection[1], usl_collection[2]])
Ejemplo n.º 4
0
    def test_linear_pl_two_usl(self):
        """Only inputs two USL repeatedly, and uses of them as a query"""
        word_a, word_b = tuple(self.generator.uniterm_word()for i in range(2))
        usl_a_derivates = [usl(Text([word_a, self.generator.uniterm_word()])) for i in range(50)]
        usl_b_derivates = [usl(Text([word_b, self.generator.uniterm_word()])) for i in range(50)]
        all_usls = usl_a_derivates + usl_b_derivates
        shuffle(all_usls)

        query = usl(Text([word_a]))

        pipeline = LinearPipeline.gen_pipeline_from_query(query)
        filtered_set = pipeline.filter(USLSet(all_usls), query, 10)
        pprint.pprint([str(_usl) for _usl in filtered_set.get_usls()])
        print(str(word_a))
Ejemplo n.º 5
0
    def test_paradigm_ranking(self):
        # We are going to test for all terms (root paradigms, paradigms, and singular terms) at once

        usl_collection = [
            usl(Word(Morpheme([self.terms[1], self.terms[4]]), Morpheme([self.terms[3]]))),
            usl(Word(Morpheme([self.terms[1], self.terms[0]]), Morpheme([self.terms[3]]))),
            usl(Word(Morpheme([self.terms[3], self.terms[0]]),
                     Morpheme([self.terms[3], self.terms[1]])))
        ]

        term_order = [self.term_scripts[3], self.term_scripts[1], self.term_scripts[0], self.term_scripts[4],
                      self.term_scripts[2]]

        result = rank_paradigms(self.term_scripts[:5], usl_collection)

        res_order = [p.paradigm for p in result]

        self.assertEqual(term_order, res_order)
Ejemplo n.º 6
0
    def test_parse_example(self):
        rules = {
            "r0": "A:O:.wo.t.-",
            "r1": "d.a.-l.a.-f.o.-'",
            "r2": "m.-M:.O:.-'m.-S:.U:.-'E:A:S:.-',",
            "f0": "b.o.-k.o.-s.u.-'",
            "f1": "n.u.-d.u.-d.u.-'"
        }

        self.assertIsInstance(usl(rules).ieml_object, Word)
Ejemplo n.º 7
0
    def test_linear_pl_ten_usl(self):
        """Only inputs two USL repeatedly, and uses of them as a query"""
        words = [self.generator.uniterm_word() for i in range(10)]
        all_usls = []
        for word  in words:
            for i in range(15):
                new_usl = usl(Text([word] +
                                     [self.generator.uniterm_word()
                                      for i in range(randint(1,5))]))

                all_usls.append(new_usl)
        shuffle(all_usls)
        query = usl(Text([words[0]]))

        pipeline = LinearPipeline.gen_pipeline_from_query(query)
        filtered_set = pipeline.filter(USLSet(all_usls), query, 10, [0.1, 0.9])
        pprint.pprint([str(_usl) for _usl in filtered_set.get_usls()])
        print(str(words[0]))
        pprint.pprint([str(_usl) for _usl in filtered_set.get_usls() if words[0] in _usl.texts[0].children])
Ejemplo n.º 8
0
    def get_usl_filtering_level(cls, input_usl):
        """If the max level for a USL is a sentence or supersentence, then its filtering level is the same,
        else, this function figures out if it's a multiterm on uniterm word"""

        input_usl = usl(input_usl)

        usl_max_level = input_usl.max_level
        # TODO : unittest this function
        if usl_max_level == Sentence:
            return cls.SENTENCE
        elif usl_max_level == SuperSentence:
            return cls.SUPERSENTENCE
        else:  # must be a word, we have to figure out if single term or not
            # all of the USL's elements have to be words, so for each words, we check if the substance's
            # count is 1 and if the mode is empty
            for word in usl:
                if len(word.subst.children) != 1 or word.mode is not None:
                    return cls.MULTITERM_WORD
            return cls.UNITERM_WORD  # reached only if all words are monoterm
Ejemplo n.º 9
0
from ieml.usl.tools import usl

root = Word(
    Morpheme([
        term("i.i.-"),  # fabriquer
        term("a.i.-")
    ]),  # vendre
    Morpheme([
        term("E:S:.o.-"),  # vouloir futur
        term("E:S:.wa.-"),  # 1ere personne pluriel
        term("E:A:T:.")
    ]))  # beaucoup

objects = [
    {
        'usl': usl(root),
        'tags': {
            'FR': "Nous avons l'intention de fabriquer et de vendre beaucoup",
            'EN': "We intend to manufacture and sell a lot"
        },
        'keywords': {
            'FR': [],
            'EN': []
        }
    },
    {
        'usl':
        usl(
            Sentence([
                Clause(
                    root,
Ejemplo n.º 10
0
 def test_with_hypertext(self):
     hypertext  = usl(Text(self.random_sentences + self.random_multiterm_words[1:2] +
                                 self.random_uniterm_words[1:3]))
     self.assertEqual(FilteringLevel.get_usl_filtering_level(hypertext), FilteringLevel.SENTENCE)