def test_paradigm_citation_dist(self): paradigm = sc("E:O:O:.") cells = [ Term("E:U:U:."), Term("E:U:A:."), Term("E:A:U:."), Term("E:A:A:.") ] headers = [Term("E:O:U:."), Term("E:O:A:."), Term("E:U:O:."), Term("E:A:O:.")] usl_collection = [ usl(Word(Morpheme([cells[0], cells[1]]), Morpheme([cells[3], cells[1], cells[2]]))), usl(Word(Morpheme([cells[0], cells[2]]), Morpheme([cells[3]]))), usl(Word(Morpheme([headers[0], cells[3]]), Morpheme([headers[3], headers[2]]))), usl(Word(Morpheme([cells[1]]), Morpheme([cells[1]]))) ] result = paradigm_usl_distribution(paradigm, usl_collection) correct_result = np.zeros((2, 2), dtype=np.int32) correct_result[0][0] = 4 correct_result[0][1] = 5 correct_result[1][0] = 4 correct_result[1][1] = 4 self.assertEqual(len(result), 1, "The paradigm has one table so we should have one distribution table") self.assertTrue(np.array_equal(result[0], correct_result))
def test(rules, expected=None): if expected: try: usl(rules) except IEMLObjectResolutionError as e: self.assertListEqual(e.errors, expected) else: self.fail() else: with self.assertRaises(IEMLObjectResolutionError): usl(rules)
def test_usl_ranking(self): usl_collection = [ usl(Word(Morpheme([self.terms[1], self.terms[3], self.terms[2]]), Morpheme([self.terms[1], self.terms[3]]))), usl(Word(Morpheme([self.terms[2], self.terms[3]]), Morpheme([self.terms[2]]))), usl(Word(Morpheme([self.terms[3], self.terms[1]]))) ] result = rank_usls(self.term_scripts[1:4], usl_collection) self.assertEqual(len(result), 3) self.assertEqual(result[self.term_scripts[1]], [usl_collection[0], usl_collection[2], usl_collection[1]]) self.assertEqual(result[self.term_scripts[2]], [usl_collection[1], usl_collection[0], usl_collection[2]]) self.assertEqual(result[self.term_scripts[3]], [usl_collection[0], usl_collection[1], usl_collection[2]])
def test_linear_pl_two_usl(self): """Only inputs two USL repeatedly, and uses of them as a query""" word_a, word_b = tuple(self.generator.uniterm_word()for i in range(2)) usl_a_derivates = [usl(Text([word_a, self.generator.uniterm_word()])) for i in range(50)] usl_b_derivates = [usl(Text([word_b, self.generator.uniterm_word()])) for i in range(50)] all_usls = usl_a_derivates + usl_b_derivates shuffle(all_usls) query = usl(Text([word_a])) pipeline = LinearPipeline.gen_pipeline_from_query(query) filtered_set = pipeline.filter(USLSet(all_usls), query, 10) pprint.pprint([str(_usl) for _usl in filtered_set.get_usls()]) print(str(word_a))
def test_paradigm_ranking(self): # We are going to test for all terms (root paradigms, paradigms, and singular terms) at once usl_collection = [ usl(Word(Morpheme([self.terms[1], self.terms[4]]), Morpheme([self.terms[3]]))), usl(Word(Morpheme([self.terms[1], self.terms[0]]), Morpheme([self.terms[3]]))), usl(Word(Morpheme([self.terms[3], self.terms[0]]), Morpheme([self.terms[3], self.terms[1]]))) ] term_order = [self.term_scripts[3], self.term_scripts[1], self.term_scripts[0], self.term_scripts[4], self.term_scripts[2]] result = rank_paradigms(self.term_scripts[:5], usl_collection) res_order = [p.paradigm for p in result] self.assertEqual(term_order, res_order)
def test_parse_example(self): rules = { "r0": "A:O:.wo.t.-", "r1": "d.a.-l.a.-f.o.-'", "r2": "m.-M:.O:.-'m.-S:.U:.-'E:A:S:.-',", "f0": "b.o.-k.o.-s.u.-'", "f1": "n.u.-d.u.-d.u.-'" } self.assertIsInstance(usl(rules).ieml_object, Word)
def test_linear_pl_ten_usl(self): """Only inputs two USL repeatedly, and uses of them as a query""" words = [self.generator.uniterm_word() for i in range(10)] all_usls = [] for word in words: for i in range(15): new_usl = usl(Text([word] + [self.generator.uniterm_word() for i in range(randint(1,5))])) all_usls.append(new_usl) shuffle(all_usls) query = usl(Text([words[0]])) pipeline = LinearPipeline.gen_pipeline_from_query(query) filtered_set = pipeline.filter(USLSet(all_usls), query, 10, [0.1, 0.9]) pprint.pprint([str(_usl) for _usl in filtered_set.get_usls()]) print(str(words[0])) pprint.pprint([str(_usl) for _usl in filtered_set.get_usls() if words[0] in _usl.texts[0].children])
def get_usl_filtering_level(cls, input_usl): """If the max level for a USL is a sentence or supersentence, then its filtering level is the same, else, this function figures out if it's a multiterm on uniterm word""" input_usl = usl(input_usl) usl_max_level = input_usl.max_level # TODO : unittest this function if usl_max_level == Sentence: return cls.SENTENCE elif usl_max_level == SuperSentence: return cls.SUPERSENTENCE else: # must be a word, we have to figure out if single term or not # all of the USL's elements have to be words, so for each words, we check if the substance's # count is 1 and if the mode is empty for word in usl: if len(word.subst.children) != 1 or word.mode is not None: return cls.MULTITERM_WORD return cls.UNITERM_WORD # reached only if all words are monoterm
from ieml.usl.tools import usl root = Word( Morpheme([ term("i.i.-"), # fabriquer term("a.i.-") ]), # vendre Morpheme([ term("E:S:.o.-"), # vouloir futur term("E:S:.wa.-"), # 1ere personne pluriel term("E:A:T:.") ])) # beaucoup objects = [ { 'usl': usl(root), 'tags': { 'FR': "Nous avons l'intention de fabriquer et de vendre beaucoup", 'EN': "We intend to manufacture and sell a lot" }, 'keywords': { 'FR': [], 'EN': [] } }, { 'usl': usl( Sentence([ Clause( root,
def test_with_hypertext(self): hypertext = usl(Text(self.random_sentences + self.random_multiterm_words[1:2] + self.random_uniterm_words[1:3])) self.assertEqual(FilteringLevel.get_usl_filtering_level(hypertext), FilteringLevel.SENTENCE)