def test_parse(self): s = """[! E:A:. ()(b.-S:.A:.-'S:.-'S:.-',) > E:A:. E:A:. ()(k.a.-k.a.-')] [>role>E:A:. E:A:.>content>constant>k.a.-k.a.-' "test"]""" res = usl(s) self.assertIsInstance(res, InstancedUSL) self.assertEqual(s, str(res)) s = """[! E:A:. ()(b.-S:.A:.-'S:.-'S:.-',) > E:A:. E:A:. ()(k.a.-k.a.-')] [>role>E:A:.>content>constant>b.-S:.A:.-'S:.-'S:.-', "test"]""" res = usl(s) self.assertIsInstance(res, InstancedUSL) self.assertEqual(s, str(res))
def test_has_prefix(self): u = usl( "[! E:A:. ()(b.-S:.A:.-'S:.-'S:.-', m1(S: B: T:) m2(y. o. e. u. a. i.)) > E:A:. E:A:. (m1(E:U:T:. E:A:T:. E:S:T:. E:B:T:. E:T:T:.))(k.a.-k.a.-')]" ) p0 = path(">role>! E:A:.>content>group_0 1>S:") p0_prefix = path(">role>! E:A:.>content>group_0 1") self.assertTrue(p0.has_prefix(p0_prefix))
def test_expand_compose_into_paths_pm2(self): u = usl( "s.-S:.U:.-' n.-T:.A:.-' d.-S:.U:.-' m1(E:.-U:.b.-l.-' E:.-U:.f.-l.-') m1(E:.-B:.k.-l.-')" ) p_u = list(u.iter_structure_path_by_script_ss()) res = usl_from_path_values(p_u) self.assertEqual(str(u), str(res))
def test_expand_compose_into_paths_empty_exclamation(self): u = usl( '[E:A:. (E:.-n.S:.-\')(b.a.- b.o.-n.o.-s.u.-\' f.a.-b.a.-f.o.-\') > E:A:. E:A:. ()(n.-S:.U:.-\'B:.-\'B:.-\',B:.-\',B:.-\',_ n.-S:.U:.-\'B:.-\'B:.-\',T:.-\',S:.-\',_) > ! E:A:. E:U:. ()]' ) p_u = list(u.iter_structure_path_by_script_ss()) res = usl_from_path_values(p_u) self.assertEqual(str(u), str(res))
def test_enumerate_variations(self): u = usl( "[! E:B:. ()(k.a.-k.a.-' l.o.-k.o.-') > E:.f.- ()(m1(p.E:A:S:.- p.E:A:B:.- p.E:A:T:.- t.i.-l.i.-' c.-'B:.-'k.o.-t.o.-',))]" ) dim, partitions = enumerate_partitions(u) self.assertEqual(dim, 1)
def rows(self) -> List[USL]: res = [] column_variations = [(path, ss) for l in self.column_paths_variation for path, ss in l] for row_path_values_list in self.row_paths_constant: res.append(usl(self.constant_paths + row_path_values_list + column_variations)) return res
def _from_string(elem, children): if elem == '': return UslPath() from ieml.usl.usl import usl morph = usl(elem) assert len(children) == 0 return FlexionPath(morpheme=morph)
def test_to_list_bijection(self): for w_S in WORDS_EXAMPLES: w = usl(w_S) self.assertIsInstance(w, Word) sfun = w.syntagmatic_fun sfun_l = sfun.as_list(w.context_type) ctx_t, sfun_2 = SyntagmaticFunction.from_list(sfun_l) self.assertEqual(sfun, sfun_2)
def test_singular_sequences(self): WORDS = [ "[! E:A:. E:S:.-k.u.-' j.-U:.-'d.o.-l.o.-', (m2(wa. we. wo. wu.)) > E:A:. E:S:.-k.u.-' j.-A:.-'d.o.-l.o.-', ()]", "[E:B:. (E:.-wa.-t.o.-' E:.-'we.-S:.-'t.o.-',)(m1(i.k.- A: T: S:)) > ! E:.k.- (E:.wo.- E:.-n.S:.-' E:S:.-d.u.-')(b.a.-b.a.-f.o.-')]", "[! E:S:. ()(u.A:.-) > E:.l.- (m1(E:.-U:.s.-l.-' E:.-U:.d.-l.-' E:.-A:.s.-l.-' E:.-A:.d.-l.-' E:.-B:.b.-l.-' E:.-B:.f.-l.-'))]" ] for w in WORDS: u = usl(w) self.assertGreater(u.cardinal, 1)
def test_old_words(self): WORDS = [ "[! E:A:. (E:.wo.- E:S:.-d.u.-')(k.i.-l.i.-')]", "[E:T:. (E:.b.wa.- E:.-wa.-t.o.-' E:.-'we.-S:.-'t.o.-',)(e.) > E:.n.- (E:.wo.- E:S:.-d.u.-') > E:.d.- (E:.wo.- E:S:.-d.u.-')(m.-S:.U:.-') > ! E:.n.- E:U:. ()]", "[E:A:. (E:.wo.- E:.-n.S:.-' E:S:.-d.u.-')(b.a.- b.o.-n.o.-s.u.-' f.a.-b.a.-f.o.-') > E:A:. E:A:. (E:.wo.- E:S:.-d.u.-')(n.-S:.U:.-'B:.-'B:.-',B:.-',B:.-',_ n.-S:.U:.-'B:.-'B:.-',T:.-',S:.-',_) > ! E:A:. E:U:. ()]" ] for w in WORDS: u = usl(w) self.assertIsInstance(u, Word) self.assertNotEqual(w, str(u)) self.assertIn('!', str(u))
def cells(self): cells = [] for row_path_values_list in tqdm.tqdm(self.row_paths_constant): cells_row = [] for column_path_values_list in self.column_paths_constant: cells_row.append(usl(self.constant_paths + column_path_values_list + row_path_values_list)) cells.append(cells_row) return cells
def columns(self) -> List[USL]: """Return a list of paradigms that have as singular sequences the cell columns. len(self.rows) == len(self.cells) The paradigms are ordered as they will appear in the column tables """ res = [] row_variations = [(path, ss) for l in self.row_paths_variation for path, ss in l] for column_path_values_list in self.column_paths_constant: res.append(usl(self.constant_paths + column_path_values_list + row_variations)) return res
def column_paths_variation(self): """Return a list of the variation that correspond to the dimension of variations of each column""" res = [] u = self._columns.deference(self.usl) ss_to_groups_path = {ss_v: p.without_morpheme() for p, ss_v in u.iter_structure_path_by_script_ss()} for ss in self._columns.deference(self.usl).singular_sequences: res.append([(self._columns.concat(ss_to_groups_path[morph], force=False), morph) for path, morph in ss.iter_structure_path_by_script_ss()]) # return sorted(res, key=lambda l: usl(l + self.constant_paths)) return sorted(res, key=lambda l: usl([(path.as_constant(vv), vv) for path, vv in l] + self.constant_paths))
def test_table_lexeme_content_flexion_paradigm(self): u = usl("(m1(E:.wo.U:.-t.o.-' E:.wo.A:.-t.o.-'))(n.-T:.A:.-' m1(E:T:S:. E:T:T:. we.f.T:.- u.A:.- p.E:A:S:.- s.-S:.A:.-') m1(E:S:.x.- n.-T:.U:.-'))") root_group1 = path(">flexion") root_group0 = path(">content") table = UslTable2D(u, rows=root_group1, columns=root_group0) self.assertEqual(table.rows, sorted(table.rows)) self.assertEqual(table.columns, sorted(table.columns)) cells = table.cells self.assertTrue(all(len(cells[0]) == len(row) for row in cells)) dim = (len(cells), len(cells[0])) print(dim) dim_column = root_group0.deference(u).cardinal dim_row = root_group1.deference(u).cardinal self.assertEqual((dim_row, dim_column), dim) self.assertEqual(dim[0] * dim[1], u.cardinal) all_cells = list(chain.from_iterable(cells)) for c in all_cells: self.assertEqual(c.cardinal, 1) self.assertEqual(len(all_cells), dim[0] * dim[1]) self.assertEqual(len(all_cells), len(set(all_cells))) self.assertEqual({PolyMorpheme([m]) for v in table.row_paths_variation for p, m in v}, root_group1.deference(u).singular_sequences_set) self.assertEqual({m for v in table.column_paths_variation for p, m in v if not m.empty}, set(root_group0.deference(u).morphemes)) for r, cells_row in zip(table.rows, cells): self.assertEqual(r.cardinal, dim[1]) for r_c, cell in zip(r.singular_sequences, cells_row): for m_r in r_c.morphemes: if not m_r.empty: self.assertIn(m_r, cell.morphemes) for c, cells_column in zip(table.columns, zip(*cells)): self.assertEqual(c.cardinal, dim[0]) for c_r, cell in zip(c.singular_sequences, cells_column): for m_c in c_r.morphemes: if not m_c.empty: self.assertIn(m_c, cell.morphemes)
def list_polymorpheme_of_word(self, w): ########WORkAROUND############TO BE SOLVED THEN REMOVE########################################3 if w == "[! E:B:. ()(k.a.-k.a.-' l.o.-k.o.-') > E:.f.- ()(p.E:A:T:.-)] [>role>E:B:.>content>constant>k.a.-k.a.-'": return [] ################################################################## w = usl(w) assert isinstance(w, Word) polyList = [] for sfun in w.syntagmatic_fun.actors.values(): if sfun.actor is not None: polyList.append((sfun.actor.pm_content, sfun.actor.pm_flexion)) # return list(chain.from_iterable((sfun.actor.pm_content, sfun.actor.pm_flexion) # for sfun in w.syntagmatic_fun.actors.values())) # encounteres AttributeError: 'NoneType' object has no attribute 'pm_content' since sfun.actor can be None return polyList
def test_table2d_word(self): u = usl("[! E:A:. ()(b.-S:.A:.-'S:.-'S:.-', m1(S: B: T:) m2(y. o. e. u. a. i.)) > E:A:. E:A:. (m1(E:U:T:. E:A:T:. E:S:T:. E:B:T:. E:T:T:.))(k.a.-k.a.-')]") root_group0 = path(">role>! E:A:.>content") # root_group1 = path(">role>! E:A:.>content>group_1 2>") actant_group0 = path(">role>E:A:. E:A:.>flexion") table = UslTable2D(u, rows=root_group0, columns=actant_group0) self.assertEqual(table.rows, sorted(table.rows)) self.assertEqual(table.columns, sorted(table.columns)) cells = table.cells self.assertTrue(all(len(cells[0]) == len(row) for row in cells)) dim = (len(cells), len(cells[0])) self.assertEqual((root_group0.deference(u).cardinal, actant_group0.deference(u).cardinal), dim)
def process_line(l): match = spliter.match(l) ieml, trans_fr = match.groups() ieml = ieml.replace('X', 'wa.') print(ieml, trans_fr) try: u = usl(ieml) except Exception as e: # print(e.args[0]) raise check_word(u) return str(u), trans_fr
def test_table_2d_paradigm_pm(self): u = usl("l.-T:.U:.-',n.-T:.A:.-',b.-S:.A:.-'U:.-'U:.-',_ m1(E:S:.x.- S:.E:A:S:.- T:.E:A:T:.-) m1(u.A:.- a.S:.- t.o.-c.-' k.i.-t.i.-t.u.-' n.-T:.A:.-' l.-T:.U:.-',n.-T:.A:.-',m.-B:.U:.-'m.-B:.U:.-'E:A:T:.-',_ l.-T:.U:.-',n.-T:.A:.-',d.-S:.U:.-',_) m1(p.E:A:S:.- E:.-U:.d.-l.-')") root_group0 = path(">group_0 1") root_group1 = path(">group_1 1") table = UslTable2D(u, rows=root_group1, columns=root_group0) self.assertEqual(table.rows, sorted(table.rows)) self.assertEqual(table.columns, sorted(table.columns)) cells = table.cells self.assertTrue(all(len(cells[0]) == len(row) for row in cells)) dim = (len(cells), len(cells[0])) print(dim) dim_column = root_group0.deference(u).cardinal dim_row = root_group1.deference(u).cardinal self.assertEqual((dim_row, dim_column), dim) self.assertEqual(dim[0] * dim[1] * 3, u.cardinal) all_cells = list(chain.from_iterable(cells)) for c in all_cells: self.assertEqual(c.cardinal, 3) self.assertEqual(len(all_cells), dim[0] * dim[1]) self.assertEqual(len(all_cells), len(set(all_cells))) self.assertEqual({PolyMorpheme([m]) for v in table.column_paths_variation for p, m in v}, root_group0.deference(u).singular_sequences_set) self.assertEqual({PolyMorpheme([m]) for v in table.row_paths_variation for p, m in v}, root_group1.deference(u).singular_sequences_set) for r, cells_row in zip(table.rows, cells): for r_c, cell in zip(r.singular_sequences, cells_row): for m_r in r_c.morphemes: if not m_r.empty: self.assertIn(m_r, cell.morphemes) for c, cells_column in zip(table.columns, zip(*cells)): for c_r, cell in zip(c.singular_sequences, cells_column): for m_c in c_r.morphemes: if not m_c.empty: self.assertIn(m_c, cell.morphemes)
def _from_string(elem, children): from ieml.usl.syntagmatic_function import SyntagmaticRole if elem == '': if len(children) == 0: return UslPath() raise ValueError("Empty role in RolePath") from ieml.usl.usl import usl sfun_role = SyntagmaticRole( [usl(s) for s in elem.split(' ') if s != '!']) child = None if len(children) != 0: child = LexemePath._from_string(children[0], children[1:]) return RolePath(role=sfun_role, has_focus='!' in elem, child=child)
def _from_string(elem, children): if elem == '': return UslPath() key = elem morph = None if len(children) == 1: from ieml.usl.usl import usl morph = usl(children[0]) idx = None multiplicity = None if key.startswith('constant'): idx = GroupIndex.CONSTANT elif key.startswith('group_'): if ' ' in key: key_, multi = key.split(' ') multiplicity = int(multi) else: key_ = key n = int(''.join(key_[6:])) if n == 0: idx = GroupIndex.GROUP_0 elif n == 1: idx = GroupIndex.GROUP_1 elif n == 2: idx = GroupIndex.GROUP_2 else: raise ValueError( "Invalid argument index for a PolymorphemePath _from_string constructor: " + str(n)) else: raise ValueError( "Invalid argument for a PolymorphemePath _from_string constructor: " + key) return PolymorphemePath(group_idx=idx, morpheme=morph, multiplicity=multiplicity)
def test_usl_from_path(self): structure = { ">role>! E:A:.>flexion>E:": "E:", ">role>! E:A:.>content>constant>b.-S:.A:.-'S:.-'S:.-',": "b.-S:.A:.-'S:.-'S:.-',", ">role>E:A:. E:A:.>flexion>E:": "E:", ">role>E:A:. E:A:.>flexion>E:U:T:.": "E:U:T:.", ">role>E:A:. E:A:.>flexion>E:A:T:.": "E:A:T:.", ">role>E:A:. E:A:.>flexion>E:S:T:.": "E:S:T:.", ">role>E:A:. E:A:.>flexion>E:B:T:.": "E:B:T:.", ">role>E:A:. E:A:.>flexion>E:T:T:.": "E:T:T:.", ">role>E:A:. E:A:.>content>constant>k.a.-k.a.-'": "k.a.-k.a.-'" } usl_parser = IEMLParser().parse path_parser = PathParser().parse structure = [(path_parser(p), usl_parser(u)) for p, u in structure.items()] u = usl_from_path_values(structure) self.assertEqual( u, usl("[! E:A:. ()(b.-S:.A:.-'S:.-'S:.-',) > E:A:. E:A:. (m1(E:U:T:. E:A:T:. E:S:T:. E:B:T:. E:T:T:.))(k.a.-k.a.-')]" ))
doc.packages.append(Package('xcolor', ['dvipsnames', 'table'])) try: doc.generate_pdf(clean_tex=False, silent=False) doc.generate_tex() except subprocess.CalledProcessError as e: os.chdir( old_cwd) # because pylatex change it but doesnt restore it raise e with open(path + '.pdf', 'rb') as fp: return fp.read() def rendex_latex_word(w: Word, descriptors: Descriptors, language: LANGUAGES): return compile_latex(word_to_latex(w, descriptors, language)) if __name__ == "__main__": gitdb = GitInterface() db = IEMLDatabase(gitdb.folder) ieml = "[E:T:. (E:.b.wa.- E:.-wa.-t.o.-' E:.-'we.-S:.-'t.o.-',)(e.) > E:.n.- (E:.wo.- E:S:.-d.u.-') > E:.d.- (E:.wo.- E:S:.-d.u.-')(m.-S:.U:.-') > ! E:.n.- E:U:. ()]" w = usl(ieml) res = rendex_latex_word(w, db.get_descriptors(), 'en') with open("output.pdf", 'wb') as fp: fp.write(res)
def row_paths_variation(self): """Return a list of List[(path, ss)], that correspond to the path and constant ss of self.rows or all the dim of variation that are not used by self.columns. The path returned as returned as constant.""" constant_dim = set() variations_dim = set() if self._rows is not None: for path, ss in self._rows.deference(self.usl).iter_structure_path_by_script_ss(): path = self._rows.concat(path.without_morpheme(), force=False) if path.deference(self.usl).cardinal != 1: variations_dim.add(path) else: constant_dim.add(path) else: for path, value in self.usl.iter_structure_path_by_script_ss(): path = path.without_morpheme() if not path.has_prefix(self._columns): if path.deference(self.usl).cardinal != 1: variations_dim.add(path.without_morpheme()) variations = [] for path_dim in variations_dim: bin = [] for ss in path_dim.deference(self.usl).singular_sequences: bin.append([(path_dim.without_morpheme(), morph) for _, morph in ss.iter_structure_path_by_script_ss()]) variations.append(bin) constants = [] for path_dim in constant_dim: for ss in path_dim.deference(self.usl).singular_sequences: constants.append((path_dim, ss)) # group variation by pm for correct ss iteration pm_bin = defaultdict(list) for path_dim in variations_dim: path_head, path_tail = path_dim.split_tail() if isinstance(path_tail, (PolymorphemePath)) or \ (isinstance(path_tail, LexemePath) and path_tail.index == LexemeIndex.FLEXION): pm_bin[path_head].extend([(path_tail, ss) for p, ss in path_dim.deference(self.usl).iter_structure_path_by_script_ss()]) # ss -> group # ss_to_group_path = {k: v for k, v in path_bin} variations_by_pm = [] for path_bin, v in pm_bin.items(): pm_struct = [] ss_to_groups_path = {ss_v: path_tail for path_tail, ss_v in v} for pm_ss in usl(v).singular_sequences: group_ss = [] for _, ss in pm_ss.iter_structure_path_by_script_ss(): path = path_bin.concat(ss_to_groups_path[ss]) group_ss.append((path, ss)) pm_struct.append(group_ss) variations_by_pm.append(pm_struct) # path -> pm # variations_by_pm = [[[(path.concat(p_ss), ss) for p_ss, ss in pm_ss.iter_structure_path_by_script_ss()] # for pm_ss in usl(v).singular_sequences] # for path, v in pm_bin.items()] res = [] for vars in product(*variations_by_pm): # for vars in product(*sorted(variations, reverse=True, # key=lambda bin: min(ss for v in bin for _, ss in v if not ss.empty))): res.append(sum(vars, constants)) res2 = [] for l in res: res2.append([(path.concat(p2).as_constant(ss), ss) for path, vv in l for p2, ss in vv.iter_structure_path_by_script_ss()]) return sorted(res2, key=lambda l: usl([(path.as_constant(), vv) for path, vv in l] + self.constant_paths))
def test_expand_compose_into_paths_pm(self): u = usl("E:T:S:. n.-T:.A:.-'") p_u = list(u.iter_structure_path_by_script_ss()) res = usl_from_path_values(p_u) self.assertEqual(str(u), str(res))
def test_path(self): from ieml.usl.usl import usl pm = [ usl("A: E: S: B: T:"), usl("A: E: m1(S: B: T:)"), usl("A: m1(E:) m1(S: B: T:)"), usl("m1(A:) m1(E:) m1(S: B: T:)") ] # pm_path = PolymorphemePath(GroupIndex.CONSTANT, usl('S:')) PolymorphemePath(GroupIndex.CONSTANT, usl('S:')).deference(pm[0]) PolymorphemePath(GroupIndex.GROUP_0, usl('S:')).deference(pm[1]) PolymorphemePath(GroupIndex.GROUP_1, usl('S:')).deference(pm[2]) PolymorphemePath(GroupIndex.GROUP_2, usl('S:')).deference(pm[3]) self.check(">constant>S:", PolymorphemePath, usl('S: A:'), Script) self.check(">constant", PolymorphemePath, usl('S: A:'), PolyMorpheme) self.check(">group_0 1>S:", PolymorphemePath, usl('A: m1(S:)'), Script) self.check(">group_0 1", PolymorphemePath, usl('m1(S: A:)'), PolyMorpheme) self.check(">group_2 1>B:", PolymorphemePath, usl('A: m1(U:) m1(B:) m1(S:)'), Script) self.check(">group_1 1>S:", PolymorphemePath, usl('A: m1(U:) m1(S:)'), Script) self.check(">group_2 1", PolymorphemePath, usl('A: m1(U:) m1(B:) m1(S:)'), PolyMorpheme) self.check(">group_1 1", PolymorphemePath, usl('A: m1(U:) m1(S:)'), PolyMorpheme) self.check(">", PolymorphemePath, usl('S: A:'), PolyMorpheme) LexemePath(LexemeIndex.CONTENT, child=PolymorphemePath(GroupIndex.CONSTANT, usl('S:'))).deference( usl("()(S: B:)")) LexemePath(LexemeIndex.FLEXION, child=FlexionPath(usl('S:'))).deference(usl("(S: B:)(S:)")) self.check('>content>constant>S:', LexemePath, usl('()(S:)'), Script) self.check('>flexion>S:', LexemePath, usl('(S:)(B:)'), Script) self.check('>flexion', LexemePath, usl('(S:)(B:)'), PolyMorpheme) self.check('>flexion', LexemePath, usl('(S:)(B:)'), PolyMorpheme) self.check(">", LexemePath, usl('(S:)(B:)'), Lexeme) w = usl( "[! E:A:. ()(m.-B:.A:.-') > E:A:. E:A:. (E:B:.-d.u.-')(p.E:A:T:.- m1(S:))]" ) path = RolePath(SyntagmaticRole([usl('E:A:.'), usl('E:A:.')]), child=LexemePath(LexemeIndex.CONTENT, child=PolymorphemePath( GroupIndex.CONSTANT, usl('p.E:A:T:.-')))) path.deference(w) self.check(">role>E:A:. E:A:.>content>group_0 1>S:", RolePath, w, Script) self.check(">role>E:A:. E:A:.>content>constant>p.E:A:T:.-", RolePath, w, Script) self.check(">role>E:A:. E:A:.>flexion>E:B:.-d.u.-'", RolePath, w, Script) self.check(">role>E:A:.>content>constant>m.-B:.A:.-'", RolePath, w, Script) u = usl( "[! E:B:. ()(k.a.-k.a.-' l.o.-k.o.-') > E:.f.- ()(m1(p.E:A:S:.- p.E:A:B:.- p.E:A:T:.- t.i.-l.i.-' c.-'B:.-'k.o.-t.o.-',))]" ) self.check(">role>E:.f.->content>group_0 1>p.E:A:S:.-", RolePath, u, Script) self.check(">role>E:A:.", RolePath, w, Lexeme) self.check(">role>E:A:.>content", RolePath, w, PolyMorpheme) self.check(">", RolePath, w, Word)
def test_table2d_pm(self): pm_2d = [[usl("t.o.- m1(S:.E:A:S:.- S:.E:A:B:.- S:.E:A:T:.-)"), None, path(">group_0 1")], [usl("n.-T:.A:.-' m1(u.l.- a.B:.- f.-S:.U:.-' f.-T:.A:.-') m1(d.-h.-')"), path(">group_0 1"), path(">group_1 1")]] for u, group0, group1 in pm_2d: self.check_table(u, group0, group1)