def test_multiple_dictionary(self): d0 = Dictionary() d1 = Dictionary() self.assertEqual(d0, d1) d2 = Dictionary('dictionary_2017-06-07_00:00:00') self.assertNotEqual(d0, d2)
def validate_dictionary(folder=DICTIONARY_FOLDER): try: Dictionary.load(folder=folder, use_cache=False) return True except Exception as e: print(e.__repr__(), file=sys.stderr) return False
def drupal_relations_dump(number=None, all=False): _drupal_utils = _get_drupal_utils() root = _term("O:M:.O:M:.-+M:O:.M:O:.-") if all: paradigm = list(Dictionary()) else: paradigm = sorted(Dictionary().roots[root]) relations = defaultdict(set) def add_rel(t0, t1, relname): if t1 > t0: q = t0 t0 = t1 t1 = q if t0 != t1: relations[(t0, t1)].add(relname) REL = list(_RELATIONS) for i, t0 in enumerate(paradigm): for t1 in paradigm[i:]: for r in t0.relations.to(t1, relations_types=REL): add_rel(t0, t1, r) res = [] for t0, t1 in relations: for rel_cat in relations[(t0, t1)]: comment = '' res.append({ 'term_src': _drupal_utils['all_uuid'][str(t0.script)], 'term_dest': _drupal_utils['all_uuid'][str(t1.script)], 'relation_name': rel_cat, 'relation_type': _RELATIONS[rel_cat], 'commentary': comment }) res.append({ 'term_src': _drupal_utils['all_uuid'][str(t1.script)], 'term_dest': _drupal_utils['all_uuid'][str(t0.script)], 'relation_name': INVERSE_RELATIONS[rel_cat], 'relation_type': _RELATIONS[rel_cat], 'commentary': comment }) if number: return res[:number] else: return res
def test_neighbours(self): for t in Dictionary(): for k in ['contains', 'contained', 'table_0', 'identity']: self.assertIn(k, t.relations.to(t)) for n in t.relations.neighbours: self.assertTrue(t.relations.to(n))
def test_glossary(self): txt = random_usl(Text) self.assertTrue(all(t in Dictionary() for t in txt.glossary)) self.assertTrue(all(t in txt for t in txt.glossary)) with self.assertRaises(ValueError): 'test' in txt
def test_inhibitions(self): for t in Dictionary(): # if Dictionary().inhibitions[t]: for reltype in t.inhibitions: self.assertTupleEqual( t.relations[reltype], (), "Term %s has relations %s. Must be inhibited" % (str(t), reltype))
def test_headers(self): for p in Dictionary(): self.assertEqual(factorize((k.paradigm for k in p.tables)), p.script) self.assertEqual(len(set(k.paradigm for k in p.tables)), len(p.tables)) for t in p.tables: for tab in t.headers.values(): if t.dim != 1: self.assertTupleEqual((len(tab.rows), len(tab.columns)), tab.cells.shape)
def test_root(self): d = Dictionary() roots = set() for r in d.roots: self.assertTrue(r.is_root) roots.add(r) for t in d: if t in roots: continue self.assertFalse(t.is_root)
def test_load_dictionary(self): dic = Dictionary() NB_TERMS = len(dic) self.assertEqual(len(dic.index), NB_TERMS) self.assertEqual(len(dic.terms), NB_TERMS) # self.assertEqual(len(dic.relations), 12) for l in LANGUAGES: self.assertEqual(len(dic.translations[l]), NB_TERMS) self.assertEqual(len(dic.layers), MAX_LAYER + 1) # from 0 self.assertEqual(sum(len(v) for v in dic.layers), NB_TERMS) self.assertListEqual(dic.index, sorted(dic.terms.values()))
def _get_drupal_utils(): global _drupal_utils if _drupal_utils is None: _drupal_utils = { 'drupal_dico': [ieml_term_model(t) for t in Dictionary()] } _drupal_utils['all_uuid'] = bidict({ d['IEML']: 1000 + int(hashlib.sha1(d['IEML'].encode()).hexdigest(), 16) % MAX_TERMS_DICTIONARY for d in _drupal_utils['drupal_dico'] }) return _drupal_utils
def download_dictionary(d_version): scripts = [] roots = [] translations = {'fr': {}, 'en': {}} for s in tqdm(get(DICTIONARY_URL.format(quote(d_version))).json()): ieml = s['IEML'] scripts.append(ieml) if s['ROOT_PARADIGM']: roots.append(ieml) translations['fr'][ieml] = s['FR'] translations['en'][ieml] = s['EN'] inhibitions = download_inhibitions(roots, d_version) comments = download_comments(scripts) return Dictionary(scripts=scripts, root_paradigms=roots, translations=translations, inhibitions=inhibitions, comments=comments)
def test_dictionary(self): for v in get_available_dictionary_version()[:6]: d = Dictionary(v) for t in d: self.assertEqual(t.dictionary, d)
def setUp(self): self.dic = Dictionary()
def test_all_terms(self): for t in Dictionary(): f = factorize(t.script) self.assertEqual(t.script, f, "Invalid factorization for term %s -> %s"%(str(t), str(f)))
def setUp(self): self.d = Dictionary.load() self.rand_gen = RandomPoolIEMLObjectGenerator(self.d, level=Theory)
def test_all_db_term(self): parser = ScriptParser() terms = [str(term.script) for term in Dictionary()] terms_ast = [parser.parse(term) for term in terms] self.assertListEqual([str(t) for t in terms_ast], terms)
parser = argparse.ArgumentParser( description='Generate the dictionary static website.') parser.add_argument('output_folder', type=str, help='the website output folder') parser.add_argument('base_url', type=str, help='the website base url') parser.add_argument('--dictionary-folder', type=str, required=False, default=DICTIONARY_FOLDER, help='the dictionary definition folder') parser.add_argument('--lexicon-folder', type=str, required=False, default=LEXICONS_FOLDER, help='the lexicons definition folder') args = parser.parse_args() dictionary = Dictionary.load(args.dictionary_folder) lexicon = Lexicon.load(args.lexicon_folder) generate_script_site(dictionary, lexicon, args.output_folder, base_url=args.base_url)
def create_dictionary_version(old_version=None, add=None, update=None, remove=None): """ :param old_version: the dictionary version to build the new version from :param add: a dict with the element to add {'terms': list of script to add, 'roots': list of script to add root paradigm, 'inhibitions': dict {root_p: list of relations to inhibits in this root p} 'translations': dict {language: {script: traduction}}} :param update: a dict to update the translations and inhibtions or the terms (new mapping) map terms|inhibitions|translations -> old -> new :param remove: a list of term to remove, they are removed from root, terms, inhibitions and translations :return: """ v = latest_dictionary_version() last_date = v.date while True: new_date = datetime.datetime.utcnow() if new_date != last_date: break if old_version is None: old_version = v old_version.load() state = { 'version': _date_to_str(new_date), 'terms': copy.deepcopy(old_version.terms), 'roots': copy.deepcopy(old_version.roots), 'inhibitions': copy.deepcopy(old_version.inhibitions), 'translations': copy.deepcopy(old_version.translations), 'diff': { **copy.deepcopy(old_version.diff), str(old_version): {} } } # if merge is not None: # for m_version in merge: # m_version.load() # # terms_to_add = set(m_version.terms).difference(state['terms']) # roots_to_add = set(m_version.roots).difference(state['roots']) # # state['terms'].extend(terms_to_add) # state['roots'].extend(roots_to_add) # state['inhibitions'].update({r: m_version.inhibitions[r] for r in roots_to_add if r in m_version.inhibitions}) # for l in LANGUAGES: # state['translations'][l].update({s: m_version.translations[l][s] for s in terms_to_add}) if remove is not None: state['terms'] = list(set(state['terms']).difference(remove)) state['roots'] = list(set(state['roots']).difference(remove)) for r in remove: if r in state['inhibitions']: del state['inhibitions'][r] for l in LANGUAGES: if r in state['translations'][l]: del state['translations'][l][r] state['diff'][str(old_version)][r] = None if add is not None: if 'terms' in add: state['terms'] = list(set(state['terms']).union(add['terms'])) if 'roots' in add: state['roots'] = list(set(state['roots']).union(add['roots'])) if 'inhibitions' in add: if set(state['inhibitions']).intersection(set(add['inhibitions'])): raise ValueError( "Error in creating a new dictionary versions, trying to add multiples " "inhibitions rules for the same script.") state['inhibitions'] = { **state['inhibitions'], **add['inhibitions'] } if 'translations' in add: if any( set(state['translations'][l]).intersection( set(add['translations'][l])) for l in LANGUAGES): raise ValueError( "Error in creating a new dictionary version, trying to add multiples " "translation for the script {%s}. Those script may already exists in the dictionary." % ', '.join([ '"%s": [%s]' % (l, ', '.join( '"%s"' % str(t) for t in set(state['translations'][l]). intersection(set(add['translations'][l])))) for l in LANGUAGES ])) state['translations'] = { l: { **state['translations'][l], **add['translations'][l] } for l in LANGUAGES } if update is not None: if 'inhibitions' in update: for s, l in update['inhibitions'].items(): if s not in state['inhibitions']: continue state['inhibitions'][s] = l if 'translations' in update: state['translations'] = { l: { **state['translations'][l], **update['translations'][l] } for l in LANGUAGES } if 'terms' in update: state['terms'] = set(t for t in state['terms'] if t not in update['terms']) roots = set(state['roots']).intersection(update['terms']) state['roots'] = set(t for t in state['roots'] if t not in update['terms']) for t_old in update['terms']: t_new = update['terms'][t_old] state['diff'][str(old_version)][t_old] = t_new state['terms'].add(t_new) if t_old in roots: state['roots'].add(t_new) for l in LANGUAGES: state['translations'][l][t_new] = state['translations'][l][ t_old] del state['translations'][l][t_old] if t_old in state['inhibitions']: state['inhibitions'][t_new] = state['inhibitions'][t_old] del state['inhibitions'][t_old] dictionary_version = DictionaryVersion(new_date) dictionary_version.__setstate__(state) from ieml.dictionary import Dictionary if set(old_version.terms) == set(state['terms']) and set(old_version.roots) == set(state['roots']) and \ all(old_version.inhibitions[s] == state['inhibitions'][s] for s in old_version.inhibitions): old_dict_state = Dictionary(old_version).__getstate__() d = Dictionary.__new__(Dictionary) rel_graph = RelationsGraph.__new__(RelationsGraph) rel_graph.__setstate__({ 'dictionary': d, 'relations': old_dict_state['relations'].__getstate__()['relations'] }) state = { 'version': dictionary_version, 'relations': rel_graph, 'scripts': old_dict_state['scripts'], } d.__setstate__(state) save_dictionary_to_cache(d) else: # graph is updated, must check the coherence Dictionary(dictionary_version) return dictionary_version
def test_rank1_2d(self): for t in Dictionary().roots: self.assertTrue(t.rank == 0, "The rank of a root paradigm is not 0")
def create_dictionary_version(old_version=None, add=None, update=None, remove=None, diff=None): """ :param old_version: the dictionary version to build the new version from :param add: a dict with the element to add {'terms': list of script to add, 'roots': list of script to add root paradigm, 'inhibitions': dict {root_p: list of relations to inhibits in this root p} 'translations': dict {language: {script: traduction}}} :param update: a dict to update the translations and inhibtions or the terms (new mapping) map terms|inhibitions|translations -> old -> new :param remove: a list of term to remove, they are removed from root, terms, inhibitions and translations :return: """ v = latest_dictionary_version() last_date = v.date while True: new_date = datetime.datetime.utcnow() if new_date != last_date: break new_version_name = version_name(new_date) if old_version is None: old_version = v old_version.load() state = { 'version': _date_to_str(new_date), 'terms': copy.deepcopy(old_version.terms), 'roots': copy.deepcopy(old_version.roots), 'inhibitions': copy.deepcopy(old_version.inhibitions), 'translations': copy.deepcopy(old_version.translations), 'diff': {**copy.deepcopy(old_version.diff), str(old_version): diff if diff else {}}, 'history': {**copy.deepcopy(old_version.history), new_version_name: {}} } # if merge is not None: # for m_version in merge: # m_version.load() # # terms_to_add = set(m_version.terms).difference(state['terms']) # roots_to_add = set(m_version.roots).difference(state['roots']) # # state['terms'].extend(terms_to_add) # state['roots'].extend(roots_to_add) # state['inhibitions'].update({r: m_version.inhibitions[r] for r in roots_to_add if r in m_version.inhibitions}) # for l in LANGUAGES: # state['translations'][l].update({s: m_version.translations[l][s] for s in terms_to_add}) if remove is not None: state['terms'] = list(set(state['terms']).difference(remove)) state['roots'] = list(set(state['roots']).difference(remove)) for r in remove: if r in state['inhibitions']: del state['inhibitions'][r] for l in LANGUAGES: if r in state['translations'][l]: del state['translations'][l][r] state['diff'][str(old_version)][r] = None state['history'][new_version_name][r] = '-' if add is not None: if 'terms' in add: state['terms'] = list(set(state['terms']).union(add['terms'])) for t in add['terms']: state['history'][new_version_name][t] = '+' if 'roots' in add: state['roots'] = list(set(state['roots']).union(add['roots'])) for t in add['roots']: state['history'][new_version_name][t] = '+' if 'inhibitions' in add: if set(state['inhibitions']).intersection(set(add['inhibitions'])): raise ValueError("Error in creating a new dictionary versions, trying to add multiples " "inhibitions rules for the same script.") state['inhibitions'] = {**state['inhibitions'], **add['inhibitions']} if 'translations' in add: if any(set(state['translations'][l]).intersection(set(add['translations'][l])) for l in LANGUAGES): raise ValueError("Error in creating a new dictionary version, trying to add multiples " "translation for the script {%s}. Those script may already exists in the dictionary."%', '.join(['"%s": [%s]'%(l, ', '.join('"%s"'%str(t) for t in set(state['translations'][l]).intersection(set(add['translations'][l])))) for l in LANGUAGES])) state['translations'] = {l: {**state['translations'][l], **add['translations'][l]} for l in LANGUAGES} if update is not None: if 'inhibitions' in update: for s, l in update['inhibitions'].items(): if s not in state['inhibitions']: continue state['inhibitions'][s] = l if 'translations' in update: state['translations'] = {l: {**state['translations'][l], **update['translations'][l]} for l in LANGUAGES} if 'terms' in update: state['terms'] = set(t for t in state['terms'] if t not in update['terms']) roots = set(state['roots']).intersection(update['terms']) state['roots'] = set(t for t in state['roots'] if t not in update['terms']) for t_old in update['terms']: t_new = update['terms'][t_old] # a modify is like an add and delete. state['history'][new_version_name][t_old] = '-' state['history'][new_version_name][t_new] = '+' state['diff'][str(old_version)][t_old] = t_new state['terms'].add(t_new) if t_old in roots: state['roots'].add(t_new) for l in LANGUAGES: state['translations'][l][t_new] = state['translations'][l][t_old] del state['translations'][l][t_old] if t_old in state['inhibitions']: state['inhibitions'][t_new] = state['inhibitions'][t_old] del state['inhibitions'][t_old] state['terms'] = list(state['terms']) state['roots'] = list(state['roots']) dictionary_version = DictionaryVersion(new_date) dictionary_version.__setstate__(state) from ieml.dictionary import Dictionary if set(old_version.terms) == set(state['terms']) and set(old_version.roots) == set(state['roots']) and \ all(old_version.inhibitions[s] == state['inhibitions'][s] for s in old_version.inhibitions): old_dict_state = Dictionary(old_version).__getstate__() d = Dictionary.__new__(Dictionary) rel_graph = RelationsGraph.__new__(RelationsGraph) rel_graph.__setstate__({ 'dictionary': d, 'relations': old_dict_state['relations'].__getstate__()['relations'] }) state = { 'version': dictionary_version, 'relations': rel_graph, 'scripts': old_dict_state['scripts'], } d.__setstate__(state) save_dictionary_to_cache(d) else: # graph is updated, must check the coherence Dictionary(dictionary_version) return dictionary_version
def dword(u0, u1): return int( np.einsum('i,j,ij->', u0.words_vector(), u1.words_vector(), Dictionary(u0.dictionary_version).relations_graph.matrix))
def dictionary(self): return Dictionary(self.dictionary_version)
def test_index(self): r0 = [t for t in Dictionary()] self.assertListEqual(r0, sorted(r0))
def test_relations_matrix(self): m = Dictionary().relations_graph.connexity self.assertTrue(m.any()) self.assertFalse(m.all()) self.assertTrue(m.dtype == bool)
def test_relations_matrix(self): m = Dictionary().relations_graph.connexity self.assertTrue(m.any()) self.assertFalse(m.all()) self.assertTrue(m.dtype == bool)
def _set_version(self, version): self.term = Dictionary(version).translate_script_from_version(self.term.dictionary.version, self.term.script)
def test_root_relations(self): # if two terms are in the same root paradigms they have to have at least relations between them for root in Dictionary().roots: for t0, t1 in product(root.relations.contains, root.relations.contains): self.assertTrue(t0.relations.to(t1))
def test_rank0(self): self.assertListEqual([t for t in Dictionary() if t.rank == 0], sorted(Dictionary().roots))