def add_morpheme_paradigm(self, script: Script, translations, comments): db = IEMLDatabase(folder=self.gitdb.folder, use_cache=self.use_cache, cache_folder=self.cache_folder) d = db.get_dictionary() script = _check_script(script) if len(script) == 1: raise ValueError( "The script is not a paradigm {}, can't use it to define a paradigm." .format(str(script))) if script in d.scripts: raise ValueError( "Script {} already defined in the dictionary".format( str(script))) r_cand = set() for ss in script.singular_sequences: try: r_cand.add(d.tables.root(ss)) except KeyError: raise ValueError( "No root paradigms contains this script {}".format( str(script))) if len(r_cand) != 1: raise ValueError( "No root paradigms or too many for script {}".format( str(script))) root = next(iter(r_cand)) descriptors = db.get_descriptors() message = "[dictionary] Create paradigm {} ({}) for root paradigm {} ({})"\ .format(str(script), " / ".join( "{}:{}".format(l, ', '.join(descriptors.get_values(script, l, 'translations'))) for l in LANGUAGES), str(root), " / ".join( "{}:{}".format(l, ', '.join(descriptors.get_values(root, l, 'translations'))) for l in LANGUAGES)) with self.gitdb.commit(self.signature, message): db.remove_descriptor(script) db.remove_structure(script) db.add_structure(script, 'is_root', False) for l in LANGUAGES: for v in translations[l]: db.add_descriptor(script, language=l, descriptor='translations', value=v) for v in comments[l]: db.add_descriptor(script, language=l, descriptor='comments', value=v)
def migrate(function, _s_old, _s_new): assert function(_s_old) == _s_new folder = '/tmp/migrate_script_iemldb' if os.path.isdir(folder): shutil.rmtree(folder) # os.mkdir(folder) git_address = "https://github.com/IEMLdev/ieml-language.git" credentials = pygit2.Keypair('ogrergo', '~/.ssh/id_rsa.pub', '~/.ssh/id_rsa', None) gitdb = GitInterface(origin=git_address, credentials=credentials, folder=folder) signature = pygit2.Signature("Louis van Beurden", "*****@*****.**") db = IEMLDatabase(folder=folder, use_cache=False) to_migrate = {} desc = db.get_descriptors() struct = db.get_structure() for s in db.get_dictionary().scripts: s2 = function(s) if s2 != s: to_migrate[s] = s2 print(to_migrate) with gitdb.commit( signature, "[Translate script] Translate paradigm from '{}' to '{}".format( str(_s_old), str(_s_new))): for s_old, s_new in to_migrate.items(): db.remove_structure(s_old) for (_, key), values in struct.get_values_partial(s_old).items(): for v in values: db.add_structure(s_new, key, v) db.remove_descriptor(s_old) for (_, lang, d), values in desc.get_values_partial(s_old).items(): for v in values: db.add_descriptor(s_new, lang, d, v)
def set_descriptors(self, ieml, descriptor, value): db = IEMLDatabase(folder=self.gitdb.folder, use_cache=self.use_cache, cache_folder=self.cache_folder) ieml = _check_ieml(ieml) value = _check_descriptors(value) desc = db.get_descriptors() old_trans = { l: desc.get_values(ieml=ieml, language=l, descriptor=descriptor) for l in LANGUAGES } if all(sorted(value[l]) == sorted(old_trans[l]) for l in LANGUAGES): error("No update needed, db already contains {}:{} for {}".format( descriptor, json.dumps(value), str(ieml))) return False # test if after modification there is still at least a descriptor if all(not (desc.get_values(ieml=ieml, language=l, descriptor=d ) if d != descriptor else value[l]) for l in LANGUAGES for d in DESCRIPTORS_CLASS): error('[descriptors] Remove {}'.format(str(ieml))) with self.gitdb.commit(self.signature, '[descriptors] Remove {}'.format( str(ieml))): db.remove_descriptor(ieml) return True # to_add = {l: [e for e in value[l] if e not in old_trans[l]] for l in LANGUAGES} # to_remove = {l: [e for e in old_trans[l] if e not in value[l]] for l in LANGUAGES} with self.gitdb.commit( self.signature, '[descriptors] Update {} for {} to {}'.format( descriptor, str(ieml), json.dumps(value))): db.remove_descriptor(ieml, None, descriptor) for l in LANGUAGES: for e in value[l]: db.add_descriptor(ieml, l, descriptor, e) return True
def update_all_ieml(self, f, message: str): db = IEMLDatabase(folder=self.gitdb.folder, use_cache=self.use_cache, cache_folder=self.cache_folder) desc = db.get_descriptors() with self.gitdb.commit( self.signature, '[IEML migration] Update all ieml in db: {}'.format(message)): for old_ieml in tqdm.tqdm(db.list(parse=True), "Migrate all usls"): new_ieml = f(old_ieml) value = desc.get_values_partial(old_ieml) db.remove_descriptor(old_ieml, None, None) for l in LANGUAGES: for d in value[l]: for e in value[l][e]: db.add_descriptor(new_ieml, l, d, e)
def create_root_paradigm(self, root, inhibitions, translations, comments): db = IEMLDatabase(folder=self.gitdb.folder, use_cache=self.use_cache, cache_folder=self.cache_folder) root = _check_script(root) if len(root) == 1: raise ValueError( "The script is not a paradigm {}, can't use it to define a root paradigm." .format(str(root))) translations = _check_descriptors(translations) comments = _check_descriptors(comments) # if not already exists (no descriptor no structures) if db.get_descriptors().get_values_partial(root): raise ValueError( "Script {} already exists in dictionary".format(root)) dictionary = db.get_dictionary() for ss in root.singular_sequences: try: r = dictionary.tables.root(ss) raise ValueError( "Root paradigms {} intersection with script {} ".format( str(r), str(root))) except KeyError: pass with self.gitdb.commit( self.signature, "[dictionary] Create root paradigm {} ({}), create {} singular sequences" .format( str(root), " / ".join("{}:{}".format( l, ', '.join(db.get_descriptors().get_values( str(root), l, 'translations'))) for l in LANGUAGES), len(root.singular_sequences)), ): db.remove_structure(root, 'is_root') db.add_structure(root, 'is_root', True) for i in _check_inhibitions(inhibitions): db.add_structure(root, 'inhibition', i) for l in LANGUAGES: for v in translations[l]: db.add_descriptor(root, language=l, descriptor='translations', value=v) for v in comments[l]: db.add_descriptor(root, language=l, descriptor='comments', value=v) # add main tables header for i, t in enumerate([tt for tt in root.tables_script if tt != root]): self.add_morpheme_paradigm( t, translations=append_idx_to_dict(translations, i), comments=append_idx_to_dict(comments, i))
def update_morpheme_paradigm( self, script_old: Script, script_new: Script, ): script_old = _check_script(script_old) script_new = _check_script(script_new) if script_old == script_new: return assert len(script_old) != 1 or len( script_new) != 1, "Can't update singular sequences, only paradigms" db = IEMLDatabase(folder=self.gitdb.folder, use_cache=self.use_cache, cache_folder=self.cache_folder) d = db.get_dictionary() desc = db.get_descriptors() ds = db.get_structure() assert script_old in d.scripts, "Source script not defined in dictionary" assert script_new not in d.scripts, "Target script already defined in dictionary" root_old = d.tables.root(script_old) is_root = ds.get_values(script_old, 'is_root') is_root = is_root and is_root[0][0].lower() == 't' root_new_cand = set() for ss in script_new.singular_sequences: try: root_new_cand.add(d.tables.root(ss)) except KeyError: if not is_root: raise ValueError( "A non root paradigm is defined over singular sequences that are in no paradigms" ) assert len( root_new_cand ) == 1, "No root paradigms or too many for script {}".format( str(script_new)) root_new = next(iter(root_new_cand)) message = "[dictionary] Update paradigm IEML from {} to {}"\ .format(str(script_old), str(script_new), " / ".join( "{}:{}".format(l, desc.get_values(script_new, l, 'translations')) for l in LANGUAGES)) if is_root: # 1st case: root paradigm assert script_old in script_new, "Can only update a root paradigm to a bigger version of it" # then we can update it to a bigger version of it old_structure = ds.get_values_partial(script_old) # transfers translations and structure with self.gitdb.commit(self.signature, message): if is_root: db.remove_structure(script_old) db.add_structure(script_old, 'is_root', 'False') for (_, key), values in old_structure.items(): for v in values: db.add_structure(script_new, key, v) else: db.remove_structure(script_old) db.add_structure(script_new, 'is_root', 'False') db.remove_descriptor(script_old) for (_, l, k), values in desc.get_values_partial(script_old).items(): for v in values: db.add_descriptor(script_new, l, k, v) if is_root: db.add_descriptor( script_old, l, k, '(translation migrated to {}) '.format( str(script_new)) + v)
# print(str(e)) # print("\t", str(s)) # to_pass = False # while not to_pass: # c = input('\t[r]emove/[u]pdate/[p]ass') # if c == 'u': # to_migrate[s] = _s # to_pass = True # elif c == 'r': # to_remove.append(s) # to_pass = True # elif c == 'p': # to_pass = True with gitdb.commit(signature, "[Filter database - Remove USL]"): for old, new in to_migrate.items(): to_remove.append(old) for (_, key), values in struct.get_values_partial(old).items(): for v in values: db.add_structure(new, key, v) for (_, lang, d), values in desc.get_values_partial(old).items(): for v in values: db.add_descriptor(new, lang, d, v) for old in to_remove: db.remove_structure(old, normalize=False) db.remove_descriptor(old, normalize=False)