Ejemplo n.º 1
0
    def delete_morpheme_root_paradigm(self,
                                      script: Script,
                                      empty_descriptors=True):
        db = IEMLDatabase(folder=self.gitdb.folder,
                          use_cache=self.use_cache,
                          cache_folder=self.cache_folder)
        d = db.get_dictionary()
        descriptors = db.get_descriptors()

        script = _check_script(script)
        if script not in d.tables.roots:
            raise ValueError("Script {} is not a root paradigm".format(
                str(script)))

        message = "[dictionary] Remove root paradigm {} ({})"\
                          .format(str(script),
                                  " / ".join("{}:{}".format(l, ', '.join(descriptors.get_values(script, l, 'translations'))) for l in
                                             LANGUAGES))

        with self.gitdb.commit(self.signature, message):
            db.remove_structure(script)

            if empty_descriptors:
                for s in list(d.relations.object(script, 'contains')):
                    db.remove_descriptor(s)
Ejemplo n.º 2
0
    def add_morpheme_paradigm(self, script: Script, translations, comments):
        db = IEMLDatabase(folder=self.gitdb.folder,
                          use_cache=self.use_cache,
                          cache_folder=self.cache_folder)
        d = db.get_dictionary()

        script = _check_script(script)
        if len(script) == 1:
            raise ValueError(
                "The script is not a paradigm {}, can't use it to define a paradigm."
                .format(str(script)))

        if script in d.scripts:
            raise ValueError(
                "Script {} already defined in the dictionary".format(
                    str(script)))

        r_cand = set()
        for ss in script.singular_sequences:
            try:
                r_cand.add(d.tables.root(ss))
            except KeyError:
                raise ValueError(
                    "No root paradigms contains this script {}".format(
                        str(script)))

        if len(r_cand) != 1:
            raise ValueError(
                "No root paradigms or too many for script {}".format(
                    str(script)))

        root = next(iter(r_cand))
        descriptors = db.get_descriptors()

        message = "[dictionary] Create paradigm {} ({}) for root paradigm {} ({})"\
            .format(str(script),
                      " / ".join(
                          "{}:{}".format(l, ', '.join(descriptors.get_values(script, l, 'translations'))) for l in LANGUAGES),
                      str(root),
                      " / ".join(
                          "{}:{}".format(l, ', '.join(descriptors.get_values(root, l, 'translations'))) for l in LANGUAGES))

        with self.gitdb.commit(self.signature, message):
            db.remove_descriptor(script)
            db.remove_structure(script)

            db.add_structure(script, 'is_root', False)

            for l in LANGUAGES:
                for v in translations[l]:
                    db.add_descriptor(script,
                                      language=l,
                                      descriptor='translations',
                                      value=v)

                for v in comments[l]:
                    db.add_descriptor(script,
                                      language=l,
                                      descriptor='comments',
                                      value=v)
Ejemplo n.º 3
0
    def test_remove(self):
        git = init_repo(['/tmp/iemldb_test/A'])[0]
        db = IEMLDatabase(git.folder)

        self.assertTrue(os.path.isfile(db.path_of('E:')))
        db.remove_descriptor('E:')
        self.assertFalse(os.path.isfile(db.path_of('E:')))
Ejemplo n.º 4
0
def migrate(function, _s_old, _s_new):
    assert function(_s_old) == _s_new

    folder = '/tmp/migrate_script_iemldb'
    if os.path.isdir(folder):
        shutil.rmtree(folder)
    # os.mkdir(folder)
    git_address = "https://github.com/IEMLdev/ieml-language.git"

    credentials = pygit2.Keypair('ogrergo', '~/.ssh/id_rsa.pub',
                                 '~/.ssh/id_rsa', None)
    gitdb = GitInterface(origin=git_address,
                         credentials=credentials,
                         folder=folder)

    signature = pygit2.Signature("Louis van Beurden",
                                 "*****@*****.**")

    db = IEMLDatabase(folder=folder, use_cache=False)

    to_migrate = {}
    desc = db.get_descriptors()
    struct = db.get_structure()

    for s in db.get_dictionary().scripts:
        s2 = function(s)
        if s2 != s:
            to_migrate[s] = s2

    print(to_migrate)

    with gitdb.commit(
            signature,
            "[Translate script] Translate paradigm from '{}' to '{}".format(
                str(_s_old), str(_s_new))):
        for s_old, s_new in to_migrate.items():
            db.remove_structure(s_old)
            for (_, key), values in struct.get_values_partial(s_old).items():
                for v in values:
                    db.add_structure(s_new, key, v)

            db.remove_descriptor(s_old)
            for (_, lang, d), values in desc.get_values_partial(s_old).items():
                for v in values:
                    db.add_descriptor(s_new, lang, d, v)
Ejemplo n.º 5
0
    def set_descriptors(self, ieml, descriptor, value):

        db = IEMLDatabase(folder=self.gitdb.folder,
                          use_cache=self.use_cache,
                          cache_folder=self.cache_folder)

        ieml = _check_ieml(ieml)
        value = _check_descriptors(value)

        desc = db.get_descriptors()
        old_trans = {
            l: desc.get_values(ieml=ieml, language=l, descriptor=descriptor)
            for l in LANGUAGES
        }

        if all(sorted(value[l]) == sorted(old_trans[l]) for l in LANGUAGES):
            error("No update needed, db already contains {}:{} for {}".format(
                descriptor, json.dumps(value), str(ieml)))
            return False

        # test if after modification there is still at least a descriptor
        if all(not (desc.get_values(ieml=ieml, language=l, descriptor=d
                                    ) if d != descriptor else value[l])
               for l in LANGUAGES for d in DESCRIPTORS_CLASS):
            error('[descriptors] Remove {}'.format(str(ieml)))
            with self.gitdb.commit(self.signature,
                                   '[descriptors] Remove {}'.format(
                                       str(ieml))):
                db.remove_descriptor(ieml)
            return True
        # to_add = {l: [e for e in value[l] if e not in old_trans[l]] for l in LANGUAGES}
        # to_remove = {l: [e for e in old_trans[l] if e not in value[l]] for l in LANGUAGES}

        with self.gitdb.commit(
                self.signature, '[descriptors] Update {} for {} to {}'.format(
                    descriptor, str(ieml), json.dumps(value))):
            db.remove_descriptor(ieml, None, descriptor)

            for l in LANGUAGES:
                for e in value[l]:
                    db.add_descriptor(ieml, l, descriptor, e)

            return True
Ejemplo n.º 6
0
    def update_all_ieml(self, f, message: str):
        db = IEMLDatabase(folder=self.gitdb.folder,
                          use_cache=self.use_cache,
                          cache_folder=self.cache_folder)
        desc = db.get_descriptors()

        with self.gitdb.commit(
                self.signature,
                '[IEML migration] Update all ieml in db: {}'.format(message)):

            for old_ieml in tqdm.tqdm(db.list(parse=True), "Migrate all usls"):
                new_ieml = f(old_ieml)

                value = desc.get_values_partial(old_ieml)

                db.remove_descriptor(old_ieml, None, None)

                for l in LANGUAGES:
                    for d in value[l]:
                        for e in value[l][e]:
                            db.add_descriptor(new_ieml, l, d, e)
Ejemplo n.º 7
0
    def delete_morpheme_paradigm(self, script: Script):
        db = IEMLDatabase(folder=self.gitdb.folder,
                          use_cache=self.use_cache,
                          cache_folder=self.cache_folder)
        d = db.get_dictionary()
        descriptors = db.get_descriptors()

        script = _check_script(script)
        if script in d.scripts and len(script) == 1:
            raise ValueError("Script {} is not a paradigm".format(str(script)))

        root = d.tables.root(script)
        message = "[dictionary] Remove paradigm {} ({})"\
                          .format(str(script),
                                  " / ".join(
                                      "{}:{}".format(l, ', '.join(descriptors.get_values(script, l, 'translations'))) for l in LANGUAGES),
                                  str(root),
                                  " / ".join(
                                      "{}:{}".format(l, ', '.join(descriptors.get_values(root, l, 'translations'))) for l in LANGUAGES))

        with self.gitdb.commit(self.signature, message):
            db.remove_structure(script)
            db.remove_descriptor(script)
Ejemplo n.º 8
0
    def update_morpheme_paradigm(
        self,
        script_old: Script,
        script_new: Script,
    ):
        script_old = _check_script(script_old)
        script_new = _check_script(script_new)

        if script_old == script_new:
            return

        assert len(script_old) != 1 or len(
            script_new) != 1, "Can't update singular sequences, only paradigms"

        db = IEMLDatabase(folder=self.gitdb.folder,
                          use_cache=self.use_cache,
                          cache_folder=self.cache_folder)
        d = db.get_dictionary()
        desc = db.get_descriptors()
        ds = db.get_structure()

        assert script_old in d.scripts, "Source script not defined in dictionary"
        assert script_new not in d.scripts, "Target script already defined in dictionary"
        root_old = d.tables.root(script_old)
        is_root = ds.get_values(script_old, 'is_root')
        is_root = is_root and is_root[0][0].lower() == 't'

        root_new_cand = set()
        for ss in script_new.singular_sequences:
            try:
                root_new_cand.add(d.tables.root(ss))
            except KeyError:
                if not is_root:
                    raise ValueError(
                        "A non root paradigm is defined over singular sequences that are in no paradigms"
                    )

        assert len(
            root_new_cand
        ) == 1, "No root paradigms or too many for script {}".format(
            str(script_new))
        root_new = next(iter(root_new_cand))

        message = "[dictionary] Update paradigm IEML from {} to {}"\
                          .format(str(script_old),
                                  str(script_new),
                                  " / ".join(
                                      "{}:{}".format(l, desc.get_values(script_new, l, 'translations')) for l in LANGUAGES))

        if is_root:
            # 1st case: root paradigm

            assert script_old in script_new, "Can only update a root paradigm to a bigger version of it"

            # then we can update it to a bigger version of it
            old_structure = ds.get_values_partial(script_old)

        # transfers translations and structure
        with self.gitdb.commit(self.signature, message):

            if is_root:
                db.remove_structure(script_old)
                db.add_structure(script_old, 'is_root', 'False')

                for (_, key), values in old_structure.items():
                    for v in values:
                        db.add_structure(script_new, key, v)
            else:
                db.remove_structure(script_old)
                db.add_structure(script_new, 'is_root', 'False')

            db.remove_descriptor(script_old)

            for (_, l,
                 k), values in desc.get_values_partial(script_old).items():
                for v in values:
                    db.add_descriptor(script_new, l, k, v)
                    if is_root:
                        db.add_descriptor(
                            script_old, l, k,
                            '(translation migrated to {}) '.format(
                                str(script_new)) + v)
Ejemplo n.º 9
0
            #     print(str(e))
            #     print("\t", str(s))
            #     to_pass = False

        # while not to_pass:
        #     c = input('\t[r]emove/[u]pdate/[p]ass')
        #     if c == 'u':
        #         to_migrate[s] = _s
        #         to_pass = True
        #     elif c == 'r':
        #         to_remove.append(s)
        #         to_pass = True
        #     elif c == 'p':
        #         to_pass = True

    with gitdb.commit(signature, "[Filter database - Remove USL]"):
        for old, new in to_migrate.items():
            to_remove.append(old)

            for (_, key), values in struct.get_values_partial(old).items():
                for v in values:
                    db.add_structure(new, key, v)

            for (_, lang, d), values in desc.get_values_partial(old).items():
                for v in values:
                    db.add_descriptor(new, lang, d, v)

        for old in to_remove:
            db.remove_structure(old, normalize=False)
            db.remove_descriptor(old, normalize=False)