コード例 #1
0
    def get_list(self):
        res = {}
        dictionary = self.get_dictionary()
        parser = IEMLParser(dictionary=dictionary)

        for (ieml, lang, desc), (v, ) in tqdm(
                self.get_descriptors().df.iterrows(),
                "List all descriptors at {}".format(self.folder)):
            if ieml not in res:
                try:
                    pieml = parser.parse(ieml)
                except CannotParse:
                    continue

                assert str(pieml) == ieml
                i, r = get_index(pieml, dictionary)
                if i == 0 and isinstance(pieml, PolyMorpheme):
                    pieml = pieml.constant[0]
                res[ieml] = {'ieml': str(pieml),
                             'type': TYPES[i],
                             'paradigm': len(pieml) != 1,
                             'class': GRAMMATICAL_CLASS_NAMES[pieml.grammatical_class].lower().capitalize(),
                             'index': r,
                             'cardinality': 'singular_sequence' if pieml.cardinal == 1 else \
                                 ('paradigm' if not isinstance(pieml,
                                                               Script) or pieml not in dictionary.tables.roots
                                 else 'root_paradigm'),
                             'domains': []
                             }
            if desc not in res[ieml]:
                res[ieml][desc] = {l: [] for l in LANGUAGES}

            res[ieml][desc][lang].append(v)

        return sorted(res.values(), key=lambda e: e['index'])
コード例 #2
0
ファイル: test_literal.py プロジェクト: bhutanmont/ieml
    def test_parse_flexion(self):

        p = "E:S:.-U:.-t.o.-' [>E:S:.-U:.-t.o.-' \"\"]"
        parser = IEMLParser()
        res = parser.parse(p)

        self.assertIsInstance(res, InstancedUSL)
        self.assertIsInstance(res.usl, PolyMorpheme)
        self.assertTrue(len(res.decorations), 1)
        self.assertIsInstance(res.decorations[0].path, FlexionPath)
        self.assertEqual(res.decorations[0].value, '')
コード例 #3
0
ファイル: test_literal.py プロジェクト: bhutanmont/ieml
    def test_parse_empty_path(self):
        p = "T: l.-T:.U:.-',n.-T:.A:.-',t.o.-f.o.-',_ m1(S:.E:A:T:.- T:.E:A:S:.-) m1(p.E:S:B:.- s.-S:.U:.-') [>group_1>s.-S:.U:.-' \"\"]"
        pathparser = IEMLParser()
        res = pathparser.parse(p)
        self.assertIsInstance(res, InstancedUSL)
        self.assertIsInstance(res.usl, PolyMorpheme)
        self.assertTrue(len(res.decorations), 1)
        self.assertEqual(res.decorations[0].value, '')

        path = PathParser().parse(">group_1>s.-S:.U:.-'")
        self.assertIsInstance(path, PolymorphemePath)
        self.assertEqual(res.decorations[0].path, path)
コード例 #4
0
    def _process_line(self, lines_iter, parse=False):

        if parse:
            parser = IEMLParser(dictionary=self.get_dictionary())

        for l in lines_iter:
            if not l.strip():
                continue

            l = l.strip().decode('utf8')
            if parse:
                return parser.parse(l)
            else:
                return l
コード例 #5
0
    def path_of(self, _ieml, descriptor=True, mkdir=False, normalize=True):
        if isinstance(_ieml, str):
            ieml = IEMLParser().parse(_ieml)
        else:
            ieml = _ieml

        if descriptor:
            ext = '.desc'
        else:
            ext = '.ieml'

        if isinstance(ieml, InstancedUSL):
            class_folder, prefix_sixe = self.CLASS_TO_FOLDER[
                ieml.usl.__class__]
        else:
            class_folder, prefix_sixe = self.CLASS_TO_FOLDER[ieml.__class__]

        if normalize:
            filename = self.filename_of(ieml)
        else:
            filename = self.filename_of(_ieml)

        prefix = filename[:prefix_sixe]

        p = os.path.join(self.folder, class_folder,
                         'singular' if len(ieml) == 1 else 'paradigm', prefix)
        if mkdir:
            os.makedirs(p, exist_ok=True)

        return os.path.join(p, filename + ext)
コード例 #6
0
    def list(
        self,
        type=None,
        paradigm=None,
        parse=False,
    ):
        p = self.folder
        if type:
            if not isinstance(type, str):
                type = type.__name__.lower()
            p = os.path.join(p, type)
            if paradigm is not None:
                p = os.path.join(p, 'paradigm' if paradigm else 'singular')

        p1 = subprocess.Popen("find -path *.desc -print0".split(),
                              stdout=subprocess.PIPE,
                              cwd=p)
        p2 = subprocess.Popen("xargs -0 cat".split(),
                              stdin=p1.stdout,
                              stdout=subprocess.PIPE,
                              cwd=p)
        p3 = subprocess.Popen(["cut", "-f2", '-d', '"'],
                              stdin=p2.stdout,
                              stdout=subprocess.PIPE,
                              cwd=p)
        p4 = subprocess.Popen(["uniq"],
                              stdin=p3.stdout,
                              stdout=subprocess.PIPE,
                              cwd=p)

        res = [
            s.strip().decode('utf8') for s in p4.stdout.readlines()
            if s.strip()
        ]

        if parse:
            parser = IEMLParser(dictionary=self.get_dictionary())
            _res = []
            for s in res:
                try:
                    _res.append(parser.parse(s))
                except CannotParse as e:
                    error("Cannot parse {} : {}".format(s, repr(e)))
            return _res

        return res
コード例 #7
0
ファイル: path.py プロジェクト: bhutanmont/ieml
def usl_from_path_values(paths_values):
    from ieml.usl.decoration.parser.parser import PathParser
    from ieml.usl.parser import IEMLParser

    path_parser = PathParser()
    usl_parser = IEMLParser()

    path_to_value = {path_parser.parse(p): set() for p, _ in paths_values}
    for p, v in paths_values:
        path_to_value[path_parser.parse(p)].add(usl_parser.parse(v))

    Tree = lambda: defaultdict(Tree)
    bins = Tree()

    def recursive_group_by(bin, path, values):
        p_cloned = path.no_child_clone()

        if 'type' in bin:
            if not isinstance(path, bin['type']):
                raise ValueError("Inconsistent path system")
        else:
            bin['type'] = path.__class__

        if path.child is None:
            bin[p_cloned]["node"] = values
        else:
            recursive_group_by(bin[p_cloned], path.child, values)

    def build_nodes(bin):
        if 'node' not in bin:
            path_to_node = {}
            for p, bin_child in bin.items():
                if isinstance(p, UslPath):
                    path_to_node[p] = build_nodes(bin_child)

            assert 'type' in bin
            bin['node'] = bin['type'].build_usl_from_path_to_node(path_to_node)

        return bin['node']

    for p, values in path_to_value.items():
        recursive_group_by(bins, p, list(values))

    return build_nodes(bins)
コード例 #8
0
 def test_invalid_cannot_parse_polymorpheme(self):
     POLYMORPH = [
         "U: wa. m1()",
         #           "m1(wo. wa.)m1(U:)",
         # "m4(U: S: E: T:)",
     ]
     for _t in POLYMORPH:
         # assert str(t) == str(_t), "{} != {}".format(str(t), str(_t))
         with self.assertRaises(CannotParse):
             t = IEMLParser().parse(_t)
コード例 #9
0
ファイル: migrate_v03Tov04.py プロジェクト: bhutanmont/ieml
def migrate(database, out_folder):
    descriptors = database.descriptors()
    dictionary = database.dictionary_structure()
    # 'root', 'paradigms', 'inhibitions'

    shutil.rmtree(out_folder + '/descriptors')
    shutil.rmtree(out_folder + '/structure')
    # os.rmdir(out_folder)

    # os.mkdir(out_folder)

    db2 = IEMLDatabase(out_folder)
    # db2.get_csv()

    if not os.path.isdir(out_folder):
        os.mkdir(out_folder)

    for ieml, (paradigms,
               inhibitions) in tqdm.tqdm(dictionary.structure.iterrows(),
                                         'migrating structure'):
        l = IEMLParser().parse(ieml, factorize_script=True)

        db2.add_structure(str(l), 'is_root', True)
        for i in inhibitions:
            db2.add_structure(str(l), 'inhibition', i)

    all_db = defaultdict(lambda: defaultdict(dict))

    for (ieml, lang, desc), (v) in descriptors:
        all_db[ieml][(lang, desc)] = v.values[0]

    for ieml, dd in tqdm.tqdm(all_db.items(), 'migrating descriptors'):
        l = IEMLParser().parse(ieml, factorize_script=True)

        path = db2.path_of(l)

        os.makedirs('/'.join(path.split('/')[:-1]), exist_ok=True)

        with open(path, 'w') as fp:
            for (lang, desc), v in dd.items():
                for vv in v:
                    fp.write('"{}" {} {} "{}"\n'.format(
                        str(l), lang, desc, db2.escape_value(vv)))
コード例 #10
0
def normalize_key(ieml,
                  key,
                  value,
                  parse_ieml=False,
                  partial=False,
                  structure=False):
    if not (partial or (ieml is not None and key and (structure or value))):
        raise ValueError("IEML and Key can't be null")

    if ieml:
        ieml = str(ieml)
        if parse_ieml:
            parsed = IEMLParser().parse(str(ieml))
            ieml = str(parsed)
            # if ieml != str(parsed):
            #     raise ValueError("IEML is not normalized: {}".format(ieml))

            # if len(parsed) == 1 and structure:
            #     raise ValueError("Only paradigms can have a structure: {}".format(ieml))

    if structure:
        if key:
            key = str(key)
            if key not in STRUCTURE_KEYS:
                raise ValueError("Unsupported structure key: '{}'".format(
                    str(key)))

        if value:
            if key and key == 'inhibition':
                value = str(value)
                if value not in INHIBITABLE_RELATIONS:
                    raise ValueError(
                        "Unsupported inhibition: {}".format(value))

            if key and key in ['is_root', 'is_ignored']:
                value = json.loads(str(value).lower())
                if not isinstance(value, bool):
                    raise ValueError(
                        "is_root or is_ignored field only accept boolean, not {}"
                        .format(value))
                value = str(value)
    else:
        if key:
            key = str(key)
            if key not in LANGUAGES:
                raise ValueError("Unsupported language: '{}'".format(str(key)))

        if value:
            value = str(value)
            if value not in DESCRIPTORS_CLASS:
                raise ValueError("Unsupported descriptor: '{}'".format(
                    str(value)))

    return ieml, key, value
コード例 #11
0
    def test_invalid_cannot_check_polymorpheme(self):
        POLYMORPH = [
            "U: wa. m0(U:)",
            # "m1(wo. wa.) m1(U:)",
            "m4(U: S: E: T:)",
        ]
        for _t in POLYMORPH:
            t = IEMLParser().parse(_t)
            assert isinstance(t, PolyMorpheme)

            with self.assertRaises(ValueError):
                check_polymorpheme(t)
コード例 #12
0
ファイル: test_path.py プロジェクト: bhutanmont/ieml
    def test_usl_from_path_pm(self):
        structure = [(">constant>b.-S:.A:.-'S:.-'S:.-',",
                      "b.-S:.A:.-'S:.-'S:.-',"),
                     (">constant>k.a.-k.a.-'", "k.a.-k.a.-'"),
                     (">constant", "U:"), (">constant", "E:")]
        usl_parser = IEMLParser().parse
        path_parser = PathParser().parse

        structure = [(path_parser(p), usl_parser(u)) for p, u in structure]

        u = usl_from_path_values(structure)
        self.assertEqual(str(u), "U: k.a.-k.a.-' b.-S:.A:.-'S:.-'S:.-',")
コード例 #13
0
ファイル: test_path.py プロジェクト: bhutanmont/ieml
    def test_usl_from_path_flexion_paradigm(self):
        structure = [
            (">flexion", "E:.wo.U:.-t.o.-'"),
            (">flexion", "E:.wo.A:.-t.o.-'"),
            (">content>constant", "U:"),
        ]
        usl_parser = IEMLParser().parse
        path_parser = PathParser().parse

        structure = [(path_parser(p), usl_parser(u)) for p, u in structure]

        u = usl_from_path_values(structure)
        self.assertEqual(str(u), "(m1(E:.wo.U:.-t.o.-' E:.wo.A:.-t.o.-'))(U:)")
コード例 #14
0
    def test_polymorpheme(self):
        POLYMORPH = [
            "U: wo. wa.", "U: m2(wo. wa.)", "m1(U:) m1(S:)",
            "o. m1(U: S:) m2(t. m.)", "o. m2(A: S: B: T:) m2(y. t.)"
        ]
        for _t in POLYMORPH:
            t = IEMLParser().parse(_t)
            assert str(t) == str(_t), "{} != {}".format(str(t), str(_t))
            assert isinstance(t, PolyMorpheme)

            elems = set()
            for ss in t.singular_sequences:
                assert ss.cardinal == 1
                ss.check()
                assert ss not in elems
                elems.add(ss)
コード例 #15
0
    def test_word(self):
        CHARACTERS = [
            "[! E:A:.  ()(b.-S:.A:.-'S:.-'S:.-', m1(S: B: T:)) > E:A:. E:A:. ()(k.a.-k.a.-')]",
            "[! E:S:. (m1(E:.-',b.-S:.U:.-'y.-'U:.-',_ E:.-',b.-S:.U:.-'y.-'A:.-',_))(wa.) > E:.n.- ()(n.i.-s.i.-') > E:.f.- (E:U:S:.)]"
        ]
        for c_str in CHARACTERS:
            c = IEMLParser().parse(c_str)
            assert isinstance(c, Word)

            elems = set()
            for ss in c.singular_sequences:
                assert ss.cardinal == 1
                ss.check()
                assert ss not in elems
                elems.add(ss)
                assert isinstance(ss, Word)
コード例 #16
0
def ieml(arg, dictionary):
    if isinstance(arg, Usl):
        return arg

    if isinstance(arg, str):
        try:
            return IEMLParser(dictionary).parse(arg)
        except CannotParse as e:
            raise InvalidIEMLObjectArgument(Usl, str(e))

    if isinstance(arg, Script):
        arg = Word(arg)
        if arg.dictionary_version != dictionary_version:
            arg.set_dictionary_version(dictionary_version)

        return arg

    raise NotImplemented
コード例 #17
0
ファイル: test_path.py プロジェクト: bhutanmont/ieml
    def test_usl_from_path(self):
        structure = {
            ">role>! E:A:.>flexion>E:": "E:",
            ">role>! E:A:.>content>constant>b.-S:.A:.-'S:.-'S:.-',":
            "b.-S:.A:.-'S:.-'S:.-',",
            ">role>E:A:. E:A:.>flexion>E:": "E:",
            ">role>E:A:. E:A:.>flexion>E:U:T:.": "E:U:T:.",
            ">role>E:A:. E:A:.>flexion>E:A:T:.": "E:A:T:.",
            ">role>E:A:. E:A:.>flexion>E:S:T:.": "E:S:T:.",
            ">role>E:A:. E:A:.>flexion>E:B:T:.": "E:B:T:.",
            ">role>E:A:. E:A:.>flexion>E:T:T:.": "E:T:T:.",
            ">role>E:A:. E:A:.>content>constant>k.a.-k.a.-'": "k.a.-k.a.-'"
        }
        usl_parser = IEMLParser().parse
        path_parser = PathParser().parse

        structure = [(path_parser(p), usl_parser(u))
                     for p, u in structure.items()]

        u = usl_from_path_values(structure)
        self.assertEqual(
            u,
            usl("[! E:A:.  ()(b.-S:.A:.-'S:.-'S:.-',) > E:A:. E:A:. (m1(E:U:T:. E:A:T:. E:S:T:. E:B:T:. E:T:T:.))(k.a.-k.a.-')]"
                ))
コード例 #18
0
ファイル: remove_usl.py プロジェクト: baajur/ieml
                         folder=folder)
    #
    gitdb.pull()

    signature = pygit2.Signature("Louis van Beurden",
                                 "*****@*****.**")

    db = IEMLDatabase(folder=folder, use_cache=False)

    desc = db.get_descriptors()
    struct = db.get_structure()

    to_migrate = {}
    to_remove = []

    parser = IEMLParser(dictionary=db.get_dictionary())

    all_db = db.list()
    # assert "[E:.b.E:B:.- E:S:. ()(a.T:.-) > ! E:.l.- ()(d.i.-l.i.-')]" in all_db
    for s in TO_REMOVE:
        to_pass = True

        try:
            _s = parser.parse(s)
        except CannotParse as e:
            print(str(e))
            print("\t", str(s))
            to_pass = False
        else:
            if s not in all_db:
                repr("{} not in database".format(s))
コード例 #19
0
ファイル: usl.py プロジェクト: bhutanmont/ieml
def usl(
    arg: Union[str, Script, USL, Iterable[Tuple['UslPath', Union[USL,
                                                                 Script]]]]
) -> USL:
    """

    Cast argument to an USL type, depending on the argument type.
     - If argument is a string, it is parsed by ieml.usl.parser.IEMLParser.parse
     - if argument is a ieml.dictionary.Script, the returned object is a
       ieml.usl.polymorpheme.PolyMorpheme with the argument as the constant.
     - if argument is an ieml.usl.usl.USL, the argument is returned
     - if argument is a list of (ieml.usl.decoration.path.UslPath, ieml.usl.usl.USL)

    :param arg:
    :type arg: Union[str, Script, USL, Iterable[Tuple['UslPath', Union[USL, Script]]]]
    :return: an ieml.usl.usl.USL
    """
    if isinstance(arg, str):
        from ieml.usl.parser import IEMLParser
        return IEMLParser().parse(arg)

    if isinstance(arg, Script):
        from ieml.usl import PolyMorpheme
        return PolyMorpheme(constant=[arg])

    if isinstance(arg, USL):
        return arg

    #if iterable, can be a list of (path, usl) to convert into an usl
    try:
        usl_list = list(arg)
    except TypeError:
        pass
    else:
        if not usl_list:
            from ieml.usl import PolyMorpheme
            return PolyMorpheme(constant=[])

        from ieml.usl.decoration.path import UslPath, usl_from_path_values

        if not all(
                isinstance(u, (USL, Script)) and isinstance(p, UslPath)
                for p, u in usl_list):
            raise ValueError(
                "Invalid iterable of (UslPath, USL) to create an USL from.")

        return usl_from_path_values(usl_list)

    # from ieml.lexicon.paths import resolve_ieml_object, path
    # if isinstance(arg, dict):
    #     # map path -> Ieml_object
    #     return resolve_ieml_object(arg)

    # if iterable, can be a list of usl to convert into a text
    # try:
    #     usl_list = list(arg)
    # except TypeError:
    #     pass
    # else:
    #     if len(usl_list) == 0:
    #         return usl('E:')
    #
    #     if all(isinstance(u, USL) for u in usl_list):
    #         if len(usl_list) == 1:
    #             return usl_list[0]
    #         else:
    #             from ieml.lexicon import text
    #             return text(usl_list)
    #     else:
    #         # list of path objects
    #         try:
    #             rules = [(a, b) for a, b in usl_list]
    #         except TypeError:
    #             pass
    #         else:
    #             rules = [(path(a), usl(b)) for a, b in rules]
    #             return resolve_ieml_object(rules)

    raise NotImplementedError()