Beispiel #1
0
    def test_threading(self):
        pool = ThreadPool(4)
        results = pool.map(self.parser.parse, Dictionary().version.terms)
        self.assertSetEqual({str(t) for t in results}, {'[{0}]'.format(str(t)) for t in Dictionary().version.terms})

        results = pool.map(script, Dictionary().version.terms)
        self.assertSetEqual({str(t) for t in results}, set(Dictionary().version.terms))
Beispiel #2
0
    def __call__(cls, *args, **kwargs):
        dictionary = args[0] if len(args) > 0 else \
            kwargs['dictionary'] if 'dictionary' in kwargs else None

        if dictionary is None:
            dictionary = Dictionary()

        if not isinstance(dictionary, Dictionary):
            dictionary = Dictionary(dictionary)

        if dictionary.version not in cls._instances:
            # this code is to clean up duplicate class if we reload modules
            cls._instances[dictionary.version] = \
                super(IEMLParserSingleton, cls).__call__(dictionary=dictionary)

        return cls._instances[dictionary.version]
Beispiel #3
0
    def test_multiple_ieml_parser(self):
        p0 = IEMLParser()
        p1 = IEMLParser()
        self.assertEqual(p0, p1)

        p2 = IEMLParser(Dictionary('dictionary_2017-06-07_00:00:00'))
        self.assertNotEqual(p0, p2)

        p3 = IEMLParser(from_version='dictionary_2017-06-07_00:00:00')
        self.assertNotEqual(p2, p3)
Beispiel #4
0
def factorize_root(root):
    factorized_root = factorize(root.script)

    to_remove = []
    to_add = {
        'terms': [],
        'roots': [str(factorized_root)],
        'inhibitions': {
            str(factorized_root): root.inhibitions
        },
        'translations': {
            'fr': {},
            'en': {}
        }
    }

    for t in Dictionary().roots[root]:
        to_remove.append(str(t.script))
        s = factorize(t.script)
        to_add['terms'].append(str(s))
        to_add['translations']['fr'][str(s)] = t.translations.fr
        to_add['translations']['en'][str(s)] = t.translations.en

    return to_remove, to_add
    def test_cache(self):
        Dictionary.load(DICTIONARY_FOLDER)
        self.assertFalse(self.cache.is_pruned())

        self._test_f = os.path.join(self.cache.folder,
                                    os.listdir(self.cache.folder)[0])
        subprocess.Popen("echo '\\n\\n' >> {}".format(self._test_f),
                         shell=True).communicate()
        self.assertTrue(self.cache.is_pruned())

        # d = Dictionary.load(DICTIONARY_FOLDER)
        Dictionary.load(DICTIONARY_FOLDER)
        self.assertFalse(self.cache.is_pruned())

        with open(self._test_f, 'r') as fp:
            r = fp.read().strip() + '\n'

        with open(self._test_f, 'w') as fp:
            fp.write(r)

        self.assertTrue(self.cache.is_pruned())
        Dictionary.load(DICTIONARY_FOLDER)
        self.assertFalse(self.cache.is_pruned())
Beispiel #6
0
 def test_all_scripts(self):
     parser = ScriptParser()
     terms = [str(script) for script in Dictionary.load().scripts]
     terms_ast = [str(parser.parse(term)) for term in terms]
     self.assertListEqual(terms_ast, terms)
 def setUp(self):
     self.d = Dictionary.load()
Beispiel #8
0
def _build_distance_matrix(version):
    def _enumerate_ancestors(t, prefix='', seen=None):
        if seen is None:
            seen = set()
        for k, v in t.relations.father.items():
            for t1 in v:
                # if t1 is layer 0, we include this etymology only if it is a direct father/child
                if t1.layer == 0 and len(prefix) != 0:
                    continue

                if t1 not in seen:
                    yield (prefix + k, t1)
                    seen.add(t1)

                if len(prefix) < 2:
                    yield from _enumerate_ancestors(t1,
                                                    prefix=prefix + k,
                                                    seen=seen)

    d = Dictionary(version)

    def _put(mat, d, i, j):
        mat[0].extend(d)
        mat[1].extend(i)
        mat[2].extend(j)

    order_matrix = ([], [], [])
    relation_type_matrix = ([], [], [])

    all_indices = {
        rel: [
            set(l) for l in np.split(d.relations_graph[rel].indices,
                                     d.relations_graph[rel].indptr)[1:-1]
        ]
        for rel, _ in RELATIONS_TYPES
    }

    for root in d.roots:
        past = set()
        for t0 in root.relations.contains:
            past.add(t0.index)
            seen = set(past)

            for rel_graph, rel_type in RELATIONS_TYPES:
                indices = all_indices[rel_graph][t0.index].difference(seen)
                if indices:
                    value = get_relation_value(rel_type, t0)

                    _put(order_matrix, [value] * len(indices),
                         [t0.index] * len(indices), indices)
                    _put(order_matrix, [value] * len(indices), indices,
                         [t0.index] * len(indices))

                    _put(relation_type_matrix, [int(rel_type)] * len(indices),
                         [t0.index] * len(indices), indices)
                    _put(relation_type_matrix, [int(rel_type)] * len(indices),
                         indices, [t0.index] * len(indices))

                    seen.update(indices)

    for layer in d.layers:
        for t0 in layer:
            for prefix, t1 in _enumerate_ancestors(t0):
                rel = get_relation(t0, t1, prefix=prefix)
                order = get_relation_value(rel, t0)

                _put(relation_type_matrix, [int(rel)], [t0.index], [t1.index])
                _put(order_matrix, [order], [t0.index], [t1.index])

                rel = get_relation(t1, t0, prefix=prefix)
                order = get_relation_value(rel, t1)

                _put(relation_type_matrix, [int(rel)], [t1.index], [t0.index])
                _put(order_matrix, [order], [t1.index], [t0.index])

    indices = list(range(len(d)))
    _put(relation_type_matrix, [int(RelationType.Equal)] * len(d), indices,
         indices)
    _put(order_matrix, [0] * len(d), indices, indices)

    def build_mat(mat):
        assert len(set(zip(mat[1], mat[2]))) == len(list(zip(mat[1], mat[2])))
        return csr_matrix((mat[0], (mat[1], mat[2])), dtype=int)

    # distance_matrix = build_mat(distance_matrix)
    # order_matrix = build_mat(order_matrix)
    relation_type_matrix = build_mat(relation_type_matrix)
    order_matrix = build_mat(order_matrix)
    # 'distance': distance_matrix,
    # 'order': order_matrix,
    return {'relation': relation_type_matrix, 'order': order_matrix}
Beispiel #9
0
 def set_dictionary_version(self, version):
     self._str = None
     self.term = Dictionary(version).translate_script_from_version(
         self.term.dictionary.version, self.term.script)
Beispiel #10
0
    #         **_up
    #     },
    #     'remove': {
    #         *_rem
    #     }
    # }
    # print('\n'.join("{} => {}".format(a, b) for a, b in _up.items()))
    # version = create_dictionary_version(latest_dictionary_version(), update=update, remove=_rem)
    # upload_to_s3(version)
    # print(version)

    root = "O:.M:.-M:.-'"
    translator = translate_ecosystem_intl_col_tern
    "dictionary_2018-06-08_17:07:06"
    print(str(translator(script(root))))

    d = Dictionary("dictionary_2018-06-08_17:07:06")
    # translate_update("s.u.-'O:M:.-'O:.-',+s.u.-'M:O:.-O:.-'M:.-',", translate_formes_visuelles)
    diff = {
        **{
            str(s.script): str(translate_competence_en_curr_data(s.script))
            for s in term("M:.-O:.-'M:.-wa.e.-'t.-x.-s.y.-',", d).relations.contains
        },
        # **{str(s.script): str(translate_ecosystem_intl_col_tern(s.script)) for s in term("O:.M:.-M:.-'", d).relations.contains},
    }
    import json
    print(json.dumps(diff, indent=True))

    version = create_dictionary_version(None, diff=diff)
    # upload_to_s3(version)
    print(version)
Beispiel #11
0
 def get_dictionary(self):
     return Dictionary(self.list('morpheme', paradigm=True),
                       self.get_structure())