Beispiel #1
0
 def testFilledExporter(self):
     """Export two FSTs and check that they are stored in the file."""
     filename = os.path.join(FLAGS.test_tmpdir, 'test.far')
     FLAGS.output = filename
     with self.assertRaises(SystemExit):
         grm.run(generator_method)
     with pynini.Far(filename, 'r') as far:
         stored_fsts = dict(far)
     self.assertLen(stored_fsts, 2)
     self.assertTrue(stored_fsts['FST1'])
     self.assertTrue(stored_fsts['FST2'])
Beispiel #2
0
# Lint as: python3
# Copyright 2016-2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Test pynini file using the grm."""

import pynini
from pynini.export import grm


def generator_main(exporter: grm.Exporter):
    exporter['FST1'] = pynini.accep('1234')
    exporter['FST2'] = pynini.accep('4321')
    exporter['FST3'] = pynini.accep('ABCD')


if __name__ == '__main__':
    grm.run(generator_main)
Beispiel #3
0
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Reading normalization grammar for abjad / alphabet script languages.

To try for Urdu:

```sh
bazel build -c opt @org_opengrm_thrax//:rewrite-tester \
  nisaba/scripts/abjad_alphabet:reading_norm

bazel-bin/external/org_opengrm_thrax/rewrite-tester \
  --far \
    bazel-bin/nisaba/scripts/abjad_alphabet/reading_norm.far \
  --rules=UR \
  < /tmp/urdu_word_list.txt
```
"""

from pynini.export import grm
from nisaba.scripts.abjad_alphabet import reading_norm

if __name__ == '__main__':
    grm.run(lambda e: reading_norm.generator_main(e, 'utf8'))
Beispiel #4
0
```
"""

import pynini
from pynini.export import grm
import nisaba.scripts.abjad_alphabet.util as u
from nisaba.scripts.utils import rule


def _open_visual(script_or_lang_code: str,
                 token_type: pynini.TokenType) -> pynini.Fst:
    return u.open_fst_from_far('visual_norm', script_or_lang_code, token_type)


def generator_main(exporter: grm.Exporter, token_type: pynini.TokenType):
    """FSTs for reading normalization of abjad / alphabet script languages."""
    with pynini.default_token_type(token_type):
        sigma = u.sigma_from_common_data_files()
        for lang in u.LANGS:
            visual_norm_fst = _open_visual(lang, token_type)
            reading_norm_file = u.LANG_DIR / lang / 'reading_norm.tsv'
            reading_norm_fst = rule.fst_from_rule_file(reading_norm_file,
                                                       sigma)
            lang = lang.upper()
            exporter[lang] = pynini.optimize(
                visual_norm_fst @ reading_norm_fst)


if __name__ == '__main__':
    grm.run(lambda e: generator_main(e, 'byte'))