Example #1
0
def read_textproto(proto_path, proto):
    logging.info('Parsing %s ...', proto_path)
    if not os.path.exists(proto_path):
        proto_path = uf.AsResourcePath(proto_path)
    with open(proto_path, encoding='utf8') as f:
        text_format.Parse(f.read(), proto)
    logging.info('Read %d items.', len(proto.item))
    return proto
 def test_iso_roundtrip(self, tag: str):
   tag = tag.upper()
   far_path = u.FAR_DIR / 'iso.far'
   with pynini.Far(file.AsResourcePath(far_path), 'r') as far:
     natv_to_iso = far[f'FROM_{tag}']
     iso_to_natv = far[f'TO_{tag}']
     self.assertFstProbablyIdentity([natv_to_iso, iso_to_natv],
                                    token_type='byte',
                                    samples=test_util.NUM_TEST_SAMPLES)
Example #3
0
def _fixed_rule_fst(script: str) -> pynini.Fst:
    """Creates an FST that transduces fixed rule romanization to ISO 15919."""
    path = u.SCRIPT_DIR / script / 'fixed.tsv'
    resource_file = uf.AsResourcePath(path)
    chars = uc.derive_chars(both_sides=[path], input_side=[])
    # ASCII printable characters are pass through.
    # Pynini's symbol generation characters ('[', ']') are avoided.
    sigma = uc.derive_sigma(chars | set(string.printable) - set('[]'))
    return rule.fst_from_cascading_rule_file(resource_file, sigma)
Example #4
0
 def test_romanization_roundtrip(self):
     far_path = u.FAR_DIR / 'reversible_roman.far'
     with pynini.Far(uf.AsResourcePath(far_path), 'r') as far:
         natv_to_latin = far['FROM_ARAB']
         latin_to_natv = far['TO_ARAB']
         round_trip = natv_to_latin @ latin_to_natv
         self.assertFstProbablyFunctional(round_trip,
                                          token_type='byte',
                                          samples=ut.NUM_TEST_SAMPLES)
Example #5
0
def MaybeLoadScriptConfig(
        file_path: os.PathLike) -> script_config_pb2.ScriptConfig:
    """Loads script configuration, if present."""
    pb = script_config_pb2.ScriptConfig()
    if not uf.IsFileExist(file_path):
        return pb
    file_path = uf.AsResourcePath(file_path)
    with open(file_path, encoding="utf8") as f:
        text_format.Parse(f.read(), pb)
    return pb
Example #6
0
  def setUp(self):
    super().setUp()
    self._letters_proto = letter_languages.read_textproto(
        u.LANG_DIR / 'letter_languages.textproto')
    self._roman_proto = unicode_strings_util.read_textproto(
        u.LANG_DIR / 'reversible_roman.textproto')

    far_path = u.FAR_DIR / 'reversible_roman.far'
    with pynini.Far(uf.AsResourcePath(far_path), 'r') as far:
      natv_to_roman = far['FROM_ARAB']
      roman_to_natv = far['TO_ARAB']
      self._round_trip = natv_to_roman @ roman_to_natv
Example #7
0
def _read_string_file_chars_to_set(files: Iterable[os.PathLike],
                                   relevant_fields: int) -> Set[str]:
    """Reads the characters under some selection from some file paths into a set.

  Arguments:
    files: An Iterable of filepaths
    relevant_fields: The number of tab-delimited fields from the beginning in a
      StringFile to process. Must be a positive integer.

  Returns:
    Set[str] -- The set of all characters, under the selection in the files.
  """
    chars = set()
    for fname in files:
        with pathlib.Path(uf.AsResourcePath(pathlib.Path(fname))).open(
                "rt", encoding="utf8") as f:
            for line in f:
                if line.startswith("#"):
                    continue
                fields = line.strip().split("\t")[0:relevant_fields]
                for field in fields:
                    chars.update(field)
    return chars
Example #8
0
def rules_from_string_file(file: os.PathLike) -> Iterator[Rule]:
    """Yields string rules from a text resource with unweighted string maps."""
    return rules_from_string_path(uf.AsResourcePath(file))
Example #9
0
 def _LoadFar(self) -> pynini.Far:
     return pynini.Far(uf.AsResourcePath(self._path_to_far))