Ejemplo n.º 1
0
    def __init__(self, changes_path, inventory):
        self.inventory = inventory
        self.insertion_count = 0
        pynini.default_token_type("utf-8")

        self.sigma_star = pynini.closure(
            pynini.union(*self.inventory.syms.union("#"))).optimize()

        self.formula = self.load_sound_changes(changes_path)
Ejemplo n.º 2
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """Generates FSTs for reading normalization of Brahmic scripts."""
    for token_type in ('byte', 'utf8'):
        rewrite_map = {}
        with pynini.default_token_type(token_type):
            sigma_map = {}
            scripts = set(u.READING_NORM_SCRIPTS)
            scripts.update(u.READING_NORM_LANG_SCRIPT_MAP)
            for script in scripts:
                sigma = u.OpenSigma(script, token_type)
                sigma_map[script] = sigma
                rewrite_map[script] = _reading_norm_fst(
                    u.SCRIPT_DIR, script, sigma)

            for script, langs in u.READING_NORM_LANG_SCRIPT_MAP.items():
                for lang in langs:
                    sigma = sigma_map[script]
                    rewrite_map[lang] = rewrite.ComposeFsts([
                        rewrite_map[script],
                        _reading_norm_fst(u.SCRIPT_DIR / script, lang, sigma),
                    ])

            exporter = exporter_map[token_type]
            for name, fst in rewrite_map.items():
                exporter[name.upper()] = fst
Ejemplo n.º 3
0
  def __construct_suff_phon(self):
    '''
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      alphabet = pynini.union(
          self.__syms.characters,
          pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<SS>", "<FB>", "<ge>", "<Ge>", "<no-ge>", "<Initial>", "<NoHy>", "<NoPref>", "<NoDef>", "<NN>", "<ADJ>"]).project("input"),
          self.__syms.stem_types,
          ).closure()

      Tau = pynini.cross("i", "")
      Lambda = pynini.concat(
          pynini.union(
            pynini.accep("i"),
            pynini.concat(
              self.__syms.consonants.project("input"),
              pynini.accep("y")
              )
            ),
          pynini.accep("<Suff_Stems>")
          )

      return pynini.concat(
          pynini.cdrewrite(
            Tau,
            Lambda,
            "",
            alphabet.project("input")
            ),
          self.__tail
          ).optimize()
Ejemplo n.º 4
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """FSTs for ISO conversion of fixed rule romanization of Brahmic."""
    for token_type in ('byte', 'utf8'):
        with pynini.default_token_type(token_type):
            exporter = exporter_map[token_type]
            for script in u.FIXED_RULE_SCRIPTS:
                exporter[f'{script.upper()}'] = _fixed_rule_fst(script)
Ejemplo n.º 5
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """Generates FAR for multilingual phonological operations."""
    for token_type in ('byte', 'utf8'):
        with p.default_token_type(token_type):

            exporter = exporter_map[token_type]
            exporter['VOICING'] = INTERSONORANT_VOICING
            exporter[
                'ANUSVARA_ASSIMILATION_LABIAL'] = ANUSVARA_ASSIMILATION_LABIAL
            exporter[
                'ANUSVARA_ASSIMILATION_DENTAL'] = ANUSVARA_ASSIMILATION_DENTAL
            exporter[
                'ANUSVARA_ASSIMILATION_ALVEOLAR'] = ANUSVARA_ASSIMILATION_ALVEOLAR
            exporter[
                'ANUSVARA_ASSIMILATION_PALATAL'] = ANUSVARA_ASSIMILATION_PALATAL
            exporter[
                'ANUSVARA_ASSIMILATION_RETROFLEX'] = ANUSVARA_ASSIMILATION_RETROFLEX
            exporter[
                'ANUSVARA_ASSIMILATION_VELAR'] = ANUSVARA_ASSIMILATION_VELAR
            exporter['ANUSVARA_ASSIMILATION'] = ANUSVARA_ASSIMILATION
            exporter['DEFAULT_ANUSVARA_DENTAL'] = DEFAULT_ANUSVARA_DENTAL
            exporter['DEFAULT_ANUSVARA_LABIAL'] = DEFAULT_ANUSVARA_LABIAL
            exporter[
                'FINAL_ANUSVARA_NASALIZATION'] = FINAL_ANUSVARA_NASALIZATION
            exporter['JNY_TO_GY'] = JNY_TO_GY
            exporter['JNY_TO_NY'] = JNY_TO_NY
Ejemplo n.º 6
0
  def __construct_compound_filter(self):
    '''
    Construct the compound filter
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      alphabet = pynini.union(
          self.__syms.characters,
          pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<SS>", "<FB>", "<ge>", "<Ge>"]).project("input"),
          self.__syms.stem_types,
          pynini.cross(self.__syms.categories, ""),
          pynini.cross(self.__syms.origin_features, ""),
          pynini.cross("<NoPref>", "")
          )

      return pynini.concat(
          pynini.union(
            pynini.cross("<Initial>", ""),
            pynini.accep("<NoHy>"),
            pynini.accep("<NoDef>")
            ).closure(0,1),
          pynini.concat(
            pynini.union(
              pynini.concat(
                alphabet.closure(),
                pynini.cross(pynini.string_map(["<ABK>", "<ADV>", "<CARD>", "<NE>", "<PRO>", "<V>", "<ORD>", "<OTHER>"]).project("input"), "")
                ),
              pynini.concat(
                pynini.cross("", "<VADJ>"),
                pynini.concat(
                  pynini.union(
                    alphabet,
                    pynini.cross("<kompos>", "")
                    ).closure(),
                  pynini.concat(
                    pynini.cross("<kompos>", ""),
                    pynini.concat(
                      alphabet.closure(),
                      pynini.cross("<V>", "")
                      )
                    )
                  )
                ),
              pynini.concat(
                pynini.union(
                  alphabet,
                  pynini.cross("<kompos>", "")
                  ).closure(),
                pynini.cross(pynini.string_map(["<ADJ>", "<NN>"]).project("input"), "")
                )
              ),
            pynini.concat(
              pynini.cross("<base>", ""),
              pynini.concat(
                pynini.cross(self.__syms.origin_features, ""),
                self.__syms.inflection_classes
                )
              )
            )
          ).optimize()
Ejemplo n.º 7
0
  def __construct_insert_zu(self):
    '''
    Inserts "zu" into infinitives with separable prefixes
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      alphabet = pynini.union(
          self.__syms.characters,
          pynini.string_map(["<n>", "<~n>", "<e>", "<d>", "<NoHy>", "<NoDef>", "<VADJ>", "<CB>", "<FB>", "<UL>", "<SS>", "<DEL-S>", "<Low#>", "<Up#>", "<Fix#>", "<^imp>", "<^UC>", "<^Ax>", "<^pl>", "<^Gen>", "<^Del>"]).project("input")
          ).optimize()

      c2 = pynini.union(
          alphabet,
          self.__syms.stem_types
          ).closure().optimize()
      
      # From deko.fst:
      # insert "zu" after verbal prefixes if followed by infinitive marker
      return pynini.union(
          c2,
          #pynini.concat(
          #  pynini.accep("<Base_Stems>"),
          #  alphabet.closure(),
          #  pynini.cross("<^zz>", ""),
          #  alphabet.closure()
          #  ),
          c2
          + pynini.accep("<Pref_Stems>")
          + alphabet.closure()
          + pynini.accep("<Base_Stems>")
          + pynini.cross("", "z u")
          + alphabet.closure()
          + pynini.cross("<^zz>", "")
          + alphabet.closure()
          ).optimize()
Ejemplo n.º 8
0
def rewrite_lattice(
    string: pynini.FstLike,
    rule: pynini.Fst,
    token_type: Optional[pynini.TokenType] = None) -> pynini.Fst:
  """Constructs a weighted lattice of output strings.

  Constructs a weighted, epsilon-free lattice of output strings given an
  input FST (or string) and a rule FST.

  Args:
    string: Input string or FST.
    rule: Input rule WFST.
    token_type: Optional input token type, or symbol table.

  Returns:
    An epsilon-free WFSA.

  Raises:
    Error: Composition failure.
  """
  # TODO(kbg): Consider adding support for PDT and MPDT composition.
  # TODO(kbg): Consider using `contextlib.nullcontext` here instead.
  if token_type is None:
    lattice = pynini.compose(string, rule, compose_filter="alt_sequence")
  else:
    with pynini.default_token_type(token_type):
      lattice = pynini.compose(string, rule, compose_filter="alt_sequence")
  if lattice.start() == pynini.NO_STATE_ID:
    raise Error("Composition failure")
  return lattice.project("output").rmepsilon()
Ejemplo n.º 9
0
    def matches(self,
                istring: pynini.FstLike,
                ostring: pynini.FstLike,
                input_token_type: Optional[pynini.TokenType] = None,
                output_token_type: Optional[pynini.TokenType] = None) -> bool:
        """Returns whether or not the rule cascade allows an input/output pair.

    Args:
      istring: Input string or FST.
      ostring: Output string or FST.
      input_token_type: Optional input token type, or symbol table.
      output_token_type: Optional output token type, or symbol table.

    Returns:
      Whether the input-output pair is generated by the rule.
    """
        lattice = self._rewrite_lattice(istring, input_token_type)
        # TODO(kbg): Consider using `contextlib.nullcontext` here instead.
        if output_token_type is None:
            lattice = pynini.intersect(lattice,
                                       ostring,
                                       compose_filter="sequence")
        else:
            with pynini.default_token_type(output_token_type):
                lattice = pynini.intersect(lattice,
                                           ostring,
                                           compose_filter="sequence")
        return lattice.start() != pynini.NO_STATE_ID
Ejemplo n.º 10
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """Generates FAR for ISO char to PSA phoneme assignment."""
    for token_type in ('byte', 'utf8'):
        with p.default_token_type(token_type):

            exporter = exporter_map[token_type]
            exporter['TYP_TO_TXN'] = TYP_TO_TXN
Ejemplo n.º 11
0
 def __construct_inflection(self):
     '''
 Build the inflection cross
 '''
     with pynini.default_token_type(self.__syms.alphabet):
         return pynini.union(
             pynini.concat(pynini.cross("", "<Adj0>"), self.__adj0),
             pynini.concat(pynini.cross("", "<Adj0-Up>"), self.__adj0_up),
             pynini.concat(pynini.cross("", "<Adj+>"), self.__adj_plus),
             pynini.concat(pynini.cross("", "<Adj+e>"), self.__adj_plus_e),
             pynini.concat(pynini.cross("", "<NMasc_es_e>"),
                           self.__nmasc_es_e),
             pynini.concat(pynini.cross("", "<NMasc_es_$e>"),
                           self.__nmasc_es_e_ul),
             pynini.concat(pynini.cross("", "<NMasc_es_en>"),
                           self.__nmasc_es_en),
             pynini.concat(pynini.cross("", "<NFem-Deriv>"),
                           self.__nfem_deriv),
             pynini.concat(pynini.cross("", "<NFem_0_n>"), self.__nfem_0_n),
             pynini.concat(pynini.cross("", "<NNeut-Dimin>"),
                           self.__nneut_dimin),
             pynini.concat(pynini.cross("", "<NNeut/Sg_s>"),
                           self.__nneut_sg_s),
             pynini.concat(pynini.cross("", "<VVReg>"),
                           self.__vv_reg)).optimize()
Ejemplo n.º 12
0
  def __construct_tail(self):
    '''
    Define possible final sequences of a derivation
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      # C1
      initial_stuff = pynini.union(
        self.__syms.characters,
        pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<UL>", "<SS>", "<FB>", "<ge>", "<Ge>", "<no-ge>", "<Initial>", "<NoHy>", "<NoPref>", "<NoDef>", "<Pref_Stems>"]).project("input")
        ).closure()
      # C2
      intermediate_stuff = pynini.union(
        self.__syms.characters,
        pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<UL>", "<SS>", "<FB>", "<ge>", "<Suff_Stems>"]).project("input")
        ).closure()

      # C3
      final_stuff = pynini.union(
        self.__syms.characters,
        pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<UL>", "<SS>", "<FB>"]).project("input"),
        self.__syms.categories,
        self.__syms.stem_type_features,
        self.__syms.origin_features,
        pynini.string_map(["<NSNeut_es_e>", "<NSFem_0_n>", "<NSFem_0_en>", "<NSMasc_es_e>", "<NSMasc_es_$e>", "<NSMasc-s/$sse>"]).project("input")
        ).closure()

      # TAIL
      tail1 = initial_stuff + self.__syms.base_stem_types + intermediate_stuff
      return pynini.concat(tail1.closure(0,1) + final_stuff, self.__syms.inflection_classes.closure(0,1)).optimize()
Ejemplo n.º 13
0
 def __construct_compound_stems_nn(self, tmp):
     '''
 Default noun compounding stems
 '''
     with pynini.default_token_type(self.__syms.alphabet):
         kompos_stems = pynini.compose(
             pynini.concat(
                 self.__syms.characters.closure(1),
                 pynini.union(
                     pynini.cross(
                         "",
                         pynini.concat(
                             pynini.accep("<+NN>"),
                             pynini.concat(self.__syms.gender,
                                           pynini.accep("<Nom> <Sg>")))),
                     pynini.cross(
                         "",
                         pynini.concat(
                             pynini.accep("<+NN>"),
                             pynini.concat(self.__syms.gender,
                                           pynini.accep("<Nom> <Pl>")))))),
             tmp)
         return (pynini.cross("", "<Kompos_Stems>") + kompos_stems +
                 pynini.accep("<NN>") +
                 pynini.cross("", "<kompos> <nativ>")).optimize()
Ejemplo n.º 14
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
  """Generates FAR for language agnostic ISO to typeable string conversion."""
  for token_type in ('byte', 'utf8'):
    with p.default_token_type(token_type):

      exporter = exporter_map[token_type]
      exporter['ISO_TO_TYP_DECOMPOSED'] = _iso_to_decomposed_typ()
      exporter['ISO_TO_TYP'] = iso_to_typ()
Ejemplo n.º 15
0
def generator_main(exporter: grm.Exporter):
    """Generate FSAs accepting the alphabet of each Brahmic script."""

    # NOTE: It isn't useful for us to create a byte-mode sigma, so only export
    # utf8-mode sigma.
    with pynini.default_token_type('utf8'):
        for script in u.SCRIPTS:
            chars = cu.script_chars(script)
            exporter[script.upper()] = uc.derive_sigma(chars)
Ejemplo n.º 16
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """FSTs for language-agnostic NFC normalization of abjad / alphabet script text."""
    for token_type in ('byte', 'utf8'):
        with pynini.default_token_type(token_type):
            sigma = u.sigma_from_common_data_files()
            mapping_file = u.LANG_DIR / 'nfc.tsv'
            mapping = rule.fst_from_rule_file(mapping_file, sigma)
            exporter = exporter_map[token_type]
            exporter[u.SCRIPT_NAME.upper()] = mapping
Ejemplo n.º 17
0
 def __construct_bdk_stems(self):
   '''
   Base, derivation and compound stems (without derivation suffixes)
   '''
   with pynini.default_token_type(self.__syms.alphabet):
     return pynini.compose(
         self.__lex,
         self.__syms.initial_features.closure() +
         pynini.string_map(["<Base_Stems>", "<Deriv_Stems>", "<Kompos_Stems>"]).project("input") +
         self.__sigma_star
         ).optimize()
Ejemplo n.º 18
0
 def __construct_base_stems(self):
   '''
   Base stems
   '''
   with pynini.default_token_type(self.__syms.alphabet):
     return pynini.compose(
         self.__bdk_stems,
         self.__syms.initial_features.closure() +
         pynini.accep("<Base_Stems>") +
         self.__sigma_star
         ).optimize()
Ejemplo n.º 19
0
 def __construct_pref_stems(self):
   '''
   Prefix stems
   '''
   with pynini.default_token_type(self.__syms.alphabet):
     return pynini.compose(
         self.__lex,
         self.__syms.initial_features.closure() +
         pynini.accep("<Pref_Stems>") +
         self.__sigma_star
         ).optimize()
Ejemplo n.º 20
0
 def __split_disjunctive_feats(self, disjunctive_feat_list):
     with pynini.default_token_type(self.__syms.alphabet):
         single_splits = []
         for disjunctive_feat in disjunctive_feat_list:
             splitted = []
             for cat in disjunctive_feat[1:-1].split(","):
                 splitted.append("<" + cat + ">")
                 single_splits.append(
                     pynini.cross(disjunctive_feat,
                                  pynini.string_map(splitted)))
         return pynini.union(*(single_splits)).optimize()
Ejemplo n.º 21
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """Generates FAR for natural transliteration for Malayalam."""
    for token_type in ('byte', 'utf8'):
        with p.default_token_type(token_type):

            iso_to_txn = (
                iso.iso_to_typ() @ typ.TYP_TO_TXN @ ops.DEFAULT_ANUSVARA_LABIAL
                @ ops.INTERSONORANT_VOICING @ ops.JNY_TO_NY)

            exporter = exporter_map[token_type]
            exporter['ISO_TO_PSAF'] = (iso_to_txn @ txn.TXN_TO_PSAF).optimize()
            exporter['ISO_TO_PSAC'] = (iso_to_txn @ txn.TXN_TO_PSAC).optimize()
Ejemplo n.º 22
0
 def __construct_pref_deriv_suff_stems(self):
   '''
   Derivation suffixes which combine with prefixed stems
   '''
   with pynini.default_token_type(self.__syms.alphabet):
     return pynini.compose(
         self.__lex,
         self.__syms.initial_features.closure() +
         pynini.accep("<Suff_Stems>") +
         pynini.cross("<prefderiv>", "") +
         self.__sigma_star
         ).optimize()
Ejemplo n.º 23
0
def generator_main(exporter: grm.Exporter, token_type: pynini.TokenType):
    """FSTs for reading normalization of abjad / alphabet script languages."""
    with pynini.default_token_type(token_type):
        sigma = u.sigma_from_common_data_files()
        for lang in u.LANGS:
            visual_norm_fst = _open_visual(lang, token_type)
            reading_norm_file = u.LANG_DIR / lang / 'reading_norm.tsv'
            reading_norm_fst = rule.fst_from_rule_file(reading_norm_file,
                                                       sigma)
            lang = lang.upper()
            exporter[lang] = pynini.optimize(
                visual_norm_fst @ reading_norm_fst)
Ejemplo n.º 24
0
  def __construct_rep_pref(self):
    '''
    Replace the marker of manually prefixed stems
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      return pynini.cdrewrite(
          pynini.cross("<prefnativ>", "<nativ>"),
          "",
          "",
          self.__prefix_filter_helper
          ).optimize()
Ejemplo n.º 25
0
  def __construct_prefix_filter_helper(self):
    '''
    Alphabet for the prefix filter
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      return pynini.union(
          self.__syms.characters,
          pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<SS>", "<FB>", "<ge>", "<Ge>", "<no-ge>", "<Initial>", "<NoHy>", "<NoPref>", "<NoDef>"]).project("input"),
          self.__syms.stem_types,
          self.__syms.categories,
          ).closure().optimize()
Ejemplo n.º 26
0
def generator_main(exporter_map: multi_grm.ExporterMapping):
    """Generates FAR for natural transliteration."""
    for token_type in ('byte', 'utf8'):
        with p.default_token_type(token_type):

            iso_to_txn = (
                iso.iso_to_typ() @ typ.TYP_TO_TXN @ ops.ANUSVARA_ASSIMILATION
                @ ops.DEFAULT_ANUSVARA_DENTAL @ ops.FINAL_ANUSVARA_NASALIZATION
                @ ops.JNY_TO_GY)

            exporter = exporter_map[token_type]
            exporter['ISO_TO_PSAF'] = (iso_to_txn @ txn.TXN_TO_PSAF).optimize()
            exporter['ISO_TO_PSAC'] = (iso_to_txn @ txn.TXN_TO_PSAC).optimize()
Ejemplo n.º 27
0
 def __construct_quant_suff_stems(self):
   '''
   Derivation suffixes which combine with a number and a simplex stem
   '''
   with pynini.default_token_type(self.__syms.alphabet):
     return pynini.compose(
         self.__lex,
         pynini.cross("<QUANT>", "") +
         self.__syms.initial_features.closure() +
         pynini.accep("<Suff_Stems>") +
         pynini.cross("<simplex>", "") +
         self.__sigma_star
         ).optimize()
Ejemplo n.º 28
0
    def __construct_ge_nom_stems_v(self, tmp):
        '''
    Stems for ge nominalization of verbs ("Gejammer")
    '''
        with pynini.default_token_type(self.__syms.alphabet):
            alphabet = pynini.union(
                self.__syms.characters, self.__syms.categories,
                pynini.string_map(["<CONV>", "<SUFF>"]).project("input"))

            # extract infinitives
            infinitives = pynini.compose(
                pynini.concat(
                    pynini.concat(self.__syms.characters.closure(1),
                                  pynini.accep("<PREF>")).closure(),
                    pynini.concat(alphabet.closure(1),
                                  pynini.cross("", "<+V> <Inf>"))),
                tmp).optimize()

            insert_ge = pynini.concat(
                pynini.concat(self.__syms.characters.closure(1),
                              pynini.accep("<PREF>")).closure(),
                pynini.concat(pynini.cross("g e <PREF> <Ge>", ""),
                              alphabet.closure(1))).optimize()

            inserted_ge = pynini.compose(
                pynini.compose(insert_ge, infinitives).project("input"),
                pynini.union(self.__syms.to_lower, self.__syms.categories,
                             self.__syms.prefix_suffix_marker,
                             pynini.accep("<Ge>")).closure()).optimize()

            deriv_stem_filter_ge = pynini.compose(
                pynini.compose(
                    pynini.compose(
                        pynini.union(alphabet, pynini.accep("<PREF>"),
                                     pynini.cross("", "<Ge>")).closure(),
                        inserted_ge),
                    pynini.union(
                        self.__syms.characters, pynini.accep("<Ge>"),
                        pynini.cross(
                            pynini.union(self.__syms.categories,
                                         self.__syms.prefix_suffix_marker),
                            "")).closure()),
                pynini.concat(
                    pynini.union(
                        self.__syms.characters,
                        pynini.accep("<Ge>"),
                    ).closure(1), pynini.cross("e n", ""))).optimize()

            return (pynini.cross("", "<Deriv_Stems>") + deriv_stem_filter_ge +
                    pynini.accep("<V>") +
                    pynini.cross("", "<deriv> <nativ>")).optimize()
Ejemplo n.º 29
0
  def __construct_umlautung(self):
    '''
    Map "a", "o" and "u" onto "ä", "ö" and "ü", corresp., if the umlaut marker "<UL>" is present.
    '''
    with pynini.default_token_type(self.__syms.alphabet):

      alphabet = pynini.union(
          self.__syms.characters,
          pynini.string_map(["<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<SS>", "<FB>", "<ge>", "<Ge>", "<no-ge>", "<Ge>", "<Initial>", "<NoHy>", "<NoPref>", "<NoDef>"]).project("input"),
          self.__syms.stem_types,
          self.__syms.categories,
          ).closure()

      return pynini.concat(
          pynini.concat(
            alphabet,
            pynini.concat(
              self.__syms.consonants,
              pynini.concat(
                pynini.union(
                  pynini.union(
                    pynini.cross("a", "ä"),
                    pynini.cross("o", "ö"),
                    pynini.cross("u", "ü")
                    ),
                  pynini.concat(
                    pynini.cross("a", "ä"),
                    pynini.union(
                      pynini.cross("a", ""),
                      pynini.accep("u")
                      )
                    )
                  ),
                pynini.concat(
                  self.__syms.consonants.closure(),
                  pynini.concat(
                    pynini.concat(
                      pynini.accep("e"),
                      pynini.string_map(["l", "r"]).project("input")
                      ).closure(0, 1),
                    pynini.concat(
                      pynini.accep("<Suff_Stems>"),
                      pynini.cross("<UL>", "")
                      )
                    )
                  )
                )
              ).closure(0, 1)
            ),
          self.__tail
          ).optimize()
Ejemplo n.º 30
0
def assert_fst_functional(fst: pynini.Fst, token_type: pynini.TokenType,
                          string_fsa: pynini.Fst) -> None:
    """Assert that an FST is funcional for the given string FSA.

  Args:
    fst: An FST to verify if is functional.
    token_type: The token_type used to derive the Fst.
    string_fsa: The string FSA to verify functional behavior.

  Raises:
    AssertionError: If the FST is found to have a non-functional.
  """
    with pynini.default_token_type(token_type):
        verify_if_single_path(string_fsa, string_fsa @ fst)