コード例 #1
0
ファイル: rewrite_test.py プロジェクト: yzhang123/pynini
 def testRankedRewrite(self):
     deletion_rule = pynini.cdrewrite(
         pynutil.delete(self.consonant, weight=1), "", self.consonant,
         self.sigstar)
     epenthesis_rule = pynini.cdrewrite(pynutil.insert("i", weight=2),
                                        self.consonant, self.consonant,
                                        self.sigstar)
     rule = pynini.union(deletion_rule, epenthesis_rule).optimize()
     self.assertEqual("oto", rewrite.one_top_rewrite("okto", rule))
     self.assertTrue(rewrite.matches("okto", "oto", rule))
 def test_delete_freestanding_punctuation(self):
     'Test deleting freestanding punctuation.'
     for test in [(("hello , friend", "hello  friend"), ("the end .",
                                                         "the end "),
                   ('" what', ' what'), ('" who , he asked , left ? "',
                                         ' who  he asked  left  '))]:
         for test_case, expected in test:
             with self.subTest(test_case=test_case):
                 normalized_text = rewrite.one_top_rewrite(
                     test_case, norm.delete_freestanding_punctuation)
                 self.assertEqual(normalized_text, expected)
 def test_remove_extra_whitespace(self):
     'Test removing extra whitespace.'
     for test in [(("hi       there", "hi there"), ("my friend    ",
                                                    "my friend"),
                   ("   the sun", " the sun"), ("   all   the   spaces   ",
                                                " all the spaces"))]:
         for test_case, expected in test:
             with self.subTest(test_case=test_case):
                 normalized_text = rewrite.one_top_rewrite(
                     test_case, norm.remove_extra_whitespace)
                 self.assertEqual(normalized_text, expected)
コード例 #4
0
ファイル: rewrite_test.py プロジェクト: yzhang123/pynini
 def testOptionalRewrite(self):
     rule = pynini.cdrewrite(pynutil.delete(self.td),
                             self.consonant,
                             "[EOS]",
                             self.sigstar,
                             mode="opt").optimize()
     with self.assertRaisesRegex(rewrite.Error, r"Multiple top rewrites"):
         unused_var = rewrite.one_top_rewrite("fist", rule)
     self.assertCountEqual(["fist", "fis"], rewrite.rewrites("fist", rule))
     self.assertTrue(rewrite.matches("fist", "fis", rule))
     self.assertTrue(rewrite.matches("fist", "fist", rule))
     self.assertFalse(rewrite.matches("fis", "fist", rule))
コード例 #5
0
ファイル: rewrite_test.py プロジェクト: yzhang123/pynini
 def testMandatoryRewrite(self):
     rule = pynini.cdrewrite(pynutil.delete(self.td), self.consonant,
                             "[EOS]", self.sigstar).optimize()
     rewrites = tuple(rewrite.rewrites("fist", rule))
     # pylint: disable=g-generic-assert
     self.assertEqual(len(rewrites), 1)
     # pylint: enable=g-generic-assert
     self.assertEqual("fis", rewrites[0])
     self.assertEqual("fis", rewrite.top_rewrite("fist", rule))
     self.assertEqual("fis", rewrite.one_top_rewrite("fist", rule))
     self.assertTrue(rewrite.matches("fist", "fis", rule))
     self.assertFalse(rewrite.matches("fis", "fist", rule))
コード例 #6
0
ファイル: g2p.py プロジェクト: blueCormorant/Hiragana-G2P
def g2p(istring: str) -> str:
    """Applies the G2P rule.

    Args:
      istring: the graphemic input string.

    Returns:
      The phonemic output string.

    Raises.
      rewrite.Error: composition failure.
    """
    return rewrite.one_top_rewrite(istring, G2P)
コード例 #7
0
ファイル: tagger.py プロジェクト: yzhang123/pynini
    def tag(self, string: pynini.FstLike) -> str:
        """Tags an input string.

    This method inserts XML-style tags around all substrings in the input
    string matching any element in the vocabulary.

    Args:
      string: The input string.

    Returns:
      The tagged string.
    """
        return rewrite.one_top_rewrite(string, self._tagger)
コード例 #8
0
ファイル: weather.py プロジェクト: yzhang123/pynini
  def generate_report(self, city: str) -> str:
    """Generates weather report for the given city.

    Args:
      city: a city string.

    Returns:
      Weather report for the city.
    """
    data = self._table[city]
    populate = WeatherTable.sigma_pad(
        pynini.cross("$CITY", city),
        pynini.cross("$TEMPERATURE", str(data.temperature)),
        pynini.cross("$STATE", data.state),
        pynini.cross("$WIND_DIRECTION", data.wind_direction),
        pynini.cross("$WIND_SPEED", str(data.wind_speed)))
    return rewrite.one_top_rewrite(self._template, populate @ self._singularize)
 def test_detach_trailing_punctuation(self):
     'Test separating trailing punctuation from tokens.'
     for test in [(("hello, friend", "hello , friend"), ("the end.",
                                                         "the end ."),
                   ('"what', '"what'), ('"who, he asked, left?"',
                                        '"who , he asked , left ? "'),
                   ("don't separate apostrophes",
                    "don't separate apostrophes"), ("initial 'apostrophe",
                                                    "initial 'apostrophe"),
                   ("final' apostrophe", "final ' apostrophe"),
                   ("keep ice-cream together",
                    "keep ice-cream together"), ("50,000", "50,000"),
                   ("google.com", "google.com"), ("12:25", "12:25"))]:
         for test_case, expected in test:
             with self.subTest(test_case=test_case):
                 normalized_text = rewrite.one_top_rewrite(
                     test_case, norm.detach_trailing_punctuation)
                 self.assertEqual(normalized_text, expected)
コード例 #10
0
    def apply_sound_changes(self, corpus_file_path):
        """applies loaded sound changes to a corpus"""
        def process_word(word):
            #  filler_string = "0" * self.insertion_count
            processed_word = word.strip()
            processed_word = "#" + processed_word + "#"
            # processed_word = filler_string.join(list(processed_word))
            return processed_word

        out_words = []
        with open(corpus_file_path, "r", encoding="utf-8") as corpus:
            for word in corpus:
                word = process_word(word)
                for formul in self.formula:
                    print(word, end="->")

                    word = rewrite.one_top_rewrite(word, formul)

                print(word.replace("0", ""))
                print()
            # out_words.append(out_word)
        corpus.close()
コード例 #11
0
 def testUnweightedInsert(self):
     inserter = pynutil.insert("Cheddar")
     self.assertEqual(rewrite.one_top_rewrite("", inserter), "Cheddar")
コード例 #12
0
ファイル: g2p.py プロジェクト: yzhang123/pynini
def g2p(string: str) -> str:
    return rewrite.one_top_rewrite(string, _g2p)
コード例 #13
0
def _harmonic_suffix(stem: str, suffix: str) -> str:
    """Concatenates suffix and applies the harmony rule."""
    return rewrite.one_top_rewrite(stem + suffix, _harmony)
コード例 #14
0
 def testUnweightedDelete(self):
     deleter = pynutil.delete("Cheddar")
     self.assertEqual(rewrite.one_top_rewrite("Cheddar", deleter), "")
コード例 #15
0
def plural(singular: str) -> str:
    return rewrite.one_top_rewrite(singular, _plural)
コード例 #16
0
def number(token: str) -> str:
    return rewrite.one_top_rewrite(token, _phi @ _lambda_star)
コード例 #17
0
ファイル: rewrite_test.py プロジェクト: yzhang123/pynini
 def assertOneTopRewrite(self, istring: str, ostring: str,
                         rule: pynini.Fst) -> None:
     self.assertEqual(rewrite.one_top_rewrite(istring, rule), ostring)
コード例 #18
0
ファイル: dates.py プロジェクト: yzhang123/pynini
def match(text: str) -> str:
    return rewrite.one_top_rewrite(text, _date_matcher)
コード例 #19
0
ファイル: dates.py プロジェクト: yzhang123/pynini
def tag(text: str) -> str:
    return rewrite.one_top_rewrite(text, _date_tagger)