def testRankedRewrite(self): deletion_rule = pynini.cdrewrite( pynutil.delete(self.consonant, weight=1), "", self.consonant, self.sigstar) epenthesis_rule = pynini.cdrewrite(pynutil.insert("i", weight=2), self.consonant, self.consonant, self.sigstar) rule = pynini.union(deletion_rule, epenthesis_rule).optimize() self.assertEqual("oto", rewrite.one_top_rewrite("okto", rule)) self.assertTrue(rewrite.matches("okto", "oto", rule))
def test_delete_freestanding_punctuation(self): 'Test deleting freestanding punctuation.' for test in [(("hello , friend", "hello friend"), ("the end .", "the end "), ('" what', ' what'), ('" who , he asked , left ? "', ' who he asked left '))]: for test_case, expected in test: with self.subTest(test_case=test_case): normalized_text = rewrite.one_top_rewrite( test_case, norm.delete_freestanding_punctuation) self.assertEqual(normalized_text, expected)
def test_remove_extra_whitespace(self): 'Test removing extra whitespace.' for test in [(("hi there", "hi there"), ("my friend ", "my friend"), (" the sun", " the sun"), (" all the spaces ", " all the spaces"))]: for test_case, expected in test: with self.subTest(test_case=test_case): normalized_text = rewrite.one_top_rewrite( test_case, norm.remove_extra_whitespace) self.assertEqual(normalized_text, expected)
def testOptionalRewrite(self): rule = pynini.cdrewrite(pynutil.delete(self.td), self.consonant, "[EOS]", self.sigstar, mode="opt").optimize() with self.assertRaisesRegex(rewrite.Error, r"Multiple top rewrites"): unused_var = rewrite.one_top_rewrite("fist", rule) self.assertCountEqual(["fist", "fis"], rewrite.rewrites("fist", rule)) self.assertTrue(rewrite.matches("fist", "fis", rule)) self.assertTrue(rewrite.matches("fist", "fist", rule)) self.assertFalse(rewrite.matches("fis", "fist", rule))
def testMandatoryRewrite(self): rule = pynini.cdrewrite(pynutil.delete(self.td), self.consonant, "[EOS]", self.sigstar).optimize() rewrites = tuple(rewrite.rewrites("fist", rule)) # pylint: disable=g-generic-assert self.assertEqual(len(rewrites), 1) # pylint: enable=g-generic-assert self.assertEqual("fis", rewrites[0]) self.assertEqual("fis", rewrite.top_rewrite("fist", rule)) self.assertEqual("fis", rewrite.one_top_rewrite("fist", rule)) self.assertTrue(rewrite.matches("fist", "fis", rule)) self.assertFalse(rewrite.matches("fis", "fist", rule))
def g2p(istring: str) -> str: """Applies the G2P rule. Args: istring: the graphemic input string. Returns: The phonemic output string. Raises. rewrite.Error: composition failure. """ return rewrite.one_top_rewrite(istring, G2P)
def tag(self, string: pynini.FstLike) -> str: """Tags an input string. This method inserts XML-style tags around all substrings in the input string matching any element in the vocabulary. Args: string: The input string. Returns: The tagged string. """ return rewrite.one_top_rewrite(string, self._tagger)
def generate_report(self, city: str) -> str: """Generates weather report for the given city. Args: city: a city string. Returns: Weather report for the city. """ data = self._table[city] populate = WeatherTable.sigma_pad( pynini.cross("$CITY", city), pynini.cross("$TEMPERATURE", str(data.temperature)), pynini.cross("$STATE", data.state), pynini.cross("$WIND_DIRECTION", data.wind_direction), pynini.cross("$WIND_SPEED", str(data.wind_speed))) return rewrite.one_top_rewrite(self._template, populate @ self._singularize)
def test_detach_trailing_punctuation(self): 'Test separating trailing punctuation from tokens.' for test in [(("hello, friend", "hello , friend"), ("the end.", "the end ."), ('"what', '"what'), ('"who, he asked, left?"', '"who , he asked , left ? "'), ("don't separate apostrophes", "don't separate apostrophes"), ("initial 'apostrophe", "initial 'apostrophe"), ("final' apostrophe", "final ' apostrophe"), ("keep ice-cream together", "keep ice-cream together"), ("50,000", "50,000"), ("google.com", "google.com"), ("12:25", "12:25"))]: for test_case, expected in test: with self.subTest(test_case=test_case): normalized_text = rewrite.one_top_rewrite( test_case, norm.detach_trailing_punctuation) self.assertEqual(normalized_text, expected)
def apply_sound_changes(self, corpus_file_path): """applies loaded sound changes to a corpus""" def process_word(word): # filler_string = "0" * self.insertion_count processed_word = word.strip() processed_word = "#" + processed_word + "#" # processed_word = filler_string.join(list(processed_word)) return processed_word out_words = [] with open(corpus_file_path, "r", encoding="utf-8") as corpus: for word in corpus: word = process_word(word) for formul in self.formula: print(word, end="->") word = rewrite.one_top_rewrite(word, formul) print(word.replace("0", "")) print() # out_words.append(out_word) corpus.close()
def testUnweightedInsert(self): inserter = pynutil.insert("Cheddar") self.assertEqual(rewrite.one_top_rewrite("", inserter), "Cheddar")
def g2p(string: str) -> str: return rewrite.one_top_rewrite(string, _g2p)
def _harmonic_suffix(stem: str, suffix: str) -> str: """Concatenates suffix and applies the harmony rule.""" return rewrite.one_top_rewrite(stem + suffix, _harmony)
def testUnweightedDelete(self): deleter = pynutil.delete("Cheddar") self.assertEqual(rewrite.one_top_rewrite("Cheddar", deleter), "")
def plural(singular: str) -> str: return rewrite.one_top_rewrite(singular, _plural)
def number(token: str) -> str: return rewrite.one_top_rewrite(token, _phi @ _lambda_star)
def assertOneTopRewrite(self, istring: str, ostring: str, rule: pynini.Fst) -> None: self.assertEqual(rewrite.one_top_rewrite(istring, rule), ostring)
def match(text: str) -> str: return rewrite.one_top_rewrite(text, _date_matcher)
def tag(text: str) -> str: return rewrite.one_top_rewrite(text, _date_tagger)