def test_as_is(self): mapping = Mapping([{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}]) mapping_as_is = Mapping([{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}], as_is=True) transducer = Transducer(mapping) transducer_as_is = Transducer(mapping_as_is) self.assertEqual(transducer('aa'), 'c') self.assertEqual(transducer_as_is('aa'), 'bb')
def test_case_sensitive(self): mapping = Mapping([{"in": "A", "out": "b"}], case_sensitive=False) mapping_case_sensitive = Mapping([{"in": "A", "out": "b"}]) transducer = Transducer(mapping) transducer_case_sensitive = Transducer(mapping_case_sensitive) self.assertEqual(transducer("a").output_string, "b") self.assertEqual(transducer_case_sensitive("a").output_string, "a") self.assertEqual(transducer("A").output_string, "b")
def test_case_sensitive(self): mapping = Mapping([{'in': 'A', "out": 'b'}], case_sensitive=False) mapping_case_sensitive = Mapping([{'in': 'A', "out": 'b'}]) transducer = Transducer(mapping) transducer_case_sensitive = Transducer(mapping_case_sensitive) self.assertEqual(transducer('a').output_string, 'b') self.assertEqual(transducer_case_sensitive('a').output_string, 'a') self.assertEqual(transducer('A').output_string, 'b')
def test_escape_special(self): mapping = Mapping([{'in': '\d', "out": 'digit'}]) mapping_escaped = Mapping([{'in': '\d', "out": 'b'}], escape_special=True) transducer = Transducer(mapping) transducer_escaped = Transducer(mapping_escaped) self.assertEqual(transducer('1'), 'digit') self.assertEqual(transducer('\d'), '\d') self.assertEqual(transducer_escaped('1'), '1') self.assertEqual(transducer_escaped('\d'), 'b')
def test_escape_special(self): mapping = Mapping([{"in": r"\d", "out": "digit"}]) mapping_escaped = Mapping([{"in": r"\d", "out": "b"}], escape_special=True) transducer = Transducer(mapping) transducer_escaped = Transducer(mapping_escaped) self.assertEqual(transducer("1").output_string, "digit") self.assertEqual(transducer(r"\d").output_string, r"\d") self.assertEqual(transducer_escaped("1").output_string, "1") self.assertEqual(transducer_escaped(r"\d").output_string, "b")
def test_reverse(self): mapping = Mapping([{"in": "a", "out": "b"}]) mapping_reversed = Mapping([{"in": "a", "out": "b"}], reverse=True) transducer = Transducer(mapping) transducer_reversed = Transducer(mapping_reversed) self.assertEqual(transducer("a").output_string, "b") self.assertEqual(transducer("b").output_string, "b") self.assertEqual(transducer_reversed("a").output_string, "a") self.assertEqual(transducer_reversed("b").output_string, "a")
def test_reverse(self): mapping = Mapping([{'in': 'a', "out": 'b'}]) mapping_reversed = Mapping([{'in': 'a', "out": 'b'}], reverse=True) transducer = Transducer(mapping) transducer_reversed = Transducer(mapping_reversed) self.assertEqual(transducer('a').output_string, 'b') self.assertEqual(transducer('b').output_string, 'b') self.assertEqual(transducer_reversed('a').output_string, 'a') self.assertEqual(transducer_reversed('b').output_string, 'a')
def test_as_is(self): """ Test deprecated config: as_is. """ # explicitly set as_is=False log_output = io.StringIO() with redirect_stderr(log_output): mapping_sorted = Mapping([{ 'in': 'a', "out": 'b' }, { 'in': 'aa', 'out': 'c' }], as_is=False) self.assertTrue(mapping_sorted.wants_rules_sorted()) self.assertIn("deprecated", log_output.getvalue(), "it should warn that the feature is deprecated") self.assertIn("apply-longest-first", log_output.getvalue(), "it should show the equivalent rule_ordering setting") # explicitly set as_is=True log_output = io.StringIO() with redirect_stderr(log_output): mapping = Mapping([{ 'in': 'a', "out": 'b' }, { 'in': 'aa', 'out': 'c' }], as_is=True) self.assertFalse(mapping.wants_rules_sorted()) self.assertIn("deprecated", log_output.getvalue(), "it should warn that the feature is deprecated") self.assertIn("as-written", log_output.getvalue(), "it should show the equivalent rule_ordering setting") # test the default (rule_ordering="as-written") mapping_as_is = Mapping([{ 'in': 'a', "out": 'b' }, { 'in': 'aa', 'out': 'c' }]) self.assertFalse(mapping.wants_rules_sorted()) # test the alternative (rule_ordering="apply-longest-first") transducer = Transducer(mapping_sorted) transducer_as_is = Transducer(mapping_as_is) self.assertEqual(transducer('aa').output_string, 'c') self.assertEqual(transducer_as_is('aa').output_string, 'bb')
def test_norm_form(self): mapping_nfc = Mapping([{"in": "a\u0301", "out": "a"}]) # Defaults to NFC mapping_nfd = Mapping([{"in": "a\u0301", "out": "a"}], norm_form="NFD") mapping_none = Mapping([{"in": "a\u0301", "out": "a"}], norm_form=False) transducer_nfc = Transducer(mapping_nfc) transducer_nfd = Transducer(mapping_nfd) transducer_none = Transducer(mapping_none) self.assertEqual(transducer_nfc("a\u0301").output_string, "a") self.assertEqual(transducer_nfc("\u00E1").output_string, "a") self.assertEqual(transducer_nfd("a\u0301").output_string, "a") self.assertEqual(transducer_nfd("\u00E1").output_string, "a") self.assertEqual(transducer_none("a\u0301").output_string, "a") self.assertEqual(transducer_none("\u00E1").output_string, "\u00E1")
def test_norm_form(self): mapping_nfc = Mapping([{'in': 'a\u0301', "out": 'a'}]) # Defaults to NFC mapping_nfd = Mapping([{'in': 'a\u0301', "out": 'a'}], norm_form='NFD') mapping_none = Mapping([{'in': 'a\u0301', "out": 'a'}], norm_form=False) transducer_nfc = Transducer(mapping_nfc) transducer_nfd = Transducer(mapping_nfd) transducer_none = Transducer(mapping_none) self.assertEqual(transducer_nfc('a\u0301'), 'a') self.assertEqual(transducer_nfc('\u00E1'), 'a') self.assertEqual(transducer_nfd('a\u0301'), 'a') self.assertEqual(transducer_nfd('\u00E1'), 'a') self.assertEqual(transducer_none('a\u0301'), 'a') self.assertEqual(transducer_none('\u00E1'), '\u00E1')
def test_basic_composition(self): """Indices mapped through a two-step basic composition""" mapping = Mapping([{"in": "a", "out": "b"}]) transducer = Transducer(mapping) tg = transducer("abba") self.assertEqual(tg.output_string, "bbbb") self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
def test_unidecode_mapping(self): m = Mapping(type="unidecode") self.assertEqual(m.mapping, []) self.assertEqual(m.kwargs["type"], "unidecode") t = Transducer(m) tg = t("été Nunavut ᓄᓇᕗᑦ") self.assertEqual(tg.output_string, "ete Nunavut nonafot")
def test_basic_composition(self): mapping = Mapping([{"in": "a", "out": "b"}]) transducer = Transducer(mapping) tg = transducer("abba") self.assertEqual(tg.output_string, "bbbb") self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)]) self.assertEqual(tg.edges, compose_indices(tg.edges, tg.edges))
def convert(message): """ Convert input text and return output """ transducers = [] for mapping in message['data']['mappings']: mappings_obj = Mapping(hot_to_mappings(mapping['mapping']), abbreviations=flatten_abbreviations( mapping['abbreviations']), **mapping['kwargs']) transducer = Transducer(mappings_obj) transducers.append(transducer) transducer = CompositeTransducer(transducers) if message['data']['index']: tg = transducer(message['data']['input_string']) data, links = return_echart_data(tg) emit( 'conversion response', { 'output_string': tg.output_string, 'index_data': data, 'index_links': links }) else: output_string = transducer( message['data']['input_string']).output_string emit('conversion response', {'output_string': output_string})
def test_reduced_indices(self): mapping = Mapping(in_lang='git', out_lang='eng-arpabet') transducer = Transducer(mapping) conversion = transducer("K̲'ay") self.assertEqual(conversion[1].reduced(), [(2, 2), (3, 5), (4, 8), (5, 9)]) conversion1 = transducer("yukwhl") self.assertEqual(conversion1[1].reduced(), [(1, 2), (2, 5), (3, 7), (4, 9), (6, 10)])
def test_case_acdc(self): transducer = Transducer( Mapping([{ "in": "a{1}c{2}", "out": "c{2}a{1}c{2}" }])) tg = transducer('acdc') self.assertEqual(tg.output_string, 'cacdc') self.assertEqual(tg.edges, [(0, 1), (1, 0), (1, 2), (2, 3), (3, 4)])
def test_conversions(self): ''' Some conversion that were posing problems for readalongs. These might fail if the lookup tables change. ''' for test in self.test_conversion_data: mapping = Mapping(in_lang=test['in_lang'], out_lang=test['out_lang']) transducer = Transducer(mapping) conversion = transducer(test['in_text']) self.assertEqual(conversion[0], test['out_text'])
def panphon_preprocess(inventory: List[str], is_xsampa: bool = False): xsampa_converter = XSampa() panphon_preprocessor = Transducer(Mapping(id='panphon_preprocessor')) new_inventory = [] for x in inventory: if is_xsampa: x = xsampa_converter.convert(x) x = panphon_preprocessor(x).output_string new_inventory.append(x) return new_inventory
def convert(message): """ Convert input text and return output """ mappings = Mapping(hot_to_mappings(message['data']['mappings']), abbreviations=flatten_abbreviations( message['data']['abbreviations']), **message['data']['kwargs']) transducer = Transducer(mappings) output_string = transducer(message['data']['input_string']) emit('conversion response', {'output_string': output_string})
def process_character(p, is_xsampa=False): if is_xsampa: if _xsampa_converter is None: # Expensive import, do it only when needed: from panphon.xsampa import XSampa _xsampa_converter = XSampa() p = _xsampa_converter.convert(p) panphon_preprocessor = Transducer(Mapping(id="panphon_preprocessor")) return panphon_preprocessor(p).output_string
def test_case_acac(self): transducer = Transducer(Mapping([{"in": "ab{1}c{2}", "out": "ab{2}"}])) transducer_default = Transducer( Mapping([{ "in": "ab", "out": "" }, { "in": "c", "out": "ab" }])) tg = transducer('abcabc') tg_default = transducer_default('abcabc') self.assertEqual(tg.output_string, 'abab') self.assertEqual(tg_default.output_string, 'abab') self.assertEqual(tg.edges, [(0, None), (1, None), (2, 0), (2, 1), (3, None), (4, None), (5, 2), (5, 3)]) self.assertEqual(tg_default.edges, [(0, None), (1, None), (2, 0), (2, 1), (3, None), (4, None), (5, 2), (5, 3)])
def test_minimal(self): mapping = Mapping(os.path.join(os.path.dirname(public_data), 'mappings', 'minimal_config.yaml')) transducer = Transducer(mapping) self.assertEqual(transducer('abb'), 'aab') self.assertEqual(transducer('a'), 'a') self.assertTrue(mapping.kwargs['as_is']) self.assertFalse(mapping.kwargs['case_sensitive']) self.assertTrue(mapping.kwargs['escape_special']) self.assertEqual(mapping.kwargs['norm_form'], 'NFD') self.assertTrue(mapping.kwargs['reverse'])
def test_abbreviations(self): mapping = Mapping( os.path.join( os.path.dirname(public_data), "mappings", "abbreviation_config.yaml" ) ) self.assertEqual(mapping.mapping[0]["in"], "i|u") self.assertEqual(mapping.mapping[1]["in"], "a|e|i|o|u") transducer = Transducer(mapping) self.assertEqual(transducer("i").output_string, "1") self.assertEqual(transducer("e").output_string, "2")
def test_rule_ordering(self): """ Test the config option: rule-ordering: 'as-written' (default) or rule-ordering: 'apply-shortest-first' """ rules = [{"in": "a", "out": "b"}, {"in": "aa", "out": "c"}] transducer_longest_first = Transducer( Mapping(rules, rule_ordering="apply-longest-first") ) self.assertEqual(transducer_longest_first("aa").output_string, "c") transducer_as_written = Transducer(Mapping(rules, rule_ordering="as-written")) self.assertEqual(transducer_as_written("aa").output_string, "bb") transducer_default = Transducer(Mapping(rules)) self.assertEqual(transducer_default("aa").output_string, "bb")
def test_minimal(self): mapping = Mapping( os.path.join( os.path.dirname(public_data), "mappings", "minimal_config.yaml" ) ) transducer = Transducer(mapping) self.assertEqual(transducer("abb").output_string, "aaa") self.assertEqual(transducer("a").output_string, "a") self.assertFalse(mapping.wants_rules_sorted()) self.assertFalse(mapping.kwargs["case_sensitive"]) self.assertTrue(mapping.kwargs["escape_special"]) self.assertEqual(mapping.kwargs["norm_form"], "NFD") self.assertTrue(mapping.kwargs["reverse"])
def test_rule_ordering(self): """ Test the config option: rule-ordering: 'as-written' (default) or rule-ordering: 'apply-shortest-first' """ rules = [{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}] mapping_default = Mapping(rules) transducer_longest_first = Transducer( Mapping(rules, rule_ordering='apply-longest-first')) self.assertEqual(transducer_longest_first('aa').output_string, 'c') transducer_as_written = Transducer( Mapping(rules, rule_ordering='as-written')) self.assertEqual(transducer_as_written('aa').output_string, 'bb') transducer_default = Transducer(Mapping(rules)) self.assertEqual(transducer_default('aa').output_string, 'bb')
def make_g2p(in_lang: str, out_lang: str): # Check in_lang is a node in network if in_lang not in LANGS_NETWORK.nodes: LOGGER.error(f"No lang called {in_lang}. Please try again.") raise (FileNotFoundError("No lang called {in_lang}.")) # Check out_lang is a node in network if out_lang not in LANGS_NETWORK.nodes: LOGGER.error(f"No lang called {out_lang}. Please try again.") raise (FileNotFoundError("No lang called {out_lang}.")) # Try to find the shortest path between the nodes try: path = shortest_path(LANGS_NETWORK, in_lang, out_lang) except NetworkXNoPath: LOGGER.error( f"Sorry, we couldn't find a way to convert {in_lang} to {out_lang}. Please update your langs by running `g2p update` and try again." ) raise (NetworkXNoPath) # Find all mappings needed mappings_needed = [] for i, lang in enumerate(path): try: mapping = Mapping(in_lang=path[i], out_lang=path[i + 1]) LOGGER.debug( f"Adding mapping between {path[i]} and {path[i+1]} to composite transducer." ) mappings_needed.append(mapping) except IndexError: continue # Either return Transducer or Composite Transducer if len(mappings_needed) == 1: return Transducer(mappings_needed[0]) else: return CompositeTransducer([Transducer(x) for x in mappings_needed])
def create_transducer(mapping): if mapping: if isinstance(mapping, list): mapping_obj = Mapping(mapping) elif isinstance(mapping, str) and re.search( r'.y(a)*ml\b', mapping): mapping_obj = Mapping(mapping) elif os.path.isfile(mapping): mapping_data = load_from_file(mapping) mapping_obj = Mapping(mapping_data) else: raise exceptions.MissingFileError(mapping) return Transducer(mapping_obj) else: mapping = str(mapping) raise exceptions.MissingFileError(mapping)
def test_rule_ordering_from_config(self): """ Same as test_minimal, but uses "rule-ordering" instead of "as-is" in the config. """ mapping = Mapping( os.path.join(os.path.dirname(public_data), "mappings", "rule-ordering.yaml") ) transducer = Transducer(mapping) self.assertEqual(transducer("abb").output_string, "aaa") self.assertEqual(transducer("a").output_string, "a") self.assertTrue(mapping.wants_rules_sorted()) self.assertEqual(mapping.kwargs["rule_ordering"], "apply-longest-first") self.assertFalse(mapping.kwargs["case_sensitive"]) self.assertTrue(mapping.kwargs["escape_special"]) self.assertEqual(mapping.kwargs["norm_form"], "NFD") self.assertTrue(mapping.kwargs["reverse"])
def index_convert(message): """ Convert input text and return output with indices for echart """ mappings = Mapping(hot_to_mappings(message['data']['mappings']), abbreviations=flatten_abbreviations( message['data']['abbreviations']), **message['data']['kwargs']) transducer = Transducer(mappings) output_string, indices = transducer(message['data']['input_string'], index=True) data, links = return_echart_data(indices) emit('index conversion response', { 'output_string': output_string, 'index_data': data, 'index_links': links })