예제 #1
0
 def test_as_is(self):
     mapping = Mapping([{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}])
     mapping_as_is = Mapping([{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}], as_is=True)
     transducer = Transducer(mapping)
     transducer_as_is = Transducer(mapping_as_is)
     self.assertEqual(transducer('aa'), 'c')
     self.assertEqual(transducer_as_is('aa'), 'bb')
예제 #2
0
 def test_case_sensitive(self):
     mapping = Mapping([{"in": "A", "out": "b"}], case_sensitive=False)
     mapping_case_sensitive = Mapping([{"in": "A", "out": "b"}])
     transducer = Transducer(mapping)
     transducer_case_sensitive = Transducer(mapping_case_sensitive)
     self.assertEqual(transducer("a").output_string, "b")
     self.assertEqual(transducer_case_sensitive("a").output_string, "a")
     self.assertEqual(transducer("A").output_string, "b")
예제 #3
0
 def test_case_sensitive(self):
     mapping = Mapping([{'in': 'A', "out": 'b'}], case_sensitive=False)
     mapping_case_sensitive = Mapping([{'in': 'A', "out": 'b'}])
     transducer = Transducer(mapping)
     transducer_case_sensitive = Transducer(mapping_case_sensitive)
     self.assertEqual(transducer('a').output_string, 'b')
     self.assertEqual(transducer_case_sensitive('a').output_string, 'a')
     self.assertEqual(transducer('A').output_string, 'b')
예제 #4
0
 def test_escape_special(self):
     mapping = Mapping([{'in': '\d', "out": 'digit'}])
     mapping_escaped = Mapping([{'in': '\d', "out": 'b'}], escape_special=True)
     transducer = Transducer(mapping)
     transducer_escaped = Transducer(mapping_escaped)
     self.assertEqual(transducer('1'), 'digit')
     self.assertEqual(transducer('\d'), '\d')
     self.assertEqual(transducer_escaped('1'), '1')
     self.assertEqual(transducer_escaped('\d'), 'b')
예제 #5
0
 def test_escape_special(self):
     mapping = Mapping([{"in": r"\d", "out": "digit"}])
     mapping_escaped = Mapping([{"in": r"\d", "out": "b"}], escape_special=True)
     transducer = Transducer(mapping)
     transducer_escaped = Transducer(mapping_escaped)
     self.assertEqual(transducer("1").output_string, "digit")
     self.assertEqual(transducer(r"\d").output_string, r"\d")
     self.assertEqual(transducer_escaped("1").output_string, "1")
     self.assertEqual(transducer_escaped(r"\d").output_string, "b")
예제 #6
0
 def test_reverse(self):
     mapping = Mapping([{"in": "a", "out": "b"}])
     mapping_reversed = Mapping([{"in": "a", "out": "b"}], reverse=True)
     transducer = Transducer(mapping)
     transducer_reversed = Transducer(mapping_reversed)
     self.assertEqual(transducer("a").output_string, "b")
     self.assertEqual(transducer("b").output_string, "b")
     self.assertEqual(transducer_reversed("a").output_string, "a")
     self.assertEqual(transducer_reversed("b").output_string, "a")
예제 #7
0
 def test_reverse(self):
     mapping = Mapping([{'in': 'a', "out": 'b'}])
     mapping_reversed = Mapping([{'in': 'a', "out": 'b'}], reverse=True)
     transducer = Transducer(mapping)
     transducer_reversed = Transducer(mapping_reversed)
     self.assertEqual(transducer('a').output_string, 'b')
     self.assertEqual(transducer('b').output_string, 'b')
     self.assertEqual(transducer_reversed('a').output_string, 'a')
     self.assertEqual(transducer_reversed('b').output_string, 'a')
예제 #8
0
    def test_as_is(self):
        """
        Test deprecated config: as_is.
        """

        # explicitly set as_is=False
        log_output = io.StringIO()
        with redirect_stderr(log_output):
            mapping_sorted = Mapping([{
                'in': 'a',
                "out": 'b'
            }, {
                'in': 'aa',
                'out': 'c'
            }],
                                     as_is=False)
        self.assertTrue(mapping_sorted.wants_rules_sorted())
        self.assertIn("deprecated", log_output.getvalue(),
                      "it should warn that the feature is deprecated")
        self.assertIn("apply-longest-first", log_output.getvalue(),
                      "it should show the equivalent rule_ordering setting")

        # explicitly set as_is=True
        log_output = io.StringIO()
        with redirect_stderr(log_output):
            mapping = Mapping([{
                'in': 'a',
                "out": 'b'
            }, {
                'in': 'aa',
                'out': 'c'
            }],
                              as_is=True)
        self.assertFalse(mapping.wants_rules_sorted())
        self.assertIn("deprecated", log_output.getvalue(),
                      "it should warn that the feature is deprecated")
        self.assertIn("as-written", log_output.getvalue(),
                      "it should show the equivalent rule_ordering setting")

        # test the default (rule_ordering="as-written")
        mapping_as_is = Mapping([{
            'in': 'a',
            "out": 'b'
        }, {
            'in': 'aa',
            'out': 'c'
        }])
        self.assertFalse(mapping.wants_rules_sorted())

        # test the alternative (rule_ordering="apply-longest-first")
        transducer = Transducer(mapping_sorted)
        transducer_as_is = Transducer(mapping_as_is)
        self.assertEqual(transducer('aa').output_string, 'c')
        self.assertEqual(transducer_as_is('aa').output_string, 'bb')
예제 #9
0
    def test_norm_form(self):
        mapping_nfc = Mapping([{"in": "a\u0301", "out": "a"}])  # Defaults to NFC
        mapping_nfd = Mapping([{"in": "a\u0301", "out": "a"}], norm_form="NFD")
        mapping_none = Mapping([{"in": "a\u0301", "out": "a"}], norm_form=False)

        transducer_nfc = Transducer(mapping_nfc)
        transducer_nfd = Transducer(mapping_nfd)
        transducer_none = Transducer(mapping_none)

        self.assertEqual(transducer_nfc("a\u0301").output_string, "a")
        self.assertEqual(transducer_nfc("\u00E1").output_string, "a")
        self.assertEqual(transducer_nfd("a\u0301").output_string, "a")
        self.assertEqual(transducer_nfd("\u00E1").output_string, "a")
        self.assertEqual(transducer_none("a\u0301").output_string, "a")
        self.assertEqual(transducer_none("\u00E1").output_string, "\u00E1")
예제 #10
0
    def test_norm_form(self):
        mapping_nfc = Mapping([{'in': 'a\u0301', "out": 'a'}]) # Defaults to NFC
        mapping_nfd = Mapping([{'in': 'a\u0301', "out": 'a'}], norm_form='NFD')
        mapping_none = Mapping([{'in': 'a\u0301', "out": 'a'}], norm_form=False)

        transducer_nfc = Transducer(mapping_nfc)
        transducer_nfd = Transducer(mapping_nfd)
        transducer_none = Transducer(mapping_none)

        self.assertEqual(transducer_nfc('a\u0301'), 'a')
        self.assertEqual(transducer_nfc('\u00E1'), 'a')
        self.assertEqual(transducer_nfd('a\u0301'), 'a')
        self.assertEqual(transducer_nfd('\u00E1'), 'a')
        self.assertEqual(transducer_none('a\u0301'), 'a')
        self.assertEqual(transducer_none('\u00E1'), '\u00E1')
예제 #11
0
 def test_basic_composition(self):
     """Indices mapped through a two-step basic composition"""
     mapping = Mapping([{"in": "a", "out": "b"}])
     transducer = Transducer(mapping)
     tg = transducer("abba")
     self.assertEqual(tg.output_string, "bbbb")
     self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
예제 #12
0
 def test_unidecode_mapping(self):
     m = Mapping(type="unidecode")
     self.assertEqual(m.mapping, [])
     self.assertEqual(m.kwargs["type"], "unidecode")
     t = Transducer(m)
     tg = t("été Nunavut ᓄᓇᕗᑦ")
     self.assertEqual(tg.output_string, "ete Nunavut nonafot")
예제 #13
0
 def test_basic_composition(self):
     mapping = Mapping([{"in": "a", "out": "b"}])
     transducer = Transducer(mapping)
     tg = transducer("abba")
     self.assertEqual(tg.output_string, "bbbb")
     self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
     self.assertEqual(tg.edges, compose_indices(tg.edges, tg.edges))
예제 #14
0
def convert(message):
    """ Convert input text and return output
    """
    transducers = []
    for mapping in message['data']['mappings']:
        mappings_obj = Mapping(hot_to_mappings(mapping['mapping']),
                               abbreviations=flatten_abbreviations(
                                   mapping['abbreviations']),
                               **mapping['kwargs'])
        transducer = Transducer(mappings_obj)
        transducers.append(transducer)
    transducer = CompositeTransducer(transducers)
    if message['data']['index']:
        tg = transducer(message['data']['input_string'])
        data, links = return_echart_data(tg)
        emit(
            'conversion response', {
                'output_string': tg.output_string,
                'index_data': data,
                'index_links': links
            })
    else:
        output_string = transducer(
            message['data']['input_string']).output_string
        emit('conversion response', {'output_string': output_string})
예제 #15
0
 def test_reduced_indices(self):
     mapping = Mapping(in_lang='git', out_lang='eng-arpabet')
     transducer = Transducer(mapping)
     conversion = transducer("K̲'ay")
     self.assertEqual(conversion[1].reduced(), [(2, 2), (3, 5), (4, 8),
                                                (5, 9)])
     conversion1 = transducer("yukwhl")
     self.assertEqual(conversion1[1].reduced(), [(1, 2), (2, 5), (3, 7),
                                                 (4, 9), (6, 10)])
예제 #16
0
파일: test_indices.py 프로젝트: deltork/g2p
 def test_case_acdc(self):
     transducer = Transducer(
         Mapping([{
             "in": "a{1}c{2}",
             "out": "c{2}a{1}c{2}"
         }]))
     tg = transducer('acdc')
     self.assertEqual(tg.output_string, 'cacdc')
     self.assertEqual(tg.edges, [(0, 1), (1, 0), (1, 2), (2, 3), (3, 4)])
예제 #17
0
 def test_conversions(self):
     ''' Some conversion that were posing problems for readalongs.
         These might fail if the lookup tables change.
     '''
     for test in self.test_conversion_data:
         mapping = Mapping(in_lang=test['in_lang'],
                           out_lang=test['out_lang'])
         transducer = Transducer(mapping)
         conversion = transducer(test['in_text'])
         self.assertEqual(conversion[0], test['out_text'])
예제 #18
0
def panphon_preprocess(inventory: List[str], is_xsampa: bool = False):
    xsampa_converter = XSampa()
    panphon_preprocessor = Transducer(Mapping(id='panphon_preprocessor'))
    new_inventory = []
    for x in inventory:
        if is_xsampa:
            x = xsampa_converter.convert(x)
        x = panphon_preprocessor(x).output_string
        new_inventory.append(x)
    return new_inventory
예제 #19
0
파일: __init__.py 프로젝트: dhdaines/g2p
def convert(message):
    """ Convert input text and return output
    """
    mappings = Mapping(hot_to_mappings(message['data']['mappings']),
                       abbreviations=flatten_abbreviations(
                           message['data']['abbreviations']),
                       **message['data']['kwargs'])
    transducer = Transducer(mappings)
    output_string = transducer(message['data']['input_string'])
    emit('conversion response', {'output_string': output_string})
예제 #20
0
def process_character(p, is_xsampa=False):
    if is_xsampa:
        if _xsampa_converter is None:
            # Expensive import, do it only when needed:
            from panphon.xsampa import XSampa

            _xsampa_converter = XSampa()
        p = _xsampa_converter.convert(p)
    panphon_preprocessor = Transducer(Mapping(id="panphon_preprocessor"))
    return panphon_preprocessor(p).output_string
예제 #21
0
파일: test_indices.py 프로젝트: deltork/g2p
 def test_case_acac(self):
     transducer = Transducer(Mapping([{"in": "ab{1}c{2}", "out": "ab{2}"}]))
     transducer_default = Transducer(
         Mapping([{
             "in": "ab",
             "out": ""
         }, {
             "in": "c",
             "out": "ab"
         }]))
     tg = transducer('abcabc')
     tg_default = transducer_default('abcabc')
     self.assertEqual(tg.output_string, 'abab')
     self.assertEqual(tg_default.output_string, 'abab')
     self.assertEqual(tg.edges, [(0, None), (1, None), (2, 0), (2, 1),
                                 (3, None), (4, None), (5, 2), (5, 3)])
     self.assertEqual(tg_default.edges,
                      [(0, None), (1, None), (2, 0), (2, 1), (3, None),
                       (4, None), (5, 2), (5, 3)])
예제 #22
0
 def test_minimal(self):
     mapping = Mapping(os.path.join(os.path.dirname(public_data), 'mappings', 'minimal_config.yaml'))
     transducer = Transducer(mapping)
     self.assertEqual(transducer('abb'), 'aab')
     self.assertEqual(transducer('a'), 'a')
     self.assertTrue(mapping.kwargs['as_is'])
     self.assertFalse(mapping.kwargs['case_sensitive'])
     self.assertTrue(mapping.kwargs['escape_special'])
     self.assertEqual(mapping.kwargs['norm_form'], 'NFD')
     self.assertTrue(mapping.kwargs['reverse'])
예제 #23
0
 def test_abbreviations(self):
     mapping = Mapping(
         os.path.join(
             os.path.dirname(public_data), "mappings", "abbreviation_config.yaml"
         )
     )
     self.assertEqual(mapping.mapping[0]["in"], "i|u")
     self.assertEqual(mapping.mapping[1]["in"], "a|e|i|o|u")
     transducer = Transducer(mapping)
     self.assertEqual(transducer("i").output_string, "1")
     self.assertEqual(transducer("e").output_string, "2")
예제 #24
0
    def test_rule_ordering(self):
        """
        Test the config option:

        rule-ordering: 'as-written' (default)

        or

        rule-ordering: 'apply-shortest-first'
        """
        rules = [{"in": "a", "out": "b"}, {"in": "aa", "out": "c"}]

        transducer_longest_first = Transducer(
            Mapping(rules, rule_ordering="apply-longest-first")
        )
        self.assertEqual(transducer_longest_first("aa").output_string, "c")

        transducer_as_written = Transducer(Mapping(rules, rule_ordering="as-written"))
        self.assertEqual(transducer_as_written("aa").output_string, "bb")

        transducer_default = Transducer(Mapping(rules))
        self.assertEqual(transducer_default("aa").output_string, "bb")
예제 #25
0
 def test_minimal(self):
     mapping = Mapping(
         os.path.join(
             os.path.dirname(public_data), "mappings", "minimal_config.yaml"
         )
     )
     transducer = Transducer(mapping)
     self.assertEqual(transducer("abb").output_string, "aaa")
     self.assertEqual(transducer("a").output_string, "a")
     self.assertFalse(mapping.wants_rules_sorted())
     self.assertFalse(mapping.kwargs["case_sensitive"])
     self.assertTrue(mapping.kwargs["escape_special"])
     self.assertEqual(mapping.kwargs["norm_form"], "NFD")
     self.assertTrue(mapping.kwargs["reverse"])
예제 #26
0
    def test_rule_ordering(self):
        """
        Test the config option:

        rule-ordering: 'as-written' (default)

        or

        rule-ordering: 'apply-shortest-first'
        """
        rules = [{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}]
        mapping_default = Mapping(rules)

        transducer_longest_first = Transducer(
            Mapping(rules, rule_ordering='apply-longest-first'))
        self.assertEqual(transducer_longest_first('aa').output_string, 'c')

        transducer_as_written = Transducer(
            Mapping(rules, rule_ordering='as-written'))
        self.assertEqual(transducer_as_written('aa').output_string, 'bb')

        transducer_default = Transducer(Mapping(rules))
        self.assertEqual(transducer_default('aa').output_string, 'bb')
예제 #27
0
def make_g2p(in_lang: str, out_lang: str):
    # Check in_lang is a node in network
    if in_lang not in LANGS_NETWORK.nodes:
        LOGGER.error(f"No lang called {in_lang}. Please try again.")
        raise (FileNotFoundError("No lang called {in_lang}."))

    # Check out_lang is a node in network
    if out_lang not in LANGS_NETWORK.nodes:
        LOGGER.error(f"No lang called {out_lang}. Please try again.")
        raise (FileNotFoundError("No lang called {out_lang}."))

    # Try to find the shortest path between the nodes
    try:
        path = shortest_path(LANGS_NETWORK, in_lang, out_lang)
    except NetworkXNoPath:
        LOGGER.error(
            f"Sorry, we couldn't find a way to convert {in_lang} to {out_lang}. Please update your langs by running `g2p update` and try again."
        )
        raise (NetworkXNoPath)

    # Find all mappings needed
    mappings_needed = []
    for i, lang in enumerate(path):
        try:
            mapping = Mapping(in_lang=path[i], out_lang=path[i + 1])
            LOGGER.debug(
                f"Adding mapping between {path[i]} and {path[i+1]} to composite transducer."
            )
            mappings_needed.append(mapping)
        except IndexError:
            continue

    # Either return Transducer or Composite Transducer
    if len(mappings_needed) == 1:
        return Transducer(mappings_needed[0])
    else:
        return CompositeTransducer([Transducer(x) for x in mappings_needed])
예제 #28
0
 def create_transducer(mapping):
     if mapping:
         if isinstance(mapping, list):
             mapping_obj = Mapping(mapping)
         elif isinstance(mapping, str) and re.search(
                 r'.y(a)*ml\b', mapping):
             mapping_obj = Mapping(mapping)
         elif os.path.isfile(mapping):
             mapping_data = load_from_file(mapping)
             mapping_obj = Mapping(mapping_data)
         else:
             raise exceptions.MissingFileError(mapping)
         return Transducer(mapping_obj)
     else:
         mapping = str(mapping)
         raise exceptions.MissingFileError(mapping)
예제 #29
0
 def test_rule_ordering_from_config(self):
     """
     Same as test_minimal, but uses "rule-ordering" instead of "as-is" in the config.
     """
     mapping = Mapping(
         os.path.join(os.path.dirname(public_data), "mappings", "rule-ordering.yaml")
     )
     transducer = Transducer(mapping)
     self.assertEqual(transducer("abb").output_string, "aaa")
     self.assertEqual(transducer("a").output_string, "a")
     self.assertTrue(mapping.wants_rules_sorted())
     self.assertEqual(mapping.kwargs["rule_ordering"], "apply-longest-first")
     self.assertFalse(mapping.kwargs["case_sensitive"])
     self.assertTrue(mapping.kwargs["escape_special"])
     self.assertEqual(mapping.kwargs["norm_form"], "NFD")
     self.assertTrue(mapping.kwargs["reverse"])
예제 #30
0
파일: __init__.py 프로젝트: dhdaines/g2p
def index_convert(message):
    """ Convert input text and return output with indices for echart
    """
    mappings = Mapping(hot_to_mappings(message['data']['mappings']),
                       abbreviations=flatten_abbreviations(
                           message['data']['abbreviations']),
                       **message['data']['kwargs'])
    transducer = Transducer(mappings)
    output_string, indices = transducer(message['data']['input_string'],
                                        index=True)
    data, links = return_echart_data(indices)
    emit('index conversion response', {
        'output_string': output_string,
        'index_data': data,
        'index_links': links
    })