Ejemplo n.º 1
0
    def _load_g2p_map(self, code):
        """Load the code table for the specified language.

        Args:
            code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
                        language/script to be loaded
        """
        g2p = defaultdict(list)
        gr_by_line = defaultdict(list)
        try:
            path = os.path.join('data', 'map', code + '.csv')
            path = pkg_resources.resource_filename(__name__, path)
        except IndexError:
            raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
        with open(path, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            next(reader)
            for (i, fields) in enumerate(reader):
                try:
                    graph, phon = fields
                except ValueError:
                    raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
                graph = unicodedata.normalize('NFC', graph)
                phon = unicodedata.normalize('NFC', phon)
                g2p[graph].append(phon)
                gr_by_line[graph].append(i)
        if self._one_to_many_gr_by_line_map(g2p):
            graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
            lines = [l + 2 for l in lines]
            raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
        return g2p
Ejemplo n.º 2
0
 def _read_rule(self, i, line):
     line = line.strip()
     if line:
         line = unicodedata.normalize('NFC',
                                      unicodedata.normalize('NFD', line))
         s = re.match(r'(?P<symbol>::\w+::)\s*=\s*(?P<value>.+)', line)
         if s:
             self.symbols[s.group('symbol')] = s.group('value')
         else:
             line = self._sub_symbols(line)
             r = re.match(r'(\S+)\s*->\s*(\S+)\s*/\s*(\S*)\s*[_]\s*(\S*)',
                          line)
             try:
                 a, b, X, Y = r.groups()  # 以 -> / _ 分为四部分
             except AttributeError:
                 raise DatafileError(
                     'Line {}: "{}" cannot be parsed.'.format(i + 1, line))
             X, Y = X.replace('#', '^'), Y.replace('#', '$')
             a, b = a.replace('0', ''), b.replace('0', '')
             try:
                 if re.search(r'[?]P[<]sw1[>].+[?]P[<]sw2[>]', a):
                     return self._fields_to_function_metathesis(a, X, Y)
                 else:
                     return self._fields_to_function(a, b, X, Y)
             except Exception as e:
                 raise DatafileError(
                     'Line {}: "{}" cannot be compiled as regex: ̪{}'.
                     format(i + 1, line, e))