def __init__( self, g2p_type: str, non_linguistic_symbols: Union[Path, str, Iterable[str]] = None, space_symbol: str = "<space>", remove_non_linguistic_symbols: bool = False, ): assert check_argument_types() if g2p_type == "g2p_en": self.g2p = g2p_en.G2p() elif g2p_type == "pyopenjtalk": self.g2p = pyopenjtalk_g2p elif g2p_type == "pyopenjtalk_kana": self.g2p = pyopenjtalk_g2p_kana else: raise NotImplementedError(f"Not supported: g2p_type={g2p_type}") self.g2p_type = g2p_type self.space_symbol = space_symbol if non_linguistic_symbols is None: self.non_linguistic_symbols = set() elif isinstance(non_linguistic_symbols, (Path, str)): non_linguistic_symbols = Path(non_linguistic_symbols) with non_linguistic_symbols.open("r", encoding="utf-8") as f: self.non_linguistic_symbols = set(line.rstrip() for line in f) else: self.non_linguistic_symbols = set(non_linguistic_symbols) self.remove_non_linguistic_symbols = remove_non_linguistic_symbols
def __call__(self, text) -> List[str]: if self.g2p is None: self.g2p = g2p_en.G2p() phones = self.g2p(text) if self.no_space: # remove space which represents word serapater phones = list(filter(lambda s: s != " ", phones)) return phones
def attempt_load_g2p_en(cls, model_dir=None): try: if model_dir: import nltk nltk.data.path.insert( 0, os.path.abspath(os.path.join(model_dir, 'g2p'))) # g2p_en>=2.1.0 import g2p_en cls.g2p_en = g2p_en.G2p() assert all( re.sub(r'[012]$', '', phone) in cls.CMU_to_XSAMPA_dict for phone in cls.g2p_en.phonemes if not phone.startswith('<')) except Exception: # including ImportError cls.g2p_en = False # Don't try anymore. _log.debug("failed to load g2p_en")
def generate_pronunciations(cls, word): """returns CMU/arpabet phones""" if g2p_en: try: if not cls.g2p_en: cls.g2p_en = g2p_en.G2p() phones = cls.g2p_en(word) _log.debug("generated pronunciation with g2p_en for %r: %r" % (word, phones)) return phones except Exception as e: _log.exception( "generate_pronunciations exception using g2p_en") if True: try: files = {'wordfile': ('wordfile', word)} req = requests.post( 'http://www.speech.cs.cmu.edu/cgi-bin/tools/logios/lextool.pl', files=files) req.raise_for_status() # FIXME: handle network failures match = re.search(r'<!-- DICT (.*) -->', req.text) if match: url = match.group(1) req = requests.get(url) req.raise_for_status() entries = req.text.strip().split('\n') pronunciations = [] for entry in entries: tokens = entry.strip().split() assert re.match( word + r'(\(\d\))?', tokens[0], re.I) # 'SEMI-COLON' or 'SEMI-COLON(2)' phones = tokens[1:] _log.debug( "generated pronunciation with cloud-cmudict for %r: CMU phones are %r" % (word, phones)) pronunciations.append(phones) return pronunciations except Exception as e: _log.exception( "generate_pronunciations exception accessing www.speech.cs.cmu.edu" ) raise KaldiError("cannot generate word pronunciation")
def phonemize( cls, text: str, lang: Optional[str], phonemizer: Optional[str] = None, preserve_punct: bool = False, to_simplified_zh: bool = False, ): if to_simplified_zh: import hanziconv text = hanziconv.HanziConv.toSimplified(text) if phonemizer == "g2p": import g2p_en g2p = g2p_en.G2p() if preserve_punct: return " ".join("|" if p == " " else p for p in g2p(text)) else: res = [{",": "sp", ";": "sp"}.get(p, p) for p in g2p(text)] return " ".join(p for p in res if p.isalnum()) if phonemizer == "g2pc": import g2pc g2p = g2pc.G2pC() return " ".join([w[3] for w in g2p(text)]) elif phonemizer == "ipa": assert lang is not None import phonemizer from phonemizer.separator import Separator lang_map = {"en": "en-us", "fr": "fr-fr"} return phonemizer.phonemize( text, backend="espeak", language=lang_map.get(lang, lang), separator=Separator(word="| ", phone=" "), ) else: return text
def __init__( self, punct=True, stresses=False, spaces=True, *, space=' ', silence=None, oov=Base.OOV, ): if HAVE_G2P: Phonemes._G2P = g2p_en.G2p() else: raise ImportError( f"G2P could not be imported properly. Please attempt to import `g2p_py` " f"before using {self.__class__.__name__}.") labels = [] self.space, labels = len(labels), labels + [space] # Space if silence: self.silence, labels = len(labels), labels + [silence] # Silence labels.extend(self.CONSONANTS) vowels = list(self.VOWELS) if stresses: vowels = [ f'{p}{s}' for p, s in itertools.product(vowels, (0, 1, 2)) ] labels.extend(vowels) labels.append("'") # Apostrophe if punct: labels.extend(self.PUNCT) super().__init__(labels, oov=oov) self.punct = punct self.stresses = stresses self.spaces = spaces
def __init__( self, punct=True, stresses=False, spaces=True, chars=False, *, space=' ', silence=None, apostrophe=True, oov=Base.OOV, sep='|', # To be able to distinguish between 2/3 letters codes. add_blank_at="last_but_one", pad_with_space=False, improved_version_g2p=False, phoneme_dict_path=None, ): labels = [] self.space, labels = len(labels), labels + [space] # Space if silence is not None: self.silence, labels = len(labels), labels + [silence] # Silence labels.extend(self.CONSONANTS) vowels = list(self.VOWELS) if stresses: vowels = [f'{p}{s}' for p, s in itertools.product(vowels, (0, 1, 2))] labels.extend(vowels) if chars: labels.extend(string.ascii_lowercase) if apostrophe: labels.append("'") # Apostrophe if punct: labels.extend(self.PUNCT) super().__init__(labels, oov=oov, sep=sep, add_blank_at=add_blank_at) self.punct = punct self.stresses = stresses self.spaces = spaces self.pad_with_space = pad_with_space # g2p_en tries to run download_corpora() on import but it is not rank zero guarded # Try to check if torch distributed is available, if not get global rank zero to download corpora and make # all other ranks sleep for a minute if torch.distributed.is_available() and torch.distributed.is_initialized(): group = torch.distributed.group.WORLD if is_global_rank_zero(): download_corpora() torch.distributed.barrier(group=group) elif is_global_rank_zero(): logging.error( f"Torch distributed needs to be initialized before you initialized {self}. This class is prone to " "data access race conditions. Now downloading corpora from global rank 0. If other ranks pass this " "before rank 0, errors might result." ) download_corpora() else: logging.error( f"Torch distributed needs to be initialized before you initialized {self}. This class is prone to " "data access race conditions. This process is not rank 0, and now going to sleep for 1 min. If this " "rank wakes from sleep prior to rank 0 finishing downloading, errors might result." ) time.sleep(60) import g2p_en # noqa pylint: disable=import-outside-toplevel _g2p = g2p_en.G2p() _g2p.variables = None if improved_version_g2p: self.g2p = G2p(_g2p, phoneme_dict_path) else: self.g2p = _g2p
from nemo.collections.asr.parts import parsers try: nltk.data.find('taggers/averaged_perceptron_tagger.zip') except LookupError: nltk.download('averaged_perceptron_tagger', quiet=True) try: nltk.data.find('corpora/cmudict.zip') except LookupError: nltk.download('cmudict', quiet=True) try: import g2p_en # noqa _g2p = g2p_en.G2p() _g2p.variables = None HAVE_G2P = True except (FileNotFoundError, LookupError): HAVE_G2P = False class Base(abc.ABC): """Vocabulary for turning str text to list of int tokens.""" # fmt: off PUNCT = ( # Derived from LJSpeech ',', '.', '!',
def __init__( self, punct=True, stresses=False, spaces=True, chars=False, *, space=' ', silence=None, apostrophe=True, oov=Base.OOV, sep='|', # To be able to distinguish between 2/3 letters codes. add_blank_at="last_but_one", pad_with_space=False, improved_version_g2p=False, phoneme_dict_path=None, ): labels = [] self.space, labels = len(labels), labels + [space] # Space if silence is not None: self.silence, labels = len(labels), labels + [silence] # Silence labels.extend(self.CONSONANTS) vowels = list(self.VOWELS) if stresses: vowels = [ f'{p}{s}' for p, s in itertools.product(vowels, (0, 1, 2)) ] labels.extend(vowels) if chars: labels.extend(string.ascii_lowercase) if apostrophe: labels.append("'") # Apostrophe if punct: labels.extend(self.PUNCT) super().__init__(labels, oov=oov, sep=sep, add_blank_at=add_blank_at) self.punct = punct self.stresses = stresses self.spaces = spaces self.pad_with_space = pad_with_space download_corpora() _ = sync_ddp_if_available( torch.tensor(0)) # Barrier until rank 0 downloads the corpora # g2p_en tries to run download_corpora() on import but it is not rank zero guarded import g2p_en # noqa pylint: disable=import-outside-toplevel _g2p = g2p_en.G2p() _g2p.variables = None if improved_version_g2p: self.g2p = G2p(_g2p, phoneme_dict_path) else: self.g2p = _g2p
def __init__(self, delimit=' '): self._delimit = delimit self._g2pen = g2p_en.G2p() self._g2pvn = G2pVn(try_other=self._g2pen)
class Phonemes(Base): """Phonemes vocabulary.""" _G2P = g2p_en.G2p() SEP = '|' # To be able to distinguish between 2/3 letters codes. # fmt: off VOWELS = ( 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW', ) CONSONANTS = ( 'B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M', 'N', 'NG', 'P', 'R', 'S', 'SH', 'T', 'TH', 'V', 'W', 'Y', 'Z', 'ZH', ) # fmt: on def __init__( self, punct=True, stresses=False, spaces=True, *, space=' ', silence=None, oov=Base.OOV, ): labels = [] self.space, labels = len(labels), labels + [space] # Space if silence: self.silence, labels = len(labels), labels + [silence] # Silence labels.extend(self.CONSONANTS) vowels = list(self.VOWELS) if stresses: vowels = [f'{p}{s}' for p, s in itertools.product(vowels, (0, 1, 2))] labels.extend(vowels) labels.append("'") # Apostrophe if punct: labels.extend(self.PUNCT) super().__init__(labels, oov=oov) self.punct = punct self.stresses = stresses self.spaces = spaces def encode(self, text): """See base class.""" ps, space = [], self.labels[self.space] for p in self._G2P(text): if len(p) == 3 and not self.stresses: p = p[:2] if p == space and ps[-1] != space: ps.append(p) if p.isalnum() or p == "'": ps.append(p) if p in self.PUNCT and self.punct: if not self.spaces and len(ps) and ps[-1] == space: ps.pop() ps.append(p) if ps[-1] == space: ps.pop() return [self._label2id[p] for p in ps]