def syllabifyTextgrids(tgPath, islePath): isleDict = isletool.LexicalTool(islePath) outputPath = join(tgPath, "syllabifiedTGs") utils.makeDir(outputPath) skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}'] for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): if os.path.exists(join(outputPath, fn)): continue tg = tgio.openTextgrid(join(tgPath, fn)) syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "words", "phones", skipLabelList=skipLabelList) outputTG = tgio.Textgrid() outputTG.addTier(tg.tierDict["words"]) outputTG.addTier(tg.tierDict["phones"]) # outputTG.addTier(syllableTG.tierDict["syllable"]) outputTG.addTier(syllableTG.tierDict["tonic"]) outputTG.save(join(outputPath, fn))
def manualPhoneCount(tgInfoPath, isleFN, outputPath, skipList=None): if skipList is None: skipList = [] utils.makeDir(outputPath) isleDict = isletool.LexicalTool(isleFN) existFNList = utils.findFiles(outputPath, filterPaths=".txt") for fn in utils.findFiles(tgInfoPath, filterExt=".txt", skipIfNameInList=existFNList): if os.path.exists(join(outputPath, fn)): continue print(fn) dataList = utils.openCSV(tgInfoPath, fn) dataList = [row[2] for row in dataList] # start, stop, tmpLabel outputList = [] for tmpLabel in dataList: if tmpLabel not in skipList: syllableCount, phoneCount = isletool.getNumPhones(isleDict, tmpLabel, maxFlag=True) else: syllableCount, phoneCount = 0, 0 outputList.append("%d,%d" % (syllableCount, phoneCount)) outputTxt = "\n".join(outputList) with open(join(outputPath, fn), "w") as fd: fd.write(outputTxt)
def createSyllabifiedTextgrid(names_audio, text_grid): isleDict = isletool.LexicalTool() tg = tgio.openTextgrid(text_grid) syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "words", "phones", skipLabelList=[ "", ]) tg.addTier(syllableTG.tierDict["syllable"]) tg.addTier(syllableTG.tierDict["tonicSyllable"]) tg.addTier(syllableTG.tierDict["tonicVowel"]) tg.save('../mavid-scripts/files/wav_recordedNames_syllables_test.TextGrid') return
# -*- coding: utf-8 -*- """ Created on Mon Aug 26 11:20:12 2019 @author: john.cheng """ import random from os.path import join from pysle import isletool from pysle import pronunciationtools root = join(".", "files") isleDict = isletool.LexicalTool(join(root, 'D:\Aeo_test\speech\ISLEdict.txt')) def printOutMatches(matchStr, numSyllables=None, wordInitial='ok', wordFinal='ok', spanSyllable='ok', stressedSyllable='ok', multiword='ok', numMatches=None, matchList=None, pos=None): if matchList is None: matchList = isleDict.search(matchStr, numSyllables, wordInitial, wordFinal, spanSyllable, stressedSyllable, multiword, pos)
#encoding: utf-8 ''' Examples of how to use pysle's pronunciationtools code ''' from os.path import join from pysle import isletool from pysle import pronunciationtools root = join(".", "files") isleDict = isletool.LexicalTool(join(root, 'ISLEdict_sample.txt')) # In the first example we determine the syllabification of a word, # as it was said. (Of course, this is just an estimate) print('-' * 50) searchWord = 'another' anotherPhoneList = ['n', '@', 'th', 'r'] isleWordList = isleDict.lookup(searchWord) returnList = pronunciationtools.findBestSyllabification( isleDict, searchWord, anotherPhoneList) (stressedSyllable, stressedPhone, syllableList, syllabification, stressedSyllableIndexList, stressedPhoneIndexList, flattenedStressIndexList) = returnList print(searchWord) print(anotherPhoneList) print(stressedSyllableIndexList) # We can see the first syllable was elided print(stressedPhoneIndexList) print(flattenedStressIndexList) print(syllableList)
#encoding: utf-8 ''' Created on July 08, 2016 @author: tmahrt Basic examples of common usage. ''' import random from pysle import isletool tmpPath = r"C:\Users\Tim\Dropbox\workspace\pysle\test\ISLEdict.txt" isleDict = isletool.LexicalTool(tmpPath) def printOutMatches(matchStr, numSyllables=None, wordInitial='ok', wordFinal='ok', spanSyllable='ok', stressedSyllable='ok', multiword='ok', numMatches=None, matchList=None): if matchList is None: matchList = isleDict.search(matchStr, numSyllables, wordInitial, wordFinal, spanSyllable, stressedSyllable, multiword) else: matchList = isletool.search(matchList, matchStr, numSyllables, wordInitial, wordFinal, spanSyllable, stressedSyllable, multiword) if numMatches is not None and len(matchList) > numMatches: random.shuffle(matchList)
''' from os.path import join from praatio import tgio from pysle import isletool from pysle import praattools path = join('.', 'files') path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files" tg = tgio.openTextGrid(join(path, "pumpkins.TextGrid")) # Needs the full path to the file islevPath = '/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt' isleDict = isletool.LexicalTool(islevPath) # Get the syllabification tiers and add it to the textgrid syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone", skipLabelList=["",]) tg.addTier(syllableTG.tierDict["syllable"]) tg.addTier(syllableTG.tierDict["tonicSyllable"]) tg.addTier(syllableTG.tierDict["tonicVowel"]) tg.save(join(path, "pumpkins_with_syllables.TextGrid"))
- 10 syllables? - Alternating stressed/unstressed? 3. Print line ''' import random import time import string import markovify from os.path import join from pysle import isletool from pysle import pronunciationtools root = join(".", "files") isleDict = isletool.LexicalTool( join(root, '/home/jay/Dropbox/19-20/PoetryGen/ISLEdict.txt')) # Set your text corpus here with open("/home/jay/Dropbox/19-20/PoetryGen/shelley.txt") as f: shelley = f.read() with open("/home/jay/Dropbox/19-20/PoetryGen/mobydick.txt") as f: mobydick = f.read() with open("/home/jay/Dropbox/19-20/PoetryGen/witchcraft.txt") as f: witch = f.read() textModelShelley = markovify.Text(shelley, state_size=2) textModelDick = markovify.Text(mobydick, state_size=2) textModelWitch = markovify.Text(witch, state_size=2)
backoff = Backoff(['fas-Arab', 'rus-Cyrl']) backoff.transliterate('Привет дорогой друг пидор') backoff.transliterate('queen') backoff.transliterate('中文') backoff.transliterate('سلام شادی من') backoff.transliterate('ملکه') from pysle import isletool a = isletool.LexicalTool('ISLEdict.txt') sentence = "do you want another pumpkinseed" phoneList = isletool.transcribe(a, sentence, 'longest') print(phoneList) from phonemizer.phonemize import phonemize phonemize("hello", language='en-us', backend='espeak') phonemize("hello my queen", language='en-us', backend='espeak') phonemize("ich will", language='de', backend='espeak') phonemize("bonjour le monde", language='fr-fr', backend='espeak') phonemize("konnichiwa", language='japanese', backend='espeak')
def get_data(seed=42, test_size=0.20, verbose=0, maxlen_x=None, maxlen_y=None, blacklist='()0123456789%.?"-_', max_phonemes=np.inf, max_chars=np.inf, phon_sep='', unique_graphemes=False, unique_phonemes=True): """Process ISLEDICT pronounciation dictionary to return unique phonemes two graphemes""" path = download_data_maybe() # load data isleDict = isletool.LexicalTool(path) X = [] y = [] for phrase in isleDict.data.keys(): for pronounciation in zip(*isleDict.lookup(phrase)): xx = [] for syllableList, stressedSyllableList, stressedPhoneList in pronounciation: xx += list(itertools.chain(*syllableList)) y.append(phon_sep.join(xx)) X.append(phrase) if verbose: print('loaded entries {}'.format(len(X))) # filter out duplicate X's if unique_phonemes: y, X = zip(*dict(zip(y, X)).items()) if verbose: print('removed duplicate phonemes leaving {}'.format(len(X))) # filter out duplicates Y's if unique_graphemes: X, y = zip(*dict(zip(X, y)).items()) if verbose: print('removed duplicate graphemes leaving {}'.format(len(X))) # split data (we must set asside test data before cleanign so it's always the same) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed) # filter out duplicate entries like 'HOUSE(2) or multi words CAT-DOG and CAT_DOG' p = re.compile('[%s]' % (re.escape(blacklist))) X_train, y_train = zip(*[(x, y) for x, y in zip(X_train, y_train) if not bool(p.findall(x))]) X_test, y_test = zip(*[(x, y) for x, y in zip(X_test, y_test) if not bool(p.findall(x))]) if verbose: print('removed blacklisted entries leaving {}'.format(len(X_train) + len(X_test))) # filter out complex entries if needed before_x = len(y_train) X_train, y_train = zip(*[(x, y) for x, y in zip(X_train, y_train) if len(y) <= max_phonemes and len(x) <= max_chars]) X_test, y_test = zip(*[(x, y) for x, y in zip(X_test, y_test) if len(y) <= max_phonemes and len(x) <= max_chars]) if verbose: print('restricted to less than {} phonemes leaving {} entries or {:2.2f}%'.format(max_phonemes, len(X_train) + len(X_test), len(X_train)/before_x*100)) # FIXME it's slow in the next few lines # encode x and y and pad them xtable = CharacterTable() xtable.fit(X_test + X_train) if maxlen_x: xtable.maxlen = maxlen_x X_train = xtable.encode(X_train) X_test = xtable.encode(X_test) ytable = CharacterTable() ytable.fit(y_test + y_train) if maxlen_y: ytable.maxlen = maxlen_y y_train = ytable.encode(y_train) y_test = ytable.encode(y_test) if verbose: print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('y_train shape:', y_train.shape) print('y_test shape:', y_test.shape) return (X_train, y_train), (X_test, y_test), (xtable, ytable)