def syllabifyTextgrids(tgPath, islePath):

    isleDict = isletool.LexicalTool(islePath)

    outputPath = join(tgPath, "syllabifiedTGs")
    utils.makeDir(outputPath)
    skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}']

    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):

        if os.path.exists(join(outputPath, fn)):
            continue

        tg = tgio.openTextgrid(join(tgPath, fn))

        syllableTG = praattools.syllabifyTextgrid(isleDict,
                                                  tg,
                                                  "words",
                                                  "phones",
                                                  skipLabelList=skipLabelList)

        outputTG = tgio.Textgrid()
        outputTG.addTier(tg.tierDict["words"])
        outputTG.addTier(tg.tierDict["phones"])
        #         outputTG.addTier(syllableTG.tierDict["syllable"])
        outputTG.addTier(syllableTG.tierDict["tonic"])

        outputTG.save(join(outputPath, fn))
def manualPhoneCount(tgInfoPath, isleFN, outputPath, skipList=None):
    
    if skipList is None:
        skipList = []
    
    utils.makeDir(outputPath)
    
    isleDict = isletool.LexicalTool(isleFN)
    
    existFNList = utils.findFiles(outputPath, filterPaths=".txt")
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt",
                              skipIfNameInList=existFNList):

        if os.path.exists(join(outputPath, fn)):
            continue
        print(fn)
        
        dataList = utils.openCSV(tgInfoPath, fn)
        dataList = [row[2] for row in dataList]  # start, stop, tmpLabel
        outputList = []
        for tmpLabel in dataList:
            if tmpLabel not in skipList:
                syllableCount, phoneCount = isletool.getNumPhones(isleDict,
                                                                  tmpLabel,
                                                                  maxFlag=True)
            else:
                syllableCount, phoneCount = 0, 0
            
            outputList.append("%d,%d" % (syllableCount, phoneCount))
        
        outputTxt = "\n".join(outputList)
        
        with open(join(outputPath, fn), "w") as fd:
            fd.write(outputTxt)
Example #3
0
def createSyllabifiedTextgrid(names_audio, text_grid):

    isleDict = isletool.LexicalTool()

    tg = tgio.openTextgrid(text_grid)
    syllableTG = praattools.syllabifyTextgrid(isleDict,
                                              tg,
                                              "words",
                                              "phones",
                                              skipLabelList=[
                                                  "",
                                              ])
    tg.addTier(syllableTG.tierDict["syllable"])
    tg.addTier(syllableTG.tierDict["tonicSyllable"])
    tg.addTier(syllableTG.tierDict["tonicVowel"])

    tg.save('../mavid-scripts/files/wav_recordedNames_syllables_test.TextGrid')

    return
Example #4
0
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 26 11:20:12 2019

@author: john.cheng
"""
import random
from os.path import join

from pysle import isletool
from pysle import pronunciationtools

root = join(".", "files")
isleDict = isletool.LexicalTool(join(root, 'D:\Aeo_test\speech\ISLEdict.txt'))


def printOutMatches(matchStr,
                    numSyllables=None,
                    wordInitial='ok',
                    wordFinal='ok',
                    spanSyllable='ok',
                    stressedSyllable='ok',
                    multiword='ok',
                    numMatches=None,
                    matchList=None,
                    pos=None):

    if matchList is None:
        matchList = isleDict.search(matchStr, numSyllables, wordInitial,
                                    wordFinal, spanSyllable, stressedSyllable,
                                    multiword, pos)
#encoding: utf-8
'''
Examples of how to use pysle's pronunciationtools code
'''

from os.path import join

from pysle import isletool
from pysle import pronunciationtools

root = join(".", "files")
isleDict = isletool.LexicalTool(join(root, 'ISLEdict_sample.txt'))

# In the first example we determine the syllabification of a word,
# as it was said.  (Of course, this is just an estimate)
print('-' * 50)
searchWord = 'another'
anotherPhoneList = ['n', '@', 'th', 'r']
isleWordList = isleDict.lookup(searchWord)
returnList = pronunciationtools.findBestSyllabification(
    isleDict, searchWord, anotherPhoneList)

(stressedSyllable, stressedPhone, syllableList, syllabification,
 stressedSyllableIndexList, stressedPhoneIndexList,
 flattenedStressIndexList) = returnList
print(searchWord)
print(anotherPhoneList)
print(stressedSyllableIndexList)  # We can see the first syllable was elided
print(stressedPhoneIndexList)
print(flattenedStressIndexList)
print(syllableList)
Example #6
0
#encoding: utf-8
'''
Created on July 08, 2016

@author: tmahrt

Basic examples of common usage.
'''

import random

from pysle import isletool

tmpPath = r"C:\Users\Tim\Dropbox\workspace\pysle\test\ISLEdict.txt"
isleDict = isletool.LexicalTool(tmpPath)

def printOutMatches(matchStr, numSyllables=None, wordInitial='ok',
                    wordFinal='ok', spanSyllable='ok', stressedSyllable='ok',
                    multiword='ok', numMatches=None, matchList=None):

    if matchList is None:
        matchList = isleDict.search(matchStr, numSyllables, wordInitial,
                                    wordFinal, spanSyllable, stressedSyllable,
                                    multiword)
    else:
        matchList = isletool.search(matchList, matchStr, numSyllables, wordInitial,
                                    wordFinal, spanSyllable, stressedSyllable,
                                    multiword)
    
    if numMatches is not None and len(matchList) > numMatches:
        random.shuffle(matchList)
Example #7
0
'''

from os.path import join

from praatio import tgio
from pysle import isletool
from pysle import praattools

path = join('.', 'files')
path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files"

tg = tgio.openTextGrid(join(path, "pumpkins.TextGrid"))

# Needs the full path to the file
islevPath = '/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt'
isleDict = isletool.LexicalTool(islevPath)

# Get the syllabification tiers and add it to the textgrid
syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone",
                                          skipLabelList=["",])
tg.addTier(syllableTG.tierDict["syllable"])
tg.addTier(syllableTG.tierDict["tonicSyllable"])
tg.addTier(syllableTG.tierDict["tonicVowel"])



tg.save(join(path, "pumpkins_with_syllables.TextGrid"))


        
Example #8
0
    - 10 syllables?
    - Alternating stressed/unstressed?
3. Print line

'''

import random
import time
import string
import markovify
from os.path import join
from pysle import isletool
from pysle import pronunciationtools

root = join(".", "files")
isleDict = isletool.LexicalTool(
    join(root, '/home/jay/Dropbox/19-20/PoetryGen/ISLEdict.txt'))

# Set your text corpus here
with open("/home/jay/Dropbox/19-20/PoetryGen/shelley.txt") as f:
    shelley = f.read()

with open("/home/jay/Dropbox/19-20/PoetryGen/mobydick.txt") as f:
    mobydick = f.read()

with open("/home/jay/Dropbox/19-20/PoetryGen/witchcraft.txt") as f:
    witch = f.read()

textModelShelley = markovify.Text(shelley, state_size=2)
textModelDick = markovify.Text(mobydick, state_size=2)
textModelWitch = markovify.Text(witch, state_size=2)
Example #9
0
backoff = Backoff(['fas-Arab', 'rus-Cyrl'])

backoff.transliterate('Привет дорогой друг пидор')

backoff.transliterate('queen')

backoff.transliterate('中文')

backoff.transliterate('سلام شادی من')

backoff.transliterate('ملکه')

from pysle import isletool

a = isletool.LexicalTool('ISLEdict.txt')

sentence = "do you want another pumpkinseed"
phoneList = isletool.transcribe(a, sentence, 'longest')
print(phoneList)

from phonemizer.phonemize import phonemize

phonemize("hello", language='en-us', backend='espeak')
phonemize("hello my queen", language='en-us', backend='espeak')

phonemize("ich will", language='de', backend='espeak')

phonemize("bonjour le monde", language='fr-fr', backend='espeak')

phonemize("konnichiwa", language='japanese', backend='espeak')
def get_data(seed=42, test_size=0.20, verbose=0, maxlen_x=None, maxlen_y=None, blacklist='()0123456789%.?"-_', max_phonemes=np.inf, max_chars=np.inf, phon_sep='', unique_graphemes=False, unique_phonemes=True):
    """Process ISLEDICT pronounciation dictionary to return unique phonemes two graphemes"""

    path = download_data_maybe()

    # load data
    isleDict = isletool.LexicalTool(path)
    X = []
    y = []
    for phrase in isleDict.data.keys():
        for pronounciation in zip(*isleDict.lookup(phrase)):
            xx = []
            for syllableList, stressedSyllableList, stressedPhoneList in pronounciation:
                xx += list(itertools.chain(*syllableList))
            y.append(phon_sep.join(xx))
            X.append(phrase)
    if verbose: print('loaded entries {}'.format(len(X)))

    # filter out duplicate X's
    if unique_phonemes:
        y, X = zip(*dict(zip(y, X)).items())
        if verbose: print('removed duplicate phonemes leaving {}'.format(len(X)))

    # filter out duplicates Y's
    if unique_graphemes:
        X, y = zip(*dict(zip(X, y)).items())
        if verbose: print('removed duplicate graphemes leaving {}'.format(len(X)))

    # split data (we must set asside test data before cleanign so it's always the same)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

    # filter out duplicate entries like 'HOUSE(2) or multi words CAT-DOG and CAT_DOG'
    p = re.compile('[%s]' % (re.escape(blacklist)))
    X_train, y_train = zip(*[(x, y) for x, y in zip(X_train, y_train) if not bool(p.findall(x))])
    X_test, y_test = zip(*[(x, y) for x, y in zip(X_test, y_test) if not bool(p.findall(x))])
    if verbose:
        print('removed blacklisted entries leaving {}'.format(len(X_train) + len(X_test)))

    # filter out complex entries if needed
    before_x = len(y_train)
    X_train, y_train = zip(*[(x, y) for x, y in zip(X_train, y_train) if len(y) <= max_phonemes and len(x) <= max_chars])
    X_test, y_test = zip(*[(x, y) for x, y in zip(X_test, y_test) if len(y) <= max_phonemes and len(x) <= max_chars])
    if verbose:
        print('restricted to less than {} phonemes leaving {} entries or {:2.2f}%'.format(max_phonemes, len(X_train) + len(X_test), len(X_train)/before_x*100))

    # FIXME it's slow in the next few lines
    # encode x and y and pad them
    xtable = CharacterTable()
    xtable.fit(X_test + X_train)
    if maxlen_x:
        xtable.maxlen = maxlen_x
    X_train = xtable.encode(X_train)
    X_test = xtable.encode(X_test)

    ytable = CharacterTable()
    ytable.fit(y_test + y_train)
    if maxlen_y:
        ytable.maxlen = maxlen_y
    y_train = ytable.encode(y_train)
    y_test = ytable.encode(y_test)

    if verbose:
        print('X_train shape:', X_train.shape)
        print('X_test shape:', X_test.shape)

        print('y_train shape:', y_train.shape)
        print('y_test shape:', y_test.shape)

    return (X_train, y_train), (X_test, y_test), (xtable, ytable)