def guessStress(input_string):
    b = BlickLoader()
    vows = {x[:-1]: { y[-1] for y in b.vowels if y[:-1] == x[:-1]} for x in b.vowels}
    phones = input_string.split(" ")
    stress_pattern_space = [ vows[x] for x in phones if x in vows]
    revised_string = []
    for p in phones:
        if p in vows:
            revised_string.append(p+"%s")
        else:
            revised_string.append(p)
    revised_string = ' '.join(revised_string)
    possible_patterns = list(itertools.product(*stress_pattern_space))
    possible_strings = [revised_string % x for x in possible_patterns]
    best = 1000
    pattern = ''
    for p in possible_strings:
        if p in word_list:
            return p
        score = b.assessWord(p)
        if score < best:
            best = score
            pattern = p

    return pattern
Example #2
0
def getPhonScores(puns):

	b = BlickLoader()
	scoredPuns = {}
	
	for pun in puns:
		words = pun.split('#')
		del words[len(words) - 1]
		goodWords = []
		for w in words: 
			if "D I C T" not in w:
				goodWords.append(w)
		scoredPuns[pun] = sum(b.assessWord(w.strip()) for w in goodWords)

	return scoredPuns
Example #3
0
def train_wakeword_model(audio_train_loader,
                         vocab_list,
                         label_model,
                         beam_size=3,
                         num_hypotheses=5,
                         query_by_string=False):
    wakeword_model = {}

    if query_by_string:
        # load ww model produced by MFA from config
        keywords = config["wakeword_model"]
        # load blick
        b = BlickLoader()

        for i, _, y_hat in enumerate(keywords.items()):
            w = b.assessWord(y_hat)
            # for each keyword, append the tuple(hypotheses + weights) to the list
            # only one hypothesis if using MFA
            wakeword_model[i] = (y_hat, w)

    else:
        # train ww model from scratch
        for i in audio_train_loader:
            posteriors_i = label_model(i)
            # decode using CTC, vocab_list is A (labels)
            decoder = CTCBeamDecoder(self.vocab_list,
                                     beam_width=self.beam_size,
                                     blank_id=self.vocab_list.index('_'))

            beam, beam_scores, _, _ = decoder.decode(posteriors_i)

            for j in range(num_hypotheses):
                y_hat = beam[j]  # hypothesis
                log_prob_post = beam_scores[j]
                w = log_prob_post**-1

                # for each keyword, append the tuple(hypotheses + weights) to the list
                wakeword_model[i].append((y_hat, w))

    return wakeword_model
def getPhonotacticProb(input_string,use_blick=True,no_stress=False):
    if use_blick:
        if no_stress:
            b = BlickLoader(grammarType='NoStress')
        else:
            b = BlickLoader()
        return b.assessWord(str(input_string))
    if no_stress:
        input_string = re.sub('[0-9]','',input_string)
    SPprob = 0.0
    BPprob = 0.0
    phones = input_string.split(" ")
    for i in range(len(phones)):
        patt = [any_segment] * i
        patt.append(phones[i])
        pattern = '^'+' '.join(patt) +'.*$'
        totPattern = '^'+' '.join([any_segment] * (i+1)) +'.*$'
        if no_stress:
            count = len(PhonoString.objects.filter(NoStress__regex = pattern))
            totCount = len(PhonoString.objects.filter(NoStress__regex = totPattern))
        else:
            count = len(PhonoString.objects.filter(Transcription__regex = pattern))
            totCount = len(PhonoString.objects.filter(Transcription__regex = totPattern))
        SPprob += float(count) / float(totCount)
        if i != len(phones)-1:
            patt = [any_segment] * i
            patt.extend([phones[i],phones[i+1]])
            pattern = '^'+' '.join(patt) +'.*$'
            totPattern = '^'+' '.join([any_segment] * (i+2)) +'.*$'
            if no_stress:
                count = len(PhonoString.objects.filter(NoStress__regex = pattern))
                totCount = len(PhonoString.objects.filter(NoStress__regex = totPattern))
            else:
                count = len(PhonoString.objects.filter(Transcription__regex = pattern))
                totCount = len(PhonoString.objects.filter(Transcription__regex = totPattern))
            BPprob += float(count) / float(totCount)
    SPprob = SPprob / float(len(phones))
    BPprob = BPprob / float(len(phones)-1)
    return (SPprob,BPprob)
Example #5
0
#!/usr/bin/env python

import argparse
from blick import BlickLoader

parser = argparse.ArgumentParser(description='Add phonotactic probability to a file of phone strings.')
parser.add_argument('filename', type=open,
                   help='name of file of phone strings')
parser.add_argument('-d','--debug', action='store_true',
                    default=False,
                   help='whether or not debug mode should be activated')
parser.add_argument('-c','--constraints', action='store_true',
                    default=False,
                   help='whether or not debug mode should be activated')
parser.add_argument('-g','--grammar', type=str, choices = set(['HayesWhite','NoStress','default']), default='default',
                   help='type of grammar to be used')

args = parser.parse_args()
argdict = vars(args)
#print argdict
b = BlickLoader(debug=argdict['debug'],grammarType=argdict['grammar'])
b.assessFile(argdict['filename'].name,includeConstraints=argdict['constraints'])
        if syl.nucleus in SHORT_VOWELS:
            return False
    if is_first and syl.onset and syl.onset[0] == "ZH":
        return False
    # if is_last and stress_lvl == 1 and len(syl.coda) == 0:
    # 	return False
    if syl.onset and syl.coda and syl.onset[0] == "S" and not syl.onset[
            -1] == "T" and syl.nucleus in SHORT_VOWELS:
        if syl.coda[0] == syl.onset[-1]:
            return False
    if stress_lvl != 1 and syl.nucleus not in {
            "AH", "ER", "IH", "IY", "OW", "UW"
    }:
        return False
    return True


blick_rater = BlickLoader()
words = []

for i in range(100):
    word = getWord()
    score, rules = blick_rater.assessWord(word.replace("  ", " "),
                                          includeConstraints=True)
    score = exp(-score)
    words.append([word, score, rules])

for word, score, rules in sorted(words, key=itemgetter(1)):
    if score > 0.00001:
        print(word, score, rules)
        print()
Example #7
0
from blick import BlickLoader

parser = argparse.ArgumentParser(
    description='Add phonotactic probability to a file of phone strings.')
parser.add_argument('filename',
                    type=open,
                    help='name of file of phone strings')
parser.add_argument('-d',
                    '--debug',
                    action='store_true',
                    default=False,
                    help='whether or not debug mode should be activated')
parser.add_argument('-c',
                    '--constraints',
                    action='store_true',
                    default=False,
                    help='whether or not debug mode should be activated')
parser.add_argument('-g',
                    '--grammar',
                    type=str,
                    choices=set(['HayesWhite', 'NoStress', 'default']),
                    default='default',
                    help='type of grammar to be used')

args = parser.parse_args()
argdict = vars(args)
#print argdict
b = BlickLoader(debug=argdict['debug'], grammarType=argdict['grammar'])
b.assessFile(argdict['filename'].name,
             includeConstraints=argdict['constraints'])
Example #8
0
import markov, time, random
from blick import BlickLoader
from google import search
from random import randrange

b = BlickLoader(grammarType="default")
phonetics = []
phonetics.append(["th", " TH", " DH"])
phonetics.append(["sh", " SH"])
phonetics.append(["ee", " IY1"])
phonetics.append(["ai", " EY1"])
phonetics.append(["oo", " UW1", " UH1"])
phonetics.append(["ou", " AW1", " AW2", " UW2"])
phonetics.append(["oi", " OY2"])
phonetics.append(["oy", " OY1"])
phonetics.append(["oa", " OW1"])
phonetics.append(["ng", " NG"])
phonetics.append(["e", " IY2", " EH1", " EH2", " EY2", " ER1", " ER2", " ER0"])
phonetics.append(["i", " IH1", " IH2", " IH0", " AY1", " AY2"])
phonetics.append(["a", " AE1", " AE2", " AO2", " AH0"])
phonetics.append(["o", " AO1", " AA1", " AA2", " OW2", " OW0"])
phonetics.append(["u", " UW0", " UH2", " AH1", " AH2"])
phonetics.append(["p", " P"])
phonetics.append(["b", " B"])
phonetics.append(["f", " F"])
phonetics.append(["v", " V"])
phonetics.append(["m", " M"])
phonetics.append(["w", " W"])
phonetics.append(["t", " T"])
phonetics.append(["d", " D"])
phonetics.append(["s", " S"])