def guessStress(input_string): b = BlickLoader() vows = {x[:-1]: { y[-1] for y in b.vowels if y[:-1] == x[:-1]} for x in b.vowels} phones = input_string.split(" ") stress_pattern_space = [ vows[x] for x in phones if x in vows] revised_string = [] for p in phones: if p in vows: revised_string.append(p+"%s") else: revised_string.append(p) revised_string = ' '.join(revised_string) possible_patterns = list(itertools.product(*stress_pattern_space)) possible_strings = [revised_string % x for x in possible_patterns] best = 1000 pattern = '' for p in possible_strings: if p in word_list: return p score = b.assessWord(p) if score < best: best = score pattern = p return pattern
def getPhonScores(puns): b = BlickLoader() scoredPuns = {} for pun in puns: words = pun.split('#') del words[len(words) - 1] goodWords = [] for w in words: if "D I C T" not in w: goodWords.append(w) scoredPuns[pun] = sum(b.assessWord(w.strip()) for w in goodWords) return scoredPuns
def train_wakeword_model(audio_train_loader, vocab_list, label_model, beam_size=3, num_hypotheses=5, query_by_string=False): wakeword_model = {} if query_by_string: # load ww model produced by MFA from config keywords = config["wakeword_model"] # load blick b = BlickLoader() for i, _, y_hat in enumerate(keywords.items()): w = b.assessWord(y_hat) # for each keyword, append the tuple(hypotheses + weights) to the list # only one hypothesis if using MFA wakeword_model[i] = (y_hat, w) else: # train ww model from scratch for i in audio_train_loader: posteriors_i = label_model(i) # decode using CTC, vocab_list is A (labels) decoder = CTCBeamDecoder(self.vocab_list, beam_width=self.beam_size, blank_id=self.vocab_list.index('_')) beam, beam_scores, _, _ = decoder.decode(posteriors_i) for j in range(num_hypotheses): y_hat = beam[j] # hypothesis log_prob_post = beam_scores[j] w = log_prob_post**-1 # for each keyword, append the tuple(hypotheses + weights) to the list wakeword_model[i].append((y_hat, w)) return wakeword_model
def getPhonotacticProb(input_string,use_blick=True,no_stress=False): if use_blick: if no_stress: b = BlickLoader(grammarType='NoStress') else: b = BlickLoader() return b.assessWord(str(input_string)) if no_stress: input_string = re.sub('[0-9]','',input_string) SPprob = 0.0 BPprob = 0.0 phones = input_string.split(" ") for i in range(len(phones)): patt = [any_segment] * i patt.append(phones[i]) pattern = '^'+' '.join(patt) +'.*$' totPattern = '^'+' '.join([any_segment] * (i+1)) +'.*$' if no_stress: count = len(PhonoString.objects.filter(NoStress__regex = pattern)) totCount = len(PhonoString.objects.filter(NoStress__regex = totPattern)) else: count = len(PhonoString.objects.filter(Transcription__regex = pattern)) totCount = len(PhonoString.objects.filter(Transcription__regex = totPattern)) SPprob += float(count) / float(totCount) if i != len(phones)-1: patt = [any_segment] * i patt.extend([phones[i],phones[i+1]]) pattern = '^'+' '.join(patt) +'.*$' totPattern = '^'+' '.join([any_segment] * (i+2)) +'.*$' if no_stress: count = len(PhonoString.objects.filter(NoStress__regex = pattern)) totCount = len(PhonoString.objects.filter(NoStress__regex = totPattern)) else: count = len(PhonoString.objects.filter(Transcription__regex = pattern)) totCount = len(PhonoString.objects.filter(Transcription__regex = totPattern)) BPprob += float(count) / float(totCount) SPprob = SPprob / float(len(phones)) BPprob = BPprob / float(len(phones)-1) return (SPprob,BPprob)
#!/usr/bin/env python import argparse from blick import BlickLoader parser = argparse.ArgumentParser(description='Add phonotactic probability to a file of phone strings.') parser.add_argument('filename', type=open, help='name of file of phone strings') parser.add_argument('-d','--debug', action='store_true', default=False, help='whether or not debug mode should be activated') parser.add_argument('-c','--constraints', action='store_true', default=False, help='whether or not debug mode should be activated') parser.add_argument('-g','--grammar', type=str, choices = set(['HayesWhite','NoStress','default']), default='default', help='type of grammar to be used') args = parser.parse_args() argdict = vars(args) #print argdict b = BlickLoader(debug=argdict['debug'],grammarType=argdict['grammar']) b.assessFile(argdict['filename'].name,includeConstraints=argdict['constraints'])
if syl.nucleus in SHORT_VOWELS: return False if is_first and syl.onset and syl.onset[0] == "ZH": return False # if is_last and stress_lvl == 1 and len(syl.coda) == 0: # return False if syl.onset and syl.coda and syl.onset[0] == "S" and not syl.onset[ -1] == "T" and syl.nucleus in SHORT_VOWELS: if syl.coda[0] == syl.onset[-1]: return False if stress_lvl != 1 and syl.nucleus not in { "AH", "ER", "IH", "IY", "OW", "UW" }: return False return True blick_rater = BlickLoader() words = [] for i in range(100): word = getWord() score, rules = blick_rater.assessWord(word.replace(" ", " "), includeConstraints=True) score = exp(-score) words.append([word, score, rules]) for word, score, rules in sorted(words, key=itemgetter(1)): if score > 0.00001: print(word, score, rules) print()
from blick import BlickLoader parser = argparse.ArgumentParser( description='Add phonotactic probability to a file of phone strings.') parser.add_argument('filename', type=open, help='name of file of phone strings') parser.add_argument('-d', '--debug', action='store_true', default=False, help='whether or not debug mode should be activated') parser.add_argument('-c', '--constraints', action='store_true', default=False, help='whether or not debug mode should be activated') parser.add_argument('-g', '--grammar', type=str, choices=set(['HayesWhite', 'NoStress', 'default']), default='default', help='type of grammar to be used') args = parser.parse_args() argdict = vars(args) #print argdict b = BlickLoader(debug=argdict['debug'], grammarType=argdict['grammar']) b.assessFile(argdict['filename'].name, includeConstraints=argdict['constraints'])
import markov, time, random from blick import BlickLoader from google import search from random import randrange b = BlickLoader(grammarType="default") phonetics = [] phonetics.append(["th", " TH", " DH"]) phonetics.append(["sh", " SH"]) phonetics.append(["ee", " IY1"]) phonetics.append(["ai", " EY1"]) phonetics.append(["oo", " UW1", " UH1"]) phonetics.append(["ou", " AW1", " AW2", " UW2"]) phonetics.append(["oi", " OY2"]) phonetics.append(["oy", " OY1"]) phonetics.append(["oa", " OW1"]) phonetics.append(["ng", " NG"]) phonetics.append(["e", " IY2", " EH1", " EH2", " EY2", " ER1", " ER2", " ER0"]) phonetics.append(["i", " IH1", " IH2", " IH0", " AY1", " AY2"]) phonetics.append(["a", " AE1", " AE2", " AO2", " AH0"]) phonetics.append(["o", " AO1", " AA1", " AA2", " OW2", " OW0"]) phonetics.append(["u", " UW0", " UH2", " AH1", " AH2"]) phonetics.append(["p", " P"]) phonetics.append(["b", " B"]) phonetics.append(["f", " F"]) phonetics.append(["v", " V"]) phonetics.append(["m", " M"]) phonetics.append(["w", " W"]) phonetics.append(["t", " T"]) phonetics.append(["d", " D"]) phonetics.append(["s", " S"])