def guessStress(input_string):
    b = BlickLoader()
    vows = {x[:-1]: { y[-1] for y in b.vowels if y[:-1] == x[:-1]} for x in b.vowels}
    phones = input_string.split(" ")
    stress_pattern_space = [ vows[x] for x in phones if x in vows]
    revised_string = []
    for p in phones:
        if p in vows:
            revised_string.append(p+"%s")
        else:
            revised_string.append(p)
    revised_string = ' '.join(revised_string)
    possible_patterns = list(itertools.product(*stress_pattern_space))
    possible_strings = [revised_string % x for x in possible_patterns]
    best = 1000
    pattern = ''
    for p in possible_strings:
        if p in word_list:
            return p
        score = b.assessWord(p)
        if score < best:
            best = score
            pattern = p

    return pattern
Esempio n. 2
0
def getPhonScores(puns):

	b = BlickLoader()
	scoredPuns = {}
	
	for pun in puns:
		words = pun.split('#')
		del words[len(words) - 1]
		goodWords = []
		for w in words: 
			if "D I C T" not in w:
				goodWords.append(w)
		scoredPuns[pun] = sum(b.assessWord(w.strip()) for w in goodWords)

	return scoredPuns
Esempio n. 3
0
def train_wakeword_model(audio_train_loader,
                         vocab_list,
                         label_model,
                         beam_size=3,
                         num_hypotheses=5,
                         query_by_string=False):
    wakeword_model = {}

    if query_by_string:
        # load ww model produced by MFA from config
        keywords = config["wakeword_model"]
        # load blick
        b = BlickLoader()

        for i, _, y_hat in enumerate(keywords.items()):
            w = b.assessWord(y_hat)
            # for each keyword, append the tuple(hypotheses + weights) to the list
            # only one hypothesis if using MFA
            wakeword_model[i] = (y_hat, w)

    else:
        # train ww model from scratch
        for i in audio_train_loader:
            posteriors_i = label_model(i)
            # decode using CTC, vocab_list is A (labels)
            decoder = CTCBeamDecoder(self.vocab_list,
                                     beam_width=self.beam_size,
                                     blank_id=self.vocab_list.index('_'))

            beam, beam_scores, _, _ = decoder.decode(posteriors_i)

            for j in range(num_hypotheses):
                y_hat = beam[j]  # hypothesis
                log_prob_post = beam_scores[j]
                w = log_prob_post**-1

                # for each keyword, append the tuple(hypotheses + weights) to the list
                wakeword_model[i].append((y_hat, w))

    return wakeword_model
def getPhonotacticProb(input_string,use_blick=True,no_stress=False):
    if use_blick:
        if no_stress:
            b = BlickLoader(grammarType='NoStress')
        else:
            b = BlickLoader()
        return b.assessWord(str(input_string))
    if no_stress:
        input_string = re.sub('[0-9]','',input_string)
    SPprob = 0.0
    BPprob = 0.0
    phones = input_string.split(" ")
    for i in range(len(phones)):
        patt = [any_segment] * i
        patt.append(phones[i])
        pattern = '^'+' '.join(patt) +'.*$'
        totPattern = '^'+' '.join([any_segment] * (i+1)) +'.*$'
        if no_stress:
            count = len(PhonoString.objects.filter(NoStress__regex = pattern))
            totCount = len(PhonoString.objects.filter(NoStress__regex = totPattern))
        else:
            count = len(PhonoString.objects.filter(Transcription__regex = pattern))
            totCount = len(PhonoString.objects.filter(Transcription__regex = totPattern))
        SPprob += float(count) / float(totCount)
        if i != len(phones)-1:
            patt = [any_segment] * i
            patt.extend([phones[i],phones[i+1]])
            pattern = '^'+' '.join(patt) +'.*$'
            totPattern = '^'+' '.join([any_segment] * (i+2)) +'.*$'
            if no_stress:
                count = len(PhonoString.objects.filter(NoStress__regex = pattern))
                totCount = len(PhonoString.objects.filter(NoStress__regex = totPattern))
            else:
                count = len(PhonoString.objects.filter(Transcription__regex = pattern))
                totCount = len(PhonoString.objects.filter(Transcription__regex = totPattern))
            BPprob += float(count) / float(totCount)
    SPprob = SPprob / float(len(phones))
    BPprob = BPprob / float(len(phones)-1)
    return (SPprob,BPprob)
        if syl.nucleus in SHORT_VOWELS:
            return False
    if is_first and syl.onset and syl.onset[0] == "ZH":
        return False
    # if is_last and stress_lvl == 1 and len(syl.coda) == 0:
    # 	return False
    if syl.onset and syl.coda and syl.onset[0] == "S" and not syl.onset[
            -1] == "T" and syl.nucleus in SHORT_VOWELS:
        if syl.coda[0] == syl.onset[-1]:
            return False
    if stress_lvl != 1 and syl.nucleus not in {
            "AH", "ER", "IH", "IY", "OW", "UW"
    }:
        return False
    return True


blick_rater = BlickLoader()
words = []

for i in range(100):
    word = getWord()
    score, rules = blick_rater.assessWord(word.replace("  ", " "),
                                          includeConstraints=True)
    score = exp(-score)
    words.append([word, score, rules])

for word, score, rules in sorted(words, key=itemgetter(1)):
    if score > 0.00001:
        print(word, score, rules)
        print()
Esempio n. 6
0
for word in open(dictionary):
    word = word.strip()
    if word != "" and not word.endswith("'s"):
        chain.add(word.lower())

#Make a word, check if it is within set range, search google for it, save it. Up to 5000 words
words = 0
while words < 5000:
    word = "".join(chain.random_output())
    if len(word) > 4 and len(word) < 10:
        score = 100
        blickified = phonetify(word)
        for blicked in blickified:
            try:
                #sometimes this bails out, instead of tracking it down each time, this was an easy out
               thisscore = b.assessWord(blicked)
            except:
               score = 100
            if thisscore < score:
                score=thisscore
        if score > 0 and score < 18:
            try:
                first_url = search('"' + word + '"',num=10, stop=1)
                for x in range(1,10):
                    this_url = first_url.next()
                print str(score) + ',' + word + " - bad " + this_url
            except StopIteration:
		with open('proop.out', 'a') as outfile:
		    outfile.write(str(score) + ',' + word + '\n')
                print str(score) + ',' + word + " - good"
                words += 1