コード例 #1
0
def getPhonScores(puns):

	b = BlickLoader()
	scoredPuns = {}
	
	for pun in puns:
		words = pun.split('#')
		del words[len(words) - 1]
		goodWords = []
		for w in words: 
			if "D I C T" not in w:
				goodWords.append(w)
		scoredPuns[pun] = sum(b.assessWord(w.strip()) for w in goodWords)

	return scoredPuns
コード例 #2
0
def train_wakeword_model(audio_train_loader,
                         vocab_list,
                         label_model,
                         beam_size=3,
                         num_hypotheses=5,
                         query_by_string=False):
    wakeword_model = {}

    if query_by_string:
        # load ww model produced by MFA from config
        keywords = config["wakeword_model"]
        # load blick
        b = BlickLoader()

        for i, _, y_hat in enumerate(keywords.items()):
            w = b.assessWord(y_hat)
            # for each keyword, append the tuple(hypotheses + weights) to the list
            # only one hypothesis if using MFA
            wakeword_model[i] = (y_hat, w)

    else:
        # train ww model from scratch
        for i in audio_train_loader:
            posteriors_i = label_model(i)
            # decode using CTC, vocab_list is A (labels)
            decoder = CTCBeamDecoder(self.vocab_list,
                                     beam_width=self.beam_size,
                                     blank_id=self.vocab_list.index('_'))

            beam, beam_scores, _, _ = decoder.decode(posteriors_i)

            for j in range(num_hypotheses):
                y_hat = beam[j]  # hypothesis
                log_prob_post = beam_scores[j]
                w = log_prob_post**-1

                # for each keyword, append the tuple(hypotheses + weights) to the list
                wakeword_model[i].append((y_hat, w))

    return wakeword_model
コード例 #3
0
        if syl.nucleus in SHORT_VOWELS:
            return False
    if is_first and syl.onset and syl.onset[0] == "ZH":
        return False
    # if is_last and stress_lvl == 1 and len(syl.coda) == 0:
    # 	return False
    if syl.onset and syl.coda and syl.onset[0] == "S" and not syl.onset[
            -1] == "T" and syl.nucleus in SHORT_VOWELS:
        if syl.coda[0] == syl.onset[-1]:
            return False
    if stress_lvl != 1 and syl.nucleus not in {
            "AH", "ER", "IH", "IY", "OW", "UW"
    }:
        return False
    return True


blick_rater = BlickLoader()
words = []

for i in range(100):
    word = getWord()
    score, rules = blick_rater.assessWord(word.replace("  ", " "),
                                          includeConstraints=True)
    score = exp(-score)
    words.append([word, score, rules])

for word, score, rules in sorted(words, key=itemgetter(1)):
    if score > 0.00001:
        print(word, score, rules)
        print()
コード例 #4
0
from blick import BlickLoader

parser = argparse.ArgumentParser(
    description='Add phonotactic probability to a file of phone strings.')
parser.add_argument('filename',
                    type=open,
                    help='name of file of phone strings')
parser.add_argument('-d',
                    '--debug',
                    action='store_true',
                    default=False,
                    help='whether or not debug mode should be activated')
parser.add_argument('-c',
                    '--constraints',
                    action='store_true',
                    default=False,
                    help='whether or not debug mode should be activated')
parser.add_argument('-g',
                    '--grammar',
                    type=str,
                    choices=set(['HayesWhite', 'NoStress', 'default']),
                    default='default',
                    help='type of grammar to be used')

args = parser.parse_args()
argdict = vars(args)
#print argdict
b = BlickLoader(debug=argdict['debug'], grammarType=argdict['grammar'])
b.assessFile(argdict['filename'].name,
             includeConstraints=argdict['constraints'])