def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] all_sequences = test_set.get_all_sequences() all_Xlengths = test_set.get_all_Xlengths() for sequence in all_sequences: probability = {} X, length = all_Xlengths[sequence] for word_model, model in models.items(): try: score = model.score(X, length) probability[word_model] = score except: score = -float("inf") probability[word_model] = score probabilities.append(probability) values = list(probability.values()) keys = list(probability.keys()) guesses.append(keys[values.index(max(values))]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses x_len = test_set.get_all_Xlengths() for x,lengths in x_len.values(): #logger.info("$s $s",x,lengths) scores, best_guess, highest_score = dict(), None, None for word, model in models.items(): try: scores[word] = model.score(x, lengths) if highest_score is None or highest_score < scores[word]: highest_score, best_guess = scores[word], word except: scores[word] = None probabilities.append(scores) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] d = test_set.get_all_Xlengths() for i in d.items(): X, lengths = i[1] logL_dict = {} for key, value in models.items(): try: logL_dict[key] = value.score(X, lengths) except: logL_dict[key] = float('-inf') probabilities.append(logL_dict) guesses.append(max(logL_dict.keys(), key = (lambda x: logL_dict[x]))) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # for each word in the testing set for word_index, _ in test_set.get_all_Xlengths().items(): x, length = test_set.get_item_Xlengths(word_index) word_log_l_dict = {} # try the word on every model and score the probabilities of matching for word, model in models.items(): try: word_log_l_dict[word] = model.score(x, length) except: word_log_l_dict[word] = float("-inf") probabilities.append(word_log_l_dict) guesses.append(max(word_log_l_dict, key=word_log_l_dict.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] #dict of {possible_word: logL} guesses = [] #best guesses # TODO implement the recognizer for word_id in range(test_set.num_items): word_logL_dict = {} #dict X, lengths = test_set.get_all_Xlengths()[word_id] for word in models: hmm_model = models[word] try: #if the hmmlearn library can score the model logL = hmm_model.score(X, lengths) except: #if the hmmlearn library cannot score the model logL = float('-inf') word_logL_dict[word] = logL probabilities.append(word_logL_dict) guesses.append(max( word_logL_dict, key=lambda k: word_logL_dict[k])) #best guess according to logL return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained model {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) probabilities = [] guesses = [] for index, (X, length) in test_set.get_all_Xlengths().items(): probs = [] for word, mod in models.items(): try: probs.append((word, mod.score(X, length))) except: probs.append((word, float("-inf"))) most_probable_word = sorted(probs, key=lambda x: -x[1])[0][0] probabilities.append(dict(probs)) guesses.append(most_probable_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for X, lengths in list(test_set.get_all_Xlengths().values()): prob = {} for key, model in models.items(): try: prob[key] = model.score(X, lengths) except: prob[key] = float('-inf') probabilities.append(prob) guess = max(prob.items(), key = lambda z: z[1])[0] guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for word_id in range(0, len(test_set.get_all_Xlengths())): X, lengths = test_set.get_item_Xlengths(word_id) likelihoods = {} for word, model in models.items(): try: the_score = models[word].score(X, lengths) except: the_score = float("-inf") likelihoods[word] = the_score probabilities.append(likelihoods) guess = max(probabilities[word_id], key = probabilities[word_id].get) guesses.append(guess) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) # we sort the test_set by word_id, just in case test_set= sorted([(item[0], item[1]) for item in test_set.get_all_Xlengths().items()], key=lambda x: x[0]) # initialize probs and guesses as empty lists for appending probabilities = [] guesses = [] # for every word we get both the id and the (X, length) tuple for word_id, test_Xlength in test_set: # empty dict for mapping word_ids to its scores probability_dict = dict() # for every word model, we score (X, length) for word, model in models.items(): try: probability_dict[word] = model.score(test_Xlength[0], test_Xlength[1]) except: continue # we append the probability_dict and guess to the lists probabilities.append(probability_dict) guesses.append(max(probability_dict, key=probability_dict.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # return probabilities, guesses all_xlengths = test_set.get_all_Xlengths() for _, data_tuple in all_xlengths.items(): x_vals, lengths = data_tuple words = {} for word, model in models.items(): try: words[word] = model.score(x_vals, lengths) # pylint: disable=broad-except # exceptions vary and occurs deep in other external classes except Exception: words[word] = float('-inf') probabilities.append(words) guesses = [max(probs, key=probs.get) for probs in probabilities] return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] len_all = len(test_set.get_all_Xlengths()) for word_id in range(0, len_all): x, lens = test_set.get_item_Xlengths(word_id) probs = {} for word, model in models.items(): try: logl = model.score(x, lens) probs[word] = logl except: probs[word] = float('-inf') probabilities.append(probs) w, _ = max(probs.items(), key=lambda x: x[1]) guesses.append(w) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for x, lengths in test_set.get_all_Xlengths().values(): b_score = float('-inf') b_guess = None l = {} for w, m in models.items(): try: score = m.score(x, lengths) l[w] = score if score > b_score: b_score = score b_guess = w except: l[w] = float('-inf') probabilities.append(l) guesses.append(b_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ probabilities = [] guesses = [] for x, x_len in test_set.get_all_Xlengths().values(): probs = {} guess = None for word, model in models.items(): try: probs[word] = model.score(x, x_len) if guess is None or probs[word] > probs[guess]: guess = word except: probs[word] = float('-inf') probabilities += [probs] guesses += [guess] return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for test_X, test_Xlength in list(test_set.get_all_Xlengths().values()): words_logL = {} for word, model in models.items(): try: words_logL[word] = model.score(test_X, test_Xlength) except: words_logL[word] = float('-inf') continue probabilities.append(words_logL) for prob in probabilities: guesses.append(max(prob, key=prob.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses seq = test_set.get_all_Xlengths() for i, _ in enumerate(seq): X, lengths = seq[i] word_probs = {} for word, model in models.items(): try: word_score = model.score(X, lengths) word_probs[word] = word_score except: pass probabilities.append(word_probs) guesses = [max(prob, key=prob.get) for prob in probabilities] return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer Xlengths_test = list(test_set.get_all_Xlengths().values()) for X_test, lengths_test in Xlengths_test: p_dict = {} for word, model in models.items(): try: p_dict[word] = model.score(X_test, lengths_test) except: p_dict[word] = 0. probabilities.append(p_dict) guesses.append(max(p_dict.keys(), key=lambda x: p_dict[x])) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses with np.errstate(divide='ignore'): for X, lengths in test_set.get_all_Xlengths().values(): probs = dict() for word, model in models.items(): try: score = model.score(X, lengths) probs[word] = score except: pass if len(probs) > 0: probabilities.append(probs) guesses.append(max(probs, key=probs.get)) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # DONE implement the recognizer for _, (X, lengths) in test_set.get_all_Xlengths().items(): probs = {} for word, model in models.items(): try: probs[word] = model.score(X, lengths) except ValueError: probs[word] = float("-inf") guess = max(probs, key=probs.get) guesses.append(guess) probabilities.append(probs) # return probabilities, guesses return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] test_words = test_set.get_all_Xlengths() for word_id in test_words.keys(): X, lengths = test_words[word_id] scores = {} for word in models.keys(): try: scores[word] = models[word].score(X, lengths) except: continue probabilities.append(scores) guesses.append(max(scores.keys(), key = (lambda k: scores[k]))) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for sequence, length in test_set.get_all_Xlengths().values(): probability = {} best_guess, best_score = None, float("-inf") for word, model in models.items(): try: logL = model.score(sequence, length) probability[word] = logL if logL > best_score: best_guess, best_score = word, logL except: # eliminate non-viable models from consideration probability[word] = float("-inf") pass guesses.append(best_guess) probabilities.append(probability) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for test_word, (X, length) in test_set.get_all_Xlengths().items(): dict = {} for trained_word, model in models.items(): try: log_prob = model.score(X, length) dict[trained_word] = log_prob except: dict[trained_word] = float("-inf") probabilities.append(dict) guesses.append(max(dict, key = dict.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for (X, lengths) in test_set.get_all_Xlengths().values(): max_score = None guess_word = None prob_word = {} for word, model in models.items(): try: score = model.score(X, lengths) prob_word[word] = score if max_score == None or max_score < score: max_score = score guess_word = word except: continue probabilities.append(prob_word) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for X, y in test_set.get_all_Xlengths().values(): seq_probs = { word: _score_data(model, X, y) for word, model in models.items() } probabilities.append(seq_probs) guesses.append(argmax(seq_probs)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for x, lengths in test_set.get_all_Xlengths().values(): # For each word, we compute the probability that this word is actually x. word_probabilities = {word : score(model, x, lengths) for word, model in models.items()} # Get the word that have the higher chances of being x. guess = max(word_probabilities.items(), key=lambda x: x[1])[0] probabilities.append(word_probabilities) guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in test_set.get_all_Xlengths().items(): x, lens = test_set.get_item_Xlengths(i[0]) # Create a dict where key = word, value = log liklihood word_liklihoods = {} for word, model in models.items(): try: word_liklihoods[word] = model.score(x, lens) except: word_liklihoods[word] = float('-inf') probabilities.append(word_liklihoods) guesses.append(max(word_liklihoods, key=word_liklihoods.get)) # TODO implement the recognizer return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for index, (X, lengths) in test_set.get_all_Xlengths().items(): test_word_scores = dict() for w, m in models.items(): try: test_word_scores[w] = m.score(X, lengths) except: test_word_scores[w] = -math.inf best_word, _ = max(test_word_scores.items(), key=operator.itemgetter(1)) probabilities.append(test_word_scores) guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] words = test_set.get_all_Xlengths() for word, (X, lengths) in words.items(): probability = {} best_score = float('-Inf') best_guess = None for word, model in models.items(): try: score = model.score(X, lengths) probability[word] = score except: score = float('-Inf') probability[word] = float('-Inf') if score > best_score: best_score, best_guess = score, word probabilities.append(probability) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for X, lengths in test_set.get_all_Xlengths().values(): best_score, best_word, score_list = float("-inf"), None, {} for word, model in models.items(): try: # Fetch model score score = model.score(X, lengths) except: score = float("-inf") if score > best_score: best_score, best_word = score, word score_list[word] = score guesses.append(best_word) probabilities.append(score_list) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses def try_or_default(fn, *args, default=float("-Inf")): try: return fn(*args) except: return default for _, (X, lengths) in test_set.get_all_Xlengths().items(): probability_dict = { word: try_or_default(model.score, X, lengths) for word, model in models.items() } probabilities.append(probability_dict) guesses.append(max(probability_dict, key=probability_dict.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for word_id, (X, lengths) in test_set.get_all_Xlengths().items(): #import pdb; pdb.set_trace() word_prob = {} for model_key, model_value in models.items(): try: score = model_value.score(X, lengths) word_prob[model_key] = score except: word_prob[model_key] = float("-inf") probabilities.append(word_prob) guesses.append(max(word_prob, key=word_prob.get)) return probabilities, guesses