def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for x in test_set.get_all_sequences(): temp_dict = {} for word,model in models.items(): try: X,lengths = test_set.get_item_Xlengths(x) temp_dict[word] = model.score(X,lengths) except: temp_dict[word] = float('-inf') if temp_dict: probabilities.append(dict(temp_dict)) guesses.append(max(temp_dict, key=temp_dict.get)) return probabilities,guesses raise NotImplementedError
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in range(0, len(test_set.get_all_sequences())): prob_dict = {} X, lengths = test_set.get_item_Xlengths(i) for word, model in models.items(): try: logL = model.score(X, lengths) prob_dict[word] = logL except: prob_dict[word] = float('-inf') probabilities.append(prob_dict) guess = max([(max_log_value, max_word) for max_word, max_log_value in prob_dict.items()])[1] guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] all_sequences = test_set.get_all_sequences() all_Xlengths = test_set.get_all_Xlengths() for sequence in all_sequences: probability = {} X, length = all_Xlengths[sequence] for word_model, model in models.items(): try: score = model.score(X, length) probability[word_model] = score except: score = -float("inf") probability[word_model] = score probabilities.append(probability) values = list(probability.values()) keys = list(probability.keys()) guesses.append(keys[values.index(max(values))]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in test_set.get_all_sequences(): X, length = test_set.get_item_Xlengths(item) scores, guess, maxScore = {}, None, None for word, model in models.items(): try: scores[word] = model.score(X, length) if maxScore is None or maxScore < scores[word]: maxScore, guess = scores[word], word except: scores[word] = None probabilities.append(scores) guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for word_id, word_length in test_set.get_all_sequences().items(): probability = {} for wordmodel_word, wordmodel in models.items(): try: model_probability = wordmodel.score(word_length[0], [len(word_length[0])]) probability[wordmodel_word] = model_probability except: pass probabilities.append(probability) guesses.append(max(probability.items(), key=operator.itemgetter(1))[0]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) probabilities = [] guesses = [] # TODO implement the recognizer for this_word in test_set.get_all_sequences(): X_test, lengths_test = test_set.get_item_Xlengths(this_word) logL = {} for this_key, this_model in models.items(): try: logL[this_key] = this_model.score(X_test, lengths_test) except ValueError: logL[this_key] = float("-inf") probabilities.append(logL) guesses.append(max(logL, key=logL.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses # getters words = test_set.get_all_sequences() hwords = test_set.get_all_Xlengths() # iterate through words for word in words: # get X, length X, length = hwords[word] temp = {} # iterate through models for model_key in models: try: temp[model_key] = models[model_key].score(X, length) except: temp[model_key] = -float("inf") # add the scores to probabilities probabilities.append(temp) # find the model with the best score for instance in probabilities: best_score = -float("inf") best_model = None for model_key in instance: score = instance[model_key] if score > best_score: best_score = score best_model = model_key # add to guesses guesses.append(best_model) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # general/specific algorithm snippets from forum discussion: # https://discussions.udacity.com/t/recognizer-implementation/234793/22 # https://discussions.udacity.com/t/recognizer-implementation/234793/28 hwords = test_set.get_all_Xlengths() for word_id in range(0, len(test_set.get_all_sequences())): # print('training on word_id '+str(word_id)+', current word is '+current_word) try: p_of_words = {} max_score = float("-inf") guess_word = None x, lengths = hwords[word_id] # for each model, get teh highest likelyhood (score) then record the respective word as guess_word # to add into guesses list for word, model in models.items(): try: score = model.score(x, lengths) p_of_words[word] = score if score > max_score: guess_word = word max_score = score except: # fill in the probability dict if no probability is found p_of_words[word] = float("-inf") pass except: pass probabilities.append(p_of_words) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses #raise NotImplementedError probabilities = [] guesses = [] # TODO implement the recognizer hwords = test_set.get_all_Xlengths() try: for word_id in range(0, len(test_set.get_all_sequences())): words_prob = {} best_score = float('-Inf') guess_word = None X, lengths = hwords[word_id] for word, model in models.items(): try: score = model.score(X, lengths) words_prob[word] = score if score > best_score: guess_word = word best_score = score except: pass probabilities.append(words_prob) guesses.append(guess_word) except: print('outer exception') pass # return probabilities, guesses return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses all_sequences = test_set.get_all_sequences() all_Xlenghts = test_set.get_all_Xlengths() print('Started recognizing ...') for i, test_word in zip(range(0, len(all_sequences)), test_set.wordlist): bestLL = float("-inf") bestWord = None probs = {} for word in models.keys(): model = models[word] try: ll = model.score(all_sequences[i][0], all_Xlenghts[i][1]) if ll > bestLL: bestLL = ll bestWord = word except Exception: #print("some exception occurred, ignoring") pass probs[word] = ll guesses.append(bestWord) probabilities.append(probs) print('Finished analyzing {} words '.format(len(all_sequences))) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] """ 1- Calculating the logL scores for each word in model 2- Appending these scores to "probabilities" list. 3- Finding words with maximum scores, append these are "guesses" lists. """ try: for word_id in range(0, len(test_set.get_all_sequences())): logL_words = {} best_score = float('-inf') best_word = None X, lengths = test_set.get_item_Xlengths(word_id) for word, model in models.items(): try: score = model.score(X, lengths) logL_words[word] = score if score > best_score: best_word = word best_score = score except: logL_words[word] = float("-inf") probabilities.append(logL_words) guesses.append(best_word) except: pass return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses # raise NotImplementedError # We need the XLengths and sequences from the test_set first Xlengths = test_set.get_all_Xlengths() sequences = test_set.get_all_sequences() # Now we iterate and build our probabilities and and guesses lists for sequence in sequences: best_guess = None best_logL = float( '-inf' ) # Start with the smallest possible logL, we want to increment this to determine best guess prob_dict = dict( ) # Dictionary of probabilities, to be added to probablilities X, xlengths = Xlengths[sequence] # Now we go through all words and their models for guess, model in models.items(): try: logL = model.score(X, xlengths) except: logL = float( '-inf') # Set it to minus infinity if an error occurs prob_dict[guess] = logL if logL > best_logL: best_logL = logL best_guess = guess # Now append the processed dictionary probabilities.append(prob_dict) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # implement the recognizer hwords = test_set.get_all_Xlengths() for idWord in range(0, len(test_set.get_all_sequences())): pbWord = {} bestScore = float('-inf') guessWord = None X, lenWord = hwords[idWord] for word, model in models.items(): score = float('-inf') try: score = model.score(X, lenWord) except: #print ('Value erro') #print (score) #raise pass pbWord[word] = score if score > bestScore: guessWord = word bestScore = score #print ("Adding " + str(pbWord)) probabilities.append(pbWord) guesses.append(guessWord) #print (len(probabilities)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses x_lengths = test_set.get_all_Xlengths() n_sequences = len(test_set.get_all_sequences()) for single_data in range(n_sequences): best_score = float('-inf') best_guess = None prob = {} X, lengths = x_lengths[single_data] for g_word, model in models.items(): try: score = model.score(X, lengths) prob[g_word] = score #Selection if score > best_score: best_score = score guess_word = g_word except: #failed process prob[g_word] = float('-inf') probabilities.append(prob) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) # TODO implement the recognizer probabilities = [] guesses = [] all_sequences = test_set.get_all_sequences() for word_id in all_sequences: word_id_X, word_id_Lengths = test_set.get_item_Xlengths(word_id) best_score = float('-inf') best_guess = None word_likelihood = {} # Initialize variables used to keep track of likelihoods and guesses for word_name in models: # Calculate the score of a particular sample in the test_set against every word model current_model = models[word_name] try: current_score = current_model.score(word_id_X, word_id_Lengths) word_likelihood[word_name] = current_score # Store the likelihood of each possible word model using the word name as the key if current_score > best_score: best_score = current_score best_guess = word_name # Keep track of the best guess for each test set except: pass probabilities.append(word_likelihood) guesses.append(best_guess) # Store likelihoods and guesses in appropriate locations before moving to next word_id return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] all_seq = test_set.get_all_sequences() for index, seq in all_seq.items(): X, lengths = test_set.get_item_Xlengths(index) guess_dict = {} best_guess_word = None best_score = float("-inf") for word, model in models.items(): try: temp_score = model.score(X, lengths) #print("{} : {}".format(word,temp_score)) guess_dict[word] = temp_score if temp_score > best_score: best_score = temp_score best_guess_word = word #print(models.items()) except: guess_dict[word] = float('-inf') guesses.append(best_guess_word) probabilities.append(guess_dict) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # Get the list of x, and lengths hwords = test_set.get_all_Xlengths() for word_id in range(0, len(test_set.get_all_sequences())): words_prob = {} best_score = float('-Inf') guess_word = None X, lengths = hwords[word_id] # for every word, we map the probability # and guess the best word for word, model in models.items(): try: score = model.score(X, lengths) except: # set score to -inf if get score fails score = float('-Inf') words_prob[word] = score if score > best_score: best_score = score guess_word = word probabilities.append(words_prob) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses Xlengths = test_set.get_all_Xlengths() for sequence in test_set.get_all_sequences(): best_guess = None best_score = float("-inf") prob = {} X, lengths = Xlengths[sequence] for word, model in models.items(): try: logL = model.score(X, lengths) except: logL = float("-inf") prob[word] = logL if logL > best_score: best_score = logL best_guess = word probabilities.append(prob) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in test_set.get_all_sequences(): features, sequence_length = test_set.get_item_Xlengths(word_id) likelihoods = {} highest_score = float("-inf") best_guess = None for word, model in models.items(): try: score = model.score(features, sequence_length) likelihoods[word] = score if score > highest_score: highest_score = score best_guess = word except: likelihoods[word] = float("-inf") continue guesses.append(best_guess) probabilities.append(likelihoods) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} param test_set: SinglesData object return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer #get the number of words (sequences) total_words = len(test_set.get_all_sequences()) #iterate over all words in test_set for word_index in range(total_words): prop = {} #initialoze an empty dict best_fit_word = None best_prop = float( '-inf') #to get the most fit word for a specific sequence x, lengths = test_set.get_item_Xlengths(word_index) #models : dictionary that word is the key and model is the value for word, model in models.items(): try: logL = model.score(x, lengths) except: continue prop[word] = logL if logL > best_prop: best_prop = logL best_fit_word = word probabilities.append(prop) guesses.append(best_fit_word) # return probabilities, guesses return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = ['' for i in range(len(test_set.get_all_Xlengths()))] # TODO implement the recognizer # return probabilities, guesses for i in range(len(test_set.get_all_sequences())): current_sequence = test_set.get_item_sequences(i) current_X, current_lengths = test_set.get_item_Xlengths(i) best_word = None p = {} best_score = float('-inf') for word in models: model = models[word] try: score = model.score(current_X, current_lengths) p[word] = score except: p[word] = 0 if score > best_score: best_score = score best_word = word probabilities.append(p) guesses[i] = best_word return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]N """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses #raise NotImplementedError for item in test_set.get_all_sequences().keys(): prob_word = dict() guess_word = '' best_score = float('-inf') X, lengths = test_set.get_item_Xlengths(item) for word, model in models.items(): try: guess_score = model.score(X, lengths) prob_word[word] = guess_score if guess_score > best_score: best_score = guess_score guess_word = word except Exception as e: prob_word[word] = float('-inf') probabilities.append(prob_word) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ probabilities = [] guesses = [] # Implement the recognizer test_words = sorted(test_set.get_all_sequences().keys()) for test_word in test_words: X, lengths = test_set.get_item_Xlengths(test_word) probs_dict = {} best_score = float("-inf") best_guess = "" for train_word, model in models.items(): try: log_prob = model.score(X, lengths) except: log_prob = float("-inf") probs_dict[train_word] = log_prob if log_prob > best_score: best_score = log_prob best_guess = train_word guesses.append(best_guess) probabilities.append(probs_dict) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses #raise NotImplementedError for test_item in test_set.get_all_sequences(): X, lengths = test_set.get_item_Xlengths(test_item) best_prob = float("-inf") best_word = None score = {} for word, model in models.items(): try: prob = model.score(X, lengths) score[word] = prob if (prob > best_prob): best_prob = prob best_word = word except: score[word] = float("-inf") pass probabilities.append(score) guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] all_sequences = test_set.get_all_sequences() all_Xlengths = test_set.get_all_Xlengths() for word_id, _ in all_sequences.items(): curr_X, curr_lengths = test_set.get_item_Xlengths(word_id) best_word = None best_score = -float('inf') p_dict = {} for word, model in models.items(): try: score = model.score(curr_X, curr_lengths) p_dict[word] = score if score > best_score: best_score = score best_word = word except: p_dict[word] = 0 probabilities.append(p_dict) guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses sequences = test_set.get_all_sequences() Xlengths = test_set.get_all_Xlengths() for ix in range(test_set.num_items): word_probs = dict() max_prob = float("-inf") word_predict = None X, lengths = test_set.get_item_Xlengths(ix) for word, model in models.items(): try: word_probs[word] = model.score(X, lengths) if word_probs[word] > max_prob: max_prob = word_probs[word] word_predict = word except: word_probs[word] = float("-inf") probabilities.append(word_probs) guesses.append(word_predict) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # return probabilities, guesses all_words_x_lengths = test_set.get_all_Xlengths() for id in range(len(test_set.get_all_sequences())): x, lengths = all_words_x_lengths[id] word_probs = {} best_score = float("-inf") word_guess = None for word_candidate, model in models.items(): try: score = model.score(x, lengths) word_probs[word_candidate] = score if score > best_score: best_score = score word_guess = word_candidate except Exception as e: pass probabilities.append(word_probs) guesses.append(word_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # implement the recognizer sequences = test_set.get_all_sequences() sequences_len = test_set.get_all_Xlengths() for sequence in sequences: prob = {} guess = None x, length = sequences_len[sequence] # find the best word match and update guess for word, model in models.items(): try: prob[word] = model.score(x, length) guess = word if (guess is None or prob[word] > prob[guess]) else guess except: prob[word] = float('-inf') probabilities += [prob] guesses += [guess] return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) # DONE implement the recognizer probabilities = [] guesses = [] all = test_set.get_all_sequences() for index, sequence in all.items(): X, lengths = test_set.get_item_Xlengths( index) # Get test set sequences guess = {} #Save word # Calculate test score for word, model in models.items(): try: prob = model.score(X, lengths) guess[word] = prob except: guess[word] = float('-inf') probabilities.append(guess) best_guess = max(guess, key=guess.get) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] sequences = test_set.get_all_sequences() for sequence in sequences: X_test, lengths_test = test_set.get_item_Xlengths(sequence) word_res = {} for word, model in models.items(): try: logL = model.score(X_test, lengths_test) except Exception as e: #print('Exception for word %s' % (word)) continue word_res[word] = logL if word_res: best_word = max(word_res.items(), key=lambda x: x[1])[0] probabilities.append(word_res) guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in test_set.get_all_sequences(): X, length = test_set.get_item_Xlengths(item) scores = {} best_guess = None best_score = None for word, model in models.items(): try: scores[word] = model.score(X, length) if not best_score or best_score < scores[word]: best_score = scores[word] best_guess = word except: scores[word] = None probabilities.append(scores) guesses.append(best_guess) return probabilities, guesses