def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in range(0, len(test_set.get_all_Xlengths())): word_probability = {} feature_sequences, lengths = test_set.get_item_Xlengths(word_id) for word, model in models.items(): try: score = model.score(feature_sequences, lengths) word_probability[word] = score except: pass probabilities.append(word_probability) guessed_word = max(word_probability, key=word_probability.get) #guesses.append(guessed_word) guesses.append( ''.join(c for c in guessed_word if c not in '0123456789')) #Get rid of digits end of recognized word return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in test_set.get_all_sequences(): X, length = test_set.get_item_Xlengths(item) scores, guess, maxScore = {}, None, None for word, model in models.items(): try: scores[word] = model.score(X, length) if maxScore is None or maxScore < scores[word]: maxScore, guess = scores[word], word except: scores[word] = None probabilities.append(scores) guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # iterate through word (or sentence?) for item, _ in test_set.get_all_Xlengths().items(): X, lengths = test_set.get_item_Xlengths(item) words_logL = {} for word, model in models.items(): try: words_logL[word] = model.score(X, lengths) except: words_logL[word] = float('-inf') probabilities.append(words_logL) guesses.append(max(words_logL, key=words_logL.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for word_id in range(0, len(test_set.get_all_Xlengths())): X, lengths = test_set.get_item_Xlengths(word_id) likelihoods = {} for word, model in models.items(): try: the_score = models[word].score(X, lengths) except: the_score = float("-inf") likelihoods[word] = the_score probabilities.append(likelihoods) guess = max(probabilities[word_id], key = probabilities[word_id].get) guesses.append(guess) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id, val in test_set.get_all_Xlengths().items(): current_sequence, current_lengths = test_set.get_item_Xlengths(word_id) log_likelihoods = {} for word, model in models.items(): try: LogLvalue = model.score(current_sequence, current_lengths) log_likelihoods[word] = LogLvalue except: log_likelihoods[word] = float("-inf") continue probabilities.append(log_likelihoods) guesses.append(get_best_word(log_likelihoods)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in test_set.get_all_Xlengths(): x, l = test_set.get_all_Xlengths()[word_id] probs = {} max_logL = float("-inf") recognized_word = None for key in models: try: probs[key] = models[key].score(x, l) if probs[key] > max_logL: max_logL = probs[key] recognized_word = key except Exception: probs[key] = float("-inf") probabilities += [probs] guesses += [recognized_word] return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in range(0, len(test_set.get_all_sequences())): prob_dict = {} X, lengths = test_set.get_item_Xlengths(i) for word, model in models.items(): try: logL = model.score(X, lengths) prob_dict[word] = logL except: prob_dict[word] = float('-inf') probabilities.append(prob_dict) guess = max([(max_log_value, max_word) for max_word, max_log_value in prob_dict.items()])[1] guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) probabilities = [] guesses = [] # TODO implement the recognizer for this_word in test_set.get_all_sequences(): X_test, lengths_test = test_set.get_item_Xlengths(this_word) logL = {} for this_key, this_model in models.items(): try: logL[this_key] = this_model.score(X_test, lengths_test) except ValueError: logL[this_key] = float("-inf") probabilities.append(logL) guesses.append(max(logL, key=logL.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for x in test_set.get_all_sequences(): temp_dict = {} for word,model in models.items(): try: X,lengths = test_set.get_item_Xlengths(x) temp_dict[word] = model.score(X,lengths) except: temp_dict[word] = float('-inf') if temp_dict: probabilities.append(dict(temp_dict)) guesses.append(max(temp_dict, key=temp_dict.get)) return probabilities,guesses raise NotImplementedError
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for s in test_set.get_all_Xlengths(): scores = {} for m in models: try: X, lengths = test_set.get_all_Xlengths()[s] scores[m] = models[m].score(X, lengths) except (ValueError, AttributeError) as e: scores[m] = -np.inf probabilities.append(scores) key, _ = max(scores.items(), key=lambda x: x[1]) guesses.append(key) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] all_sequences = test_set.get_all_sequences() all_Xlengths = test_set.get_all_Xlengths() for sequence in all_sequences: probability = {} X, length = all_Xlengths[sequence] for word_model, model in models.items(): try: score = model.score(X, length) probability[word_model] = score except: score = -float("inf") probability[word_model] = score probabilities.append(probability) values = list(probability.values()) keys = list(probability.keys()) guesses.append(keys[values.index(max(values))]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in test_set.get_all_Xlengths().items(): x, lens = test_set.get_item_Xlengths(i[0]) # Create a dict where key = word, value = log liklihood word_liklihoods = {} for word, model in models.items(): try: word_liklihoods[word] = model.score(x, lens) except: word_liklihoods[word] = float('-inf') probabilities.append(word_liklihoods) guesses.append(max(word_liklihoods, key=word_liklihoods.get)) # TODO implement the recognizer return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # for each word in the testing set for word_index, _ in test_set.get_all_Xlengths().items(): x, length = test_set.get_item_Xlengths(word_index) word_log_l_dict = {} # try the word on every model and score the probabilities of matching for word, model in models.items(): try: word_log_l_dict[word] = model.score(x, length) except: word_log_l_dict[word] = float("-inf") probabilities.append(word_log_l_dict) guesses.append(max(word_log_l_dict, key=word_log_l_dict.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for index in range(0, len(test_set.get_all_Xlengths())): best_score = float("-inf") best_match = None X, lengths = test_set.get_all_Xlengths()[index] probabilities_dict = {} for word, model in models.items(): try: logL = model.score(X, lengths) except: logL = float("-inf") probabilities_dict[word] = logL if logL > best_score: best_match, best_score = word, logL guesses.append(best_match) probabilities.append(probabilities_dict) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in range(0, len(test_set.get_all_Xlengths())): X, lengths = test_set.get_item_Xlengths(i) test_word_probs = {} for word, model in models.items(): try: test_word_probs[word] = model.score(X, lengths) except Exception: test_word_probs[word] = float(-100000000000000000) continue probabilities.append(test_word_probs) guesses.append(max(test_word_probs, key=test_word_probs.get)) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] len_all = len(test_set.get_all_Xlengths()) for word_id in range(0, len_all): x, lens = test_set.get_item_Xlengths(word_id) probs = {} for word, model in models.items(): try: logl = model.score(x, lens) probs[word] = logl except: probs[word] = float('-inf') probabilities.append(probs) w, _ = max(probs.items(), key=lambda x: x[1]) guesses.append(w) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in range(len(test_set.get_all_Xlengths())): x, lengths = test_set.get_item_Xlengths(word_id) word_probabilities = {} for word, model in models.items(): try: log_l = model.score(x, lengths) word_probabilities[word] = log_l except (ValueError, AttributeError): continue probabilities.append(word_probabilities) top_word_probabilities = sorted(word_probabilities.items(), key=lambda item: item[1], reverse=True) guesses.append([guess for guess, score in top_word_probabilities][0]) #print(guesses[:10]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # X, lengths = test_set.get_all_Xlengths() # # for word_id in range(0, len(test_set.get_all_Xlengths())): # # current_sequence = test_set.get_item_sequences(word_id) # current_length = test_set.get_item_Xlengths(word_id) # # prob_word = dict() for test in range(0,len(test_set.get_all_Xlengths())): X, lengths = test_set.get_all_Xlengths()[test] max_score = None max_word = None prob_dict = dict() for word,model in models.items(): #print(word,model) try: score = model.score(X, lengths) except: score = float("-Inf") if max_score == None or score > max_score: max_score = score max_word = word prob_dict[word] = score probabilities.append(prob_dict) guesses.append(max_word) # model = models[word] # # # for model_word in models[word]: # score = model.score(current_sequence, current_length) # return (probabilities , guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # LOG_FILENAME = 'recognizer.log' # log = logging.getLogger('Recognizer') # fh = logging.FileHandler(LOG_FILENAME) # fh.setLevel(logging.DEBUG) # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # fh.setFormatter(formatter) # log.addHandler(fh) # Iteate through the test set where i represent the word we are analyzing for i in range(0, len(test_set.get_all_Xlengths())): test_X, test_lengths = test_set.get_item_Xlengths(i) log_l_dict = {} best_score, best_word = float('-Inf'), None # try to calculate the probabilities for each word/model and populate the dictionary for word, model in models.items(): try: # Try to get the log likelihood of test_X for the current model score = model.score(test_X, test_lengths) except Exception as e: # log.warn('EXCEPTION {}'.format(e)) # We add this word to maintain the structure of the dictionary, # with probability 0 score = float('-Inf') log_l_dict[word] = score # log.info("Step {}: logl for word {} is {}".format(i, word, score)) # Keep track of the most likely word if score > best_score: # log.info("Old score {} for word {} was dethroned by score {} with {} word".format(best_score, best_word, score, word)) best_score, best_word = score, word # Add the whole dictionary to the probability list probabilities.append(log_l_dict) # store in the guesses the most likely word guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses # getters words = test_set.get_all_sequences() hwords = test_set.get_all_Xlengths() # iterate through words for word in words: # get X, length X, length = hwords[word] temp = {} # iterate through models for model_key in models: try: temp[model_key] = models[model_key].score(X, length) except: temp[model_key] = -float("inf") # add the scores to probabilities probabilities.append(temp) # find the model with the best score for instance in probabilities: best_score = -float("inf") best_model = None for model_key in instance: score = instance[model_key] if score > best_score: best_score = score best_model = model_key # add to guesses guesses.append(best_model) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # general/specific algorithm snippets from forum discussion: # https://discussions.udacity.com/t/recognizer-implementation/234793/22 # https://discussions.udacity.com/t/recognizer-implementation/234793/28 hwords = test_set.get_all_Xlengths() for word_id in range(0, len(test_set.get_all_sequences())): # print('training on word_id '+str(word_id)+', current word is '+current_word) try: p_of_words = {} max_score = float("-inf") guess_word = None x, lengths = hwords[word_id] # for each model, get teh highest likelyhood (score) then record the respective word as guess_word # to add into guesses list for word, model in models.items(): try: score = model.score(x, lengths) p_of_words[word] = score if score > max_score: guess_word = word max_score = score except: # fill in the probability dict if no probability is found p_of_words[word] = float("-inf") pass except: pass probabilities.append(p_of_words) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) # init returned vars guesses = [] probabilities = [] # iterate the test_set for word_id in test_set.get_all_Xlengths(): # probabilities is a list of dictionaries where each key a word and value is Log Likelihood probability_log_likelihoods = {} # extract current test_set item based on it id current_word_features, current_seq_lengths = test_set.get_item_Xlengths( word_id) # calculate LogLikelihoodScore for each word and model, than add it to probabilities list for word, model in models.items(): try: # perform score calc score = model.score(current_word_features, current_seq_lengths) # the key is the a word and it value is the Log Likelihood Score probability_log_likelihoods[word] = score except: # if catch an exception, so it model isn't viable to calc, store it as neg inf score probability_log_likelihoods[word] = float("-inf") # add it current probability to the probabilities list probabilities.append(probability_log_likelihoods) # calc the best score best_guess_score = max(probability_log_likelihoods, key=probability_log_likelihoods.get) # add it best score guess to guesses list, as it follows the test set word_id order guesses.append(best_guess_score) # return these filled lists return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # get the word set hwords = test_set.get_all_Xlengths() # loop through all of the words for word_id in range(0, len(test_set.get_all_Xlengths())): # create dictionary for this word's probabilities word_prob = {} # set the best score to neg infinity best_score = float('-Inf') # create an empty guess guess_word = None # get the training values for this word X, lengths = hwords[word_id] # loop through the word-model pairs in models input for word, model in models.items(): try: # get the score score = model.score(X, lengths) # add the score to the word probabilities word_prob[word] = score # check if the score is better than best if score > best_score: # update guess guess_word = word # update best score best_score = score except: pass probabilities.append(word_prob) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses #Initialize the probabilities list probabilities = [] # loop through every word_id in the test set to compute dictionary of logL for word_id in test_set.get_all_Xlengths().keys(): X, lengths = test_set.get_item_Xlengths(word_id) logL_dict = dict() # initialize the dict of logL # loop through each model in the trained models dict for word in models.keys(): try: logL = models[word].score(X, lengths) logL_dict[word] = logL except: logL_dict[word] = None probabilities.append(logL_dict) # Initialize the guesses list guesses = [] # loop through every dictonary in probablities, find the word with the maximum logL and append it to the guesses for dictionary in probabilities: max_logL = -float('inf') guess = None for word in dictionary.keys(): if dictionary[word] is not None and dictionary[word] > max_logL: max_logL = dictionary[word] guess = word guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses all_sequences = test_set.get_all_sequences() all_Xlenghts = test_set.get_all_Xlengths() print('Started recognizing ...') for i, test_word in zip(range(0, len(all_sequences)), test_set.wordlist): bestLL = float("-inf") bestWord = None probs = {} for word in models.keys(): model = models[word] try: ll = model.score(all_sequences[i][0], all_Xlenghts[i][1]) if ll > bestLL: bestLL = ll bestWord = word except Exception: #print("some exception occurred, ignoring") pass probs[word] = ll guesses.append(bestWord) probabilities.append(probs) print('Finished analyzing {} words '.format(len(all_sequences))) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses #raise NotImplementedError probabilities = [] guesses = [] # TODO implement the recognizer hwords = test_set.get_all_Xlengths() try: for word_id in range(0, len(test_set.get_all_sequences())): words_prob = {} best_score = float('-Inf') guess_word = None X, lengths = hwords[word_id] for word, model in models.items(): try: score = model.score(X, lengths) words_prob[word] = score if score > best_score: guess_word = word best_score = score except: pass probabilities.append(words_prob) guesses.append(guess_word) except: print('outer exception') pass # return probabilities, guesses return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses # Iterate over all Data from test_set for k, _ in test_set.get_all_Xlengths().items(): # Get single item from test set X_test, X_lengths = test_set.get_item_Xlengths(k) # Initializations scores = {} best_word = "" best_score = float("-inf") for word, model in models.items(): score = float("-inf") try: # Compute model score on test item score = model.score(X_test, X_lengths) except Exception as e: if DEBUG: print("ERROR: {}".format(e)) pass if score >= best_score: best_score, best_word = score, word scores[word] = score probabilities.append(scores) guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] """ 1- Calculating the logL scores for each word in model 2- Appending these scores to "probabilities" list. 3- Finding words with maximum scores, append these are "guesses" lists. """ try: for word_id in range(0, len(test_set.get_all_sequences())): logL_words = {} best_score = float('-inf') best_word = None X, lengths = test_set.get_item_Xlengths(word_id) for word, model in models.items(): try: score = model.score(X, lengths) logL_words[word] = score if score > best_score: best_word = word best_score = score except: logL_words[word] = float("-inf") probabilities.append(logL_words) guesses.append(best_word) except: pass return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses # raise NotImplementedError # We need the XLengths and sequences from the test_set first Xlengths = test_set.get_all_Xlengths() sequences = test_set.get_all_sequences() # Now we iterate and build our probabilities and and guesses lists for sequence in sequences: best_guess = None best_logL = float( '-inf' ) # Start with the smallest possible logL, we want to increment this to determine best guess prob_dict = dict( ) # Dictionary of probabilities, to be added to probablilities X, xlengths = Xlengths[sequence] # Now we go through all words and their models for guess, model in models.items(): try: logL = model.score(X, xlengths) except: logL = float( '-inf') # Set it to minus infinity if an error occurs prob_dict[guess] = logL if logL > best_logL: best_logL = logL best_guess = guess # Now append the processed dictionary probabilities.append(prob_dict) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) def calc_best_score(word_log_likelihoods): # Max of dictionary of values by comparing each item by value at index return max(word_log_likelihoods, key=word_log_likelihoods.get) # return max(word_log_likelihoods, key = lambda index: word_log_likelihoods[index]) logging.debug("My Recognizer Started...") probabilities = [] guesses = [] # Iterate through each item in the Test Set for word_id in range(0, len(test_set.get_all_Xlengths())): current_word_feature_lists_sequences, current_sequences_length = test_set.get_item_Xlengths( word_id) word_log_likelihoods = {} # Calculate Log Likelihood score for each word and model and append to probability list for word, model in models.items(): try: score = model.score(current_word_feature_lists_sequences, current_sequences_length) word_log_likelihoods[word] = score except: # Eliminate non-viable models from consideration word_log_likelihoods[word] = float("-inf") continue # Probabilities appended with probability list probabilities.append(word_log_likelihoods) # Guesses appended with calculation of word with maximum score (log likelihood) for each model guesses.append(calc_best_score(word_log_likelihoods)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in test_set.get_all_sequences(): X, length = test_set.get_item_Xlengths(item) scores = {} best_guess = None best_score = None for word, model in models.items(): try: scores[word] = model.score(X, length) if not best_score or best_score < scores[word]: best_score = scores[word] best_guess = word except: scores[word] = None probabilities.append(scores) guesses.append(best_guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # implement the recognizer for index in range(test_set.num_items): top_prob, top_word = float("-inf"), None word_probabilities = {} seq, lengths = test_set.get_item_Xlengths(index) for word, model in models.items(): try: word_probabilities[word] = model.score(seq, lengths) except Exception as e: word_probabilities[word] = float("-inf") if word_probabilities[word] > top_prob: top_prob, top_word = word_probabilities[word], word probabilities.append(word_probabilities) guesses.append(top_word) return probabilities, guesses