Exemple #1
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """

    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for word_id in range(0, len(test_set.get_all_Xlengths())):
        word_probability = {}
        feature_sequences, lengths = test_set.get_item_Xlengths(word_id)
        for word, model in models.items():
            try:
                score = model.score(feature_sequences, lengths)
                word_probability[word] = score
            except:
                pass
        probabilities.append(word_probability)
        guessed_word = max(word_probability, key=word_probability.get)
        #guesses.append(guessed_word)
        guesses.append(
            ''.join(c for c in guessed_word if c not in
                    '0123456789'))  #Get rid of digits end of recognized word
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for item in test_set.get_all_sequences():
        X, length = test_set.get_item_Xlengths(item)
        scores, guess, maxScore = {}, None, None
        for word, model in models.items():
            try:
                scores[word] = model.score(X, length)
                if maxScore is None or maxScore < scores[word]:
                    maxScore, guess = scores[word], word
            except:
                scores[word] = None
        probabilities.append(scores)
        guesses.append(guess)

    return probabilities, guesses
Exemple #3
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    probabilities = []
    guesses = []

    # iterate through word (or sentence?)
    for item, _ in test_set.get_all_Xlengths().items():
        X, lengths = test_set.get_item_Xlengths(item)
        words_logL = {}
        for word, model in models.items():
            try:
                words_logL[word] = model.score(X, lengths)

            except:
                words_logL[word] = float('-inf')

        probabilities.append(words_logL)
        guesses.append(max(words_logL, key=words_logL.get))

    return probabilities, guesses
Exemple #4
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for word_id in range(0, len(test_set.get_all_Xlengths())):
            X, lengths = test_set.get_item_Xlengths(word_id)
            likelihoods = {}
            for word, model in models.items():
                try:
                    the_score = models[word].score(X, lengths)
                except:
                    the_score = float("-inf")
                likelihoods[word] = the_score
            probabilities.append(likelihoods)
            guess = max(probabilities[word_id], key = probabilities[word_id].get)
            guesses.append(guess)
    return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for word_id, val in test_set.get_all_Xlengths().items():
        current_sequence, current_lengths = test_set.get_item_Xlengths(word_id)
        log_likelihoods = {}
        for word, model in models.items():
            try:
                LogLvalue = model.score(current_sequence, current_lengths)
                log_likelihoods[word] = LogLvalue
            except:
                log_likelihoods[word] = float("-inf")
                continue
        probabilities.append(log_likelihoods)
        guesses.append(get_best_word(log_likelihoods))
    return probabilities, guesses
Exemple #6
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for word_id in test_set.get_all_Xlengths():
        x, l = test_set.get_all_Xlengths()[word_id]
        probs = {}
        max_logL = float("-inf")
        recognized_word = None
        for key in models:
            try:
                probs[key] = models[key].score(x, l)
                if probs[key] > max_logL:
                    max_logL = probs[key]
                    recognized_word = key
            except Exception:
                probs[key] = float("-inf")
        probabilities += [probs]
        guesses += [recognized_word]
    return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in range(0, len(test_set.get_all_sequences())):
        prob_dict = {}
        X, lengths = test_set.get_item_Xlengths(i)

        for word, model in models.items():
            try:
                logL = model.score(X, lengths)
                prob_dict[word] = logL
            except:
                prob_dict[word] = float('-inf')

        probabilities.append(prob_dict)
        guess = max([(max_log_value, max_word) for max_word, max_log_value in prob_dict.items()])[1]
        guesses.append(guess)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

    :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
    :param test_set: SinglesData object
    :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
    """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for this_word in test_set.get_all_sequences():
        X_test, lengths_test = test_set.get_item_Xlengths(this_word)
        logL = {}
        for this_key, this_model in models.items():
            try:
                logL[this_key] = this_model.score(X_test, lengths_test)
            except ValueError:
                logL[this_key] = float("-inf")
        probabilities.append(logL)
        guesses.append(max(logL, key=logL.get))
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # TODO implement the recognizer
    for x in  test_set.get_all_sequences():
        temp_dict = {}
        for word,model in models.items():
            try:
                X,lengths = test_set.get_item_Xlengths(x)
                temp_dict[word] = model.score(X,lengths)
            except:
                temp_dict[word] = float('-inf')
        if temp_dict:
            probabilities.append(dict(temp_dict))
            guesses.append(max(temp_dict, key=temp_dict.get))
    return probabilities,guesses
    raise NotImplementedError
Exemple #10
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for s in test_set.get_all_Xlengths():

        scores = {}
        for m in models:
            try:
                X, lengths = test_set.get_all_Xlengths()[s]
                scores[m] = models[m].score(X, lengths)
            except (ValueError, AttributeError) as e:
                scores[m] = -np.inf

        probabilities.append(scores)
        key, _ = max(scores.items(), key=lambda x: x[1])
        guesses.append(key)

    return probabilities, guesses
Exemple #11
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    all_sequences = test_set.get_all_sequences()
    all_Xlengths = test_set.get_all_Xlengths()
    for sequence in all_sequences:
        probability = {}
        X, length = all_Xlengths[sequence]
        for word_model, model in models.items():
            try:
                score = model.score(X, length)
                probability[word_model] = score
            except:
                score = -float("inf")
                probability[word_model] = score
        probabilities.append(probability)
        values = list(probability.values())
        keys = list(probability.keys())
        guesses.append(keys[values.index(max(values))])
    return probabilities, guesses
Exemple #12
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in test_set.get_all_Xlengths().items():
        x, lens = test_set.get_item_Xlengths(i[0])
        # Create a dict where key = word, value = log liklihood
        word_liklihoods = {}
        for word, model in models.items():
            try:
                word_liklihoods[word] = model.score(x, lens)
            except:
                word_liklihoods[word] = float('-inf')
        probabilities.append(word_liklihoods)
        guesses.append(max(word_liklihoods, key=word_liklihoods.get))
    # TODO implement the recognizer
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # for each word in the testing set
    for word_index, _ in test_set.get_all_Xlengths().items():
        x, length = test_set.get_item_Xlengths(word_index)
        word_log_l_dict = {}
        # try the word on every model and score the probabilities of matching
        for word, model in models.items():
            try:
                word_log_l_dict[word] = model.score(x, length)
            except:
                word_log_l_dict[word] = float("-inf")

        probabilities.append(word_log_l_dict)
        guesses.append(max(word_log_l_dict, key=word_log_l_dict.get))

    return probabilities, guesses
Exemple #14
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for index in range(0, len(test_set.get_all_Xlengths())):
        best_score = float("-inf")
        best_match = None
        X, lengths = test_set.get_all_Xlengths()[index]
        probabilities_dict = {}
        for word, model in models.items():
            try:
                logL = model.score(X, lengths)
            except:
                logL = float("-inf")
            probabilities_dict[word] = logL
            if logL > best_score:
                best_match, best_score = word, logL
        guesses.append(best_match)
        probabilities.append(probabilities_dict)
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in range(0, len(test_set.get_all_Xlengths())):
        X, lengths = test_set.get_item_Xlengths(i)

        test_word_probs = {}
        for word, model in models.items():
            try:
                test_word_probs[word] = model.score(X, lengths)
            except Exception:
                test_word_probs[word] = float(-100000000000000000)
                continue

        probabilities.append(test_word_probs)
        guesses.append(max(test_word_probs, key=test_word_probs.get))
    return (probabilities, guesses)
Exemple #16
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    len_all = len(test_set.get_all_Xlengths())
    for word_id in range(0, len_all):
        x, lens = test_set.get_item_Xlengths(word_id)
        probs = {}
        for word, model in models.items():
            try:
                logl = model.score(x, lens)
                probs[word] = logl
            except:
                probs[word] = float('-inf')
        probabilities.append(probs)
        w, _ = max(probs.items(), key=lambda x: x[1])
        guesses.append(w)
    return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for word_id in range(len(test_set.get_all_Xlengths())):
        x, lengths = test_set.get_item_Xlengths(word_id)
        word_probabilities = {}
        for word, model in models.items():
            try:
                log_l = model.score(x, lengths)
                word_probabilities[word] = log_l
            except (ValueError, AttributeError):
                continue
        probabilities.append(word_probabilities)
        top_word_probabilities = sorted(word_probabilities.items(), key=lambda item: item[1], reverse=True)
        guesses.append([guess for guess, score in top_word_probabilities][0])

    #print(guesses[:10])

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # X, lengths = test_set.get_all_Xlengths()
    #
    # for word_id in range(0, len(test_set.get_all_Xlengths())):
    #
    #     current_sequence = test_set.get_item_sequences(word_id)
    #     current_length = test_set.get_item_Xlengths(word_id)
    #
    #     prob_word = dict()

    for test in range(0,len(test_set.get_all_Xlengths())):
        X, lengths = test_set.get_all_Xlengths()[test]

        max_score = None
        max_word = None
        prob_dict = dict()

        for word,model in models.items():
            #print(word,model)
            try:
                score = model.score(X, lengths)

            except:
                score = float("-Inf")

            if max_score == None or score > max_score:
                max_score = score
                max_word = word

            prob_dict[word] = score
        probabilities.append(prob_dict)
        guesses.append(max_word)

            # model = models[word]
        #
        #
        # for model_word in models[word]:
        #     score = model.score(current_sequence, current_length)
        #

    return (probabilities , guesses)
Exemple #19
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # LOG_FILENAME = 'recognizer.log'
    # log = logging.getLogger('Recognizer')
    # fh = logging.FileHandler(LOG_FILENAME)
    # fh.setLevel(logging.DEBUG)
    # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    # fh.setFormatter(formatter)
    # log.addHandler(fh)

    # Iteate through the test set where i represent the word we are analyzing
    for i in range(0, len(test_set.get_all_Xlengths())):
        test_X, test_lengths = test_set.get_item_Xlengths(i)
        log_l_dict = {}
        best_score, best_word = float('-Inf'), None

        # try to calculate the probabilities for each word/model and populate the dictionary
        for word, model in models.items():
            try:
                # Try to get the log likelihood of test_X for the current model
                score = model.score(test_X, test_lengths)
            except Exception as e:
                # log.warn('EXCEPTION {}'.format(e))
                # We add this word to maintain the structure of the dictionary,
                # with probability 0
                score = float('-Inf')
            log_l_dict[word] = score
            # log.info("Step {}: logl for word {} is {}".format(i, word, score))
            # Keep track of the most likely word
            if score > best_score:
                # log.info("Old score {} for word {} was dethroned by score {} with {} word".format(best_score, best_word, score, word))
                best_score, best_word = score, word
        # Add the whole dictionary to the probability list
        probabilities.append(log_l_dict)
        # store in the guesses the most likely word
        guesses.append(best_word)

    return probabilities, guesses
Exemple #20
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses

    # getters
    words = test_set.get_all_sequences()
    hwords = test_set.get_all_Xlengths()

    # iterate through words
    for word in words:
        # get X, length
        X, length = hwords[word]
        temp = {}
        # iterate through models
        for model_key in models:
            try:
                temp[model_key] = models[model_key].score(X, length)
            except:
                temp[model_key] = -float("inf")

        # add the scores to probabilities
        probabilities.append(temp)

    # find the model with the best score
    for instance in probabilities:
        best_score = -float("inf")
        best_model = None
        for model_key in instance:
            score = instance[model_key]
            if score > best_score:
                best_score = score
                best_model = model_key
        # add to guesses
        guesses.append(best_model)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # general/specific algorithm snippets from forum discussion:
    # https://discussions.udacity.com/t/recognizer-implementation/234793/22
    # https://discussions.udacity.com/t/recognizer-implementation/234793/28

    hwords = test_set.get_all_Xlengths()
    for word_id in range(0, len(test_set.get_all_sequences())):
        # print('training on word_id '+str(word_id)+', current word is '+current_word)
        try:
            p_of_words = {}
            max_score = float("-inf")
            guess_word = None
            x, lengths = hwords[word_id]

            # for each model, get teh highest likelyhood (score) then record the respective word as guess_word
            # to add into guesses list
            for word, model in models.items():
                try:
                    score = model.score(x, lengths)
                    p_of_words[word] = score

                    if score > max_score:
                        guess_word = word
                        max_score = score
                except:
                    # fill in the probability dict if no probability is found
                    p_of_words[word] = float("-inf")
                    pass
        except:
            pass

        probabilities.append(p_of_words)
        guesses.append(guess_word)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # init returned vars
    guesses = []
    probabilities = []

    # iterate the test_set
    for word_id in test_set.get_all_Xlengths():
        # probabilities is a list of dictionaries where each key a word and value is Log Likelihood
        probability_log_likelihoods = {}

        # extract current test_set item based on it id
        current_word_features, current_seq_lengths = test_set.get_item_Xlengths(
            word_id)

        # calculate LogLikelihoodScore for each word and model, than add it to probabilities list
        for word, model in models.items():
            try:
                # perform score calc
                score = model.score(current_word_features, current_seq_lengths)
                # the key is the a word and it value is the Log Likelihood Score
                probability_log_likelihoods[word] = score
            except:
                # if catch an exception, so it model isn't viable to calc, store it as neg inf score
                probability_log_likelihoods[word] = float("-inf")

        # add it current probability to the probabilities list
        probabilities.append(probability_log_likelihoods)

        # calc the best score
        best_guess_score = max(probability_log_likelihoods,
                               key=probability_log_likelihoods.get)
        # add it best score guess to guesses list, as it follows the test set word_id order
        guesses.append(best_guess_score)

    # return these filled lists
    return probabilities, guesses
Exemple #23
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer

    # get the word set
    hwords = test_set.get_all_Xlengths()

    # loop through all of the words
    for word_id in range(0, len(test_set.get_all_Xlengths())):
        # create dictionary for this word's probabilities
        word_prob = {}
        # set the best score to neg infinity
        best_score = float('-Inf')
        # create an empty guess
        guess_word = None
        # get the training values for this word
        X, lengths = hwords[word_id]

        # loop through the word-model pairs in models input
        for word, model in models.items():
            try:
                # get the score
                score = model.score(X, lengths)
                # add the score to the word probabilities
                word_prob[word] = score
                # check if the score is better than best
                if score > best_score:
                    # update guess
                    guess_word = word
                    # update best score
                    best_score = score
            except:
                pass
        probabilities.append(word_prob)
        guesses.append(guess_word)
    return probabilities, guesses
Exemple #24
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

    :param models: dict of trained models
        {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
    :param test_set: SinglesData object
    :return: (list, list)  as probabilities, guesses
        both lists are ordered by the test set word_id
        probabilities is a list of dictionaries where each key a word and value is Log Liklihood
            [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
             {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
             ]
        guesses is a list of the best guess words ordered by the test set word_id
            ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
    """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses

    #Initialize the probabilities list
    probabilities = []

    # loop through every word_id in the test set to compute dictionary of logL
    for word_id in test_set.get_all_Xlengths().keys():
        X, lengths = test_set.get_item_Xlengths(word_id)
        logL_dict = dict()  # initialize the dict of logL

        # loop through each model in the trained models dict
        for word in models.keys():
            try:
                logL = models[word].score(X, lengths)
                logL_dict[word] = logL
            except:
                logL_dict[word] = None
        probabilities.append(logL_dict)

    # Initialize the guesses list
    guesses = []

    # loop through every dictonary in probablities, find the word with the maximum logL and append it to the guesses
    for dictionary in probabilities:
        max_logL = -float('inf')
        guess = None
        for word in dictionary.keys():
            if dictionary[word] is not None and dictionary[word] > max_logL:
                max_logL = dictionary[word]
                guess = word
        guesses.append(guess)

    return probabilities, guesses
Exemple #25
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    all_sequences = test_set.get_all_sequences()
    all_Xlenghts = test_set.get_all_Xlengths()

    print('Started recognizing ...')

    for i, test_word in zip(range(0, len(all_sequences)), test_set.wordlist):
        bestLL = float("-inf")
        bestWord = None
        probs = {}

        for word in models.keys():
            model = models[word]
            try:

                ll = model.score(all_sequences[i][0], all_Xlenghts[i][1])
                if ll > bestLL:
                    bestLL = ll
                    bestWord = word

            except Exception:
                #print("some exception occurred, ignoring")
                pass

            probs[word] = ll

        guesses.append(bestWord)
        probabilities.append(probs)

    print('Finished analyzing {} words '.format(len(all_sequences)))

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    #raise NotImplementedError
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    hwords = test_set.get_all_Xlengths()
    try:
        for word_id in range(0, len(test_set.get_all_sequences())):
            words_prob = {}
            best_score = float('-Inf')
            guess_word = None
            X, lengths = hwords[word_id]

            for word, model in models.items():
                try:
                    score = model.score(X, lengths)
                    words_prob[word] = score

                    if score > best_score:
                        guess_word = word
                        best_score = score
                except:
                    pass

            probabilities.append(words_prob)
            guesses.append(guess_word)

    except:
        print('outer exception')
        pass
    # return probabilities, guesses
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    
    # Iterate over all Data from test_set
    for k, _ in test_set.get_all_Xlengths().items():
      # Get single item from test set
      X_test, X_lengths = test_set.get_item_Xlengths(k)

      # Initializations
      scores = {}
      best_word = ""
      best_score = float("-inf")

      for word, model in models.items():
        score = float("-inf")
        try:
          # Compute model score on test item
          score = model.score(X_test, X_lengths)
        except Exception as e:
          if DEBUG:
            print("ERROR: {}".format(e))
          pass

        if score >= best_score:
          best_score, best_word = score, word

        scores[word] = score

      probabilities.append(scores)
      guesses.append(best_word)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    """
        1- Calculating the logL scores for each word in model 
        2- Appending these scores to "probabilities" list.
        3- Finding words with maximum scores, append these are "guesses" lists.
    """


    try:
        for word_id in range(0, len(test_set.get_all_sequences())):
            logL_words = {}
            best_score = float('-inf')
            best_word = None
            X, lengths = test_set.get_item_Xlengths(word_id)

            for word, model in models.items():
                try:
                    score = model.score(X, lengths)
                    logL_words[word] = score
                    if score > best_score:
                        best_word = word
                        best_score = score
                except:
                    logL_words[word] = float("-inf")
                    
            probabilities.append(logL_words)
            guesses.append(best_word)

    except:
        pass

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    # raise NotImplementedError
    # We need the XLengths and sequences from the test_set first
    Xlengths = test_set.get_all_Xlengths()
    sequences = test_set.get_all_sequences()

    # Now we iterate and build our probabilities and and guesses lists
    for sequence in sequences:
        best_guess = None
        best_logL = float(
            '-inf'
        )  # Start with the smallest possible logL, we want to increment this to determine best guess
        prob_dict = dict(
        )  # Dictionary of probabilities, to be added to probablilities
        X, xlengths = Xlengths[sequence]
        # Now we go through all words and their models
        for guess, model in models.items():
            try:
                logL = model.score(X, xlengths)
            except:
                logL = float(
                    '-inf')  # Set it to minus infinity if an error occurs
            prob_dict[guess] = logL
            if logL > best_logL:
                best_logL = logL
                best_guess = guess
        # Now append the processed dictionary
        probabilities.append(prob_dict)
        guesses.append(best_guess)
    return probabilities, guesses
Exemple #30
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    def calc_best_score(word_log_likelihoods):
        # Max of dictionary of values by comparing each item by value at index
        return max(word_log_likelihoods, key=word_log_likelihoods.get)
        # return max(word_log_likelihoods, key = lambda index: word_log_likelihoods[index])

    logging.debug("My Recognizer Started...")

    probabilities = []
    guesses = []

    # Iterate through each item in the Test Set
    for word_id in range(0, len(test_set.get_all_Xlengths())):
        current_word_feature_lists_sequences, current_sequences_length = test_set.get_item_Xlengths(
            word_id)
        word_log_likelihoods = {}

        # Calculate Log Likelihood score for each word and model and append to probability list
        for word, model in models.items():
            try:
                score = model.score(current_word_feature_lists_sequences,
                                    current_sequences_length)
                word_log_likelihoods[word] = score
            except:
                # Eliminate non-viable models from consideration
                word_log_likelihoods[word] = float("-inf")
                continue
        # Probabilities appended with probability list
        probabilities.append(word_log_likelihoods)
        # Guesses appended with calculation of word with maximum score (log likelihood) for each model
        guesses.append(calc_best_score(word_log_likelihoods))

    return probabilities, guesses
Exemple #31
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for item in test_set.get_all_sequences():
        X, length = test_set.get_item_Xlengths(item)
        scores = {}
        best_guess = None
        best_score = None
        for word, model in models.items():
            try:
                scores[word] = model.score(X, length)
                if not best_score or best_score < scores[word]:
                    best_score = scores[word]
                    best_guess = word
            except:
                scores[word] = None
                
        probabilities.append(scores)
        guesses.append(best_guess)

    return probabilities, guesses
Exemple #32
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    
    # implement the recognizer
    for index in range(test_set.num_items):
    
        top_prob, top_word = float("-inf"), None
        
        word_probabilities = {}
        
        seq, lengths = test_set.get_item_Xlengths(index)
        for word, model in models.items():
            try:
                word_probabilities[word] = model.score(seq, lengths)
            except Exception as e:
                word_probabilities[word] = float("-inf")
            
            if word_probabilities[word] > top_prob:
                top_prob, top_word = word_probabilities[word], word
                
        probabilities.append(word_probabilities)
        guesses.append(top_word)
        
    return probabilities, guesses