예제 #1
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for i, _ in enumerate(test_set.wordlist):
        logL_all = dict()
        X, lengths = test_set.get_item_Xlengths(i)
        for word, model in models.items():
            try:
                logL = model.score(X, lengths)
                logL_all[word] = logL
            except (ValueError, AttributeError):
                continue

        probabilities.append(logL_all)
        guesses.append(max(logL_all, key=logL_all.get))

    return probabilities, guesses
예제 #2
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for item in range(0, test_set.num_items):
        word_probabilities_for_item = dict()
        x_item, length_item = test_set.get_item_Xlengths(item)
        for word, model in models.items():
            word_probabilities_for_item[word] = score_model(
                model, x_item, length_item)
        probabilities.append(word_probabilities_for_item)

    for prob_dict in probabilities:
        guesses.append([(k, v) for k, v in prob_dict.items()
                        if v == max(prob_dict.values())][0][0])

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in range(0, len(test_set.get_all_Xlengths())):
        X, lengths = test_set.get_item_Xlengths(i)

        test_word_probs = {}
        for word, model in models.items():
            try:
                test_word_probs[word] = model.score(X, lengths)
            except Exception:
                test_word_probs[word] = float(-100000000000000000)
                continue

        probabilities.append(test_word_probs)
        guesses.append(max(test_word_probs, key=test_word_probs.get))
    return (probabilities, guesses)
예제 #4
0
def recognize(models: dict, test_set: SinglesData):
    """
    Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as scores, guesses
       both lists are ordered by the test set word_id
       scores is a list of dictionaries where each key is a word and value is Log Liklihood
           [{'SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {'SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    scores = [{} for _ in range(test_set.num_items)]
    guesses = ["" for _ in range(test_set.num_items)]
    for i in range(test_set.num_items):
        X, lengths = test_set.get_item_Xlengths(i)
        best_score = float("-inf")
        best_word = ""
        for word, model in models.items():
            try:
                score = model.score(X, lengths)
                scores[i][word] = score
                if score > best_score:
                    best_score = score
                    best_word = word
            except:
                pass
        guesses[i] = best_word

    return scores, guesses
예제 #5
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for ind, word in enumerate(test_set.wordlist):
        X, lengths = test_set.get_item_Xlengths(ind)
        word_guess = {}
        for guess_word, model in models.items():
            try:
                logL = model.score(X, lengths)
            except:
                logL = float("-inf")
            word_guess[guess_word] = logL
        probabilities.append(word_guess)
        guesses.append(max(word_guess.keys(), key=lambda w: word_guess[w]))

    return probabilities, guesses
예제 #6
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for word_id, val in test_set.get_all_Xlengths().items():
        current_sequence, current_lengths = test_set.get_item_Xlengths(word_id)
        log_likelihoods = {}
        for word, model in models.items():
            try:
                LogLvalue = model.score(current_sequence, current_lengths)
                log_likelihoods[word] = LogLvalue
            except:
                log_likelihoods[word] = float("-inf")
                continue
        probabilities.append(log_likelihoods)
        guesses.append(get_best_word(log_likelihoods))
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for id in test_set.df.index:
        data = test_set.get_item_Xlengths(id)
        dict = {}

        for word in models:
            try:
                model = models[word]
                dict[word] = model.score(*data)
            except:
                continue
        probabilities.append(dict)
        guesses.append(max(dict.items(), key=lambda x: x[1])[0])
    return (probabilities, guesses)
예제 #8
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """

    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for word_id in range(0, len(test_set.get_all_Xlengths())):
        word_probability = {}
        feature_sequences, lengths = test_set.get_item_Xlengths(word_id)
        for word, model in models.items():
            try:
                score = model.score(feature_sequences, lengths)
                word_probability[word] = score
            except:
                pass
        probabilities.append(word_probability)
        guessed_word = max(word_probability, key=word_probability.get)
        #guesses.append(guessed_word)
        guesses.append(
            ''.join(c for c in guessed_word if c not in
                    '0123456789'))  #Get rid of digits end of recognized word
    return probabilities, guesses
예제 #9
0
def recognize(models: dict, test_set: SinglesData):

    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # implement the recognizer
    for index in range(test_set.num_items):

        top_prob, top_word = float("-inf"), None

        word_probabilities = {}

        seq, lengths = test_set.get_item_Xlengths(index)
        for word, model in models.items():
            try:
                word_probabilities[word] = model.score(seq, lengths)
            except Exception as e:
                word_probabilities[word] = float("-inf")

            if word_probabilities[word] > top_prob:
                top_prob, top_word = word_probabilities[word], word

        probabilities.append(word_probabilities)
        guesses.append(top_word)

    return probabilities, guesses
예제 #10
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # Implement the recognizer
    for word_select in [i for i, word in enumerate(test_set.wordlist)]:
        word_loc = dict()
        for key, model in models.items():
            try:
                X, lengths = test_set.get_item_Xlengths(word_select)
                word_score = model.score(X, lengths)
            except:
                word_score = float("-inf")
            word_loc[key] = word_score

        probabilities.append(word_loc)
        guesses.append(max(word_loc, key=word_loc.get))
    # return probabilities, guesses
    return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for word_id in range(len(test_set.wordlist)):
        X, lengths = test_set.get_item_Xlengths(word_id)
        bestScore, bestGuess, dic = float('-inf'), None, {}
        for key, model in models.items():
            try:
                dic[key] = model.score(X, lengths)
            except:
                dic[key] = float('-inf')
            if bestScore < dic[key]:
                bestScore = dic[key]
                bestGuess = key
        guesses.append(bestGuess)
        probabilities.append(dic)
    # return probabilities, guesses
    return probabilities, guesses
예제 #12
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for v in test_set.sentences_index:
        for test_word in test_set.sentences_index[v]:
            probability = {}
            for model_word in models:
                model = models[model_word]
                X, lengths = test_set.get_item_Xlengths(test_word)
                try:
                    probability[model_word] = model.score(X, lengths)
                except:
                    probability[model_word] = -1000000
            probabilities.append(probability)
    for probability in probabilities:
        guesses.append(max(probability.items(), key=lambda x: x[1])[0])
    return probabilities, guesses
예제 #13
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    len_all = len(test_set.get_all_Xlengths())
    for word_id in range(0, len_all):
        x, lens = test_set.get_item_Xlengths(word_id)
        probs = {}
        for word, model in models.items():
            try:
                logl = model.score(x, lens)
                probs[word] = logl
            except:
                probs[word] = float('-inf')
        probabilities.append(probs)
        w, _ = max(probs.items(), key=lambda x: x[1])
        guesses.append(w)
    return (probabilities, guesses)
예제 #14
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for word_id in range(0, len(test_set.get_all_Xlengths())):
            X, lengths = test_set.get_item_Xlengths(word_id)
            likelihoods = {}
            for word, model in models.items():
                try:
                    the_score = models[word].score(X, lengths)
                except:
                    the_score = float("-inf")
                likelihoods[word] = the_score
            probabilities.append(likelihoods)
            guess = max(probabilities[word_id], key = probabilities[word_id].get)
            guesses.append(guess)
    return (probabilities, guesses)
예제 #15
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in range(0, len(test_set.get_all_sequences())):
        prob_dict = {}
        X, lengths = test_set.get_item_Xlengths(i)

        for word, model in models.items():
            try:
                logL = model.score(X, lengths)
                prob_dict[word] = logL
            except:
                prob_dict[word] = float('-inf')

        probabilities.append(prob_dict)
        guess = max([(max_log_value, max_word) for max_word, max_log_value in prob_dict.items()])[1]
        guesses.append(guess)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for item in test_set.get_all_sequences():
        X, length = test_set.get_item_Xlengths(item)
        scores, guess, maxScore = {}, None, None
        for word, model in models.items():
            try:
                scores[word] = model.score(X, length)
                if maxScore is None or maxScore < scores[word]:
                    maxScore, guess = scores[word], word
            except:
                scores[word] = None
        probabilities.append(scores)
        guesses.append(guess)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for i in range(test_set.num_items):
        X, Xlength = test_set.get_item_Xlengths(item=i)
        wordLogValues = {}
        # calculate for each word the log-likelihood
        for word, model in models.items():
            try:
                wordLogValues[word] = model.score(X, Xlength)
            except Exception as error:
                wordLogValues[word] = float("-inf")

        # add word probabilities to the output list
        probabilities.append(wordLogValues)

        # extract best fit
        best_word = max(wordLogValues, key=wordLogValues.get)
        guesses.append(best_word)
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # TODO implement the recognizer
    for x in  test_set.get_all_sequences():
        temp_dict = {}
        for word,model in models.items():
            try:
                X,lengths = test_set.get_item_Xlengths(x)
                temp_dict[word] = model.score(X,lengths)
            except:
                temp_dict[word] = float('-inf')
        if temp_dict:
            probabilities.append(dict(temp_dict))
            guesses.append(max(temp_dict, key=temp_dict.get))
    return probabilities,guesses
    raise NotImplementedError
예제 #19
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

    :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
    :param test_set: SinglesData object
    :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
    """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for this_word in test_set.get_all_sequences():
        X_test, lengths_test = test_set.get_item_Xlengths(this_word)
        logL = {}
        for this_key, this_model in models.items():
            try:
                logL[this_key] = this_model.score(X_test, lengths_test)
            except ValueError:
                logL[this_key] = float("-inf")
        probabilities.append(logL)
        guesses.append(max(logL, key=logL.get))
    return probabilities, guesses
예제 #20
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for i in range(test_set.num_items):
        word_probability = {}
        for word, model in models.items():
            try:
                sequences, lengths = test_set.get_item_Xlengths(i)
                best_prob = model.score(sequences, lengths)
            except:
                best_prob = float("-inf")
            word_probability[word] = best_prob
        probabilities.append(word_probability)
        guesses.append(max(word_probability, key=word_probability.get))
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for word_id in range(test_set.num_items):
        log_likelihoods = {}
        for word, model in models.items():
            try:
                log_likelihoods[word] = model.score(
                    *test_set.get_item_Xlengths(word_id))
            except ValueError:
                log_likelihoods[word] = float("-inf")

        probabilities.append(log_likelihoods)
        guesses.append(max(log_likelihoods.items(), key=itemgetter(1))[0])

    return probabilities, guesses
예제 #22
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for word_id in range(len(test_set.get_all_Xlengths())):
        x, lengths = test_set.get_item_Xlengths(word_id)
        word_probabilities = {}
        for word, model in models.items():
            try:
                log_l = model.score(x, lengths)
                word_probabilities[word] = log_l
            except (ValueError, AttributeError):
                continue
        probabilities.append(word_probabilities)
        top_word_probabilities = sorted(word_probabilities.items(), key=lambda item: item[1], reverse=True)
        guesses.append([guess for guess, score in top_word_probabilities][0])

    #print(guesses[:10])

    return probabilities, guesses
예제 #23
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    ##test_set.wordlist -> List of words
    for i, word in enumerate(test_set.wordlist):
        X, lengths = test_set.get_item_Xlengths(i)
        best_guess, guess_word = float("-inf"), None
        tmp = {}
        for key in models:
            try:
                tmp[key] = models[key].score(X, lengths)
                if tmp[key] > best_guess:
                    best_guess = tmp[key]
                    guess_word = key
            except:
                tmp[key] = float("-inf")
                continue
        probabilities.append(tmp)
        guesses.append(guess_word)
    return probabilities, guesses
예제 #24
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    probabilities = []
    guesses = []

    # iterate through word (or sentence?)
    for item, _ in test_set.get_all_Xlengths().items():
        X, lengths = test_set.get_item_Xlengths(item)
        words_logL = {}
        for word, model in models.items():
            try:
                words_logL[word] = model.score(X, lengths)

            except:
                words_logL[word] = float('-inf')

        probabilities.append(words_logL)
        guesses.append(max(words_logL, key=words_logL.get))

    return probabilities, guesses
예제 #25
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # for each word in the testing set
    for word_index, _ in test_set.get_all_Xlengths().items():
        x, length = test_set.get_item_Xlengths(word_index)
        word_log_l_dict = {}
        # try the word on every model and score the probabilities of matching
        for word, model in models.items():
            try:
                word_log_l_dict[word] = model.score(x, length)
            except:
                word_log_l_dict[word] = float("-inf")

        probabilities.append(word_log_l_dict)
        guesses.append(max(word_log_l_dict, key=word_log_l_dict.get))

    return probabilities, guesses
예제 #26
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in test_set.get_all_Xlengths().items():
        x, lens = test_set.get_item_Xlengths(i[0])
        # Create a dict where key = word, value = log liklihood
        word_liklihoods = {}
        for word, model in models.items():
            try:
                word_liklihoods[word] = model.score(x, lens)
            except:
                word_liklihoods[word] = float('-inf')
        probabilities.append(word_liklihoods)
        guesses.append(max(word_liklihoods, key=word_liklihoods.get))
    # TODO implement the recognizer
    return probabilities, guesses
예제 #27
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    valid_models = {
        descript: model
        for descript, model in models.items() if model != None
    }

    probabilities = [
        get_word_probs(valid_models, *test_set.get_item_Xlengths(i))
        for i, _ in enumerate(test_set.wordlist)
    ]
    guesses = [
        max(word_probs.keys(), key=lambda word: word_probs[word])
        for word_probs in probabilities
    ]

    return probabilities, guesses
예제 #28
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # Implement the recognizer
    # return probabilities, guesses
    for test in range(test_set.num_items):
        top_prob, top_word = float("-inf"), None
        probs = {}
        sequence, lengths = test_set.get_item_Xlengths(test)
        for word, model in models.items():
            try:
                probs[word] = model.score(sequence, lengths)
            except:
                probs[word] = float("-inf")
            if probs[word] > top_prob:
                top_prob, top_word = probs[word], word
        probabilities.append(probs)
        guesses.append(top_word)
    return probabilities, guesses
예제 #29
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    logging.basicConfig(level=logging.DEBUG)
    probabilities = []
    guesses = []
    # for each unknown word in the test set score it with each model provided.
    # Pick the one with the best fit
    
    for item in range(0,test_set.num_items):
        logging.debug("Recognizing sample {} with these sequences {}".format(item,test_set.get_item_Xlengths(item)))
        probs = dict()
        X, lengths = test_set.get_item_Xlengths(item)
        for word,model in models.items():
            logging.debug("  Comparing to {}".format(word))
            try:
                score = model.score(X, lengths)
                logging.debug("    Got this score {}:{}".format(word,score))
                probs[word]=score
            except Exception as e:
                logging.warning("{} caught while scoring model for word {}: {}".format(type(e),word,e))
                probs[word] = -math.inf
                pass
        probabilities.append(probs)
        if len(probs)==0:
            guesses.append("None")
            logging.debug("  No results found for item {}!".format(item))
        else:
            best_guess=max(probs, key=lambda key: probs[key])
            guesses.append(best_guess)
            logging.debug("  Best option {}:{}".format(best_guess, probs[best_guess]))
            
    return probabilities, guesses
예제 #30
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    transmat_fail=0
    nb_scored=0
    for test_item in range(test_set.num_items):
        #print("test item is : {}".format(test_item))
        probabilities.append(dict())
        for model_word in models:
            X,lengths=test_set.get_item_Xlengths(test_item)
            try:
                nb_scored=nb_scored+1
                logL = models[model_word].score(X,lengths)
            except Exception as inst:
                if re.match("^rows of transmat_ must sum to 1.0",str(inst)):
                    #Ok. That's a known issue...
                    transmat_fail=transmat_fail+1
                    pass
                else:
                    print("Exception {}\nSetting logL to -inf".format(inst))
                # print
                # '-' * 60
                # traceback.print_exc(file=sys.stdout)
                # print
                # '-' * 60
                logL=float('-inf')
            probabilities[test_item][model_word]=logL

        max_logL=None
        for word in probabilities[test_item]:
            if max_logL is None:
                max_logL=probabilities[test_item][word]
                guess=word
            if probabilities[test_item][word] > max_logL:
                max_logL=probabilities[test_item][word]
                guess=word
        guesses.append(guess)

    print('Got {} transmat failed for a total of {} score calculations'.format(transmat_fail,nb_scored))
    return probabilities, guesses

    raise NotImplementedError
예제 #31
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    
    # implement the recognizer
    for index in range(test_set.num_items):
    
        top_prob, top_word = float("-inf"), None
        
        word_probabilities = {}
        
        seq, lengths = test_set.get_item_Xlengths(index)
        for word, model in models.items():
            try:
                word_probabilities[word] = model.score(seq, lengths)
            except Exception as e:
                word_probabilities[word] = float("-inf")
            
            if word_probabilities[word] > top_prob:
                top_prob, top_word = word_probabilities[word], word
                
        probabilities.append(word_probabilities)
        guesses.append(top_word)
        
    return probabilities, guesses
예제 #32
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for item in test_set.get_all_sequences():
        X, length = test_set.get_item_Xlengths(item)
        scores = {}
        best_guess = None
        best_score = None
        for word, model in models.items():
            try:
                scores[word] = model.score(X, length)
                if not best_score or best_score < scores[word]:
                    best_score = scores[word]
                    best_guess = word
            except:
                scores[word] = None
                
        probabilities.append(scores)
        guesses.append(best_guess)

    return probabilities, guesses