예제 #1
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    all_sequences = test_set.get_all_sequences()
    all_Xlengths = test_set.get_all_Xlengths()
    for sequence in all_sequences:
        probability = {}
        X, length = all_Xlengths[sequence]
        for word_model, model in models.items():
            try:
                score = model.score(X, length)
                probability[word_model] = score
            except:
                score = -float("inf")
                probability[word_model] = score
        probabilities.append(probability)
        values = list(probability.values())
        keys = list(probability.keys())
        guesses.append(keys[values.index(max(values))])
    return probabilities, guesses
예제 #2
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    x_len = test_set.get_all_Xlengths()
    for x,lengths in x_len.values():
      #logger.info("$s $s",x,lengths)
      scores, best_guess, highest_score = dict(), None, None
      for word, model in models.items():
        try:
          scores[word] = model.score(x, lengths)
          if highest_score is None or highest_score < scores[word]:
            highest_score, best_guess = scores[word], word
        except:
          scores[word] = None
      probabilities.append(scores)
      guesses.append(best_guess)

    return probabilities, guesses
예제 #3
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    d = test_set.get_all_Xlengths()
    for i in d.items():
        X, lengths = i[1]
        logL_dict = {}
        for key, value in models.items():
            try:
                logL_dict[key] = value.score(X, lengths)
            except:
                logL_dict[key] = float('-inf')
        probabilities.append(logL_dict)
        guesses.append(max(logL_dict.keys(), key = (lambda x: logL_dict[x])))
    return probabilities, guesses
예제 #4
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # for each word in the testing set
    for word_index, _ in test_set.get_all_Xlengths().items():
        x, length = test_set.get_item_Xlengths(word_index)
        word_log_l_dict = {}
        # try the word on every model and score the probabilities of matching
        for word, model in models.items():
            try:
                word_log_l_dict[word] = model.score(x, length)
            except:
                word_log_l_dict[word] = float("-inf")

        probabilities.append(word_log_l_dict)
        guesses.append(max(word_log_l_dict, key=word_log_l_dict.get))

    return probabilities, guesses
예제 #5
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

	:param models: dict of trained models
		{'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
	:param test_set: SinglesData object
	:return: (list, list)  as probabilities, guesses
		both lists are ordered by the test set word_id
		probabilities is a list of dictionaries where each key a word and value is Log Liklihood
			[{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }]
		guesses is a list of the best guess words ordered by the test set word_id
			['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
	"""

    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []  #dict of {possible_word: logL}
    guesses = []  #best guesses
    # TODO implement the recognizer
    for word_id in range(test_set.num_items):
        word_logL_dict = {}  #dict
        X, lengths = test_set.get_all_Xlengths()[word_id]
        for word in models:
            hmm_model = models[word]
            try:  #if the hmmlearn library can score the model
                logL = hmm_model.score(X, lengths)
            except:  #if the hmmlearn library cannot score the model
                logL = float('-inf')
            word_logL_dict[word] = logL
        probabilities.append(word_logL_dict)
        guesses.append(max(
            word_logL_dict,
            key=lambda k: word_logL_dict[k]))  #best guess according to logL

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
	:param models: dict of trained model
		{'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
		:param test_set: SinglesData object
	:return: (list, list)  as probabilities, guesses
		both lists are ordered by the test set word_id
		probabilities is a list of dictionaries where each key a word and value is Log Liklihood
			[{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			 {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			 ]
		guesses is a list of the best guess words ordered by the test set word_id
			['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
	"""
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)

    probabilities = []
    guesses = []
    for index, (X, length) in test_set.get_all_Xlengths().items():
        probs = []
        for word, mod in models.items():
            try:
                probs.append((word, mod.score(X, length)))
            except:
                probs.append((word, float("-inf")))
        most_probable_word = sorted(probs, key=lambda x: -x[1])[0][0]

        probabilities.append(dict(probs))
        guesses.append(most_probable_word)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for X, lengths in list(test_set.get_all_Xlengths().values()):
        prob = {}
        for key, model in models.items():
            try:
                prob[key] = model.score(X, lengths)
            except:
                prob[key] = float('-inf')
        probabilities.append(prob)
        guess = max(prob.items(), key = lambda z: z[1])[0]
        guesses.append(guess)
    
    
    return probabilities, guesses
예제 #8
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for word_id in range(0, len(test_set.get_all_Xlengths())):
            X, lengths = test_set.get_item_Xlengths(word_id)
            likelihoods = {}
            for word, model in models.items():
                try:
                    the_score = models[word].score(X, lengths)
                except:
                    the_score = float("-inf")
                likelihoods[word] = the_score
            probabilities.append(likelihoods)
            guess = max(probabilities[word_id], key = probabilities[word_id].get)
            guesses.append(guess)
    return (probabilities, guesses)
예제 #9
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    # we sort the test_set by word_id, just in case
    test_set= sorted([(item[0], item[1]) for item in test_set.get_all_Xlengths().items()], key=lambda x: x[0])
    # initialize probs and guesses as empty lists for appending
    probabilities = []
    guesses = []
    # for every word we get both the id and the (X, length) tuple
    for word_id, test_Xlength in test_set:
        # empty dict for mapping word_ids to its scores
        probability_dict = dict()
        # for every word model, we score (X, length)
        for word, model in models.items():
            try:
                probability_dict[word] = model.score(test_Xlength[0], test_Xlength[1])
            except:
                continue
        # we append the probability_dict and guess to the lists
        probabilities.append(probability_dict)
        guesses.append(max(probability_dict, key=probability_dict.get))
    return probabilities, guesses
예제 #10
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

    :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
    :param test_set: SinglesData object
    :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
    """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # return probabilities, guesses
    all_xlengths = test_set.get_all_Xlengths()
    for _, data_tuple in all_xlengths.items():
        x_vals, lengths = data_tuple
        words = {}
        for word, model in models.items():
            try:
                words[word] = model.score(x_vals, lengths)
            # pylint: disable=broad-except
            # exceptions vary and occurs deep in other external classes
            except Exception:
                words[word] = float('-inf')
        probabilities.append(words)

    guesses = [max(probs, key=probs.get) for probs in probabilities]
    return probabilities, guesses
예제 #11
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    len_all = len(test_set.get_all_Xlengths())
    for word_id in range(0, len_all):
        x, lens = test_set.get_item_Xlengths(word_id)
        probs = {}
        for word, model in models.items():
            try:
                logl = model.score(x, lens)
                probs[word] = logl
            except:
                probs[word] = float('-inf')
        probabilities.append(probs)
        w, _ = max(probs.items(), key=lambda x: x[1])
        guesses.append(w)
    return (probabilities, guesses)
예제 #12
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for x, lengths in test_set.get_all_Xlengths().values():
        b_score = float('-inf')
        b_guess = None
        l = {}
        for w, m in models.items():
            try:
                score = m.score(x, lengths)
                l[w] = score
                if score > b_score:
                    b_score = score
                    b_guess = w
            except:
                l[w] = float('-inf')
        probabilities.append(l)
        guesses.append(b_guess)
    return probabilities, guesses
예제 #13
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    probabilities = []
    guesses = []

    for x, x_len in test_set.get_all_Xlengths().values():
        probs = {}
        guess = None
        for word, model in models.items():
            try:
                probs[word] = model.score(x, x_len)
                if guess is None or probs[word] > probs[guess]:
                    guess = word
            except:
                probs[word] = float('-inf')

        probabilities += [probs]
        guesses += [guess]

    return probabilities, guesses
예제 #14
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for test_X, test_Xlength in list(test_set.get_all_Xlengths().values()):
        words_logL = {}
        for word, model in models.items():
            try:
                words_logL[word] = model.score(test_X, test_Xlength)
            except:
                words_logL[word] = float('-inf')
                continue
        probabilities.append(words_logL)

    for prob in probabilities:
        guesses.append(max(prob, key=prob.get))

    return probabilities, guesses
예제 #15
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    seq = test_set.get_all_Xlengths()

    for i, _ in enumerate(seq):
        X, lengths = seq[i]
        word_probs = {}
        for word, model in models.items():
            try:
                word_score = model.score(X, lengths)
                word_probs[word] = word_score
            except:
                pass
        probabilities.append(word_probs)
    guesses = [max(prob, key=prob.get) for prob in probabilities]

    return probabilities, guesses
예제 #16
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    Xlengths_test = list(test_set.get_all_Xlengths().values())
    for X_test, lengths_test in Xlengths_test:
        p_dict = {}
        for word, model in models.items():
            try:
                p_dict[word] = model.score(X_test, lengths_test)
            except:
                p_dict[word] = 0.
        probabilities.append(p_dict)
        guesses.append(max(p_dict.keys(), key=lambda x: p_dict[x]))
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    with np.errstate(divide='ignore'):
        for X, lengths in test_set.get_all_Xlengths().values():
            probs = dict()
            for word, model in models.items():
                try:
                    score = model.score(X, lengths)
                    probs[word] = score
                except:
                    pass
            if len(probs) > 0:
                probabilities.append(probs)
                guesses.append(max(probs, key=probs.get))
    return (probabilities, guesses)
예제 #18
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

    :param models: dict of trained models
     {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
    :param test_set: SinglesData object
    :return: (list, list)  as probabilities, guesses
     both lists are ordered by the test set word_id
     probabilities is a list of dictionaries where each key a word and value is Log Liklihood
         [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
          {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
          ]
     guesses is a list of the best guess words ordered by the test set word_id
         ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
    """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # DONE implement the recognizer
    for _, (X, lengths) in test_set.get_all_Xlengths().items():
        probs = {}
        for word, model in models.items():
            try:
                probs[word] = model.score(X, lengths)
            except ValueError:
                probs[word] = float("-inf")

        guess = max(probs, key=probs.get)
        guesses.append(guess)
        probabilities.append(probs)

    # return probabilities, guesses
    return probabilities, guesses
예제 #19
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    
    test_words = test_set.get_all_Xlengths()
    
    for word_id in test_words.keys():
        X, lengths = test_words[word_id]
        scores = {}
        for word in models.keys():
            try:
                scores[word] = models[word].score(X, lengths)
            except:
                continue
        probabilities.append(scores)
        guesses.append(max(scores.keys(), key = (lambda k: scores[k])))
    
    return probabilities, guesses
예제 #20
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for sequence, length in test_set.get_all_Xlengths().values():
        probability = {}
        best_guess, best_score = None, float("-inf")
        for word, model in models.items():
            try:
                logL = model.score(sequence, length)
                probability[word] = logL
                if logL > best_score:
                    best_guess, best_score = word, logL
            except:
                # eliminate non-viable models from consideration
                probability[word] = float("-inf")
                pass
        guesses.append(best_guess)
        probabilities.append(probability)
    return probabilities, guesses
예제 #21
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    
    for test_word, (X, length) in test_set.get_all_Xlengths().items():

        dict = {}
        for trained_word, model in models.items():
            try:
                log_prob = model.score(X, length)
                dict[trained_word] = log_prob
            except:
                dict[trained_word] = float("-inf")

        probabilities.append(dict)
        guesses.append(max(dict, key = dict.get))

    return probabilities, guesses
예제 #22
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for (X, lengths) in test_set.get_all_Xlengths().values():
        max_score = None
        guess_word = None
        prob_word = {}
        for word, model in models.items():
            try:
                score = model.score(X, lengths)
                prob_word[word] = score
                if max_score == None or max_score < score:
                    max_score = score
                    guess_word = word
            except:
                continue
        probabilities.append(prob_word)
        guesses.append(guess_word)

    return probabilities, guesses
예제 #23
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer

    for X, y in test_set.get_all_Xlengths().values():
        seq_probs = {
            word: _score_data(model, X, y)
            for word, model in models.items()
        }
        probabilities.append(seq_probs)
        guesses.append(argmax(seq_probs))
    return probabilities, guesses
예제 #24
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for x, lengths in test_set.get_all_Xlengths().values():

        # For each word, we compute the probability that this word is actually x.
        word_probabilities = {word : score(model, x, lengths) for word, model in models.items()}

        # Get the word that have the higher chances of being x.
        guess = max(word_probabilities.items(), key=lambda x: x[1])[0]

        probabilities.append(word_probabilities)
        guesses.append(guess)

    return probabilities, guesses
예제 #25
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in test_set.get_all_Xlengths().items():
        x, lens = test_set.get_item_Xlengths(i[0])
        # Create a dict where key = word, value = log liklihood
        word_liklihoods = {}
        for word, model in models.items():
            try:
                word_liklihoods[word] = model.score(x, lens)
            except:
                word_liklihoods[word] = float('-inf')
        probabilities.append(word_liklihoods)
        guesses.append(max(word_liklihoods, key=word_liklihoods.get))
    # TODO implement the recognizer
    return probabilities, guesses
예제 #26
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for index, (X, lengths) in test_set.get_all_Xlengths().items():
        test_word_scores = dict()
        for w, m in models.items():
            try:
                test_word_scores[w] = m.score(X, lengths)
            except:
                test_word_scores[w] = -math.inf
        best_word, _ = max(test_word_scores.items(),
                           key=operator.itemgetter(1))
        probabilities.append(test_word_scores)
        guesses.append(best_word)

    return probabilities, guesses
예제 #27
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
	   {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
	   both lists are ordered by the test set word_id
	   probabilities is a list of dictionaries where each key a word and value is Log Liklihood
		   [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			]
	   guesses is a list of the best guess words ordered by the test set word_id
		   ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    words = test_set.get_all_Xlengths()
    for word, (X, lengths) in words.items():
        probability = {}
        best_score = float('-Inf')
        best_guess = None
        for word, model in models.items():
            try:
                score = model.score(X, lengths)
                probability[word] = score
            except:
                score = float('-Inf')
                probability[word] = float('-Inf')
            if score > best_score:
                best_score, best_guess = score, word
        probabilities.append(probability)
        guesses.append(best_guess)
    return probabilities, guesses
예제 #28
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for X, lengths in test_set.get_all_Xlengths().values():
        best_score, best_word, score_list = float("-inf"), None, {}
        for word, model in models.items():
            try:
                # Fetch model score
                score = model.score(X, lengths)
            except:
                score = float("-inf")
            if score > best_score:
                best_score, best_word = score, word
            score_list[word] = score
        guesses.append(best_word)
        probabilities.append(score_list)

    return probabilities, guesses
예제 #29
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # TODO implement the recognizer
    # return probabilities, guesses
    def try_or_default(fn, *args, default=float("-Inf")):
        try:
            return fn(*args)
        except:
            return default

    for _, (X, lengths) in test_set.get_all_Xlengths().items():
        probability_dict = {
            word: try_or_default(model.score, X, lengths)
            for word, model in models.items()
        }
        probabilities.append(probability_dict)
        guesses.append(max(probability_dict, key=probability_dict.get))
    return probabilities, guesses
예제 #30
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for word_id, (X, lengths) in test_set.get_all_Xlengths().items():
        #import pdb; pdb.set_trace()
        word_prob = {}
        for model_key, model_value in models.items():
            try:
                score = model_value.score(X, lengths)
                word_prob[model_key] = score
            except:
                word_prob[model_key] = float("-inf")
        probabilities.append(word_prob)
        guesses.append(max(word_prob, key=word_prob.get))
    return probabilities, guesses