def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # TODO implement the recognizer
    for x in  test_set.get_all_sequences():
        temp_dict = {}
        for word,model in models.items():
            try:
                X,lengths = test_set.get_item_Xlengths(x)
                temp_dict[word] = model.score(X,lengths)
            except:
                temp_dict[word] = float('-inf')
        if temp_dict:
            probabilities.append(dict(temp_dict))
            guesses.append(max(temp_dict, key=temp_dict.get))
    return probabilities,guesses
    raise NotImplementedError
예제 #2
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for i in range(0, len(test_set.get_all_sequences())):
        prob_dict = {}
        X, lengths = test_set.get_item_Xlengths(i)

        for word, model in models.items():
            try:
                logL = model.score(X, lengths)
                prob_dict[word] = logL
            except:
                prob_dict[word] = float('-inf')

        probabilities.append(prob_dict)
        guess = max([(max_log_value, max_word) for max_word, max_log_value in prob_dict.items()])[1]
        guesses.append(guess)

    return probabilities, guesses
예제 #3
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    all_sequences = test_set.get_all_sequences()
    all_Xlengths = test_set.get_all_Xlengths()
    for sequence in all_sequences:
        probability = {}
        X, length = all_Xlengths[sequence]
        for word_model, model in models.items():
            try:
                score = model.score(X, length)
                probability[word_model] = score
            except:
                score = -float("inf")
                probability[word_model] = score
        probabilities.append(probability)
        values = list(probability.values())
        keys = list(probability.keys())
        guesses.append(keys[values.index(max(values))])
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for item in test_set.get_all_sequences():
        X, length = test_set.get_item_Xlengths(item)
        scores, guess, maxScore = {}, None, None
        for word, model in models.items():
            try:
                scores[word] = model.score(X, length)
                if maxScore is None or maxScore < scores[word]:
                    maxScore, guess = scores[word], word
            except:
                scores[word] = None
        probabilities.append(scores)
        guesses.append(guess)

    return probabilities, guesses
예제 #5
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # TODO implement the recognizer
    for word_id, word_length in test_set.get_all_sequences().items():
        probability = {}

        for wordmodel_word, wordmodel in models.items():
            try:
                model_probability = wordmodel.score(word_length[0],
                                                    [len(word_length[0])])
                probability[wordmodel_word] = model_probability
            except:
                pass

        probabilities.append(probability)
        guesses.append(max(probability.items(), key=operator.itemgetter(1))[0])

    return probabilities, guesses
예제 #6
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

    :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
    :param test_set: SinglesData object
    :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
    """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    for this_word in test_set.get_all_sequences():
        X_test, lengths_test = test_set.get_item_Xlengths(this_word)
        logL = {}
        for this_key, this_model in models.items():
            try:
                logL[this_key] = this_model.score(X_test, lengths_test)
            except ValueError:
                logL[this_key] = float("-inf")
        probabilities.append(logL)
        guesses.append(max(logL, key=logL.get))
    return probabilities, guesses
예제 #7
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses

    # getters
    words = test_set.get_all_sequences()
    hwords = test_set.get_all_Xlengths()

    # iterate through words
    for word in words:
        # get X, length
        X, length = hwords[word]
        temp = {}
        # iterate through models
        for model_key in models:
            try:
                temp[model_key] = models[model_key].score(X, length)
            except:
                temp[model_key] = -float("inf")

        # add the scores to probabilities
        probabilities.append(temp)

    # find the model with the best score
    for instance in probabilities:
        best_score = -float("inf")
        best_model = None
        for model_key in instance:
            score = instance[model_key]
            if score > best_score:
                best_score = score
                best_model = model_key
        # add to guesses
        guesses.append(best_model)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    # general/specific algorithm snippets from forum discussion:
    # https://discussions.udacity.com/t/recognizer-implementation/234793/22
    # https://discussions.udacity.com/t/recognizer-implementation/234793/28

    hwords = test_set.get_all_Xlengths()
    for word_id in range(0, len(test_set.get_all_sequences())):
        # print('training on word_id '+str(word_id)+', current word is '+current_word)
        try:
            p_of_words = {}
            max_score = float("-inf")
            guess_word = None
            x, lengths = hwords[word_id]

            # for each model, get teh highest likelyhood (score) then record the respective word as guess_word
            # to add into guesses list
            for word, model in models.items():
                try:
                    score = model.score(x, lengths)
                    p_of_words[word] = score

                    if score > max_score:
                        guess_word = word
                        max_score = score
                except:
                    # fill in the probability dict if no probability is found
                    p_of_words[word] = float("-inf")
                    pass
        except:
            pass

        probabilities.append(p_of_words)
        guesses.append(guess_word)

    return probabilities, guesses
예제 #9
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    #raise NotImplementedError
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    hwords = test_set.get_all_Xlengths()
    try:
        for word_id in range(0, len(test_set.get_all_sequences())):
            words_prob = {}
            best_score = float('-Inf')
            guess_word = None
            X, lengths = hwords[word_id]

            for word, model in models.items():
                try:
                    score = model.score(X, lengths)
                    words_prob[word] = score

                    if score > best_score:
                        guess_word = word
                        best_score = score
                except:
                    pass

            probabilities.append(words_prob)
            guesses.append(guess_word)

    except:
        print('outer exception')
        pass
    # return probabilities, guesses
    return probabilities, guesses
예제 #10
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    all_sequences = test_set.get_all_sequences()
    all_Xlenghts = test_set.get_all_Xlengths()

    print('Started recognizing ...')

    for i, test_word in zip(range(0, len(all_sequences)), test_set.wordlist):
        bestLL = float("-inf")
        bestWord = None
        probs = {}

        for word in models.keys():
            model = models[word]
            try:

                ll = model.score(all_sequences[i][0], all_Xlenghts[i][1])
                if ll > bestLL:
                    bestLL = ll
                    bestWord = word

            except Exception:
                #print("some exception occurred, ignoring")
                pass

            probs[word] = ll

        guesses.append(bestWord)
        probabilities.append(probs)

    print('Finished analyzing {} words '.format(len(all_sequences)))

    return probabilities, guesses
예제 #11
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    """
        1- Calculating the logL scores for each word in model 
        2- Appending these scores to "probabilities" list.
        3- Finding words with maximum scores, append these are "guesses" lists.
    """


    try:
        for word_id in range(0, len(test_set.get_all_sequences())):
            logL_words = {}
            best_score = float('-inf')
            best_word = None
            X, lengths = test_set.get_item_Xlengths(word_id)

            for word, model in models.items():
                try:
                    score = model.score(X, lengths)
                    logL_words[word] = score
                    if score > best_score:
                        best_word = word
                        best_score = score
                except:
                    logL_words[word] = float("-inf")
                    
            probabilities.append(logL_words)
            guesses.append(best_word)

    except:
        pass

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    # raise NotImplementedError
    # We need the XLengths and sequences from the test_set first
    Xlengths = test_set.get_all_Xlengths()
    sequences = test_set.get_all_sequences()

    # Now we iterate and build our probabilities and and guesses lists
    for sequence in sequences:
        best_guess = None
        best_logL = float(
            '-inf'
        )  # Start with the smallest possible logL, we want to increment this to determine best guess
        prob_dict = dict(
        )  # Dictionary of probabilities, to be added to probablilities
        X, xlengths = Xlengths[sequence]
        # Now we go through all words and their models
        for guess, model in models.items():
            try:
                logL = model.score(X, xlengths)
            except:
                logL = float(
                    '-inf')  # Set it to minus infinity if an error occurs
            prob_dict[guess] = logL
            if logL > best_logL:
                best_logL = logL
                best_guess = guess
        # Now append the processed dictionary
        probabilities.append(prob_dict)
        guesses.append(best_guess)
    return probabilities, guesses
예제 #13
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # implement the recognizer
    hwords = test_set.get_all_Xlengths()

    for idWord in range(0, len(test_set.get_all_sequences())):
        pbWord = {}
        bestScore = float('-inf')

        guessWord = None
        X, lenWord = hwords[idWord]

        for word, model in models.items():
            score = float('-inf')
            try:
                score = model.score(X, lenWord)
            except:
                #print ('Value erro')
                #print (score)
                #raise
                pass
            pbWord[word] = score
            if score > bestScore:
                guessWord = word
                bestScore = score

        #print ("Adding " + str(pbWord))
        probabilities.append(pbWord)
        guesses.append(guessWord)

    #print (len(probabilities))
    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    
    # TODO implement the recognizer
    # return probabilities, guesses
    x_lengths = test_set.get_all_Xlengths()
    n_sequences = len(test_set.get_all_sequences())

    for single_data in range(n_sequences):
        best_score = float('-inf')
        best_guess = None
        prob = {}
        X, lengths = x_lengths[single_data]
        
        for g_word, model in models.items():
            try:
                score = model.score(X, lengths)
                prob[g_word] = score
                
                #Selection
                if score > best_score:
                    best_score = score
                    guess_word = g_word
                    
            except:
                #failed process
                prob[g_word] = float('-inf')
                
        probabilities.append(prob)
        guesses.append(guess_word)
        
    return probabilities, guesses
예제 #15
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    # TODO implement the recognizer
    probabilities = []
    guesses = []
    all_sequences = test_set.get_all_sequences()

    for word_id in all_sequences:
        word_id_X, word_id_Lengths = test_set.get_item_Xlengths(word_id)

        best_score = float('-inf')
        best_guess = None
        word_likelihood = {}
        # Initialize variables used to keep track of likelihoods and guesses
        for word_name in models:
            # Calculate the score of a particular sample in the test_set against every word model
            current_model = models[word_name]
            try:
                current_score = current_model.score(word_id_X, word_id_Lengths)
                word_likelihood[word_name] = current_score
                # Store the likelihood of each possible word model using the word name as the key
                if current_score > best_score:
                    best_score = current_score
                    best_guess = word_name
                    # Keep track of the best guess for each test set
            except:
                pass

        probabilities.append(word_likelihood)
        guesses.append(best_guess)
        # Store likelihoods and guesses in appropriate locations before moving to next word_id

    return probabilities, guesses
예제 #16
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    all_seq = test_set.get_all_sequences()

    for index, seq in all_seq.items():
        X, lengths = test_set.get_item_Xlengths(index)
        guess_dict = {}

        best_guess_word = None
        best_score = float("-inf")

        for word, model in models.items():
            try:
                temp_score = model.score(X, lengths)
                #print("{} : {}".format(word,temp_score))
                guess_dict[word] = temp_score
                if temp_score > best_score:
                    best_score = temp_score
                    best_guess_word = word
                #print(models.items())

            except:
                guess_dict[word] = float('-inf')

        guesses.append(best_guess_word)
        probabilities.append(guess_dict)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # Get the list of x, and lengths
    hwords = test_set.get_all_Xlengths()
    for word_id in range(0, len(test_set.get_all_sequences())):

        words_prob = {}
        best_score = float('-Inf')
        guess_word = None
        X, lengths = hwords[word_id]
        
        # for every word, we map the probability
        # and guess the best word
        for word, model in models.items():
            try:
                score = model.score(X, lengths)
            except:
                # set score to -inf if get score fails
                score = float('-Inf')
                
            words_prob[word] = score
            if score > best_score:
                best_score = score
                guess_word = word
                    
        probabilities.append(words_prob)
        guesses.append(guess_word)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses

    Xlengths = test_set.get_all_Xlengths()

    for sequence in test_set.get_all_sequences():

        best_guess = None
        best_score = float("-inf")

        prob = {}
        X, lengths = Xlengths[sequence]
        for word, model in models.items():
            try:
                logL = model.score(X, lengths)
            except:
                logL = float("-inf")

            prob[word] = logL
            if logL > best_score:
                best_score = logL
                best_guess = word

        probabilities.append(prob)
        guesses.append(best_guess)

    return probabilities, guesses
예제 #19
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    for word_id in test_set.get_all_sequences():
        features, sequence_length = test_set.get_item_Xlengths(word_id)
        likelihoods = {}

        highest_score = float("-inf")
        best_guess = None

        for word, model in models.items():
            try:

                score = model.score(features, sequence_length)
                likelihoods[word] = score

                if score > highest_score:
                    highest_score = score
                    best_guess = word

            except:
                likelihoods[word] = float("-inf")
                continue

        guesses.append(best_guess)
        probabilities.append(likelihoods)

    return probabilities, guesses
예제 #20
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set
	
	param models: dict of trained models
	{'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
	param test_set: SinglesData object
	return: (list, list)  as probabilities, guesses
	both lists are ordered by the test set word_id
	probabilities is a list of dictionaries where each key a word and value is Log Liklihood
		[{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			]
	guesses is a list of the best guess words ordered by the test set word_id
		['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
	"""
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    #get the number of words (sequences)
    total_words = len(test_set.get_all_sequences())
    #iterate over all words in test_set
    for word_index in range(total_words):
        prop = {}  #initialoze an empty dict
        best_fit_word = None
        best_prop = float(
            '-inf')  #to get the most fit word for a specific sequence
        x, lengths = test_set.get_item_Xlengths(word_index)
        #models : dictionary that word is the key and model is the value
        for word, model in models.items():
            try:
                logL = model.score(x, lengths)
            except:
                continue
            prop[word] = logL
            if logL > best_prop:
                best_prop = logL
                best_fit_word = word

        probabilities.append(prop)
        guesses.append(best_fit_word)
    # return probabilities, guesses
    return probabilities, guesses
예제 #21
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = ['' for i in range(len(test_set.get_all_Xlengths()))]
    # TODO implement the recognizer
    # return probabilities, guesses
    for i in range(len(test_set.get_all_sequences())):
        current_sequence = test_set.get_item_sequences(i)
        current_X, current_lengths = test_set.get_item_Xlengths(i)
        best_word = None
        p = {}
        best_score = float('-inf')

        for word in models:
            model = models[word]
            try:
                score = model.score(current_X, current_lengths)
                p[word] = score
            except:
                p[word] = 0

            if score > best_score:
                best_score = score
                best_word = word

        probabilities.append(p)
        guesses[i] = best_word

    return probabilities, guesses
예제 #22
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]N
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    #raise NotImplementedError

    for item in test_set.get_all_sequences().keys():
      prob_word = dict()
      guess_word = ''
      best_score = float('-inf')

      X, lengths = test_set.get_item_Xlengths(item)
      for word, model in models.items():
        try:
          guess_score = model.score(X, lengths)
          prob_word[word] = guess_score

          if guess_score > best_score:
            best_score = guess_score
            guess_word = word
        except Exception as e:
          prob_word[word] = float('-inf')

      probabilities.append(prob_word)
      guesses.append(guess_word)

    return probabilities, guesses
예제 #23
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """

    probabilities = []
    guesses = []

    # Implement the recognizer
    test_words = sorted(test_set.get_all_sequences().keys())

    for test_word in test_words:
        X, lengths = test_set.get_item_Xlengths(test_word)
        probs_dict = {}
        best_score = float("-inf")
        best_guess = ""

        for train_word, model in models.items():
            try:
                log_prob = model.score(X, lengths)
            except:
                log_prob = float("-inf")
            probs_dict[train_word] = log_prob
            if log_prob > best_score:
                best_score = log_prob
                best_guess = train_word

        guesses.append(best_guess)
        probabilities.append(probs_dict)

    return probabilities, guesses
예제 #24
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    #raise NotImplementedError
    for test_item in test_set.get_all_sequences():
        X, lengths = test_set.get_item_Xlengths(test_item)

        best_prob = float("-inf")
        best_word = None
        score = {}
        for word, model in models.items():
            try:
                prob = model.score(X, lengths)
                score[word] = prob
                if (prob > best_prob):
                    best_prob = prob
                    best_word = word
            except:
                score[word] = float("-inf")
                pass

        probabilities.append(score)
        guesses.append(best_word)

    return probabilities, guesses
예제 #25
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    probabilities = []
    guesses = []
    all_sequences = test_set.get_all_sequences()
    all_Xlengths = test_set.get_all_Xlengths()

    for word_id, _ in all_sequences.items():
        curr_X, curr_lengths = test_set.get_item_Xlengths(word_id)
        best_word = None
        best_score = -float('inf')
        p_dict = {}

        for word, model in models.items():
            try:
                score = model.score(curr_X, curr_lengths)
                p_dict[word] = score
                if score > best_score:
                    best_score = score
                    best_word = word
            except:
                p_dict[word] = 0
        probabilities.append(p_dict)
        guesses.append(best_word)

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # TODO implement the recognizer
    # return probabilities, guesses
    sequences = test_set.get_all_sequences()
    Xlengths = test_set.get_all_Xlengths()

    for ix in range(test_set.num_items):
        word_probs = dict()
        max_prob = float("-inf")
        word_predict = None
        X, lengths = test_set.get_item_Xlengths(ix)
        for word, model in models.items():
            try:
                word_probs[word] = model.score(X, lengths)
                if word_probs[word] > max_prob:
                    max_prob = word_probs[word]
                    word_predict = word
            except:
                word_probs[word] = float("-inf")

        probabilities.append(word_probs)
        guesses.append(word_predict)

    return probabilities, guesses
예제 #27
0
def recognize(models: dict, test_set: SinglesData):
	""" Recognize test word sequences from word models set

   :param models: dict of trained models
	   {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
	   both lists are ordered by the test set word_id
	   probabilities is a list of dictionaries where each key a word and value is Log Liklihood
		   [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
			]
	   guesses is a list of the best guess words ordered by the test set word_id
		   ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
	warnings.filterwarnings("ignore", category=DeprecationWarning)
	probabilities = []
	guesses = []

	# return probabilities, guesses
	all_words_x_lengths = test_set.get_all_Xlengths()

	for id in range(len(test_set.get_all_sequences())):
		x, lengths = all_words_x_lengths[id]
		word_probs = {}
		best_score = float("-inf")
		word_guess = None
		for word_candidate, model in models.items():
			try:
				score = model.score(x, lengths)
				word_probs[word_candidate] = score
				if score > best_score:
					best_score = score
					word_guess = word_candidate
			except Exception as e:
				pass

		probabilities.append(word_probs)
		guesses.append(word_guess)

	return probabilities, guesses
예제 #28
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    # implement the recognizer
    sequences = test_set.get_all_sequences()
    sequences_len = test_set.get_all_Xlengths()

    for sequence in sequences:
        prob = {}
        guess = None

        x, length = sequences_len[sequence]
        # find the best word match and update guess
        for word, model in models.items():
            try:
                prob[word] = model.score(x, length)
                guess = word if (guess is None
                                 or prob[word] > prob[guess]) else guess
            except:
                prob[word] = float('-inf')

        probabilities += [prob]
        guesses += [guess]

    return probabilities, guesses
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # DONE implement the recognizer

    probabilities = []
    guesses = []
    all = test_set.get_all_sequences()

    for index, sequence in all.items():
        X, lengths = test_set.get_item_Xlengths(
            index)  # Get test set sequences
        guess = {}  #Save word
        # Calculate test score
        for word, model in models.items():
            try:
                prob = model.score(X, lengths)
                guess[word] = prob
            except:
                guess[word] = float('-inf')

        probabilities.append(guess)
        best_guess = max(guess, key=guess.get)
        guesses.append(best_guess)

    return probabilities, guesses
예제 #30
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []

    sequences = test_set.get_all_sequences()

    for sequence in sequences:
        X_test, lengths_test = test_set.get_item_Xlengths(sequence)

        word_res = {}
        for word, model in models.items():
            try:
                logL = model.score(X_test, lengths_test)
            except Exception as e:
                #print('Exception for word %s' % (word))
                continue

            word_res[word] = logL

        if word_res:
            best_word = max(word_res.items(), key=lambda x: x[1])[0]
            probabilities.append(word_res)
            guesses.append(best_word)

    return probabilities, guesses
예제 #31
0
def recognize(models: dict, test_set: SinglesData):
    """ Recognize test word sequences from word models set

   :param models: dict of trained models
       {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...}
   :param test_set: SinglesData object
   :return: (list, list)  as probabilities, guesses
       both lists are ordered by the test set word_id
       probabilities is a list of dictionaries where each key a word and value is Log Liklihood
           [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... },
            ]
       guesses is a list of the best guess words ordered by the test set word_id
           ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...]
   """
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    probabilities = []
    guesses = []
    for item in test_set.get_all_sequences():
        X, length = test_set.get_item_Xlengths(item)
        scores = {}
        best_guess = None
        best_score = None
        for word, model in models.items():
            try:
                scores[word] = model.score(X, length)
                if not best_score or best_score < scores[word]:
                    best_score = scores[word]
                    best_guess = word
            except:
                scores[word] = None
                
        probabilities.append(scores)
        guesses.append(best_guess)

    return probabilities, guesses