def test_get_corrected_split_queries(self):
        # No splits
        query_1_word = ["fast"]
        ans_1_word = []
        # one split, total two words
        query_2_word = ["forw", "ard"]
        ans_2_word = [["forward"]]
        # one split, total three words
        query_3_word = ["forw", "ard", "march"]
        ans_3_word = [["forward", "march"]]
        # one split, total four words
        query_4_word = ["fast", "forw", "ard", "march"]
        ans_4_word = [["fast", "forward", "march"]]

        queries = [query_1_word, query_2_word, query_3_word, query_4_word]
        queries = [Suggestion(query) for query in queries]
        answers = [ans_1_word, ans_2_word, ans_3_word, ans_4_word]

        for i in xrange(4):
            self.assertEqual(utils.get_corrected_split_queries(queries[i], self.lexicon), answers[i])
    def generate_suggestions_and_posteriors(self, query, 
                                            get_posterior_fn = None):
        """Return (suggestion, posterior) pairs for query.

        Get a list of candidate suggestions and calculate posteriors
        for each of them.

        Arguments:
        - `query`: Suggestion object.
        """
        if get_posterior_fn == None:
            get_posterior_fn = self.get_posterior_fn

        # all_queries = [query] + utils.get_corrected_split_queries(query, self.lexicon) \
        #   + utils.get_corrected_run_on_queries(query, self.lexicon)

        all_queries = [query] + utils.get_corrected_split_queries(query, self.lexicon)
        #   + utils.get_corrected_run_on_queries(query, self.lexicon)

        # print 'all_queries'
        # pprint(all_queries)

        # List of list of (query, suggestion, likelihood) for each query
        all_suggestions = [[(query, suggestion) 
                            for suggestion in self.generate_candidate_suggestions(
                                    map(self.generate_candidate_terms, query),
                                    query.suggestion_type)] 
                                    for query in all_queries]

        # Flatten the list of list of suggestions
        all_suggestions = list(itertools.chain(*all_suggestions))

        # print 'all_suggestions after flattening'
        # pprint(all_suggestions)

        all_suggestions.sort(key = lambda query_sugg_tuple: 
                             phrase.get_likelihood(*query_sugg_tuple), 
                             reverse = True)

        # print 'suggestions and likelihood'
        # pprint([(query, suggestion, phrase.get_likelihood(query, suggestion)) 
        #         for query, suggestion in all_suggestions])

        # Remove duplicates (if any)
        all_suggestions = [key for key, _ in itertools.groupby(all_suggestions)]

        # print 'all_suggestions after removing duplicates'
        # pprint(all_suggestions)

        # Take only the top few suggestions
        all_suggestions = all_suggestions[:self.MAX_NUM_SUGGESTIONS]

        # print 'len(all_suggestions)'
        # pprint(len(all_suggestions))

        # print 'all_suggestions after taking off the top'
        # pprint(all_suggestions)

        all_posteriors = [get_posterior_fn(suggestion, query)
                          for query, suggestion in all_suggestions]

        all_suggestions = list(zip(*all_suggestions)[1])

        # TODO
        # original_query = query
        # original_query_posterior = get_posterior_fn(query, query)
        # print 'original_query'
        # pprint(original_query, original_query_posterior)
        # if original_query_posterior > self.ORIGINAL_POSTERIOR_THRESHOLD:
        #     all_suggestions += [original_query]
        #     all_posteriors += [original_query_posterior]

        normalized_posteriors = utils.get_normalized_probabilities(all_posteriors)
        suggestion_posterior_list = list(zip(all_suggestions, normalized_posteriors))
        suggestion_posterior_list.sort(key = lambda pair: pair[1], reverse = True)
        return suggestion_posterior_list