def getBigramPerplexity(): ''' Controller method to find the perplexity of all the test books with all the genres ''' bigram_model = loadBigramModels() #Reads in the unigrams one file at a time and stores it with their bookname book_tokens = {} for genre in genres: print("\nReading test files for genre {0}".format(genre)) for path in os.listdir(test_path + genre): book_tokens[path] = getTokensForFile(test_path + genre + '/' + path) # Construct Bigrams from Unigrams book_bigrams = {} for path, tokens in book_tokens.iteritems(): book_bigrams[path] = [(tokens[i], tokens[i+1]) for i in range(0, len(tokens)-1)] # Predict Bigram Perplexity book_perplexity = defaultdict(dict) for book, bigrams in book_bigrams.iteritems(): for genre in genres: book_perplexity[book][genre] = computeBigramPerplexity(bigram_model[genre], bigrams) print("Perplexity of '{0}' book on {1} genre model: {2}".format(book, genre, book_perplexity[book][genre])) return book_perplexity
def main(): ''' Controller method for loading the models and calling the sentence generation methods ''' #Generate the unigram model for all the genres or load it from memory unigram_model = loadUnigramModels() #Generate random sentences from the unigram model which ends as soon as sentence end character is presented generateRandomSentenceFromUnigram(unigram_model) #Generate the bigram model for all the genres bigram_model = loadBigramModels('BigramSentenceModel') #Generate random sentences from the bigram model with default seed and n=200 generateRandomSentenceFromBigram(bigram_model, n=200) #Generate random sentences from the bigram model with custom seed and n=100 #Will consider <START> character as seed for history which has not been specified bigram_seed = { 'children':'sjbdsabdoisabdoisbdoias', 'crime':'killed' } generateRandomSentenceFromBigram(bigram_model, seed = bigram_seed, n=200)