def testVeryShort(): WORD_OCCURRENCES = { 'airport': 348996, 'request': 2816909, 'wandered': 451106 } LETTER_FREQ = [ 0.03104758705050717, 0.0, 0.0, 0.03500991824543893, 0.2536276129665047, 0.0, 0.0, 0.0, 0.013542627927787708, 0.0, 0.0, 0.0, 0.0, 0.017504959122719464, 0.013542627927787708, 0.013542627927787708, 0.10930884736053291, 0.15389906233882777, 0.10930884736053291, 0.12285147528832062, 0.10930884736053291, 0.0, 0.017504959122719464, 0.0, 0.0, 0.0 ] print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # test totalOccurrences for word in words: print('Total occurrences of', word + ':', 'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \ else 'GOT: ' + str(wordData.totalOccurrences(word, words)) + ', EXPECTED: ' + str(WORD_OCCURRENCES[word])) freqList = letterFreq.letterFreq(words) for ch, got, expected in zip(string.ascii_lowercase, freqList, LETTER_FREQ): print( 'Frequency of', ch + ':', 'OK' if got == expected else 'GOT: ' + str(got) + ', EXPECTED: ' + str(expected))
def main(): file = input("Enter the name of a file to read: ") words = wordData.readWordFile(file) print(words) word = input("Enter word you want to count the occurences of: ") print(totalOccurrences(word, words))
def testA(): WORD_OCCURRENCES = { 'airplane': 2487028, 'alien': 5400198, 'accept': 26299474 } LETTER_FREQ_STRING = 'andetsrliocupgmybhvfwkxqjz' print('Testing with a.txt...\n') words = wordData.readWordFile('a.txt') # test totalOccurrences for word in WORD_OCCURRENCES: print('Total occurrences of', word + ':', 'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \ else 'GOT: ' + str(wordData.totalOccurrences(word, words)) + ', EXPECTED: ' + str(WORD_OCCURRENCES[word])) freqString = letterFreq.letterFreq(words) if freqString == LETTER_FREQ_STRING: print('\nFrequency ordering of letters OK') else: print('Frequency ordering of letters incorrect.') print('GOT: ' + freqString) print('EXPECTED: ' + LETTER_FREQ_STRING)
def main(): file = input("Enter the name of a file to read: ") words = wordData.readWordFile(file) start = input("Enter starting year: ") end = input("Enter ending year: ") trends = (trending(words, float(start), float(end))) print("") print("The top 10 trending words from", start, "to", end, ":") for x in trends[:10]: print(x.word) print("") print("The bottom 10 trending words from", start, "to", end, ":") mylist = [] for x in trends[-10:]: mylist.append(x.word) mylist.reverse() for i in mylist: print(i)
def testA(): """ Test function for 'a.txt'. :return: None :rtype: NoneType """ # expected results # each tuple contains startYr/endYr/index/word # index 0 is the highest trending word, -1 is lowest TRENDS = ((1927, 1931, 0, 'av'), (1927, 1931, -1, 'acetate'), (1950, 1952, 0, 'antibiotics'), (1950, 1952, -1, 'atque'), (1966, 1975, 0, 'algorithms'), (1966, 1975, -3, 'aeroplanes'), (1981, 2008, 1, 'authentication'), (1981, 2008, -2, 'antisera')) print('Testing with a.txt...\n') words = wordData.readWordFile('a.txt') for idx in range(len(TRENDS)): print("testing: ", TRENDS[idx][0], "to", TRENDS[idx][1]) trendList = trending.trending(words, TRENDS[idx][0], TRENDS[idx][1]) if (trendList[TRENDS[idx][2]].word == TRENDS[idx][3]): print("OK!") else: print("got:", trendList[TRENDS[idx][2]].word) print("expected: ", TRENDS[idx][3])
def testVeryShort(): """ Test function for 'very_short.csv'. :return: None :rtype: NoneType """ # expected results AIRPORT_2007 = 175702 REQUEST_2004 = 0 AVG_WORD_LENGTH_2007 = 7.110627395031065 AVG_WORD_LENGTH_2004 = 0 AVG_WORD_LENGTHS_2005_2008 = [ 7.1147602294958, # 2005 7.114548770228398, # 2006 7.110627395031065, # 2007 7.150069236398865 # 2008 ] print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # test occurrencesInAYear occurrences = wordLength.occurrencesInYear('airport', 2007, words) print('Occurrences of "airport" in 2007:', 'OK' if occurrences == AIRPORT_2007 else 'GOT: ' + str(occurrences) + ', EXPECTED: ' + str(AIRPORT_2007)) occurrences = wordLength.occurrencesInYear('request', 2004, words) print('Occurrences of "request" in 2004:', 'OK' if occurrences == REQUEST_2004 else 'GOT: ' + str(occurrences) + ', EXPECTED: ' + str(REQUEST_2004)) # test averageWordLength length = wordLength.averageWordLength(2007, words) print('Average word length in 2007:', 'OK' if length == AVG_WORD_LENGTH_2007 else 'GOT: ' + str(length) + ', EXPECTED: ' + str(AVG_WORD_LENGTH_2007)) length = wordLength.averageWordLength(2004, words) print('Average word length in 2004:', 'OK' if length == AVG_WORD_LENGTH_2004 else 'GOT: ' + str(length) + ', EXPECTED: ' + str(AVG_WORD_LENGTH_2004)) # averageWordLengthYears lengthsList = wordLength.averageWordLengthYears(2005, 2008, words) yearList = list(range(2005, 2009)) for year, got, expected in zip(yearList, lengthsList, AVG_WORD_LENGTHS_2005_2008): print('Average word length for', str(year) + ':', 'OK' if got == expected else 'GOT:' + str(got) + ', EXPECTED: ' + str(expected))
def testVeryShort(): """ Test function for 'very_short.csv'. :return: None :rtype: NoneType """ # expected results AIRPORT_2007 = 175702 REQUEST_2004 = 0 AVG_WORD_LENGTH_2007 = 7.110627395031065 AVG_WORD_LENGTH_2004 = 0 AVG_WORD_LENGTHS_2005_2008 = [ 7.1147602294958, # 2005 7.114548770228398, # 2006 7.110627395031065, # 2007 7.150069236398865 # 2008 ] print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # test occurrencesInAYear occurrences = wordLength.occurrencesInYear('airport', 2007, words) print( 'Occurrences of "airport" in 2007:', 'OK' if occurrences == AIRPORT_2007 else 'GOT: ' + str(occurrences) + ', EXPECTED: ' + str(AIRPORT_2007)) occurrences = wordLength.occurrencesInYear('request', 2004, words) print( 'Occurrences of "request" in 2004:', 'OK' if occurrences == REQUEST_2004 else 'GOT: ' + str(occurrences) + ', EXPECTED: ' + str(REQUEST_2004)) # test averageWordLength length = wordLength.averageWordLength(2007, words) print( 'Average word length in 2007:', 'OK' if length == AVG_WORD_LENGTH_2007 else 'GOT: ' + str(length) + ', EXPECTED: ' + str(AVG_WORD_LENGTH_2007)) length = wordLength.averageWordLength(2004, words) print( 'Average word length in 2004:', 'OK' if length == AVG_WORD_LENGTH_2004 else 'GOT: ' + str(length) + ', EXPECTED: ' + str(AVG_WORD_LENGTH_2004)) # averageWordLengthYears lengthsList = wordLength.averageWordLengthYears(2005, 2008, words) yearList = list(range(2005, 2009)) for year, got, expected in zip(yearList, lengthsList, AVG_WORD_LENGTHS_2005_2008): print( 'Average word length for', str(year) + ':', 'OK' if got == expected else 'GOT:' + str(got) + ', EXPECTED: ' + str(expected))
def main(): fileName=input("Enter word file: ") words=wordData.readWordFile(fileName) word=input("Enter word: ") occ=wordData.totalOccurrences(word,words) print("Total occurrences of ",word,":",occ) print("Letter frequencies: ",letterFreq(words)) print("# plot is displayed") freqList=letterFreq(words) letterHist.letterFreqPlot(freqList) input("Enter to exit")
def main(): """ The main fucntion. :return None :rtype: NoneType """ fileName = input("Enter word file: ") words = wordData.readWordFile(fileName) freqList = wordFrequencies(words) rank = int(input("Enter rank" + str((1,len(freqList))) + ":")) print("Rank",rank,":", freqList[rank - 1]) simplePlot.wordFreqPlot(freqList)
def main(): fileName=input("Enter word file: ") words=wordData.readWordFile(fileName) word=input("Enter a word: ") year=int(input("Enter a year: ")) occ=occurrencesInYear(word,year,words) print("The word ",'"',word+'"',"occurred ",occ,"times in the year ",year) year2=int(input("Enter a year: ")) avg=averageWordLength(year2,words) print("The average word length for the year ",year2,"is ",avg," letters") startYear=int(input("Enter a star year: ")) endYear=int(input("Enter an end year: ")) print("# plot is displayed") lengthsList=averageWordLengthYears(startYear,endYear,words) simplePlot.averageWordLengthPlot(startYear, endYear, lengthsList)
def main(): """ The main function. :return None :rtype: NoneType """ fileName = input("Enter word file: ") word = input("Enter word: ") words = wordData.readWordFile(fileName) print("Total occurrences of", word, ":", wordData.totalOccurrences(word, words)) freqList = letterFreq(words) print("Letter frequencies:", freqList) letterHist.letterFreqPlot(freqList) input("Hit Enter to EXIT")
def testVeryShort(): WORD_OCCURRENCES = {'airport' : 348996, 'request' : 2816909, 'wandered' : 451106} LETTER_FREQ = [0.03104758705050717, 0.0, 0.0, 0.03500991824543893, 0.2536276129665047, 0.0, 0.0, 0.0, 0.013542627927787708, 0.0, 0.0, 0.0, 0.0, 0.017504959122719464, 0.013542627927787708, 0.013542627927787708, 0.10930884736053291, 0.15389906233882777, 0.10930884736053291, 0.12285147528832062, 0.10930884736053291, 0.0, 0.017504959122719464, 0.0, 0.0, 0.0] print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # test totalOccurrences for word in words: print('Total occurrences of', word + ':', 'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \ else 'GOT: ' + str(wordData.totalOccurrences(word, words)) + ', EXPECTED: ' + str(WORD_OCCURRENCES[word])) freqList = letterFreq.letterFreq(words) for ch, got, expected in zip(string.ascii_lowercase, freqList, LETTER_FREQ): print('Frequency of', ch + ':', 'OK' if got == expected else 'GOT: ' + str(got) + ', EXPECTED: ' + str(expected))
def main(): file = input("Enter word file: ") words = wordData.readWordFile(file) year = input("Enter year: ") solution = summaryFromWords(words, int(year)) small = solution[0] q1 = solution[1] med = solution[2] q3 = solution[3] large = solution[4] print("minimum: ", small) print("1st quartile: ", q1) print("median: ", med) print("3rd quartile ", q3) print("maximum: ", large) boxAndWhisker.boxAndWhisker(small, q1, med, q3, large)
def main(): """ The main function. :return None :rtype: NoneType """ fileName = input("Enter word file: ") word = input("Enter a word: ") year = int(input("Enter a year: ")) words = wordData.readWordFile(fileName) totalOccur = occurrencesInYear(word, year, words) print("The word",'"',word,'"', "occurred", totalOccur, \ "times in the year", year) year = int(input("Enter a year: ")) avgWordLength = averageWordLength(year, words) print("The average word length for the year", year,\ "is", avgWordLength, "letters") startYear = int(input("Enter a start year: ")) endYear = int(input("Enter an end year: ")) lengthsList = averageWordLengthYears(startYear, endYear, words) simplePlot.averageWordLengthPlot(startYear, endYear, lengthsList)
def testVeryShort(): """ Test function for 'very_short.csv'. :return: None :rtype: NoneType """ # Expected results from the q words MOST_FREQ_WORD = wordData.WordCount('request', 2816909) SECOND_MOST_FREQ_WORD = wordData.WordCount('wandered', 451106) LEAST_FREQ_WORD = wordData.WordCount('airport', 348996) # read in the words print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # get the frequency of WordCount objects freqList = wordFreq.wordFrequencies(words) # test most frequent, second most frequent, and least frequent word print('Most frequent word: ', 'OK' if freqList[0].word == MOST_FREQ_WORD.word and freqList[0].count == MOST_FREQ_WORD.count else 'GOT: '+ str(freqList[0]) + ', EXPECTED: ' + str(MOST_FREQ_WORD)) print('Second most frequent word: ', 'OK' if freqList[1].word == SECOND_MOST_FREQ_WORD.word and freqList[1].count == SECOND_MOST_FREQ_WORD.count else 'GOT: '+ str(freqList[1]) + ', EXPECTED: ' + str(SECOND_MOST_FREQ_WORD)) print('Least frequent word: ', 'OK' if freqList[2].word == LEAST_FREQ_WORD.word and freqList[2].count == LEAST_FREQ_WORD.count else 'GOT: '+ str(freqList[2]) + ', EXPECTED: ' + str(LEAST_FREQ_WORD))
def testVeryShort(): """ Test function for 'very_short.csv'. :return: None :rtype: NoneType """ # Expected results from the q words MOST_FREQ_WORD = wordData.WordCount('request', 2816909) SECOND_MOST_FREQ_WORD = wordData.WordCount('wandered', 451106) LEAST_FREQ_WORD = wordData.WordCount('airport', 348996) # read in the words print('Testing with very_short.csv...') words = wordData.readWordFile('very_short.csv') # get the frequency of WordCount objects freqList = wordFreq.wordFrequencies(words) # test most frequent, second most frequent, and least frequent word print( 'Most frequent word: ', 'OK' if freqList[0].word == MOST_FREQ_WORD.word and freqList[0].count == MOST_FREQ_WORD.count else 'GOT: ' + str(freqList[0]) + ', EXPECTED: ' + str(MOST_FREQ_WORD)) print( 'Second most frequent word: ', 'OK' if freqList[1].word == SECOND_MOST_FREQ_WORD.word and freqList[1].count == SECOND_MOST_FREQ_WORD.count else 'GOT: ' + str(freqList[1]) + ', EXPECTED: ' + str(SECOND_MOST_FREQ_WORD)) print( 'Least frequent word: ', 'OK' if freqList[2].word == LEAST_FREQ_WORD.word and freqList[2].count == LEAST_FREQ_WORD.count else 'GOT: ' + str(freqList[2]) + ', EXPECTED: ' + str(LEAST_FREQ_WORD))
def testZ(): """ Test function for 'z.txt'. :return: None :rtype: NoneType """ # Expected results from z.txt WORDS = ((1900, 136049), (1931, 155940), (1964, 581610), (2008, 2450556)) # read in the words print('Testing with z.txt...') words = wordData.readWordFile('z.txt') # get the list of words for each year wordsByYearList = printedWords.printedWords(words) for idx in range(len(WORDS)): print("Testing year", WORDS[idx][0]) got = printedWords.wordsForYear(WORDS[idx][0], wordsByYearList) if (got == WORDS[idx][1]): print("OK") else: print("GOT:", got, "EXPECTED:", WORDS[idx][1])
def testFileName(filename, year, expected): """ testFileName: String NatNum (NatNum or Float)^5 -> Boolean """ return test(summaryFromWords(readWordFile(filename), year), expected)
def main(): file = input("Enter the name of a file to read: ") words = wordData.readWordFile(file) print(letterFreq(words))