Python readWordFileの例、wordData.readWordFile Pythonの例

コード例 #1

0

ファイルを表示

def testVeryShort():
    WORD_OCCURRENCES = {
        'airport': 348996,
        'request': 2816909,
        'wandered': 451106
    }

    LETTER_FREQ = [
        0.03104758705050717, 0.0, 0.0, 0.03500991824543893, 0.2536276129665047,
        0.0, 0.0, 0.0, 0.013542627927787708, 0.0, 0.0, 0.0, 0.0,
        0.017504959122719464, 0.013542627927787708, 0.013542627927787708,
        0.10930884736053291, 0.15389906233882777, 0.10930884736053291,
        0.12285147528832062, 0.10930884736053291, 0.0, 0.017504959122719464,
        0.0, 0.0, 0.0
    ]

    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # test totalOccurrences
    for word in words:
        print('Total occurrences of', word + ':',
              'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \
                else 'GOT: ' + str(wordData.totalOccurrences(word, words)) +
                     ', EXPECTED: ' + str(WORD_OCCURRENCES[word]))

    freqList = letterFreq.letterFreq(words)
    for ch, got, expected in zip(string.ascii_lowercase, freqList,
                                 LETTER_FREQ):
        print(
            'Frequency of', ch + ':', 'OK' if got == expected else 'GOT: ' +
            str(got) + ', EXPECTED: ' + str(expected))

コード例 #2

0

ファイルを表示

ファイル: wordData.py プロジェクト: ahadsheriff/UnigramProject

def main():
    file = input("Enter the name of a file to read: ")
    words = wordData.readWordFile(file)
    print(words)

    word = input("Enter word you want to count the occurences of: ")
    print(totalOccurrences(word, words))

コード例 #3

0

ファイルを表示

ファイル: testLetterFreq.py プロジェクト: ahadsheriff/UnigramProject

def testA():
    WORD_OCCURRENCES = {
        'airplane': 2487028,
        'alien': 5400198,
        'accept': 26299474
    }

    LETTER_FREQ_STRING = 'andetsrliocupgmybhvfwkxqjz'

    print('Testing with a.txt...\n')
    words = wordData.readWordFile('a.txt')

    # test totalOccurrences
    for word in WORD_OCCURRENCES:
        print('Total occurrences of', word + ':',
              'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \
                else 'GOT: ' + str(wordData.totalOccurrences(word, words)) +
                     ', EXPECTED: ' + str(WORD_OCCURRENCES[word]))

    freqString = letterFreq.letterFreq(words)
    if freqString == LETTER_FREQ_STRING:
        print('\nFrequency ordering of letters OK')
    else:
        print('Frequency ordering of letters incorrect.')
        print('GOT: ' + freqString)
        print('EXPECTED: ' + LETTER_FREQ_STRING)

コード例 #4

0

ファイルを表示

def main():
    file = input("Enter the name of a file to read: ")
    words = wordData.readWordFile(file)
    start = input("Enter starting year: ")
    end = input("Enter ending year: ")

    trends = (trending(words, float(start), float(end)))

    print("")

    print("The top 10 trending words from", start, "to", end, ":")
    for x in trends[:10]:
        print(x.word)

    print("")

    print("The bottom 10 trending words from", start, "to", end, ":")


    mylist = []
    for x in trends[-10:]:
        mylist.append(x.word)
    mylist.reverse()
    for i in mylist:
        print(i)

コード例 #5

0

ファイルを表示

ファイル: testTrending.py プロジェクト: CalvinWu4/CSCI-141

def testA():
    """
    Test function for 'a.txt'.
    :return: None
    :rtype: NoneType
    """

    # expected results
    # each tuple contains startYr/endYr/index/word
    # index 0 is the highest trending word, -1 is lowest
    TRENDS = ((1927, 1931, 0, 'av'), (1927, 1931, -1, 'acetate'),
              (1950, 1952, 0, 'antibiotics'), (1950, 1952, -1, 'atque'),
              (1966, 1975, 0, 'algorithms'), (1966, 1975, -3, 'aeroplanes'),
              (1981, 2008, 1, 'authentication'), (1981, 2008, -2, 'antisera'))

    print('Testing with a.txt...\n')
    words = wordData.readWordFile('a.txt')

    for idx in range(len(TRENDS)):
        print("testing: ", TRENDS[idx][0], "to", TRENDS[idx][1])
        trendList = trending.trending(words, TRENDS[idx][0], TRENDS[idx][1])
        if (trendList[TRENDS[idx][2]].word == TRENDS[idx][3]):
            print("OK!")
        else:
            print("got:", trendList[TRENDS[idx][2]].word)
            print("expected: ", TRENDS[idx][3])

コード例 #6

0

ファイルを表示

ファイル: testWordLength.py プロジェクト: hl9897/Test-Statistics

def testVeryShort():
    """
    Test function for 'very_short.csv'.
    :return: None
    :rtype: NoneType
    """

    # expected results
    AIRPORT_2007 = 175702
    REQUEST_2004 = 0
    AVG_WORD_LENGTH_2007 = 7.110627395031065
    AVG_WORD_LENGTH_2004 = 0
    AVG_WORD_LENGTHS_2005_2008 = [
         7.1147602294958,       # 2005
         7.114548770228398,     # 2006
         7.110627395031065,     # 2007
         7.150069236398865      # 2008
    ]

    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # test occurrencesInAYear
    occurrences = wordLength.occurrencesInYear('airport', 2007, words)
    print('Occurrences of "airport" in 2007:',
          'OK' if occurrences == AIRPORT_2007
               else 'GOT: ' +  str(occurrences) +
                    ', EXPECTED: ' + str(AIRPORT_2007))

    occurrences = wordLength.occurrencesInYear('request', 2004, words)
    print('Occurrences of "request" in 2004:',
          'OK' if occurrences == REQUEST_2004
               else 'GOT: ' +  str(occurrences) +
                    ', EXPECTED: ' + str(REQUEST_2004))

    # test averageWordLength
    length = wordLength.averageWordLength(2007, words)
    print('Average word length in 2007:',
          'OK' if length == AVG_WORD_LENGTH_2007
               else 'GOT: ' + str(length) +
                    ', EXPECTED: ' + str(AVG_WORD_LENGTH_2007))

    length = wordLength.averageWordLength(2004, words)
    print('Average word length in 2004:',
          'OK' if length == AVG_WORD_LENGTH_2004
               else 'GOT: ' + str(length) +
                    ', EXPECTED: ' + str(AVG_WORD_LENGTH_2004))

    # averageWordLengthYears
    lengthsList = wordLength.averageWordLengthYears(2005, 2008, words)
    yearList = list(range(2005, 2009))
    for year, got, expected in zip(yearList, lengthsList,
        AVG_WORD_LENGTHS_2005_2008):
        print('Average word length for', str(year) + ':',
              'OK' if got == expected else
                   'GOT:' + str(got) +
                   ', EXPECTED: ' + str(expected))

コード例 #7

0

ファイルを表示

def testVeryShort():
    """
    Test function for 'very_short.csv'.
    :return: None
    :rtype: NoneType
    """

    # expected results
    AIRPORT_2007 = 175702
    REQUEST_2004 = 0
    AVG_WORD_LENGTH_2007 = 7.110627395031065
    AVG_WORD_LENGTH_2004 = 0
    AVG_WORD_LENGTHS_2005_2008 = [
        7.1147602294958,  # 2005
        7.114548770228398,  # 2006
        7.110627395031065,  # 2007
        7.150069236398865  # 2008
    ]

    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # test occurrencesInAYear
    occurrences = wordLength.occurrencesInYear('airport', 2007, words)
    print(
        'Occurrences of "airport" in 2007:',
        'OK' if occurrences == AIRPORT_2007 else 'GOT: ' + str(occurrences) +
        ', EXPECTED: ' + str(AIRPORT_2007))

    occurrences = wordLength.occurrencesInYear('request', 2004, words)
    print(
        'Occurrences of "request" in 2004:',
        'OK' if occurrences == REQUEST_2004 else 'GOT: ' + str(occurrences) +
        ', EXPECTED: ' + str(REQUEST_2004))

    # test averageWordLength
    length = wordLength.averageWordLength(2007, words)
    print(
        'Average word length in 2007:',
        'OK' if length == AVG_WORD_LENGTH_2007 else 'GOT: ' + str(length) +
        ', EXPECTED: ' + str(AVG_WORD_LENGTH_2007))

    length = wordLength.averageWordLength(2004, words)
    print(
        'Average word length in 2004:',
        'OK' if length == AVG_WORD_LENGTH_2004 else 'GOT: ' + str(length) +
        ', EXPECTED: ' + str(AVG_WORD_LENGTH_2004))

    # averageWordLengthYears
    lengthsList = wordLength.averageWordLengthYears(2005, 2008, words)
    yearList = list(range(2005, 2009))
    for year, got, expected in zip(yearList, lengthsList,
                                   AVG_WORD_LENGTHS_2005_2008):
        print(
            'Average word length for',
            str(year) + ':', 'OK' if got == expected else 'GOT:' + str(got) +
            ', EXPECTED: ' + str(expected))

コード例 #8

0

ファイルを表示

ファイル: letterFreq.py プロジェクト: hl9897/Test-Statistics

def main():
    fileName=input("Enter word file: ")
    words=wordData.readWordFile(fileName)
    word=input("Enter word: ")
    occ=wordData.totalOccurrences(word,words)
    print("Total occurrences of ",word,":",occ)
    print("Letter frequencies: ",letterFreq(words))
    print("# plot is displayed")
    freqList=letterFreq(words)
    letterHist.letterFreqPlot(freqList)
    input("Enter to exit")

コード例 #9

0

ファイルを表示

def main():
    """
    The main fucntion.
    :return None
    :rtype: NoneType

    """
    fileName = input("Enter word file: ")
    words = wordData.readWordFile(fileName)
    freqList = wordFrequencies(words)
    rank = int(input("Enter rank" + str((1,len(freqList))) + ":"))
    print("Rank",rank,":", freqList[rank - 1])
    simplePlot.wordFreqPlot(freqList)

コード例 #10

0

ファイルを表示

ファイル: wordLength.py プロジェクト: hl9897/Test-Statistics

def main():
    fileName=input("Enter word file: ")
    words=wordData.readWordFile(fileName)
    word=input("Enter a word: ")
    year=int(input("Enter a year: "))
    occ=occurrencesInYear(word,year,words)
    print("The word ",'"',word+'"',"occurred ",occ,"times in the year ",year)
    year2=int(input("Enter a year: "))
    avg=averageWordLength(year2,words)
    print("The average word length for the year ",year2,"is ",avg," letters")
    startYear=int(input("Enter a star year: "))
    endYear=int(input("Enter an end year: "))
    print("# plot is displayed")
    lengthsList=averageWordLengthYears(startYear,endYear,words)
    simplePlot.averageWordLengthPlot(startYear, endYear, lengthsList)

コード例 #11

0

ファイルを表示

ファイル: letterFreq.py プロジェクト: akshitvjain/unigram

def main():
    """
    The main function.
    :return None
    :rtype: NoneType
    
    """
    fileName = input("Enter word file: ")
    word = input("Enter word: ")
    words = wordData.readWordFile(fileName)
    print("Total occurrences of", word, ":",
          wordData.totalOccurrences(word, words))
    freqList = letterFreq(words)
    print("Letter frequencies:", freqList)
    letterHist.letterFreqPlot(freqList)
    input("Hit Enter to EXIT")

コード例 #12

0

ファイルを表示

ファイル: testLetterFreq.py プロジェクト: pzenie/Python-Text-Analysis

def testVeryShort():
    WORD_OCCURRENCES = {'airport' : 348996,
                        'request' : 2816909,
                        'wandered' : 451106}

    LETTER_FREQ = [0.03104758705050717,
                   0.0,
                   0.0,
                   0.03500991824543893,
                   0.2536276129665047,
                   0.0,
                   0.0,
                   0.0,
                   0.013542627927787708,
                   0.0,
                   0.0,
                   0.0,
                   0.0,
                   0.017504959122719464,
                   0.013542627927787708,
                   0.013542627927787708,
                   0.10930884736053291,
                   0.15389906233882777,
                   0.10930884736053291,
                   0.12285147528832062,
                   0.10930884736053291,
                   0.0,
                   0.017504959122719464,
                   0.0,
                   0.0,
                   0.0]

    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # test totalOccurrences
    for word in words:
        print('Total occurrences of', word + ':',
              'OK' if wordData.totalOccurrences(word, words) == WORD_OCCURRENCES[word] \
                else 'GOT: ' + str(wordData.totalOccurrences(word, words)) +
                     ', EXPECTED: ' + str(WORD_OCCURRENCES[word]))

    freqList = letterFreq.letterFreq(words)
    for ch, got, expected in zip(string.ascii_lowercase, freqList, LETTER_FREQ):
        print('Frequency of', ch + ':',
              'OK' if got == expected else 'GOT: ' + str(got) +
                    ', EXPECTED: ' + str(expected))

コード例 #13

0

ファイルを表示

ファイル: wordLength.py プロジェクト: ahadsheriff/UnigramProject

def main():

    file = input("Enter word file: ")
    words = wordData.readWordFile(file)
    year = input("Enter year: ")

    solution = summaryFromWords(words, int(year))

    small = solution[0]
    q1 = solution[1]
    med = solution[2]
    q3 = solution[3]
    large = solution[4]

    print("minimum: ", small)
    print("1st quartile: ", q1)
    print("median: ", med)
    print("3rd quartile ", q3)
    print("maximum: ", large)

    boxAndWhisker.boxAndWhisker(small, q1, med, q3, large)

コード例 #14

0

ファイルを表示

def main():
    """
    The main function.
    :return None
    :rtype: NoneType
    
    """
    fileName = input("Enter word file: ")
    word = input("Enter a word: ")
    year = int(input("Enter a year: "))
    words = wordData.readWordFile(fileName)
    totalOccur = occurrencesInYear(word, year, words)
    print("The word",'"',word,'"', "occurred", totalOccur, \
          "times in the year", year)
    year = int(input("Enter a year: "))
    avgWordLength = averageWordLength(year, words)
    print("The average word length for the year", year,\
          "is", avgWordLength, "letters")
    startYear = int(input("Enter a start year: "))
    endYear = int(input("Enter an end year: "))
    lengthsList = averageWordLengthYears(startYear, endYear, words)
    simplePlot.averageWordLengthPlot(startYear, endYear, lengthsList)

コード例 #15

0

ファイルを表示

ファイル: testWordFreq.py プロジェクト: ArthuruhtrA/Fall-2014

def testVeryShort():
    """
    Test function for 'very_short.csv'.
    :return: None
    :rtype: NoneType
    """

    # Expected results from the q words
    MOST_FREQ_WORD = wordData.WordCount('request', 2816909)
    SECOND_MOST_FREQ_WORD = wordData.WordCount('wandered', 451106)
    LEAST_FREQ_WORD = wordData.WordCount('airport', 348996)

    # read in the words
    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # get the frequency of WordCount objects
    freqList = wordFreq.wordFrequencies(words)

    # test most frequent, second most frequent, and least frequent word
    print('Most frequent word: ',
          'OK' if freqList[0].word == MOST_FREQ_WORD.word and
                  freqList[0].count == MOST_FREQ_WORD.count
               else 'GOT: '+ str(freqList[0]) +
                    ', EXPECTED: ' + str(MOST_FREQ_WORD))

    print('Second most frequent word: ',
          'OK' if freqList[1].word == SECOND_MOST_FREQ_WORD.word and
                  freqList[1].count == SECOND_MOST_FREQ_WORD.count
               else 'GOT: '+ str(freqList[1]) +
                    ', EXPECTED: ' + str(SECOND_MOST_FREQ_WORD))

    print('Least frequent word: ',
          'OK' if freqList[2].word == LEAST_FREQ_WORD.word and
                  freqList[2].count == LEAST_FREQ_WORD.count
               else 'GOT: '+ str(freqList[2]) +
                    ', EXPECTED: ' + str(LEAST_FREQ_WORD))

コード例 #16

0

ファイルを表示

def testVeryShort():
    """
    Test function for 'very_short.csv'.
    :return: None
    :rtype: NoneType
    """

    # Expected results from the q words
    MOST_FREQ_WORD = wordData.WordCount('request', 2816909)
    SECOND_MOST_FREQ_WORD = wordData.WordCount('wandered', 451106)
    LEAST_FREQ_WORD = wordData.WordCount('airport', 348996)

    # read in the words
    print('Testing with very_short.csv...')
    words = wordData.readWordFile('very_short.csv')

    # get the frequency of WordCount objects
    freqList = wordFreq.wordFrequencies(words)

    # test most frequent, second most frequent, and least frequent word
    print(
        'Most frequent word: ', 'OK' if freqList[0].word == MOST_FREQ_WORD.word
        and freqList[0].count == MOST_FREQ_WORD.count else 'GOT: ' +
        str(freqList[0]) + ', EXPECTED: ' + str(MOST_FREQ_WORD))

    print(
        'Second most frequent word: ',
        'OK' if freqList[1].word == SECOND_MOST_FREQ_WORD.word
        and freqList[1].count == SECOND_MOST_FREQ_WORD.count else 'GOT: ' +
        str(freqList[1]) + ', EXPECTED: ' + str(SECOND_MOST_FREQ_WORD))

    print(
        'Least frequent word: ',
        'OK' if freqList[2].word == LEAST_FREQ_WORD.word
        and freqList[2].count == LEAST_FREQ_WORD.count else 'GOT: ' +
        str(freqList[2]) + ', EXPECTED: ' + str(LEAST_FREQ_WORD))

コード例 #17

0

ファイルを表示

def testZ():
    """
    Test function for 'z.txt'.
    :return: None
    :rtype: NoneType
    """

    # Expected results from z.txt
    WORDS = ((1900, 136049), (1931, 155940), (1964, 581610), (2008, 2450556))

    # read in the words
    print('Testing with z.txt...')
    words = wordData.readWordFile('z.txt')

    # get the list of words for each year
    wordsByYearList = printedWords.printedWords(words)

    for idx in range(len(WORDS)):
        print("Testing year", WORDS[idx][0])
        got = printedWords.wordsForYear(WORDS[idx][0], wordsByYearList)
        if (got == WORDS[idx][1]):
            print("OK")
        else:
            print("GOT:", got, "EXPECTED:", WORDS[idx][1])

コード例 #18

0

ファイルを表示

def testFileName(filename, year, expected):
    """
    testFileName: String NatNum (NatNum or Float)^5 -> Boolean
    """
    return test(summaryFromWords(readWordFile(filename), year), expected)

コード例 #19

0

ファイルを表示

def main():
    file = input("Enter the name of a file to read: ")
    words = wordData.readWordFile(file)
    print(letterFreq(words))