コード例 #1
0
# Christian Clark, [email protected], 29 September 2014

import pickle, textstats as ts

outFile = open('bigram_bible_austen_out.txt', 'w')


# Part 1: The King James Bible
# (A) and (B) Create token and type lists from the text file

bInfile = open('../Ling 1330/gutenberg/gutenberg/bible-kjv.txt')
bTxt = bInfile.read()
bInfile.close()

bToks = ts.getTokens(bTxt)
bTypes = ts.getTypes(bTxt)


# (C) Write out token and type counts to outFile

outFile.write('There are a total of '+str(len(bToks))+' word tokens and '+\
              str(len(bTypes))+' word types in the King James Bible.'+'\n\n')


# (D) Create bigram frequency dictionary

bBigrFreq = {}
for bigr in ts.getWordNGrams(bToks, 2):
    if bigr in bBigrFreq: bBigrFreq[bigr] += 1
    else: bBigrFreq[bigr] = 1
コード例 #2
0
ファイル: Obama.py プロジェクト: cclark94/compLing
# Christian ...

import pickle, textstats as ts

outFile = open('2009-Obama_out.txt', 'w')


# Part 1: The King James Bible
# (A) and (B) Create token and type lists from the text file

bInfile = open('2009-Obama.txt')
bTxt = bInfile.read()
bInfile.close()

bToks = ts.getTokens(bTxt)
bTypes = ts.getTypes(bTxt)


# (C) Write out token and type counts to outFile

outFile.write('There are a total of '+str(len(bToks))+' word tokens and '+\
              str(len(bTypes))+' word types in Obama\'s speech.'+'\n\n')


# (D) Create bigram frequency dictionary

bBigrFreq = {}
for bigr in ts.getWordNGrams(bToks, 2):
    if bigr in bBigrFreq: bBigrFreq[bigr] += 1
    else: bBigrFreq[bigr] = 1