bInfile.close()

bToks = ts.getTokens(bTxt)
bTypes = ts.getTypes(bTxt)


# (C) Write out token and type counts to outFile

outFile.write('There are a total of '+str(len(bToks))+' word tokens and '+\
              str(len(bTypes))+' word types in the King James Bible.'+'\n\n')


# (D) Create bigram frequency dictionary

bBigrFreq = {}
for bigr in ts.getWordNGrams(bToks, 2):
    if bigr in bBigrFreq: bBigrFreq[bigr] += 1
    else: bBigrFreq[bigr] = 1


# (E) Write out the top bigrams and their counts

n1 = 20
outFile.write('Top '+str(n1)+' word bigrams in the Bible:\n')
for bigr in sorted(bBigrFreq, key=bBigrFreq.get, reverse=True)[:n1]:
    outFile.write(bigr[0]+' '+bigr[1]+'\t\t'+str(bBigrFreq[bigr])+'\n')
outFile.write('\n')


# (F) Create 'so-initial' bigram frequency dictionary
Beispiel #2
0
bInfile.close()

bToks = ts.getTokens(bTxt)
bTypes = ts.getTypes(bTxt)


# (C) Write out token and type counts to outFile

outFile.write('There are a total of '+str(len(bToks))+' word tokens and '+\
              str(len(bTypes))+' word types in Obama\'s speech.'+'\n\n')


# (D) Create bigram frequency dictionary

bBigrFreq = {}
for bigr in ts.getWordNGrams(bToks, 2):
    if bigr in bBigrFreq: bBigrFreq[bigr] += 1
    else: bBigrFreq[bigr] = 1


# (E) Write out the top bigrams and their counts

n1 = 20
outFile.write('Top '+str(n1)+' word bigrams in the Bible:\n')
for bigr in sorted(bBigrFreq, key=bBigrFreq.get, reverse=True)[:n1]:
    outFile.write(bigr[0]+' '+bigr[1]+'\t\t'+str(bBigrFreq[bigr])+'\n')
outFile.write('\n')


# (F) Create 'so-initial' bigram frequency dictionary