Python relativizeTokenCounts Examples

Programming Language: Python

Namespace/Package Name: corpus

Method/Function: relativizeTokenCounts

Examples at hotexamples.com: 3

Python relativizeTokenCounts - 3 examples found. These are the top rated real world Python examples of corpus.relativizeTokenCounts extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

# use this and then prefix all function names with corpus.
#import corpus
# or use this
from corpus import loadTextFromFile, tokenize, getTokenCounts, prettyPrintFrequencyProfile, relativizeTokenCounts

from math import log


mytext = loadTextFromFile("pg873.txt")

# tokenize mytext and return list of tokens
tokens = tokenize(mytext)

# count tokens
mydict = getTokenCounts(tokens)
relativizeTokenCounts(mydict)

# pretty-print tokens and frequencies
#prettyPrintFrequencyProfile(mydict, sortbyfrq=True, myreverse=True)

mytext = loadTextFromFile("sports-bbc.txt")
mysportsdict = getTokenCounts(tokenize(mytext))
relativizeTokenCounts(mysportsdict)

unknowntext = """Yesterday we scored ten goals in the last 45 minutest of the game."""


"""
Wimbledon 2013: Andy Murray's victory could boost British tennis
Comments (149)
All too often sporting moments and achievements are given a misplaced historical significance. Not on Sunday.

Example #2

Show file

File: test-ngrams-1.py Project: dcavar/Py3L



def getMIScore(bigramprob, unigramprobaA, unigramprobB):
    return bigramprob * log(bigramprob / (unigramprobaA * unigramprobB) )

def getMIScoreFromFQP( bigrams, unigrams, bigram):
    tokenA, tokenB = bigram.split()
    return bigrams[bigram] * log( bigrams[bigram] / (unigrams[tokenA] * unigrams[tokenB]) )



tokens = tokenize( loadTextFromFile("pg873.txt") )

unigrams = getNGrams(tokens, 1)
relativizeTokenCounts( unigrams )

bigrams = getNGrams(tokens, 2)
#prettyPrintFrequencyProfile(bigrams, myreverse=False)

relativizeTokenCounts( bigrams )


# young King: likelihood ratio
lhr = bigrams["young Fisherman"] / (unigrams["young"] * unigrams["Fisherman"])

# young King - pointwise Mutual Information
pmi = bigrams["young Fisherman"] * log( lhr )

print("young Fisherman", lhr, pmi, sep="\t")

Example #3

Show file


def getMIScore(bigramprob, unigramprobaA, unigramprobB):
    return bigramprob * log(bigramprob / (unigramprobaA * unigramprobB))


def getMIScoreFromFQP(bigrams, unigrams, bigram):
    tokenA, tokenB = bigram.split()
    return bigrams[bigram] * log(bigrams[bigram] /
                                 (unigrams[tokenA] * unigrams[tokenB]))


tokens = tokenize(loadTextFromFile("pg873.txt"))

unigrams = getNGrams(tokens, 1)
relativizeTokenCounts(unigrams)

bigrams = getNGrams(tokens, 2)
#prettyPrintFrequencyProfile(bigrams, myreverse=False)

relativizeTokenCounts(bigrams)

# young King: likelihood ratio
lhr = bigrams["young Fisherman"] / (unigrams["young"] * unigrams["Fisherman"])

# young King - pointwise Mutual Information
pmi = bigrams["young Fisherman"] * log(lhr)

print("young Fisherman", lhr, pmi, sep="\t")

# iron chain: likelihood ratio