pxy = float(bigrams[bigram])/float(bigramcount) px = float(tokens[token])/float(tokencount) py = float(types[type])/float(tokencount) return pxy * math.log(pxy/(px * py), 2) if __name__ == "__main__": for i in sys.argv[1:]: for x in glob.glob(os.path.normcase(i)): try: file = open(x, "r") for i in file.readlines(): i = string.lower(string.strip(i)) if i == "": continue wordlist = getTWordList(i) bigrams, bigramcount = getTBigrams(wordlist, bigrams, bigramcount, TOKEN, TYPE) tokens, tokencount = getTTokens(wordlist, tokens, tokencount, TOKEN) types, typecount = getTTokens(wordlist, types, typecount, TYPE) bigramsleft, bigramsright = getTLRBigrams(wordlist, bigramsleft, bigramsright, TOKEN, TYPE) file.close() except IOError: file.close() myTokens = sortNgrams(tokens) print "Left MI\tToken\tRight MI\tFrequency\tRelative Frequency" for x in range(min(len(myTokens), PRINTWORDS)): rmi, lmi = PMI(myTokens[x][0]) print str(lmi) + "\t" + myTokens[x][0] + "\t" + str(rmi) + "\t" + str(myTokens[x][1]) + "\t" + str(float(myTokens[x][1])/float(tokencount))
global bigrams, tokens, bigramcount, tokencount pxy = float(bigrams[bigram])/float(bigramcount) px = float(tokens[token2])/float(tokencount) py = float(tokens[token1])/float(tokencount) return py * math.log(py/(pxy/px), 2) if __name__ == "__main__": for i in sys.argv[1:]: for x in glob.glob(os.path.normcase(i)): try: file = open(x, "r") for i in file.readlines(): i = string.lower(string.strip(i)) if i == "": continue wordlist = getTWordList(i) bigrams, bigramcount = getTBigrams(wordlist, bigrams, bigramcount, TOKEN, TOKEN) tokens, tokencount = getTTokens(wordlist, tokens, tokencount, TOKEN) bigramsleft, bigramsright = getTLRBigrams(wordlist, bigramsleft, bigramsright, TOKEN, TOKEN) file.close() except IOError: file.close() myTokens = sortNgrams(tokens) print "Left RE\tToken\tRight RE\tFrequency\tRelative Frequency" for x in range(min(len(myTokens), PRINTWORDS)): rre, lre = PRE(myTokens[x][0]) print str(lre) + "\t" + myTokens[x][0] + "\t" + str(rre) + "\t" + str(myTokens[x][1]) + "\t" + str(float(myTokens[x][1])/float(tokencount))
global bigrams, tokens, bigramcount, tokencount pxy = float(bigrams[bigram])/float(bigramcount) px = float(tokens[token1])/float(tokencount) py = float(tokens[token2])/float(tokencount) return pxy * math.log(pxy/(px * py), 2) if __name__ == "__main__": for i in sys.argv[1:]: for x in glob.glob(os.path.normcase(i)): try: file = open(x, "r") for i in file.readlines(): i = string.lower(string.strip(i)) if i == "": continue wordlist = getTWordList(i) bigrams, bigramcount = getTBigrams(wordlist, bigrams, bigramcount, TYPE, TYPE) tokens, tokencount = getTTokens(wordlist, tokens, tokencount, TYPE) bigramsleft, bigramsright = getTLRBigrams(wordlist, bigramsleft, bigramsright, TYPE, TYPE) file.close() except IOError: file.close() myTokens = sortNgrams(tokens) print "Left MI\tToken\tRight MI\tFrequency\tRelative Frequency" for x in range(min(len(myTokens), PRINTWORDS)): rmi, lmi = PMI(myTokens[x][0]) print str(lmi) + "\t" + myTokens[x][0] + "\t" + str(rmi) + "\t" + str(myTokens[x][1]) + "\t" + str(float(myTokens[x][1])/float(tokencount))