Python getWordList Examples

Programming Language: Python

Namespace/Package Name: stattools

Method/Function: getWordList

Examples at hotexamples.com: 3

Python getWordList - 3 examples found. These are the top rated real world Python examples of stattools.getWordList extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: MIRE.py Project: decode/Rpkg

def test():
  bigrams     = {}  # bigram as key, frequency as value
  tokens      = {}  # token as key, frequency as value
  tokencount  = 0   # number of tokens
  bigramcount = 0   # number of bigrams
  alphabet    = ""  # all characters used

  for i in sys.argv[1:]:
    for x in glob.glob(os.path.normcase(i)):
      try:
        file = open(x, "r")
        for i in file.readlines():
          #i = string.lower(string.strip(i))
          i = i.strip().lower()
          if i == "":
            continue
          wordlist = getWordList(i)
          bigrams, bigramcount = getBigrams(wordlist, bigrams, bigramcount)
          tokens, tokencount = getTokens(wordlist, tokens, tokencount)
        file.close()
      except IOError:
        file.close()

  print("Got total:\nBigrams: " + str(bigramcount) + "\nTokens: " + str(tokencount))
  print("Bigram\tFrequency\tRelative Frequency\tMutual Information\tRelative Entropy")
  #myTokens = string.split(i[0])
  for i in sortNgrams(bigrams):
    tokenlist = list(i)[0].split()
    re = RE(rF(i[1], bigramcount), P(tokenlist[1], tokens, tokencount), P(tokenlist[0], tokens, tokencount))
    #print(i[0] + "\t" + str(i[1]) + "\t" + str(rF(i[1], bigramcount)) + "\t" + str(MI(i[0], rF(i[1], bigramcount), tokens, tokencount)) + "\t" + str(RE(i[0], rF(i[1], bigramcount), rF(myTokens[1], tokencount), rF(myTokens[0], tokencount))))
    print(i[0] + "\t" + str(i[1]) + "\t" + str(rF(i[1], bigramcount)) + "\t" + str(MI(i[0], rF(i[1], bigramcount), tokens, tokencount)) + "\t" + str(re))

Example #2

Show file

File: MIRE.py Project: decode/Rpkg

def caculate(filename, freq=100):
  bigrams     = {}  # bigram as key, frequency as value
  tokens      = {}  # token as key, frequency as value
  tokencount  = 0   # number of tokens
  bigramcount = 0   # number of bigrams
  alphabet    = ""  # all characters used

  try:
    file = open(filename, "r")
    for i in file.readlines():
      i = i.strip().lower()
      if i == "":
        continue
      wordlist = getWordList(i)
      bigrams, bigramcount = getBigrams(wordlist, bigrams, bigramcount)
      tokens, tokencount = getTokens(wordlist, tokens, tokencount)
    file.close()
  except IOError:
    file.close()

  if os.path.exists("mi.txt"):
    os.remove('mi.txt')
  if os.path.exists("dict.txt"):
    os.remove('dict.txt')

  f = open("mi.txt", "w")
  fl = open("dict.txt", "w")

  print("Got total:\nBigrams: " + str(bigramcount) + "\nTokens: " + str(tokencount))
  #print("Bigram\tFrequency\tRelative Frequency\tMutual Information\tRelative Entropy")
  f.write("T1, T2, Frequency, Relative Frequency, Mutual Information, Relative Entropy\n")
  sep = ", "
  for i in sortNgrams(bigrams):
    tokenlist = list(i)[0].split()
    re = RE(rF(i[1], bigramcount), P(tokenlist[1], tokens, tokencount), P(tokenlist[0], tokens, tokencount))
    mi = MI(i[0], rF(i[1], bigramcount), tokens, tokencount)
    if mi > freq:
      f.write(tokenlist[1] + sep + tokenlist[0] + sep + str(i[1]) + sep + str(rF(i[1], bigramcount)) + sep + str(mi) + sep + str(re) + "\n")
      fl.write(tokenlist[1] + " " + tokenlist[0]+ "\n")
  f.close()
  fl.close()

  ret = merge("dict.txt", "data.basket")
  '''

Example #3

Show file

File: MIRE.old.py Project: decode/Rpkg

	return bigramprob * math.log(bigramprob/(px * py) , 2)


if __name__ == "__main__":
	bigrams     = {}	# bigram as key, frequency as value
	tokens      = {}	# token as key, frequency as value
	tokencount  = 0   # number of tokens
	bigramcount = 0   # number of bigrams
	alphabet    = ""  # all characters used

	for i in sys.argv[1:]:
		for x in glob.glob(os.path.normcase(i)):
			try:
				file = open(x, "r")
				for i in file.readlines():
					i = string.lower(string.strip(i))
					if i == "":
						continue
					wordlist = getWordList(i)
					bigrams, bigramcount = getBigrams(wordlist, bigrams, bigramcount)
					tokens, tokencount = getTokens(wordlist, tokens, tokencount)
				file.close()
			except IOError:
				file.close()

	print "Got total:\nBigrams: " + str(bigramcount) + "\nTokens: " + str(tokencount)
	print "Bigram\tFrequency\tRelative Frequency\tMutual Information"
	myTokens = string.split(i[0])
	for i in sortNgrams(bigrams):
		print i[0] + "\t" + str(i[1]) + "\t" + str(rF(i[1], bigramcount)) + "\t" + str(MI(i[0], rF(i[1], bigramcount), tokens, tokencount)) + "\t" + str(RE(i[0], rF(i[1], bigramcount), rF(myTokens[1], tokencount), rF(myTokens[0], tokencount)))