Exemple #1
0
	def __init__(self):
		self.r = Datastore.factory()
		self.DIGRAM = "DIGRAM"
		self.WORD2ID = "WORD2ID"
		self.TOP_N = 5
		self.bgm = nltk.collocations.BigramAssocMeasures
		self.SCORER_FN = self.bgm.likelihood_ratio
Exemple #2
0
import sys
sys.path.append("../")
from datastore import Datastore

r = Datastore.factory()

DEFAULT = "../cusp/b_cpu"
RANK = "RANK"
SORTED_WORD_IN = "SORTED_WORD_IN"
WORD2ID = "WORD2ID"
FOUND_IN = "FOUND_IN"

try:
	f = open (sys.argv[1])
except IndexError:
	f = open (DEFAULT)

for i, l in enumerate(f):
	r.set(RANK + ":" + str(i + 1), l.strip())

f.close ()

for word in r.smembers("WORD_SET"):
	word_id = r.get(WORD2ID + ":" + word)
	for url_id in r.smembers(FOUND_IN + ":" + word_id):
		rank = r.get(RANK + ":" + url_id)
		r.zadd(SORTED_WORD_IN + ":" + word_id, url_id, float(rank))

# to be commented if everything screws up
for word in r.smembers("WORD_SET"):
	word_id = r.get(WORD2ID + ":" + word)