Ejemplo n.º 1
0
class indexer:
    # load the index from a saved file
	def loadFromFile (self, indexFile):
		# TODO assert that trie should be empty right now,
		# otherwise you'll loose the index
		fp = open(indexFile, 'rb')
		self.index = pickle.load (fp)
		self.modified = False

	# initialize a new TRIE
	def __init__ (self):
		self.index = PatriciaTrie ()
		self.modified = True

    # insert keyData Pairs in index
	def insertWord (self, key, data):
		# TODO assert that same word doesn't repeat
		self.modified = True
		self.index.insert(key, data)

    # Search the word in index, if found return the data found
	def searchWord (self, word):
		try:
			return self.index.lookup (word)
		except KeyError:
			print "Oops.. Key Error"
			return None

    # save index to fileName
	def saveIndex (self, fileName):
		if self.modified:
			fp = open (fileName, 'wb')
			pickle.dump (self.index, fp)
Ejemplo n.º 2
0
	def __init__ (self):
		self.index = PatriciaTrie ()
		self.modified = True
def freq(doc_id, q_index):
	for id in list_query[q_index]:
		if(id):
			if(int(id[0]) == doc_id):
				return id[1]
	return 0
	
def num_docs(q_index):
	return (len(list_query[q_index]))


if __name__ == '__main__':
	init_time = time.time()
	author_trie = PatriciaTrie()
	content_trie = PatriciaTrie()
	author_trie = pickle.load(open('authordump.dat', 'rb'))
	content_trie = pickle.load(open('contentdump.dat', 'rb'))
	inStream = open("query.dat","r")
	fo = open("doc_length.txt","r")
	for lines in fo.readlines():
		doc_length.append(lines.split('\n')[0])
	avgdl= float(doc_length[-1])
	del doc_length[-1]
	total_documents = len(doc_length)
	counter = 1;
	fo.close()
	fo = open("output_results.txt","w")
	mid_time = time.time()
	const_time = mid_time - init_time
	while(1):