class indexer: # load the index from a saved file def loadFromFile (self, indexFile): # TODO assert that trie should be empty right now, # otherwise you'll loose the index fp = open(indexFile, 'rb') self.index = pickle.load (fp) self.modified = False # initialize a new TRIE def __init__ (self): self.index = PatriciaTrie () self.modified = True # insert keyData Pairs in index def insertWord (self, key, data): # TODO assert that same word doesn't repeat self.modified = True self.index.insert(key, data) # Search the word in index, if found return the data found def searchWord (self, word): try: return self.index.lookup (word) except KeyError: print "Oops.. Key Error" return None # save index to fileName def saveIndex (self, fileName): if self.modified: fp = open (fileName, 'wb') pickle.dump (self.index, fp)
def __init__ (self): self.index = PatriciaTrie () self.modified = True
def freq(doc_id, q_index): for id in list_query[q_index]: if(id): if(int(id[0]) == doc_id): return id[1] return 0 def num_docs(q_index): return (len(list_query[q_index])) if __name__ == '__main__': init_time = time.time() author_trie = PatriciaTrie() content_trie = PatriciaTrie() author_trie = pickle.load(open('authordump.dat', 'rb')) content_trie = pickle.load(open('contentdump.dat', 'rb')) inStream = open("query.dat","r") fo = open("doc_length.txt","r") for lines in fo.readlines(): doc_length.append(lines.split('\n')[0]) avgdl= float(doc_length[-1]) del doc_length[-1] total_documents = len(doc_length) counter = 1; fo.close() fo = open("output_results.txt","w") mid_time = time.time() const_time = mid_time - init_time while(1):