class IndexerThread(threading.Thread): def __init__(self, queue, index_directory, log_rate, log_file): self.search_engine = Engine(index_directory) self.buffered_writer = self.search_engine.new_buffered_writer(120, 100 , 512) self.queue = queue self.log_count = 0 self.log_rate = log_rate self.log_file = log_file threading.Thread.__init__(self) # needed for thread to be instantiated def run(self): while True: while not self.queue.empty(): path, add_or_delete = self.queue.get_nowait() time_taken, successful = self.add_or_remove(path, add_or_delete) if successful: if self.log_count % self.log_rate == 0: if add_or_delete: log(self.log_file, ("time taken to index path: ", [time_taken, path, str(datetime.now())])) else: log(self.log_file, ("time taken to un-index path: ", [time_taken, path, str(datetime.now())])) else: log(self.log_file,("index/ un-index of path was unsuccessful: ", [path, str(datetime.now())])) self.log_count += 1 """ This method indexes/ removes an index of 'path', and returns the time taken to do so. params: - path: the path to index/ remove from index - add_or_delete: True for add/ False for delete return: - time_taken (0.0 if unsuccessful; i.e. directory, not file) - boolean of successful or not (whether file or directory) """ def add_or_remove(self, path, add_or_delete): if os.path.isdir(path): return [0.0, False] t0 = time.time() if add_or_delete: self.search_engine.add_document(path, self.buffered_writer) else: self.search_engine.remove_document(path, self.buffered_writer) t1 = time.time() time_taken = t1 - t0 return [time_taken, True]
for l in ls: recursive_put(os.path.join(path, l), queue) else: pass recursive_put(path, queue) print "files are loaded" print "beginning the indexing part" counter = 0 while not queue.empty(): queue_time0 = time.time() path = queue.get() queue_time1 = time.time() index_time0 = time.time() search_engine.add_document(path, buffered_writer) index_time1 = time.time() queue_time = queue_time1 - queue_time0 index_time = index_time1 - index_time0 counter += 1 if counter % 500 == 0: print "indexed path, queue_time, index_time, path: ", [str(datetime.now()), queue_time, index_time, path]