Ejemplo n.º 1
0
class IndexerThread(threading.Thread):

    def __init__(self, queue, index_directory, log_rate, log_file):

        self.search_engine = Engine(index_directory)
        self.buffered_writer = self.search_engine.new_buffered_writer(120, 100 , 512)
        self.queue = queue
        self.log_count = 0
        self.log_rate = log_rate
        self.log_file = log_file
        threading.Thread.__init__(self) # needed for thread to be instantiated

  
    def run(self):

        while True:

            while not self.queue.empty():

                path, add_or_delete = self.queue.get_nowait()
                time_taken, successful = self.add_or_remove(path, add_or_delete)

                if successful: 
                    if self.log_count % self.log_rate == 0:
                        if add_or_delete:
                            log(self.log_file, ("time taken to index path: ", [time_taken, path, str(datetime.now())]))
                        else:
                            log(self.log_file, ("time taken to un-index path: ", [time_taken, path, str(datetime.now())]))
                else:
                    log(self.log_file,("index/ un-index of path was unsuccessful: ", [path, str(datetime.now())]))

                self.log_count += 1


    """
    This method indexes/ removes an index of 'path', and returns the time taken to do so. 
        params:
            - path: the path to index/ remove from index
            - add_or_delete: True for add/ False for delete
        return:
            - time_taken (0.0 if unsuccessful; i.e. directory, not file)
            - boolean of successful or not (whether file or directory)
    """
    def add_or_remove(self, path, add_or_delete):

        if os.path.isdir(path):
            return [0.0, False]

        t0 = time.time()
        
        if add_or_delete:
            self.search_engine.add_document(path, self.buffered_writer)
        else:
            self.search_engine.remove_document(path, self.buffered_writer)

        t1 = time.time()
        time_taken = t1 - t0

        return [time_taken, True]
Ejemplo n.º 2
0
		for l in ls:
			recursive_put(os.path.join(path, l), queue)
	else: 
		pass

recursive_put(path, queue)

print "files are loaded"
print "beginning the indexing part"

counter = 0
while not queue.empty():

	queue_time0 = time.time()
	path = queue.get()
	queue_time1 = time.time()

	index_time0 = time.time()
	search_engine.add_document(path, buffered_writer)
	index_time1 = time.time()

	queue_time = queue_time1 - queue_time0
	index_time = index_time1 - index_time0

	counter += 1

	if counter % 500 == 0:

		print "indexed path, queue_time, index_time, path: ", [str(datetime.now()), queue_time, index_time, path]