Example #1
0
class IndexerThread(threading.Thread):

    def __init__(self, queue, index_directory, log_rate):

        self.search_engine = Engine(index_directory)
        self.buffered_writer = self.search_engine.writer(120, 5000 , 512)
        self.queue = queue
        self.log_count = 0
        self.log_rate = log_rate
        threading.Thread.__init__(self) # needed for thread to be instantiated

    """
    # The old run:
    def run(self):

        while True:

            job_list = []
            while not self.queue.empty():
                job = self.queue.get()
                job_list.append(job)
            for j in job_list:
                path, add_or_delete = j
                time_taken, successful = self.add_or_remove(path, add_or_delete)

                if self.log_count % 1000 == 0:
                    if successful: 
                        if add_or_delete:
                            print "time taken to index path: ", [time_taken, path, str(datetime.now())]
                        else:
                            print "time taken to un-index path: ", [time_taken, path, str(datetime.now())]

                    else:
                        print "index/ un-index of path was unsuccessful: ", [path, str(datetime.now())]

                self.log_count += 1

            time.sleep(self.sleep_time)
    """
    def run(self):

        while True:

            while not self.queue.empty():

                path, add_or_delete = self.queue.get_nowait()
                time_taken, successful = self.add_or_remove(path, add_or_delete)

                if successful: 
                    if self.log_count % self.log_rate == 0:
                        if add_or_delete:
                            print "time taken to index path: ", [time_taken, path, str(datetime.now())]
                        else:
                            print "time taken to un-index path: ", [time_taken, path, str(datetime.now())]
                else:
                    print "index/ un-index of path was unsuccessful: ", [path, str(datetime.now())]

                self.log_count += 1


    """
    This method indexes/ removes an index of 'path', and returns the time taken to do so. 
        params:
            - path: the path to index/ remove from index
            - add_or_delete: True for add/ False for delete
        return:
            - time_taken (0.0 if unsuccessful; i.e. directory, not file)
            - boolean of successful or not (whether file or directory)
    """
    def add_or_remove(self, path, add_or_delete):

        if os.path.isdir(path):
            return [0.0, False]

        t0 = time.time()
        
        if add_or_delete:
            self.search_engine.add_document(path, self.buffered_writer)
        else:
            self.search_engine.remove_document(path, self.buffered_writer)

        t1 = time.time()
        time_taken = t1 - t0

        return [time_taken, True]
Example #2
0
- Using monitored_directory and index_directory2
"""


parser = argparse.ArgumentParser(description="the index directory")
parser.add_argument("index_directory", help="the index_directory to refresh")
parser.add_argument("monitored_directory", help="the monitored_directory to load into queue")



args = parser.parse_args()
index_directory = args.index_directory
monitored_directory = args.monitored_directory
search_engine = Engine(index_directory)
buffered_writer = search_engine.writer(120, 1000, 512)

queue = Queue.Queue()
path = monitored_directory

def recursive_put(path, queue):

	if os.path.isfile(path):
		queue.put(path)

	elif os.path.isdir(path):
		ls = os.listdir(path)
		for l in ls:
			recursive_put(os.path.join(path, l), queue)
	else: 
		pass