def testCrawlerConfigWorks(self): file_handle = StringIO.StringIO(textwrap.dedent(""" [General] database_address: my_database_address download_folder: my_download_folder zip_size_limit: 30000 """)) config = CrawlerConfig(file_handle) self.assertEqual('my_database_address', config.database_address()) self.assertEqual('my_download_folder', config.download_folder()) self.assertEqual(30000, config.zip_size_limit())
from downloader_thread import DownloaderThread import argparse import Queue import threading parser = argparse.ArgumentParser(description="Crawls the web looking for 3D object models.") parser.add_argument("--config", action="store", type=str) parser.add_argument("--instances", action="store", type=int, default=10) if __name__ == "__main__": # TODO(brunonery): verify arguments and fail gracefully if necessary. args = parser.parse_args() config = CrawlerConfig(open(args.config)) # Prepare database and locks. database_handler = DatabaseHandler(config.database_address()) database_handler.Init() url_lock = threading.Lock() # Prepare download queue. download_queue = Queue.Queue() # Start all threads. crawler_thread_list = [] for i in range(args.instances): current_thread = CrawlerThread(database_handler, download_queue, url_lock) crawler_thread_list.append(current_thread) current_thread.start() downloader_thread_list = [] # TODO(brunonery): have different number of crawler and downloader threads. for i in range(args.instances): current_thread = DownloaderThread(download_queue, config.download_folder(), config.zip_size_limit()) current_thread.daemon = True