class Controller: def __init__(self, sock_timeout, req_delay, workspace, url_obj, max_conn=5): '''sock_timeout: time-out for terminating a socket connection without any reply req_delay : Delay between successive requests to a website workspace : Folder to which the mirror should be saved url_obj : An HostURLParse object representing the base URL max_conn : Maximum number of parallel threads to use''' ## Set up all configurations logging.basicConfig(level=logging.DEBUG) socket.setdefaulttimeout(sock_timeout) self.queue = TimedQueue(delay=req_delay) self.shutdown_event = threading.Event() self.downloaded_hash_set = SafeSet() self.static_set = SafeSet() self.max_conn = max_conn self.project_directory = workspace # take this argument from command line self.url_obj = url_obj self.robots_dict = SafeDict() def start(self): self.workers = [] for i in range(self.max_conn): t = Worker(self.queue, self.shutdown_event, self.downloaded_hash_set, self.static_set, self.project_directory, self.robots_dict) t.setDaemon(True) t.start() self.workers.append(t) self.queue.put(self.url_obj) try: self.queue.join(.1) except (KeyboardInterrupt, SystemExit): self.shutdown_event.set()
def __init__(self, sock_timeout, req_delay, workspace, url_obj, max_conn=5): '''sock_timeout: time-out for terminating a socket connection without any reply req_delay : Delay between successive requests to a website workspace : Folder to which the mirror should be saved url_obj : An HostURLParse object representing the base URL max_conn : Maximum number of parallel threads to use''' ## Set up all configurations logging.basicConfig(level=logging.DEBUG) socket.setdefaulttimeout(sock_timeout) self.queue = TimedQueue(delay=req_delay) self.shutdown_event = threading.Event() self.downloaded_hash_set = SafeSet() self.static_set = SafeSet() self.max_conn = max_conn self.project_directory = workspace # take this argument from command line self.url_obj = url_obj self.robots_dict = SafeDict()