Ejemplo n.º 1
0
class Controller:

    def __init__(self, sock_timeout, req_delay, workspace, url_obj, max_conn=5):
        '''sock_timeout: time-out for terminating a socket connection without any reply
           req_delay   : Delay between successive requests to a website
           workspace   : Folder to which the mirror should be saved
           url_obj     : An HostURLParse object representing the base URL
           max_conn    : Maximum number of parallel threads to use'''


        ## Set up all configurations
        logging.basicConfig(level=logging.DEBUG)
        socket.setdefaulttimeout(sock_timeout)
        self.queue = TimedQueue(delay=req_delay)
        self.shutdown_event = threading.Event()
        self.downloaded_hash_set = SafeSet()
        self.static_set = SafeSet()
        self.max_conn = max_conn
        self.project_directory = workspace  # take this argument from command line
        self.url_obj = url_obj
        self.robots_dict = SafeDict()

    def start(self):
        self.workers = []
        for i in range(self.max_conn):
            t = Worker(self.queue, self.shutdown_event, self.downloaded_hash_set, self.static_set, self.project_directory, self.robots_dict)
            t.setDaemon(True)
            t.start()
            self.workers.append(t)

        self.queue.put(self.url_obj)

        try:
            self.queue.join(.1)
        except (KeyboardInterrupt, SystemExit):
            self.shutdown_event.set()
Ejemplo n.º 2
0
    def __init__(self, sock_timeout, req_delay, workspace, url_obj, max_conn=5):
        '''sock_timeout: time-out for terminating a socket connection without any reply
           req_delay   : Delay between successive requests to a website
           workspace   : Folder to which the mirror should be saved
           url_obj     : An HostURLParse object representing the base URL
           max_conn    : Maximum number of parallel threads to use'''


        ## Set up all configurations
        logging.basicConfig(level=logging.DEBUG)
        socket.setdefaulttimeout(sock_timeout)
        self.queue = TimedQueue(delay=req_delay)
        self.shutdown_event = threading.Event()
        self.downloaded_hash_set = SafeSet()
        self.static_set = SafeSet()
        self.max_conn = max_conn
        self.project_directory = workspace  # take this argument from command line
        self.url_obj = url_obj
        self.robots_dict = SafeDict()