def __init__(self, resources_collection, opener, config, worker_pool_size=5):
        super(WebPageLoader, self).__init__()
        self._worker_pool = WorkerPool(worker_pool_size)
        self._resources_collection = resources_collection
        self._opener = opener

        logging.config.dictConfig(config['logging'])
        self._logger = logging.getLogger()
class WebPageLoader(Thread):

    def __init__(self, resources_collection, opener, config, worker_pool_size=5):
        super(WebPageLoader, self).__init__()
        self._worker_pool = WorkerPool(worker_pool_size)
        self._resources_collection = resources_collection
        self._opener = opener

        logging.config.dictConfig(config['logging'])
        self._logger = logging.getLogger()

    def run(self):

        while True:
            # add the web page processing task  
            for resource in self._resources_collection.find_models():
                self._worker_pool.add_task(self._process_web_page, resource)

    def _process_web_page(self, resource):

        # if the 'http://' doesn't exist
        if (resource.uri[:7] != 'http://'):
            resource.uri = 'http://' + resource.uri
        entire_content = ''

        try:
            handle = self._opener.open(resource.uri)
            resource.uri = handle.url
            encoding = detect_header_encoding(handle.headers.dict)
            entire_content = decode_html(handle.read(), encoding)
            resource.content = entire_content
            handle.close()
            self._logger.info('Reading %s. Success.' % resource.uri)
            self._enqueue(resource)
        except (IOError, HTTPException), e:
            # mark for retry
            self._logger.error('Reading %s. IO error %s.' % (resource.uri, e))
        except UnicodeDecodeError, e:
            # mark for no more retries
            self._logger.error('Reading %s. Unicode error %s.' % (resource.uri, e))