Esempio n. 1
0
def check_proxy(proxy):
    protocol, proxy = proxy.split('=')
    seg = proxy.split(':')
    proxy_map = {}
    try:
        proxy_support = urllib.request.ProxyHandler(proxy_map)
        opener = urllib.request.build_opener(proxy_support)
        html = opener.open('http://www.baidu.com').read()
        return True
    except Exception as e:
        return False
Esempio n. 2
0
    def proxy_fetch(self, proxies_queue):
        # 用proxy获取网页
        headers = {'Content-type': 'application/x-www-form-urlencoded',
                   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                   'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 '
                                 '(KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1'}

        # 是否重新获取代理
        if proxies_queue.qsize() < 1:
            logging.debug('重新获取代理')
            p = common.proxy.Proxy('proxies.dat')
            proxies_queue = p.get_from_web()
        # 当前代理
        protocol, proxy = proxies_queue.get().split('=')
        seg = proxy.split(':')
        proxy_map = {seg[0]: seg[1]}
        try:
            # 用代理获取网页
            logging.debug('正在获取 %s' % self.url)
            proxy_support = urllib.request.ProxyHandler(proxy_map)
            opener = urllib.request.build_opener(proxy_support)
            html_bytes = opener.open(self.url).read()
            logging.info('获取成功 %s' % self.url)
            proxies_queue.put(protocol + '=' + proxy)
            return html_bytes
        except Exception as e:
            # 检查代理,代理是否可用
            if check_proxy(protocol + '=' + proxy):
                # 代理可用
                proxies_queue.put(protocol + '=' + proxy)
                logging.info('网址有误 %s' % self.url)
                logging.debug(e)
                return False
            else:
                # 代理不可用
                logging.info('待验证网址 %s ' % self.url)
                return False