Ejemplo n.º 1
0
    def __fetch_sitedigger(self):
        catch_from = 'site-digger.net'
        url = 'http://www.site-digger.com/html/articles/20110516/proxieslist.html'
        xpath = '//*[@id="content_detail"]/div[3]/p[3]/textarea'

        try:
            html = urllib2.urlopen(url).read()
            doc = htmlParser(html, 'utf-8')
            doc = htmlParser(html, 'utf-8')
            content = doc.xpathEval(xpath)[0].content
            if not content: return False
            self.logger.p_log('成功代开网页,获取内容的长度为:%d 字节' % len(content))
        except:
            self.logger.p_log('打开目标网页 %s 获取内容时出错,退出中...' % (url))

        proxies = []
        for ip_port in content.split('\n')[2:-2]:
            try:
                proxy = (ip_port.split(':')[0], ip_port.split(':')[1])
            except Exception, e:
                self.logger.p_log('处理记录:%s 是发生错误' % ip_port)
            if proxy not in proxies:
                proxies.append(proxy)
Ejemplo n.º 2
0
    def __fetch_sitedigger(self):
        catch_from = 'site-digger.net'
        url = 'http://www.site-digger.com/html/articles/20110516/proxieslist.html'
        xpath = '//*[@id="content_detail"]/div[3]/p[3]/textarea'

        try:
            html = urllib2.urlopen(url).read()
            doc = htmlParser(html, 'utf-8')
            doc = htmlParser(html, 'utf-8')
            content = doc.xpathEval(xpath)[0].content
            if not content: return False
            self.logger.p_log('成功代开网页,获取内容的长度为:%d 字节' % len(content))
        except:
            self.logger.p_log('打开目标网页 %s 获取内容时出错,退出中...' % (url))

        proxies = []
        for ip_port in content.split('\n')[2:-2]:
            try:
                proxy = (ip_port.split(':')[0], ip_port.split(':')[1])
            except Exception, e:
                self.logger.p_log('处理记录:%s 是发生错误' % ip_port)
            if proxy not in proxies:
                proxies.append(proxy)
Ejemplo n.º 3
0
    def __get_ip_from_myip_cn(self, host=None, port=None):
        if not host or not port: return False
        url = 'http://www.myip.cn'
        xpath = '/html/body/center/div[4]/font[1]/b'
        proxy = {'http': 'http://' + host + ':' + port}

        try:
            html = urllib.urlopen(url, proxies=proxy).read()
            doc = htmlParser(html, 'utf-8')
            content = doc.xpathEval(xpath)[0].content
            if content:
                ip_there = content.split(' ')[1]
                return ip_there
        except Exception, e:
            return False
Ejemplo n.º 4
0
    def __get_ip_from_myip_cn(self, host=None, port=None):
        if not host or not port: return False
        url = 'http://www.myip.cn'
        xpath = '/html/body/center/div[4]/font[1]/b'
        proxy = {'http': 'http://' + host + ':' + port}

        try:
            html = urllib.urlopen(url, proxies=proxy).read()
            doc = htmlParser(html, 'utf-8')
            content = doc.xpathEval(xpath)[0].content
            if content:
                ip_there = content.split(' ')[1]
                return ip_there
        except Exception, e:
            return False