Esempio n. 1
0
    def parse(self, soup):
        result = []
        for s in soup.find("table").find_all("tr")[1:]:
            try:
                d = s.find_all("td")
                ip = {
                    "ip": d[2].string,
                    "port": d[3].string,
                    "info": "",
                    "type": 0,
                }
                info = d[4].find("a")
                if info:
                    ip["info"] = info.string

                if d[5].string == "透明":
                    ip["type"] = 1
                elif d[5].string == "匿名":
                    ip["type"] = 2
                elif d[5].string == "高匿":
                    ip["type"] = 3

                result.append(ip)

            except Exception as e:
                logger.error('XiCiDaiLi parse error: %s', e)

        return result
Esempio n. 2
0
    def parse(self, soup):
        result = []
        for s in soup.find("table").find_all("tr")[1:]:
            try:
                d = s.find_all("td")
                ip = {
                    "ip": d[2].string,
                    "port": d[3].string,
                    "info": "",
                    "type": 0,
                }
                info = d[4].find("a")
                if info:
                    ip["info"] = info.string

                if d[5].string == "透明":
                    ip["type"] = 1
                elif d[5].string == "匿名":
                    ip["type"] = 2
                elif d[5].string == "高匿":
                    ip["type"] = 3

                result.append(ip)

            except Exception as e:
                logger.error('XiCiDaiLi parse error: %s', e)

        return result
Esempio n. 3
0
    def parse(self, soup):
        result = []
        s = soup.find("div", class_="entry-content").find_all("p")
        for d in s[1].stripped_strings:
            try:
                rst = d.split(u"\xa0\xa0", 2)
                if len(rst) != 3:
                    continue

                ip = {
                    "ip": rst[0].split(":")[0],
                    "port": rst[0].split(":")[1],
                    "info": rst[2],
                }
                if rst[1] == "透明":
                    ip["type"] = 1
                elif rst[1] == "匿名":
                    ip["type"] = 2
                elif rst[1] == "高匿名":
                    ip["type"] = 3
                else:
                    ip["type"] = 0

                result.append(ip)

            except Exception, e:
                logger.error('KuaiDaiLi parse error: %s', e)
Esempio n. 4
0
    def parse(self, soup):
        result = []
        s = soup.find("div", class_="entry-content").find_all("p")
        for d in s[1].stripped_strings:
            try:
                rst = d.split(u"\xa0\xa0", 2)
                if len(rst) != 3:
                    continue

                ip = {
                    "ip": rst[0].split(":")[0],
                    "port": rst[0].split(":")[1],
                    "info": rst[2],
                }
                if rst[1] == "透明":
                    ip["type"] = 1
                elif rst[1] == "匿名":
                    ip["type"] = 2
                elif rst[1] == "高匿名":
                    ip["type"] = 3
                else:
                    ip["type"] = 0

                result.append(ip)

            except Exception, e:
                logger.error('KuaiDaiLi parse error: %s', e)
Esempio n. 5
0
    def run(self, proxyips):
        result = {}
        proxy_set = self.classify(proxyips)
        for proxy_type in self.proxy_type:
            proxy_list = list(proxy_set.get(proxy_type, set()))
            logger.info('sniffer start, proxy_type: %s, proxy_ip: %s', proxy_type, len(proxy_list))
            result[proxy_type] = self.validator.run_in_multiprocess(proxy_list)
            logger.info('sniffer finish, proxy_type: %s, avail_ip: %s', proxy_type, len(result[proxy_type]))

        if SNIFFER['OUTPUT']:
            try:
                self.save2file(result)
            except Exception as e:
                logger.error("Write file fail, error: %s", e)

        if SNIFFER['BACKEND'] != '':
            try:
                self.redis = redis.StrictRedis(*SNIFFER['BACKEND'].split(':'))
                self.redis.ping()
            except Exception as e:
                logger.error("Backend redis error: %s", e)
                return

            self.reflesh_redis()
            self.save2redis(result)
Esempio n. 6
0
    def parse(self, soup):
        result = []
        for s in soup.find_all("table"):
            for t in s.find_all("tr")[2:]:
                try:
                    ip = self._parse(t.find_all("td"))
                    result.append(ip)

                except Exception as e:
                    logger.error('CNProxy parse error: %s', e)

        return result
Esempio n. 7
0
    def parse(self, soup):
        result = []
        for s in soup.find_all("table"):
            for t in s.find_all("tr")[2:]:
                try:
                    ip = self._parse(t.find_all("td"))
                    result.append(ip)

                except Exception as e:
                    logger.error('CNProxy parse error: %s', e)

        return result
Esempio n. 8
0
    def crawl(self):
        base = "http://blog.kuaidaili.com/"
        proxyip = []
        r = requests.get(base)
        if r.status_code  == requests.codes.ok:
            soup = BeautifulSoup(r.text, "html5lib")
            for s in soup.find_all("article")[:2]:
                proxyip.extend(self.get(s.find("a")["href"]))

        else:
            logger.error("KuaiDaiLi crawl root fail, HTTP Response Code: %s", r.status_code)

        return proxyip
Esempio n. 9
0
    def crawl(self):
        base = "http://blog.kuaidaili.com/"
        proxyip = []
        r = requests.get(base)
        if r.status_code == requests.codes.ok:
            soup = BeautifulSoup(r.text, "html5lib")
            for s in soup.find_all("article")[:2]:
                proxyip.extend(self.get(s.find("a")["href"]))

        else:
            logger.error("KuaiDaiLi crawl root fail, HTTP Response Code: %s",
                         r.status_code)

        return proxyip
Esempio n. 10
0
    def parse(self, soup):
        result = []
        soup = soup.find("div", id="boxright").find_all("li")
        keys = ["ip", "port", "type", "info"]
        for s in soup[1:]:
            try:
                ip = {}
                for idx, val in enumerate(s.stripped_strings):
                    ip[keys[idx]] = val

                ip['type'] = 1
                result.append(ip)
            except Exception as e:
                logger.error('CZ88 parse error: %s', e)

        return result
Esempio n. 11
0
    def parse(self, soup):
        result = []
        soup = soup.find("div", id="boxright").find_all("li")
        keys = ["ip", "port", "type", "info"]
        for s in soup[1:]:
            try:
                ip = {}
                for idx, val in enumerate(s.stripped_strings):
                    ip[keys[idx]] = val

                ip['type'] = 1
                result.append(ip)
            except Exception as e:
                logger.error('CZ88 parse error: %s', e)

        return result
Esempio n. 12
0
    def parse(self, soup):
        result = []
        for d in soup.find('body').contents:
            try:
                d = str(d).strip()
                if d != '' and d[0].isdigit():
                    ip = {
                        "ip": d.split(':')[0],
                        "port": d.split(':')[1],
                        "info": "",
                        "type": 0,
                    }
                    result.append(ip)
            except Exception as e:
                logger.error('IP66API parse error: %s', e)

        return result
Esempio n. 13
0
    def parse(self, soup):
        result = []
        for d in soup.find('body').contents:
            try:
                d = str(d).strip()
                if d != '' and d[0].isdigit():
                    ip = {
                        "ip": d.split(':')[0],
                        "port": d.split(':')[1],
                        "info": "",
                        "type": 0,
                    }
                    result.append(ip)
            except Exception as e:
                logger.error('IP66API parse error: %s', e)

        return result
Esempio n. 14
0
    def get(self, url, encoding=None, headers=None):
        logger.info('crawl: %s', url)
        try:
            r = requests.get(url, headers=headers) if headers else requests.get(url)
            if encoding:
                r.encoding = encoding

            if r.status_code == requests.codes.ok:
                soup = BeautifulSoup(r.text, "html5lib")
                return self.parse(soup)
            else:
                raise Exception("HTTP Response Code: %s" % r.status_code)

        except Exception as e:
            logger.error('Crawl error: %s', e)

        return []
Esempio n. 15
0
    def parse(self, soup):
        result = []
        s = soup.find("table").find_all("tr")
        for d in s:
            try:
                w = d.find_all("td")
                ip = {
                    "ip": w[0].string,
                    "port": w[1].string,
                    "info": w[3].string,
                    "type": 0,
                }
                if w[2].string == "透明":
                    ip['type'] = 1
                elif w[2].string == "高匿":
                    ip['type'] = 3

                result.append(ip)

            except Exception as e:
                logger.error('IP002 parse error: %s', e)

        return result
Esempio n. 16
0
    def parse(self, soup):
        result = []
        s = soup.find("table").find_all("tr")
        for d in s:
            try:
                w = d.find_all("td")
                ip = {
                    "ip": w[0].string,
                    "port": w[1].string,
                    "info": w[3].string,
                    "type": 0,
                }
                if w[2].string == "透明":
                    ip['type'] = 1
                elif w[2].string == "高匿":
                    ip['type'] = 3

                result.append(ip)

            except Exception as e:
                logger.error('IP002 parse error: %s', e)

        return result
Esempio n. 17
0
    def parse(self, soup):
        result = []
        for s in soup.find("table").find_all("tr")[1:]:
            try:
                d = s.find_all("td")
                ip = {
                    "ip": d[0].string,
                    "port": d[1].string,
                    "info": d[5].string,
                    "type": 0,
                }
                if d[2].string == "透明":
                    ip["type"] = 1
                elif d[2].string == "匿名":
                    ip["type"] = 2
                elif d[2].string == "高匿名":
                    ip["type"] = 3

                result.append(ip)

            except Exception as e:
                logger.error('KuaiDaiLi2 parse error: %s', e)

        return result
Esempio n. 18
0
    def parse(self, soup):
        result = []
        for s in soup.find("table").find_all("tr")[1:]:
            try:
                d = s.find_all("td")
                ip = {
                    "ip": d[0].string,
                    "port": d[1].string,
                    "info": d[5].string,
                    "type": 0,
                }
                if d[2].string == "透明":
                    ip["type"] = 1
                elif d[2].string == "匿名":
                    ip["type"] = 2
                elif d[2].string == "高匿名":
                    ip["type"] = 3

                result.append(ip)

            except Exception as e:
                logger.error('KuaiDaiLi2 parse error: %s', e)

        return result