Example #1
0
 def parse_youdaili_detail(self, response):
     for p in response.xpath('//div[@class="content"]/p'):
         str = p.xpath('text()').extract_first()
         ip = str.split(':')[0]
         port = str.split(':')[1].split('@')[0]
         if ip and port:
             proxy = Proxy('http://', ip, port)
             proxy.add()
     return None
Example #2
0
    def parse_66ip(self, response):
        proxies = response.xpath('/html/body').re(r'(\d+\.\d+\.\d+\.\d+:\d+)')
        for proxy_str in proxies:
            arr = proxy_str.split(':')
            if len(arr) > 1:
                proxy = Proxy('http://', arr[0], arr[1])
                proxy.add()

        return None
Example #3
0
    def parse_pachong(self, response):
        for tr in response.xpath('//table/tbody/tr[position()>1]'):
            ip = tr.xpath('td[2]/text()').extract_first()
            port = tr.xpath('td[3]').re(r'\d{2,4}')
            if ip and port:
                proxy = Proxy('http://', ip, port)
                proxy.add()

        return None
Example #4
0
    def parse_cybersyndrome(self, response):
        for tr in response.xpath('//table/tbody/tr[position()>1]'):
            proxy_str = tr.xpath('td[2]//text()').extract_first()
            print(proxy_str)
            if proxy_str:
                arr = proxy_str.strip().split(':')
                if len(arr) > 1:
                    proxy = Proxy('http://', arr[0], arr[1])
                    proxy.add()

        return None
Example #5
0
    def parse_kuaidaili(self, response):
        for tr in response.xpath('//table/tbody/tr/[position()>1]'):
            ip = tr.xpath('td[1]//text()').extract()
            port = tr.xpath('td[2]/text()').extract()
            speed = tr.xpath('td[6]/text()').re(r'([\.\d]+)')
            protocal = tr.xpath('td[4]/text()').extract()
            if ip and port and speed and protocal:
                if float(speed[0]) < self.allowed_max_speed:
                    schema = 'http://'
                    if str(protocal[0]).strip().upper() == 'HTTP':
                        schema = 'http://'
                    if str(protocal[0]).strip().upper() == 'HTTPS':
                        schema = 'https://'

                    proxy = Proxy(schema, ip[0], port[0])
                    proxy.add()
                    return None
Example #6
0
    def parse_goubanjia(self, response):
        for tr in response.xpath('//table[@class="table"]/tbody/tr'):
            ip = tr.xpath('td[1]//string(.)').extract()
            speed = tr.xpath('td[6]/text()').re(r'([\.\d]+)')
            protocal = tr.xpath('td[3]/a/text()').extract()
            if ip and speed and protocal:
                arr = str.split(':')
                if len(arr) > 1 and float(speed[0]) < self.allowed_max_speed:
                    schema = 'http://'
                    if str(schema[0]).strip().upper() == 'HTTP':
                        schema = 'http://'
                    if str(schema[0]).strip().upper() == 'HTTPS':
                        schema = 'https://'

                    proxy = Proxy(schema, arr[0], arr[1])
                    proxy.add()
                    return None