def parse_youdaili_detail(self, response): for p in response.xpath('//div[@class="content"]/p'): str = p.xpath('text()').extract_first() ip = str.split(':')[0] port = str.split(':')[1].split('@')[0] if ip and port: proxy = Proxy('http://', ip, port) proxy.add() return None
def parse_66ip(self, response): proxies = response.xpath('/html/body').re(r'(\d+\.\d+\.\d+\.\d+:\d+)') for proxy_str in proxies: arr = proxy_str.split(':') if len(arr) > 1: proxy = Proxy('http://', arr[0], arr[1]) proxy.add() return None
def parse_pachong(self, response): for tr in response.xpath('//table/tbody/tr[position()>1]'): ip = tr.xpath('td[2]/text()').extract_first() port = tr.xpath('td[3]').re(r'\d{2,4}') if ip and port: proxy = Proxy('http://', ip, port) proxy.add() return None
def parse_cybersyndrome(self, response): for tr in response.xpath('//table/tbody/tr[position()>1]'): proxy_str = tr.xpath('td[2]//text()').extract_first() print(proxy_str) if proxy_str: arr = proxy_str.strip().split(':') if len(arr) > 1: proxy = Proxy('http://', arr[0], arr[1]) proxy.add() return None
def parse_kuaidaili(self, response): for tr in response.xpath('//table/tbody/tr/[position()>1]'): ip = tr.xpath('td[1]//text()').extract() port = tr.xpath('td[2]/text()').extract() speed = tr.xpath('td[6]/text()').re(r'([\.\d]+)') protocal = tr.xpath('td[4]/text()').extract() if ip and port and speed and protocal: if float(speed[0]) < self.allowed_max_speed: schema = 'http://' if str(protocal[0]).strip().upper() == 'HTTP': schema = 'http://' if str(protocal[0]).strip().upper() == 'HTTPS': schema = 'https://' proxy = Proxy(schema, ip[0], port[0]) proxy.add() return None
def parse_goubanjia(self, response): for tr in response.xpath('//table[@class="table"]/tbody/tr'): ip = tr.xpath('td[1]//string(.)').extract() speed = tr.xpath('td[6]/text()').re(r'([\.\d]+)') protocal = tr.xpath('td[3]/a/text()').extract() if ip and speed and protocal: arr = str.split(':') if len(arr) > 1 and float(speed[0]) < self.allowed_max_speed: schema = 'http://' if str(schema[0]).strip().upper() == 'HTTP': schema = 'http://' if str(schema[0]).strip().upper() == 'HTTPS': schema = 'https://' proxy = Proxy(schema, arr[0], arr[1]) proxy.add() return None