def parse_page(self, response): xpath = HtmlXPathSelector(response) for row in xpath.select('//table[@cellpadding="3"]/tr[starts-with(@class, "row")]'): loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) loader.add_xpath('port', 'td[3]/a/text()') loader.add_xpath('address', 'td[2]/a/text()') yield loader.load_item()
def parse_page(self, response): xpath = HtmlXPathSelector(response) for row in xpath.select('//table/tr[count(td) > 1]')[1:-1]: loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) address = row.select('td[1]/script/text()').re(r'"(.*?)"')[0].decode('rot13').decode('base64') loader.add_value('address', address) loader.add_xpath('port', 'td[2]/text()') yield loader.load_item()
def parse_page(self, response): xpath = HtmlXPathSelector(response) for row in xpath.select('//table[@id="listable"]/tbody/tr'): loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) loader.add_value('address', self.get_ip(row.select('td[2]/span'))) loader.add_xpath('port', 'td[3]/text()') yield loader.load_item()
def parse_page(self, response): xpath = HtmlXPathSelector(response) for row in xpath.select('//table[@id="listable"]/tbody/tr'): loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) loader.add_value("address", self.get_ip(row.select("td[2]/span"))) loader.add_xpath("port", "td[3]/text()") yield loader.load_item()
def parse_page(self, response): xpath = HtmlXPathSelector(response) for row in xpath.select('//table[@cellpadding="3"]/tr[starts-with(@class, "row")]'): loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) loader.add_xpath("port", "td[3]/a/text()") loader.add_xpath("address", "td[2]/a/text()") yield loader.load_item()
def parse_page(self, response): xpath = HtmlXPathSelector(response) variables = self.get_variables(xpath) for row in xpath.select('//table[@class="proxytbl"]/tr[position() > 1]'): loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) loader.add_xpath('address', 'td[@class="t_ip"]/text()') loader.add_value('port', self.get_port(row.select('td[@class="t_port"]/script'), variables)) yield loader.load_item()
def parse_page(self, response): xpath = HtmlXPathSelector(response) variables = self.get_variables(xpath) for row in xpath.select( '//table[@class="proxytbl"]/tr[position() > 1]'): loader = ProxyItemLoader(item=Proxy(), response=response, selector=row) loader.add_xpath('address', 'td[@class="t_ip"]/text()') loader.add_value( 'port', self.get_port(row.select('td[@class="t_port"]/script'), variables)) yield loader.load_item()