Beispiel #1
0
 def parse(self, response):
     data = response.xpath('/*')
     items = []
     for d in data:
         item = GetproxyItem()
         item["ip"] = d.xpath("//td[2]/text()").extract()  # IP
         item["port"] = d.xpath("//td[3]/text()").extract()  # port
         item["loction"] = d.xpath("//td[4]/a/text()").extract()  # locaion
         item["protocol"] = d.xpath("//td[5]/a/text()").extract()
         items.append(item)
     return items
Beispiel #2
0
    def parse(self, response):
        node_list = response.xpath("//tr[@class='odd']|//tr[@class='']")

        for node in node_list:
            item = GetproxyItem()

            item['ip'] = node.xpath("./td[2]/text()").extract()[0]
            item["port"] = node.xpath("./td[3]/text()").extract()[0]
            item["type"] = node.xpath("./td[5]/text()").extract()[0]
            item["protocol"] = node.xpath("./td[6]/text()").extract()[0]

            yield item
Beispiel #3
0
 def parse(self, response):
     subSelector = response.xpath('//div[@id="list"]/table//tbody/tr')
     items = []
     for sub in subSelector:
         item = GetproxyItem()
         item['ip'] = sub.xpath('./td[@data-title="IP"]/text()').extract()[0]
         item['port'] = sub.xpath('./td[@data-title="PORT"]/text()').extract()[0]
         item['protocol'] = sub.xpath('./td[@data-title="类型"]/text()').extract()[0]
         item['crypt'] = sub.xpath('./td[@data-title="匿名度"]/text()').extract()[0]
         item['location'] = sub.xpath('./td[@data-title="位置"]/text()').extract()[0]
         item['source'] = '快代理'
         items.append(item)
     return items
Beispiel #4
0
	def parse(self, response):
		subSelector = response.xpath('//div[@class="proxylistitem" and @name="list_proxy_ip"]')
		items = []
		for sub in subSelector:
			item = GetproxyItem()
			item['ip'] = sub.xpath('.//span[1]/text()').extract()[0]
			item['port'] = sub.xpath('.//span[2]/text()').extract()[0]
			item['type'] = sub.xpath('.//span[3]/text()').extract()[0]
			item['loction'] = sub.xpath('.//span[4]/text()').extract()[0]
			item['protocol'] = 'HTTP'
			item['source'] = 'proxy360'
			items.append(item)
		return items
 def parse(self, response):
     subSelector = response.xpath("//div[@class='proxylistitem' and @name='list_proxy_ip']")
     items =[]
     for sub in subSelector:
         item = GetproxyItem()
         item['ip'] = sub.xpath(".//span[@class='tbBottomLine'][1]/text()").extract()[0]
         item['port'] = sub.xpath(".//span[2]/text()").extract()[0]
         item['type'] = sub.xpath(".//span[3]/text()").extract()[0]
         item['loction'] = sub.xpath("//span[4]/text()").extract()[0]
         item['protocol'] = 'http'
         item['source'] = 'proxy360'
         items.append(item)
     return items
Beispiel #6
0
 def parse(self, response):
     subselector=response.xpath('//tr')
     items=[]
     for sub in subselector[1:]:
         item=GetproxyItem()
         item['ip']=sub.xpath('./td[2]//text()').extract()[0]
         item['port']=sub.xpath('./td[3]//text()').extract()[0]
         item['type']=""
         item['loction']=sub.xpath('./td[4]//text()').extract()[0]
         item['protocol']=sub.xpath('./td[5]//text()').extract()[0]
         item['exitdays']=sub.xpath('./td[7]//text()').extract()[0]
         item['source']='http://31f.cn/'
         items.append(item)
     return items
Beispiel #7
0
 def parse(self, response):
     subSelector = response.xpath('//tr[@class=""]|//tr[@class="odd"]')
     items = []
     for sub in subSelector:
         item = GetproxyItem()
         item['ip'] = sub.xpath('.//td[2]/text()').extract()[0]
         item['port'] = sub.xpath('.//td[3]/text()').extract()[0]
         if sub.xpath('.//td[4]/a/text()'):
             item['location'] = sub.xpath('.//td[4]/a/text()').extract()[0]
         else:
             item['location'] = sub.xpath('.//td[4]/text()').extract()[0]
         item['type'] = sub.xpath('.//td[5]/text()').extract()[0]
         item['protocol'] = sub.xpath('.//td[6]/text()').extract()[0]
         items.append(item)
     return items
Beispiel #8
0
 def parse(self, response):
     subSelect = response.xpath('//tr[@class=""]|//tr[@class="odd"]')
     items = []
     for sub in subSelect:
         item = GetproxyItem()
         item["ip"] = sub.xpath(".//td[2]/text()").extract()[0]
         item["port"] = sub.xpath("//td[3]/text()").extract()[0]  # port
         if sub.xpath("//td[4]/a/text()"):
             item["loction"] = sub.xpath("//td[4]/a/text()").extract()[0]
         else:
             item["loction"] = sub.xpath("//td[4]/text()").extract()[
                 0]  # locaion
         item["protocol"] = sub.xpath("//td[6]/text()").extract()[0]
         items.append(item)
     return items
Beispiel #9
0
 def parse(self, response):
     subs = response.xpath('//tr[@class=""]|//tr[@class="odd"]')
     items = []
     for sub in subs:
         item = GetproxyItem()
         item['ip'] = sub.xpath('.//td[2]/text()').extract()[0]
         item['port'] = sub.xpath('.//td[3]/text()').extract()[0]
         #item['type'] = sub.xpath('.//td[5]/text()').extract()[0]
         #if sub.xpath('.//td[4]/a/text()'):
         #    item['loction'] = sub.xpath('//td[4]/a/text()').extract()[0]
         #else:
         #    item['loction'] = sub.xpath('.//td[4]/text()').extract()[0]
         item['protocol'] = sub.xpath('.//td[6]/text()').extract()[0]
         #item['source'] = 'xicidaili'
         items.append(item)
     return items
Beispiel #10
0
    def parse(self, response):
        item = GetproxyItem()

        item['ip'] = response.xpath('//tbody/tr/td[1]/text()').extract()
        item['port'] = response.xpath('//tbody/tr/td[2]/text()').extract()
        item['noname'] = response.xpath('//tbody/tr/td[3]/text()').extract()
        item['type'] = response.xpath('//tbody/tr/td[4]/text()').extract()
        item['location'] = response.xpath('//tbody/tr/td[5]/text()').extract()
        item['res_speed'] = response.xpath('//tbody/tr/td[6]/text()').extract()
        print(item['ip'])
        yield item

        for i in range(6, 2000):
            url = 'https://www.kuaidaili.com/free/inha/' + str(i)

            yield Request(url, callback=self.parse)
Beispiel #11
0
 def parse(self, response):
     subSelector = response.xpath(
         '//div[@class="proxylistitem"and @ name="list_proxy_ip"]')
     items = []
     for sub in subSelector:
         item = GetproxyItem()
         item['ip'] = sub.xpath('.//span[1]/text()').extract()[0].strip()
         item['port'] = sub.xpath('.//span[2]/text()').extract()[0].strip()
         item['proxyType'] = sub.xpath(
             './/span[3]/text()').extract()[0].strip()
         item['loction'] = sub.xpath(
             './/span[4]/text()').extract()[0].strip()
         item['upDated'] = sub.xpath(
             './/span[5]/text()').extract()[0].strip()
         item['score'] = sub.xpath('.//span[6]/text()').extract()[0].strip()
         item['allScore'] = sub.xpath(
             './/span[6]/text()').extract()[0].strip()
         item['useTime'] = sub.xpath(
             './/span[7]/text()').extract()[0].strip()
         item['protocol'] = 'HTTP'
         item['source'] = 'proxy360.cn'
         items.append(item)
     # yield item
     return items