Esempio n. 1
0
    def parse(self, response):
        if response.status_code == 200:
            log('{}抓取成功'.format(response.url))

            pyquery = PyQuery(response.text)
            trs = pyquery('.boxindex table tr:nth-of-type(n+2)')
            for tr in trs.items():
                yield {
                    'ip': tr('td').eq(0).text(),
                    'port': tr('td').eq(1).text()
                }
Esempio n. 2
0
    def spider(self):
        for type in ('nn', 'nt'):
            for page in range(self.begin_page, int(self.total_page) + 1):
                url = self.url.format(page=page, type=type)

                try:
                    response = requests_get(url)
                    if response.status_code != 200:
                        raise RequestException

                    for item in self.parse(response):
                        yield item
                except RequestException:
                    log('{}抓取失败, 加入重试的队列中'.format(url))
                    yield Request('GET', url)
Esempio n. 3
0
    def spider(self):
        self.set_cookie()
        for page in range(self.begin_page, int(self.total_page) + 1):
            url = self.url.format(page=page)

            try:
                response = requests.get(url,
                                        headers=self.headers,
                                        timeout=env('TIME_OUT'))
                if response.status_code != 200:
                    raise RequestException

                for item in self.parse(response):
                    yield item
            except RequestException:
                log('{}抓取失败, 加入重试的队列中'.format(url))
                yield Request('GET', url)