コード例 #1
0
ファイル: middlewares.py プロジェクト: KKKKKXY/ScrapyProject
    def process_request(self, request, spider):
        #load cookie from local
        cookie_path = '/Users/mya/Desktop/Development/scrapyTest/postscrape/postscrape/spiders/temp/cookie.json'
        if os.path.isfile(cookie_path):
            try:
                with open(cookie_path, 'rb') as f:
                    cookies = pickle.load(f)
            except EOFError:
                cookies = None

        for i in cookies:
            if i['name'] == 'JSESSIONID':
                cookies = i['value']
                break

        try:
            #time.sleep(0.1)
            # response = scrapy.Request(request.url, cookies={"JSESSIONID":cookies})
            response = requests.get(request.url,
                                    cookies={'JSESSIONID': cookies},
                                    timeout=60,
                                    verify=False)
            html = str(response.content, 'utf-8')
            page = html
            scrapy_response = HtmlResponse(url=request.url,
                                           body=page,
                                           request=request,
                                           encoding='utf-8')
            scrapy_response.status_code = response.status_code

        except Timeout:
            print('Get page time out!')
            self.fail_count += 1
            request.status = False
            scrapy_response = HtmlResponse(url=request.url,
                                           body='',
                                           request=request,
                                           encoding='utf-8')
            scrapy_response.status_code = 'timeout'
            return scrapy_response

        return scrapy_response