コード例 #1
0
    def process_request(self, request, spider):
        """
        For Splash requests add 'cookies' key with current
        cookies to request.meta['splash']['args']
        """
        if 'splash' not in request.meta:
            return

        if request.meta.get('_splash_processed'):
            return

        splash_options = request.meta['splash']

        splash_args = splash_options.setdefault('args', {})
        if 'cookies' in splash_args:  # cookies already set
            return

        if 'session_id' not in splash_options:
            return

        jar = self.jars[splash_options['session_id']]

        cookies = self._get_request_cookies(request)
        har_to_jar(jar, cookies)

        splash_args['cookies'] = jar_to_har(jar)
コード例 #2
0
    def process_response(self, request, response, spider):
        """
        For Splash JSON responses add all cookies from
        'cookies' in a response to the cookiejar.
        """
        from scrapy_splash import SplashJsonResponse
        if not isinstance(response, SplashJsonResponse):
            return response

        if 'cookies' not in response.data:
            return response

        if 'splash' not in request.meta:
            return response

        if not request.meta.get('_splash_processed'):
            warnings.warn("SplashCookiesMiddleware requires SplashMiddleware")
            return response

        splash_options = request.meta['splash']
        session_id = splash_options.get('new_session_id',
                                        splash_options.get('session_id'))
        if session_id is None:
            return response

        jar = self.jars[session_id]
        request_cookies = splash_options['args'].get('cookies', [])
        har_to_jar(jar, response.data['cookies'], request_cookies)
        response.cookiejar = jar
        return response
コード例 #3
0
    def process_response(self, request, response, spider):
        """
        For Splash JSON responses add all cookies from
        'cookies' in a response to the cookiejar.
        """
        from scrapy_splash import SplashJsonResponse
        if not isinstance(response, SplashJsonResponse):
            return response

        if 'cookies' not in response.data:
            return response

        if 'splash' not in request.meta:
            return response

        if not request.meta.get('_splash_processed'):
            warnings.warn("SplashCookiesMiddleware requires SplashMiddleware")
            return response

        splash_options = request.meta['splash']
        session_id = splash_options.get('new_session_id',
                                        splash_options.get('session_id'))
        if session_id is None:
            return response

        jar = self.jars[session_id]
        request_cookies = splash_options['args'].get('cookies', [])
        har_to_jar(jar, response.data['cookies'], request_cookies)
        self._debug_set_cookie(response, spider)
        response.cookiejar = jar
        return response
コード例 #4
0
    def process_request(self, request, spider):
        """
        For Splash requests add 'cookies' key with current
        cookies to request.meta['splash']['args']
        """
        if 'splash' not in request.meta:
            return

        if request.meta.get('_splash_processed'):
            return

        splash_options = request.meta['splash']

        splash_args = splash_options.setdefault('args', {})
        if 'cookies' in splash_args:  # cookies already set
            return

        if 'session_id' not in splash_options:
            return

        jar = self.jars[splash_options['session_id']]

        cookies = self._get_request_cookies(request)
        har_to_jar(jar, cookies)

        splash_args['cookies'] = jar_to_har(jar)
        self._debug_cookie(request, spider)
コード例 #5
0
    def process_request(self, request, spider):
        """
        For Splash requests add 'cookies' key with current
        cookies to ``request.meta['splash']['args']`` and remove cookie
        headers sent to Splash itself.
        """
        if 'splash' not in request.meta:
            return

        if request.meta.get('_splash_processed'):
            request.headers.pop('Cookie', None)
            return

        splash_options = request.meta['splash']

        splash_args = splash_options.setdefault('args', {})
        if 'cookies' in splash_args:  # cookies already set
            return

        if 'session_id' not in splash_options:
            return

        jar = self.jars[splash_options['session_id']]

        cookies = self._get_request_cookies(request)
        har_to_jar(jar, cookies)

        splash_args['cookies'] = jar_to_har(jar)
        self._debug_cookie(request, spider)
コード例 #6
0
    def process_request(self, request, spider):
        """
        For Splash requests add 'cookies' key with current
        cookies to request.meta['splash']['args']
        """
        if 'splash' not in request.meta:
            return

        if request.meta.get('_splash_processed'):
            return

        splash_options = request.meta['splash']

        splash_args = splash_options.setdefault('args', {})
        if 'cookies' in splash_args:  # cookies already set
            return

        if 'session_id' not in splash_options:
            return

        _jars = self.redis_conn.get(self.redis_key)
        if _jars is None:
            self.jars = defaultdict(CookieJar)
            self.redis_conn.set(self.redis_key, pickle.dumps(self.jars, protocol=-1))
        else:
            self.jars = pickle.loads(_jars)

        jar = self.jars[splash_options['session_id']]

        cookies = self._get_request_cookies(request)
        har_to_jar(jar, cookies)

        splash_args['cookies'] = jar_to_har(jar)
        self._debug_cookie(request, spider)
コード例 #7
0
    def process_response(self, request, response, spider):
        """
        For Splash JSON responses add all cookies from
        'cookies' in a response to the cookiejar.
        """
        from scrapy_splash import SplashJsonResponse
        if not isinstance(response, SplashJsonResponse):
            return response

        if 'cookies' not in response.data:
            return response

        if 'splash' not in request.meta:
            return response

        if not request.meta.get('_splash_processed'):
            warnings.warn("SplashCookiesMiddleware requires SplashMiddleware")
            return response

        splash_options = request.meta['splash']
        session_id = splash_options.get('new_session_id',
                                        splash_options.get('session_id'))
        if session_id is None:
            return response

        _jars = self.redis_conn.get(self.redis_key)
        if _jars is None:
            self.jars = defaultdict(CookieJar)
            self.redis_conn.set(self.redis_key, pickle.dumps(self.jars, protocol=-1))
        else:
            self.jars = pickle.loads(_jars)

        jar = self.jars[session_id]
        request_cookies = splash_options['args'].get('cookies', [])
        har_to_jar(jar, response.data['cookies'], request_cookies)
        self._debug_set_cookie(response, spider)
        response.cookiejar = jar

        self.redis_conn.set(self.redis_key, pickle.dumps(self.jars, protocol=-1))

        return response