def cookies_for_url(jar, url): """ Get cookies for an URL from a cookielib CookieJar. Adapted from scrapy.http.cookies.CookieJar.add_cookie_header(). """ host = urlparse(url).hostname if not IPV4_RE.search(host): hosts = _potential_domain_matches(host) if host.find(".") == -1: hosts += host + ".local" else: hosts = [host] jar._policy._now = jar._now = int(time.time()) for host in hosts: if host in jar._cookies: # TODO: origin and unverifiable. req = Request(url) for cookie in jar._cookies_for_domain(host, req): yield cookie
def add_cookie_header(self, request): wreq = WrappedRequest(request) self.policy._now = self.jar._now = int(time.time()) # the cookiejar implementation iterates through all domains # instead we restrict to potential matches on the domain req_host = urlparse_cached(request).hostname if not req_host: return if not IPV4_RE.search(req_host): hosts = potential_domain_matches(req_host) if "." not in req_host: hosts += [req_host + ".local"] else: hosts = [req_host] cookies = [] for host in hosts: if host in self.jar._cookies: cookies += self.jar._cookies_for_domain(host, wreq) attrs = self.jar._cookie_attrs(cookies) if attrs: if not wreq.has_header("Cookie"): wreq.add_unredirected_header("Cookie", "; ".join(attrs)) self.processed += 1 if self.processed % self.check_expired_frequency == 0: # This is still quite inefficient for large number of cookies self.jar.clear_expired_cookies()