def save_cookies_to_file(cookies: "Cookies", path: Path): jar = MozillaCookieJar(path) for i in cookies.jar: jar.set_cookie(i) if not path.is_file(): path.parent.mkdir(parents=True, exist_ok=True) jar.save(ignore_discard=True)
def test_authholder_credentials_save_reallysave(auth_holder): """Save really do save if cookies changed.""" # create some fake cookies fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) fake_cookie = get_cookie() fake_cookiejar.set_cookie(fake_cookie) fake_cookiejar.save() # make auth holder to have those credentials loaded, and also load them ourselves for # later comparison auth_holder._load_credentials() with open(auth_holder._cookiejar_filepath, "rb") as fh: prv_file_content = fh.read() # set a different credential in the auth_holder (mimickin that the user authenticated # while doing the request) other_cookie = get_cookie(value="different") auth_holder._cookiejar.set_cookie(other_cookie) # call the tested method and ensure that file changed! auth_holder._save_credentials_if_changed() with open(auth_holder._cookiejar_filepath, "rb") as fh: new_file_content = fh.read() assert new_file_content != prv_file_content # call the tested method again, to verify that it was calling save on the cookiejar (and # not that the file changed as other side effect) with patch.object(auth_holder._cookiejar, "save") as mock: auth_holder._save_credentials_if_changed() assert mock.call_count == 1
def test_authholder_request_simple(auth_holder): """Load credentials the first time, hit the network, save credentials.""" # save a cookie to be used fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) fake_cookiejar.set_cookie(get_cookie()) fake_cookiejar.save() other_cookie = get_cookie(value="different") def fake_request(self, method, url, json, headers): # check it was properly called assert method == "testmethod" assert url == "testurl" assert json == "testbody" assert headers == {"User-Agent": build_user_agent()} # check credentials were loaded at this time assert auth_holder._cookiejar is not None # modify the credentials, to simulate that a re-auth happened while the request auth_holder._cookiejar.set_cookie(other_cookie) return "raw request response" with patch("macaroonbakery.httpbakery.Client.request", fake_request): resp = auth_holder.request("testmethod", "testurl", "testbody") # verify response (the calling checks were done above in fake_request helper) assert resp == "raw request response" # check the credentials were saved (that were properly loaded was also check in above's helper) new_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) new_cookiejar.load() assert list(new_cookiejar)[0].value == other_cookie.value
def test_authholder_credentials_load_file_present_ok(auth_holder): """Credentials are properly loaded and all internal objects setup ok.""" # create some fake cookies fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) fake_cookie = get_cookie() fake_cookiejar.set_cookie(fake_cookie) fake_cookiejar.save() auth_holder._load_credentials() # check credentials loaded_cookies = list(auth_holder._cookiejar) assert len(loaded_cookies) == 1 assert ( loaded_cookies[0].value == fake_cookie.value ) # compare the value as no __eq__ in Cookie assert isinstance(auth_holder._cookiejar, MozillaCookieJar) assert auth_holder._old_cookies == list(auth_holder._cookiejar) # check other internal objects assert isinstance(auth_holder._client, httpbakery.Client) assert list(auth_holder._client.cookies)[0].value == fake_cookie.value (im,) = auth_holder._client._interaction_methods assert isinstance(im, httpbakery.WebBrowserInteractor) assert im._open_web_browser == visit_page_with_browser
def save_cookies(self): self.driver.get(self.config.url_accounts_no_form) driver_cookies = self.driver.get_cookies() cookie_jar = MozillaCookieJar() for driver_cookie in driver_cookies: http_cookie = self.get_cookie(driver_cookie) cookie_jar.set_cookie(http_cookie) save_cookie_jar(self.config, cookie_jar)
def save_cookies_to_txt(cookies: List[Any], filename: str) -> None: # Ensure the cookie file exists if not os.path.isfile(filename): touch(filename) cjar = MozillaCookieJar(filename) for cookie in cookies: cjar.set_cookie(cookie) cjar.save(ignore_discard=False)
def test_authholder_request_interaction_error(auth_holder): """Support authentication failure while doing the request.""" # save a cookie to be used fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) fake_cookiejar.set_cookie(get_cookie()) fake_cookiejar.save() with patch("macaroonbakery.httpbakery.Client.request") as mock: mock.side_effect = httpbakery.InteractionError("bad auth!!") expected = "Authentication failure: cannot start interactive session: bad auth!!" with pytest.raises(CommandError, match=expected): auth_holder.request("testmethod", "testurl", "testbody")
def test_authholder_request_credentials_already_loaded(auth_holder): """Do not load credentials if already there.""" # save a cookie to be used fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) fake_cookiejar.set_cookie(get_cookie()) fake_cookiejar.save() # load credentials, and ensure that this will be fail if it's called again auth_holder._load_credentials() auth_holder._load_credentials = None with patch("macaroonbakery.httpbakery.Client.request"): auth_holder.request("testmethod", "testurl", "testbody")
def test_authholder_credentials_save_notreally(auth_holder): """Save does not really save if cookies didn't change.""" # create some fake cookies fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath) fake_cookie = get_cookie() fake_cookiejar.set_cookie(fake_cookie) fake_cookiejar.save() auth_holder._load_credentials() with patch.object(auth_holder._cookiejar, "save") as mock: auth_holder._save_credentials_if_changed() assert mock.call_count == 0
def save_cookies_lwp(cookiejar): """Return cookies from a requests. Session cookies member in the Netscape format. """ lwp_cookiejar = MozillaCookieJar() for c in cookiejar: args = dict(vars(c).items()) args["rest"] = args["_rest"] del args["_rest"] if args["expires"] is None: args["expires"] = int(time.time()) + 86400 c = Cookie(**args) lwp_cookiejar.set_cookie(c) return lwp_cookiejar
def get_cookies_in_cookiejar(host): """Export cookies and put them in a cookiejar. Return value: a cookiejar filled with cookies.""" # based on http://www.guyrutenberg.com/2010/11/27/building-cookiejar-out-of-firefoxs-cookies-sqlite/ cj = MozillaCookieJar() cookie_db = get_cookie_db_path(str(FIREFOX_DIR)) conn = db.connect(cookie_db) cursor = conn.cursor() sql = "SELECT {c} FROM moz_cookies WHERE host LIKE '%{h}%'".format( c=CONTENTS, h=host) cursor.execute(sql) for item in cursor.fetchall(): c = Cookie(0, item[4], item[5], None, False, item[0], item[0].startswith('.'), item[0].startswith('.'), item[1], False, item[2], item[3], item[3] == "", None, None, {}) #print c cj.set_cookie(c) return cj
class requester(): cache_timeout = 10940 caching = True assert_on_fail = True time_out = 4 debug_output = True dir_cache = path.dirname(path.realpath(__file__)) + "/cache/" cookie_filename = path.abspath(path.realpath(__file__)) + ".cookie" last_page = None last_url = None ref_url = None time_sleep = 1e-1 limit_file_cache = 200 counter_file_cache = 0 verify_word = None def print(self, *objs): if self.debug_output: print(datetime.utcnow(), *objs, file=stderr) def __init__(self, proxy=bool(strtobool(environ.get('REQUESTER_PROXY', '0'))), cookie_filename=None, caching=None, user_agent=None, headers=None, file_name_with_proxies=path.join(path.dirname(__file__), 'proxies.txt')): self.opened = None if cookie_filename: self.cookie_filename = cookie_filename if caching is not None: self.caching = caching if headers: self.headers = headers else: self.headers = [ ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ), ('Accept-Encoding', 'gzip, deflate'), ('Accept-Language', 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3'), ('Connection', 'keep-alive'), ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2' if user_agent is None else user_agent) ] if self.cookie_filename: makedirs(path.dirname(self.cookie_filename), exist_ok=True) self.cookiejar = MozillaCookieJar(self.cookie_filename) if path.exists(self.cookie_filename): self.cookiejar.load() else: self.cookiejar = MozillaCookieJar() self.http_cookie_processor = urllib.request.HTTPCookieProcessor( self.cookiejar) self.opener = urllib.request.build_opener(self.http_cookie_processor) self.proxer = None if proxy: if proxy is True: if not file_name_with_proxies or not path.exists( file_name_with_proxies): raise FileWithProxiesNotFound( "ERROR: not found '%s' file" % file_name_with_proxies) self.proxer = proxer(file_name_with_proxies) proxy = self.proxer.get() self.print("[proxy]", proxy) time_response = self.proxer.time_response() if time_response: self.print("[average time]", time_response) self.opener.add_handler( urllib.request.ProxyHandler({ 'http': proxy, 'https': proxy, })) self._init_opener_headers = self.headers def get( self, url, post=None, caching=None, is_ref_url=True, md5_file_cache=None, time_out=None, headers=None, detect_charsets=True, content_type=None, files=None, ): prefix = "local-file:" if url.startswith(prefix): with open(url[len(prefix):], "r") as fo: page = fo.read().decode("utf8") self.last_page = page return page if not url.startswith('http') and self.last_url: url = urllib.parse.urljoin(self.last_url, url) if caching is None: caching = self.caching url = url.replace('&', '&') url = url.replace(' ', '%20') makedirs(self.dir_cache, mode=0o777, exist_ok=True) files = files or isinstance(post, dict) and post.pop('files__', None) post_urlencoded = urllib.parse.urlencode(post).encode( 'utf-8') if post and isinstance(post, dict) else post try: file_cache = ''.join(( self.dir_cache, md5((md5_file_cache or url + (post_urlencoded or "")).encode()).hexdigest(), ("/" + url[url.find("//") + 2:].split("?", 2)[0]).replace( "/", "_"), ".html", )) except Exception: file_cache = None caching = file_cache and caching and self.cache_timeout > 0 from_cache = caching if caching: if not path.isfile(file_cache): from_cache = False else: diff_time = datetime.now() - datetime.fromtimestamp( path.getctime(file_cache)) from_cache = diff_time.seconds < self.cache_timeout self.print("[cache]" if from_cache else "", url) self.error = None self.response = None if from_cache: with open(file_cache, "r") as f: page = f.read().encode('utf8') else: if self.time_sleep: v_time_sleep = min(1, abs(gauss(0, 1)) * self.time_sleep) sleep(v_time_sleep) if not headers: headers = {} if self.ref_url and 'Referer' not in headers: headers.update({"Referer": self.ref_url}) if not self.last_url or urllib.parse.urlparse( self.last_url).netloc != urllib.parse.urlparse(url).netloc: self.opener.addheaders = self._init_opener_headers if headers: h = dict(self.opener.addheaders) h.update(headers) self.opener.addheaders = list(h.items()) if content_type == 'multipart/form-data' and post or files: post_urlencoded, multipart_headers = encode_multipart( fields=post, files=files) headers.update(multipart_headers) try: if headers: request = urllib.request.Request(url, headers=headers) else: request = url time_start = datetime.utcnow() response = self.opener.open( request, post_urlencoded if post else None, timeout=time_out or self.time_out, ) if response.info().get("Content-Encoding", None) == "gzip": buf = BytesIO(response.read()) page = GzipFile(fileobj=buf).read() else: page = response.read() self.response = response self.time_response = datetime.utcnow() - time_start if self.verify_word and self.verify_word not in page: raise NoVerifyWord("No verify word '%s', size page = %d" % (self.verify_word, len(page))) except Exception as err: self.error = err if self.assert_on_fail: if self.proxer: self.proxer.fail() raise FailOnGetResponse(err) else: traceback.print_exc() return try: if file_cache and caching: cookie_write = True if self.response.info().get("Content-Type").startswith( "application/json"): page = dumps(loads(page), indent=4) cookie_write = False if self.response.info().get("Content-Type").startswith( "image/"): cookie_write = False with open(file_cache, "w") as f: f.write(page.decode('utf8')) if cookie_write: f.write("\n\n" + dumps(self.get_cookies(), indent=4)) except Exception: traceback.print_exc() self.print("[cache] ERROR: write to", file_cache) if self.proxer: if not self.error: self.proxer.ok(self.time_response) else: self.proxer.fail() if detect_charsets: matches = re.findall( r'charset=["\']?(?P<charset>[^"\'\s\.>;]{3,}\b)', str(page), re.IGNORECASE) if matches: charsets = [c.lower() for c in matches] if len(charsets) > 1 and len(set(charsets)) > 1: self.print( f'[WARNING] set multi charset values: {charsets}') charset = charsets[-1].lower() else: charset = 'utf-8' try: charset_detect = chardet.detect(page) if charset_detect and charset_detect['confidence'] > 0.98: charset = charset_detect['encoding'] except Exception as e: self.print('exception on charset detect:', str(e)) if charset in ('utf-8', 'utf8'): page = page.decode('utf-8', 'replace') elif charset in ('windows-1251', 'cp1251'): page = page.decode('cp1251', 'replace') else: page = page.decode(charset, 'replace') self.last_page = page self.last_url = self.response.geturl() if self.response else url if is_ref_url: self.ref_url = self.last_url self.file_cache_clear() return page def get_link_by_text(self, text, page=None): if page is None: page = self.last_page match = re.search( r""" <a[^>]*href="(?P<href>[^"]*)"[^>]*>\s* (?:</?[^a][^>]*>\s*)* %s """ % text.replace(" ", r"\s"), page, re.VERBOSE) if not match: return return match.group("href") def get_link_by_text_and_go_if_exist(self, text): url = self.get_link_by_text(text) if url: self.get(url) return self.last_page def form(self, page=None, action='', limit=1, fid=None, selectors=(), enctype=False): if page is None: page = self.last_page selectors = list(selectors) selectors += ['''method=["'](?P<method>post|get)"'''] if action is not None: selectors += [f'''action=["'](?P<url>[^"']*{action}[^"']*)["']'''] limit += 1 if fid is not None: selectors.append(f'id="{fid}"') limit += 1 if enctype: selectors.append('enctype="(?P<enctype>[^"]*)"') limit += 1 selector = '|[^>]*'.join(selectors) regex = f''' <form([^>]*{selector}){{{limit}}}[^>]*> .*? </form> ''' match = re.search(regex, page, re.DOTALL | re.VERBOSE | re.IGNORECASE) if not match: return None page = match.group() result = match.groupdict() post = {} fields = re.finditer( r''' (?: type=["'](?P<type>[^"']*)["']\s*| value=["'](?P<value>[^"']*)["']\s*| name=["'](?P<name>[^"']*)["']\s*| [-a-z]+=["'][^"']*["']\s*| (?P<checked>checked)\s* ){2,}''', page, re.VERBOSE | re.IGNORECASE) unchecked = [] for field in fields: field = field.groupdict() if field["name"] is None or field["value"] is None: continue if field["type"] == "checkbox" and field["checked"] is None: unchecked.append(field) else: post[field['name']] = html.unescape(field['value']) fields = re.finditer(r'''<select[^>]*name="(?P<name>[^"]*)"[^>]*>''', page, re.VERBOSE) for field in fields: post[field.group('name')] = '' result['post'] = post if unchecked: result['unchecked'] = unchecked return result def submit_form(self, data, *args, url=None, form=None, **kwargs): form = form or self.form(*args, **kwargs) form['post'].update(data) data_urlencoded = urllib.parse.urlencode(form['post']).encode('utf-8') url = url or form['url'] content_type = form.get('enctype') ret = { 'get': lambda: self.get(urllib.parse.urljoin(url, f'?{data_urlencoded}'), content_type=content_type), 'post': lambda: self.get(url, form['post'], content_type=content_type), }[form['method'].lower()]() return ret def file_cache_clear(self): if self.limit_file_cache and self.counter_file_cache % self.limit_file_cache == 0: file_list = [] for file_cache in listdir(self.dir_cache): stat_file = stat(self.dir_cache + file_cache) file_list.append((stat_file.st_atime, file_cache)) file_list.sort(reverse=True) for atime, file_cache in file_list[self.limit_file_cache:]: remove(self.dir_cache + file_cache) self.counter_file_cache += 1 def get_raw_cookies(self): for c in self.cookiejar: yield c def get_cookies(self): return dict(((i.name, i.value) for i in self.cookiejar)) def get_cookie(self, name): return self.get_cookies().get(name, None) def set_cookie(self, name, value): for c in self.cookiejar: if c.name == name: c.value = value self.cookiejar.set_cookie(c) self.print("[setcookie]", name) break @staticmethod def rand_string(length): a = ascii_letters + digits return ''.join([choice(a) for i in range(length)]) def save_cookie(self): if self.cookie_filename: try: self.cookiejar.save() except Exception: pass def close(self): if self.proxer: self.proxer.save_data() self.save_cookie() def __enter__(self): if self.opened is not True: self.opened = True return self def __exit__(self, *err): if self.opened is not False: self.opened = False self.close() def __del__(self): if not isdir(self.dir_cache): return for file_cache in listdir(self.dir_cache): diff_time = ( datetime.now() - datetime.fromtimestamp(getctime(self.dir_cache + file_cache))) if diff_time.seconds >= self.cache_timeout: remove(self.dir_cache + file_cache) self.save_cookie()
class ClientHandler(urllib.request.BaseHandler): DEFAULT_HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", "Accept": "text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8", "Accept-Encoding": "gzip, deflate" } def __init__(self): self.headers = self.DEFAULT_HEADERS self.cookies = MozillaCookieJar() def load_cookie(self, fpath): self.cookies.load(fpath) def save_cookie(self, fpath): self.cookies.save(fpath) def set_header(self, name, value): self.headers[name] = value def set_cookie(self, cookie_string): cookie = self._make_cookie(cookie_string) if cookie: self.cookies.set_cookie(cookie) def _make_cookie(self, cookie_string): return HttpClient.ClientCookie().make(cookie_string) def _request_add_headers(self, request): for name, value in list(self.headers.items()): request.add_header(name, value) request.add_unredirected_header(name, value) return self def _request_add_cookies(self, request): self.cookies.add_cookie_header(request) def _response_extract_cookies(self, request, response): self.cookies.extract_cookies(response, request) return self def _response_decompress_content(self, request, response): encoding = response.headers.get("content-encoding", "") if encoding == "gzip": response.fp = gzip.GzipFile(fileobj=StringIO(response.read())) elif encoding == "deflate": response.fp = zlib.decompress(response.read()) else: pass return self def http_request(self, request): self._request_add_headers(request) self._request_add_cookies(request) return request def http_response(self, request, response): self._response_extract_cookies( request, response)._response_decompress_content(request, response) return response https_request = http_request https_response = http_response