def handle_401(self, r, **kwargs): """Resends a request with auth headers, if needed.""" www_authenticate = r.headers.get('www-authenticate', '').lower() if 'basic' in www_authenticate: if self.pos is not None: r.request.body.seek(self.pos) # Consume content and release the original connection # to allow our new request to reuse the same one. r.content r.raw.release_conn() prep = r.request.copy() if not hasattr(prep, '_cookies'): prep._cookies = cookies.RequestsCookieJar() cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw) prep.prepare_cookies(prep._cookies) self.auth = auth.HTTPBasicAuth(self.username, self.password) prep = self.auth(prep) _r = r.connection.send(prep, **kwargs) _r.history.append(r) _r.request = prep return _r if 'digest' in www_authenticate: self.auth = auth.HTTPDigestAuth(self.username, self.password) # Digest auth would resend the request by itself. We can take a # shortcut here. return self.auth.handle_401(r, **kwargs)
def retrieve(cls, id_source, session=None, username=None, password=None, domain_name=None): jar = cookies.RequestsCookieJar() if not session: session = requests.Session() if not username or not password: credentials = Opossum.get_untrustworthy_credentials(id_source) username = credentials['username'] password = credentials['password'] domain_name = credentials['domain_name'] attempts = 0 while attempts < 3: try: login_url = "https://login.cbh2.crediblebh.com" domain_url = ".crediblebh.com" login_api_url = 'https://login-api.cbh2.crediblebh.com/api/' data_center_path = 'Authenticate/CheckDataCenter' check_login_path = 'Authenticate/CheckLogin' first_payload = { 'UserName': username, 'Password': password, 'DomainName': domain_name } headers = {'DomainName': domain_name} data_center_post = session.post(login_api_url + data_center_path, json=first_payload, headers=headers, verify=False) data_center_json = data_center_post.json() if data_center_json['Status'] == 14: login_api_url = data_center_json['ApiUrl'] login_url = data_center_json['LoginUrl'] login_post = session.post(login_api_url + check_login_path, json=first_payload, headers=headers, verify=False) login_json = login_post.json() if login_json['Status'] != 0: raise CredibleFrontEndLoginException() session_cookie = login_json['SessionCookie'] website_url = login_json['WebsiteURL'] jar.set('SessionId', session_cookie, domain=domain_url, path='/') session.get(website_url, data={'SessionId': session_cookie}, cookies=jar, verify=False) time_generated = datetime.datetime.now(pytz.UTC) return cls(id_source, domain_name, website_url, session, time_generated) except KeyError or ConnectionError or IndexError: attempts += 1 raise CredibleFrontEndLoginException()
def retrieve(cls, id_source, session=None, username=None, password=None, domain_name=None): time_generated = datetime.datetime.now() if not session: session = requests.Session() if not username or not password: credentials = Opossum.get_untrustworthy_credentials(id_source) username = credentials['username'] password = credentials['password'] domain_name = credentials['domain_name'] attempts = 0 while attempts < 3: try: jar = cookies.RequestsCookieJar() api_url = "https://login-api.crediblebh.com/api/Authenticate/CheckLogin" index_url = "https://ww7.crediblebh.com/index.aspx" first_payload = {'UserName': username, 'Password': password, 'DomainName': domain_name} headers = {'DomainName': domain_name} post = session.post(api_url, json=first_payload, headers=headers) response_json = post.json() session_cookie = response_json['SessionCookie'] jar.set('SessionId', session_cookie, domain='.crediblebh.com', path='/') second_payload = {'SessionId': session_cookie} second_post = session.post(index_url, data=second_payload, cookies=jar) history = second_post.history cbh_response = history[0] cbh_cookies = cbh_response.cookies session.cookies = cbh_cookies cookie_values = getattr(cbh_cookies, '_cookies') credible_value = cookie_values['.crediblebh.com']['/']['cbh'].value return cls(domain_name, credible_value, time_generated) except KeyError or ConnectionError: attempts += 1 raise CredibleFrontEndLoginException()
def test_cookies(client): """ """ jar = cookies.RequestsCookieJar() jar.set('cookie1', 'A', path='/cookies') jar.set('cookie2', 'B', path='/fruits') res = client.cookies(cookies=jar) assert res['cookies']['cookie1'] == 'A' assert 'cookie2' not in res['cookies']
def main(): session = Session() url_tmf_login = "******" url_tmf_push = "http://xxx.xxx.xxx:30012/secApi/manage/api/package/batch-new-all" params = { 'username': '******', 'password': '******' } try: # 注意这里没有用json.dumps,和后台服务器的要求有关系 # 如果从fiddler里看到是JSON形式的,得dumps;如果是a=xx&b=xx&c=xx的,不要dumps # 另外, 如果是JSON格式, 除了dumps+data指定, 还可以不dumps, 直接用json=params也可以 session.post(url_tmf_login, data=params) print("登录成功") except: print("调用失败") exit(1) with open("turntableNew.zip", "rb") as f: package = "data:;base64,"+b64encode(f.read()).decode("utf-8") # 文件上传要用到前面接口得到的一个cookie, app_id, 写死就好, 生产上的要先去截报文看 cookie = cookies.RequestsCookieJar() cookie.set("app_id", "app-dcqa7xhy1m") header = { "Content-Type": "application/json;charset=UTF-8", } params = { "bid_str": "turntableNew", "package": package, "bid_type": 0, "info": "turntableNew", "product_id": "1156", "success": False, "isUpload": False, "platform": "[2,3]", "verify_type": 0, "expire_time": None, "frequency": 10, "loadmode": 2, } params = str.replace(str.replace(json.dumps(params), '": ', '":'), ', "', ',"') try: res = session.post(url_tmf_push, data=params, cookies=cookie, headers=header).json() # res自带json方法, 不用再自己loads print(res) except Exception as e: print(e) print("上传失败")
def __init__(self): self.cf = self.db_config() self.db_host = self.cf.get("db", "db_host") self.db_port = self.cf.getint("db", "db_port") self.db_user = self.cf.get("db", "db_user") self.db_password = self.cf.get("db", "db_password") self.db_database = self.cf.get("db", "db_database") self.__url = self.cf.get("url", "base") self.url_login = self.__url + self.cf.get("url", "login") self.url_adduser = self.__url + self.cf.get("url", "adduser") self.store = cookies.RequestsCookieJar()
def site_cookies(): jar = cookies.RequestsCookieJar() jar.set_cookie( cookies.create_cookie( name='__cfduid', value='d38637fd5fcdd51ba17c23f4f34fd12be1566670847', )) jar.set_cookie( cookies.create_cookie( name='PHPSESSID', value='f3240botpio78qaqjqa75os9q7', )) return jar
def __init__(self, user_data): from requests import cookies super().__init__(user_data) self.domain = 'scholar.google.es' self.scholarUser = user_data['scholarUser'] self.scholarAliases = user_data['scholarAliases'] self.unusedScholarAliases = [] self.lastRequestUrl = '' self.repoUserId = user_data['id'] self.user = user_data['user'] self.cookieJar = cookies.RequestsCookieJar() self.pages = 1
def connect(url): #using stored session cache for connection if os.path.exists(cache_path + ".cache") and not args.dynamic_login: if not args.submit: download() cj = cookies.RequestsCookieJar() with open(cache_path + ".cache", "rb") as f: cook = pickle.load(f) cj._cookies = cook s = Session() s.cookies = cj return s elif not os.path.exists(cache_path + ".cache") and not args.dynamic_login: print(lc + ".cache missing! create a new session!!" + rt) sys.exit() #new connection else: with Session() as s: print(lc + bd + "Connecting...\r" + rt, end='') try: site = s.get(url + '/login') except: print( og + bd + "Failed to establish a new connection! Name or service not known" + rt) sys.exit() bs_content = bs(site.content, "html.parser") token = bs_content.find("input", {"name": "nonce"})["value"] login_data = { "name": username, "password": password, "nonce": token } resp = s.post(url + '/login', login_data) if b'Your username or password is incorrect' in resp.content: print(og + bd + "your username or password is incorrect!!" + rt) sys.exit() print(lc + bd + "\x1b[2KConnected!!" + rt) with open(cache_path + ".cache", 'wb') as f: pickle.dump(s.cookies._cookies, f) return s
def login(self): browser = webdriver.Chrome() browser.maximize_window() browser.get('http://jzsc.mohurd.gov.cn/data/project/') time.sleep(20) c = cookies.RequestsCookieJar() for cookie in browser.get_cookies(): c.set(cookie['name'], cookie['value']) headers = { 'accessToken': browser.execute_script( 'return localStorage.getItem("accessToken");'), 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36', } self.session.headers.update(headers) self.session.cookies.update(c) browser.quit() print('建立链接成功')
def login(url, option_path): option = webdriver.ChromeOptions() option.add_argument('--user-data-dir=' + option_path) driver = webdriver.Chrome(options=option) # 使用谷歌chrome登录,电脑没有chrome的自行下载 driver.get(url) time.sleep(3) allcookies = driver.get_cookies() # 获取浏览器所有的cookie driver.quit() print("成功获取cookies!") s = requests.session() try: c = rc.RequestsCookieJar() for i in allcookies: c.set(i['name'], i['value']) s.cookies.update(c) except Exception as msg: print(u'添加cookies时出错:', msg) return s
def _handle_basic_auth_407(self, r, kwargs): if self.pos is not None: r.request.body.seek(self.pos) r.content r.raw.release_conn() prep = r.request.copy() if not hasattr(prep, "_cookies"): prep._cookies = cookies.RequestsCookieJar() cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw) prep.prepare_cookies(prep._cookies) self.proxy_auth = auth.HTTPProxyAuth(self.proxy_username, self.proxy_password) prep = self.proxy_auth(prep) _r = r.connection.send(prep, **kwargs) _r.history.append(r) _r.request = prep return _r
def _handle_basic_auth_401(self, r, kwargs): if self.pos is not None: r.request.body.seek(self.pos) # Consume content and release the original connection # to allow our new request to reuse the same one. r.content r.raw.release_conn() prep = r.request.copy() if not hasattr(prep, "_cookies"): prep._cookies = cookies.RequestsCookieJar() cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw) prep.prepare_cookies(prep._cookies) self.auth = auth.HTTPBasicAuth(self.username, self.password) prep = self.auth(prep) _r = r.connection.send(prep, **kwargs) _r.history.append(r) _r.request = prep return _r
def __init__(self, *args, **kwargs): """ 实例化类时如果传入参数url, 则会以传入的url为参数执行crawl :param url: base url :param logger: logger :param log_level: logging level """ if len(args) > 0: kwargs.setdefault('url', args[0]) self._base_url = kwargs.get("url") self._referer = None self._cookies = cookies.RequestsCookieJar() self.async_ = kwargs.get('async', True) self.logger = kwargs.pop('logger', logging.getLogger('crawler')) log_level = kwargs.pop('log_level', logging.WARN) super(BaseCrawler, self).__init__(self.logger, log_level) self.fetch = copy.deepcopy(self.default_options) self.proxy_lock = None if kwargs: self._prepare_setting(**kwargs) self.ioloop = kwargs.get('ioloop', tornado.ioloop.IOLoop())
def __init__(self): self.url_login = TestConfig().url_login self.url_adduser = TestConfig().url_adduser self.store = cookies.RequestsCookieJar()
def puppeteer_fetch(self, url, task): '''Fetch with puppeteer proxy''' start_time = time.time() self.on_fetch('puppeteer', task) handle_error = lambda x: self.handle_error('puppeteer', url, task, start_time, x) # check puppeteer proxy is enabled if not self.puppeteer_proxy: result = { "orig_url": url, "content": "puppeteer is not enabled.", "headers": {}, "status_code": 501, "url": url, "time": time.time() - start_time, "cookies": {}, "save": task.get('fetch', {}).get('save') } logger.warning("[501] %s:%s %s 0s", task.get('project'), task.get('taskid'), url) raise gen.Return(result) # setup request parameters fetch = self.pack_tornado_request_parameters(url, task) task_fetch = task.get('fetch', {}) for each in task_fetch: if each not in fetch: fetch[each] = task_fetch[each] # robots.txt if task_fetch.get('robots_txt', False): user_agent = fetch['headers']['User-Agent'] can_fetch = yield self.can_fetch(user_agent, url) if not can_fetch: error = tornado.httpclient.HTTPError( 403, 'Disallowed by robots.txt') raise gen.Return(handle_error(error)) request_conf = {'follow_redirects': False} request_conf['connect_timeout'] = fetch.get('connect_timeout', 20) request_conf['request_timeout'] = fetch.get('request_timeout', 120) + 1 session = cookies.RequestsCookieJar() if 'Cookie' in fetch['headers']: c = http_cookies.SimpleCookie() try: c.load(fetch['headers']['Cookie']) except AttributeError: c.load(utils.utf8(fetch['headers']['Cookie'])) for key in c: session.set(key, c[key]) del fetch['headers']['Cookie'] if 'cookies' in fetch: session.update(fetch['cookies']) del fetch['cookies'] request = tornado.httpclient.HTTPRequest(url=fetch['url']) cookie_header = cookies.get_cookie_header(session, request) if cookie_header: fetch['headers']['Cookie'] = cookie_header logger.info("%s", self.puppeteer_proxy) # making requests fetch['headers'] = dict(fetch['headers']) headers = {} headers['Content-Type'] = 'application/json; charset=UTF-8' try: request = tornado.httpclient.HTTPRequest(url=self.puppeteer_proxy, method="POST", headers=headers, body=json.dumps(fetch), **request_conf) except Exception as e: raise gen.Return(handle_error(e)) try: response = yield gen.maybe_future(self.http_client.fetch(request)) except tornado.httpclient.HTTPError as e: if e.response: response = e.response else: raise gen.Return(handle_error(e)) if not response.body: raise gen.Return( handle_error( Exception('no response from puppeteer: %r' % response))) result = {} try: result = json.loads(utils.text(response.body)) assert 'status_code' in result, result except Exception as e: if response.error: result['error'] = utils.text(response.error) raise gen.Return(handle_error(e)) if result.get('status_code', 200): logger.info("[%d] %s:%s %s %.2fs", result['status_code'], task.get('project'), task.get('taskid'), url, result['time']) else: logger.error("[%d] %s:%s %s, %r %.2fs", result['status_code'], task.get('project'), task.get('taskid'), url, result['content'], result['time']) raise gen.Return(result)
def http_fetch(self, url, task): '''HTTP fetcher''' start_time = time.time() self.on_fetch('http', task) handle_error = lambda x: self.handle_error('http', url, task, start_time, x) # setup request parameters fetch = self.pack_tornado_request_parameters(url, task) task_fetch = task.get('fetch', {}) session = cookies.RequestsCookieJar() # fix for tornado request obj if 'Cookie' in fetch['headers']: c = http_cookies.SimpleCookie() try: c.load(fetch['headers']['Cookie']) except AttributeError: c.load(utils.utf8(fetch['headers']['Cookie'])) for key in c: session.set(key, c[key]) del fetch['headers']['Cookie'] if 'cookies' in fetch: session.update(fetch['cookies']) del fetch['cookies'] max_redirects = task_fetch.get('max_redirects', 5) # we will handle redirects by hand to capture cookies fetch['follow_redirects'] = False # making requests while True: # robots.txt if task_fetch.get('robots_txt', False): can_fetch = yield self.can_fetch( fetch['headers']['User-Agent'], fetch['url']) if not can_fetch: error = tornado.httpclient.HTTPError( 403, 'Disallowed by robots.txt') raise gen.Return(handle_error(error)) try: request = tornado.httpclient.HTTPRequest(**fetch) # if cookie already in header, get_cookie_header wouldn't work old_cookie_header = request.headers.get('Cookie') if old_cookie_header: del request.headers['Cookie'] cookie_header = cookies.get_cookie_header(session, request) if cookie_header: request.headers['Cookie'] = cookie_header elif old_cookie_header: request.headers['Cookie'] = old_cookie_header except Exception as e: logger.exception(fetch) raise gen.Return(handle_error(e)) try: response = yield gen.maybe_future( self.http_client.fetch(request)) except tornado.httpclient.HTTPError as e: if e.response: response = e.response else: raise gen.Return(handle_error(e)) extract_cookies_to_jar(session, response.request, response.headers) if (response.code in (301, 302, 303, 307) and response.headers.get('Location') and task_fetch.get('allow_redirects', True)): if max_redirects <= 0: error = tornado.httpclient.HTTPError( 599, 'Maximum (%d) redirects followed' % task_fetch.get('max_redirects', 5), response) raise gen.Return(handle_error(error)) if response.code in (302, 303): fetch['method'] = 'GET' if 'body' in fetch: del fetch['body'] fetch['url'] = quote_chinese( urljoin(fetch['url'], response.headers['Location'])) fetch['request_timeout'] -= time.time() - start_time if fetch['request_timeout'] < 0: fetch['request_timeout'] = 0.1 max_redirects -= 1 continue result = {} result['orig_url'] = url result['content'] = response.body or '' result['headers'] = dict(response.headers) result['status_code'] = response.code result['url'] = response.effective_url or url result['time'] = time.time() - start_time result['cookies'] = session.get_dict() result['save'] = task_fetch.get('save') if response.error: result['error'] = utils.text(response.error) if 200 <= response.code < 300: logger.info("[%d] %s:%s %s %.2fs", response.code, task.get('project'), task.get('taskid'), url, result['time']) else: logger.warning("[%d] %s:%s %s %.2fs", response.code, task.get('project'), task.get('taskid'), url, result['time']) raise gen.Return(result)
import time from bs4 import BeautifulSoup import json from urllib import parse import tkinter as tk import re import os import ssl ssl._create_default_https_context = ssl._create_unverified_context path = 'D:\\py\\edu\\img\\' host = 'https://hbt.gpa.enetedu.com' cookies = c.RequestsCookieJar() eneteducookies = c.RequestsCookieJar() headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:83.0) Gecko/20100101 Firefox/83.0' } def downImg(): page = get("https://mingshi8.hbte.com.cn/index.php/home/login/login.html", headers=headers) cookies.update(page.cookies) response = request( "GET", "https://mingshi8.hbte.com.cn/index.php/Home/Login/Verify",
"""Guesses the auth type by the WWW-Authentication header.""" @@ -12,38 +14,57 @@ class GuessAuth(auth.AuthBase): self.auth = None self.pos = None + def _handle_basic_auth_401(self, r, kwargs): + if self.pos is not None: + r.request.body.seek(self.pos) + + # Consume content and release the original connection + # to allow our new request to reuse the same one. + r.content + r.raw.release_conn() + prep = r.request.copy() + if not hasattr(prep, '_cookies'): + prep._cookies = cookies.RequestsCookieJar() + cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw) + prep.prepare_cookies(prep._cookies) + + self.auth = auth.HTTPBasicAuth(self.username, self.password) + prep = self.auth(prep) + _r = r.connection.send(prep, **kwargs) + _r.history.append(r) + _r.request = prep + + return _r + + def _handle_digest_auth_401(self, r, kwargs): + self.auth = auth_compat.HTTPDigestAuth(self.username, self.password) + try: + self.auth.init_per_thread_state()
#!/usr/bin/python3 from selenium import webdriver import myBaiduIndex from requests import cookies import requests myCookies = myBaiduIndex.prep_cookies() jar = cookies.RequestsCookieJar() for mycookie in myCookies: mycookie.pop('httpOnly') if 'expiry' in mycookie: mycookie.pop('expiry') jar.set(**mycookie) # chromeDriver = r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe' # browser = webdriver.Chrome(chromeDriver) # browser.add_cookie(jar) # browser.get('http://index.baidu.com/?tpl=trend&word=s') r = requests.get('http://index.baidu.com/?tpl=trend&word=s', cookies=jar) print(r.text) print(r.headers) print(r.cookies) # print(jar)