Ejemplo n.º 1
0
    def handle_401(self, r, **kwargs):
        """Resends a request with auth headers, if needed."""

        www_authenticate = r.headers.get('www-authenticate', '').lower()

        if 'basic' in www_authenticate:
            if self.pos is not None:
                r.request.body.seek(self.pos)

            # Consume content and release the original connection
            # to allow our new request to reuse the same one.
            r.content
            r.raw.release_conn()
            prep = r.request.copy()
            if not hasattr(prep, '_cookies'):
                prep._cookies = cookies.RequestsCookieJar()
            cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
            prep.prepare_cookies(prep._cookies)

            self.auth = auth.HTTPBasicAuth(self.username, self.password)
            prep = self.auth(prep)
            _r = r.connection.send(prep, **kwargs)
            _r.history.append(r)
            _r.request = prep

            return _r

        if 'digest' in www_authenticate:
            self.auth = auth.HTTPDigestAuth(self.username, self.password)
            # Digest auth would resend the request by itself. We can take a
            # shortcut here.
            return self.auth.handle_401(r, **kwargs)
Ejemplo n.º 2
0
 def retrieve(cls,
              id_source,
              session=None,
              username=None,
              password=None,
              domain_name=None):
     jar = cookies.RequestsCookieJar()
     if not session:
         session = requests.Session()
     if not username or not password:
         credentials = Opossum.get_untrustworthy_credentials(id_source)
         username = credentials['username']
         password = credentials['password']
         domain_name = credentials['domain_name']
     attempts = 0
     while attempts < 3:
         try:
             login_url = "https://login.cbh2.crediblebh.com"
             domain_url = ".crediblebh.com"
             login_api_url = 'https://login-api.cbh2.crediblebh.com/api/'
             data_center_path = 'Authenticate/CheckDataCenter'
             check_login_path = 'Authenticate/CheckLogin'
             first_payload = {
                 'UserName': username,
                 'Password': password,
                 'DomainName': domain_name
             }
             headers = {'DomainName': domain_name}
             data_center_post = session.post(login_api_url +
                                             data_center_path,
                                             json=first_payload,
                                             headers=headers,
                                             verify=False)
             data_center_json = data_center_post.json()
             if data_center_json['Status'] == 14:
                 login_api_url = data_center_json['ApiUrl']
                 login_url = data_center_json['LoginUrl']
             login_post = session.post(login_api_url + check_login_path,
                                       json=first_payload,
                                       headers=headers,
                                       verify=False)
             login_json = login_post.json()
             if login_json['Status'] != 0:
                 raise CredibleFrontEndLoginException()
             session_cookie = login_json['SessionCookie']
             website_url = login_json['WebsiteURL']
             jar.set('SessionId',
                     session_cookie,
                     domain=domain_url,
                     path='/')
             session.get(website_url,
                         data={'SessionId': session_cookie},
                         cookies=jar,
                         verify=False)
             time_generated = datetime.datetime.now(pytz.UTC)
             return cls(id_source, domain_name, website_url, session,
                        time_generated)
         except KeyError or ConnectionError or IndexError:
             attempts += 1
     raise CredibleFrontEndLoginException()
Ejemplo n.º 3
0
 def retrieve(cls, id_source, session=None, username=None, password=None, domain_name=None):
     time_generated = datetime.datetime.now()
     if not session:
         session = requests.Session()
     if not username or not password:
         credentials = Opossum.get_untrustworthy_credentials(id_source)
         username = credentials['username']
         password = credentials['password']
         domain_name = credentials['domain_name']
     attempts = 0
     while attempts < 3:
         try:
             jar = cookies.RequestsCookieJar()
             api_url = "https://login-api.crediblebh.com/api/Authenticate/CheckLogin"
             index_url = "https://ww7.crediblebh.com/index.aspx"
             first_payload = {'UserName': username,
                              'Password': password,
                              'DomainName': domain_name}
             headers = {'DomainName': domain_name}
             post = session.post(api_url, json=first_payload, headers=headers)
             response_json = post.json()
             session_cookie = response_json['SessionCookie']
             jar.set('SessionId', session_cookie, domain='.crediblebh.com', path='/')
             second_payload = {'SessionId': session_cookie}
             second_post = session.post(index_url, data=second_payload, cookies=jar)
             history = second_post.history
             cbh_response = history[0]
             cbh_cookies = cbh_response.cookies
             session.cookies = cbh_cookies
             cookie_values = getattr(cbh_cookies, '_cookies')
             credible_value = cookie_values['.crediblebh.com']['/']['cbh'].value
             return cls(domain_name, credible_value, time_generated)
         except KeyError or ConnectionError:
             attempts += 1
     raise CredibleFrontEndLoginException()
Ejemplo n.º 4
0
def test_cookies(client):
    """
    """
    jar = cookies.RequestsCookieJar()
    jar.set('cookie1', 'A', path='/cookies')
    jar.set('cookie2', 'B', path='/fruits')
    res = client.cookies(cookies=jar)

    assert res['cookies']['cookie1'] == 'A'
    assert 'cookie2' not in res['cookies']
Ejemplo n.º 5
0
def main():
    session = Session()

    url_tmf_login = "******"
    url_tmf_push = "http://xxx.xxx.xxx:30012/secApi/manage/api/package/batch-new-all"

    params = {
        'username': '******',
        'password': '******'
    }

    try:
        # 注意这里没有用json.dumps,和后台服务器的要求有关系
        # 如果从fiddler里看到是JSON形式的,得dumps;如果是a=xx&b=xx&c=xx的,不要dumps
        # 另外, 如果是JSON格式, 除了dumps+data指定, 还可以不dumps, 直接用json=params也可以
        session.post(url_tmf_login, data=params)
        print("登录成功")
    except:
        print("调用失败")
        exit(1)

    with open("turntableNew.zip", "rb") as f:
        package = "data:;base64,"+b64encode(f.read()).decode("utf-8")

    # 文件上传要用到前面接口得到的一个cookie, app_id, 写死就好, 生产上的要先去截报文看
    cookie = cookies.RequestsCookieJar()
    cookie.set("app_id", "app-dcqa7xhy1m")

    header = {
        "Content-Type": "application/json;charset=UTF-8",
    }

    params = {
        "bid_str": "turntableNew",
        "package": package,
        "bid_type": 0,
        "info": "turntableNew",
        "product_id": "1156",
        "success": False,
        "isUpload": False,
        "platform": "[2,3]",
        "verify_type": 0,
        "expire_time": None,
        "frequency": 10,
        "loadmode": 2,
    }
    params = str.replace(str.replace(json.dumps(params), '": ', '":'), ', "', ',"')

    try:
        res = session.post(url_tmf_push, data=params, cookies=cookie, headers=header).json()
        # res自带json方法, 不用再自己loads
        print(res)
    except Exception as e:
        print(e)
        print("上传失败")
Ejemplo n.º 6
0
    def __init__(self):
        self.cf = self.db_config()
        self.db_host = self.cf.get("db", "db_host")
        self.db_port = self.cf.getint("db", "db_port")
        self.db_user = self.cf.get("db", "db_user")
        self.db_password = self.cf.get("db", "db_password")
        self.db_database = self.cf.get("db", "db_database")

        self.__url = self.cf.get("url", "base")
        self.url_login = self.__url + self.cf.get("url", "login")
        self.url_adduser = self.__url + self.cf.get("url", "adduser")

        self.store = cookies.RequestsCookieJar()
Ejemplo n.º 7
0
def site_cookies():
    jar = cookies.RequestsCookieJar()
    jar.set_cookie(
        cookies.create_cookie(
            name='__cfduid',
            value='d38637fd5fcdd51ba17c23f4f34fd12be1566670847',
        ))
    jar.set_cookie(
        cookies.create_cookie(
            name='PHPSESSID',
            value='f3240botpio78qaqjqa75os9q7',
        ))
    return jar
    def __init__(self, user_data):
        from requests import cookies

        super().__init__(user_data)

        self.domain = 'scholar.google.es'
        self.scholarUser = user_data['scholarUser']
        self.scholarAliases = user_data['scholarAliases']
        self.unusedScholarAliases = []
        self.lastRequestUrl = ''
        self.repoUserId = user_data['id']
        self.user = user_data['user']
        self.cookieJar = cookies.RequestsCookieJar()
        self.pages = 1
Ejemplo n.º 9
0
def connect(url):
    #using stored session cache for connection
    if os.path.exists(cache_path + ".cache") and not args.dynamic_login:
        if not args.submit:
            download()
        cj = cookies.RequestsCookieJar()
        with open(cache_path + ".cache", "rb") as f:
            cook = pickle.load(f)
        cj._cookies = cook
        s = Session()
        s.cookies = cj
        return s
    elif not os.path.exists(cache_path + ".cache") and not args.dynamic_login:
        print(lc + ".cache missing! create a new session!!" + rt)
        sys.exit()
    #new connection
    else:
        with Session() as s:
            print(lc + bd + "Connecting...\r" + rt, end='')

            try:
                site = s.get(url + '/login')
            except:
                print(
                    og + bd +
                    "Failed to establish a new connection! Name or service not known"
                    + rt)
                sys.exit()

            bs_content = bs(site.content, "html.parser")
            token = bs_content.find("input", {"name": "nonce"})["value"]
            login_data = {
                "name": username,
                "password": password,
                "nonce": token
            }
            resp = s.post(url + '/login', login_data)

            if b'Your username or password is incorrect' in resp.content:
                print(og + bd + "your username or password is incorrect!!" +
                      rt)
                sys.exit()

            print(lc + bd + "\x1b[2KConnected!!" + rt)

            with open(cache_path + ".cache", 'wb') as f:
                pickle.dump(s.cookies._cookies, f)

            return s
Ejemplo n.º 10
0
 def login(self):
     browser = webdriver.Chrome()
     browser.maximize_window()
     browser.get('http://jzsc.mohurd.gov.cn/data/project/')
     time.sleep(20)
     c = cookies.RequestsCookieJar()
     for cookie in browser.get_cookies():
         c.set(cookie['name'], cookie['value'])
     headers = {
         'accessToken':
         browser.execute_script(
             'return localStorage.getItem("accessToken");'),
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
     }
     self.session.headers.update(headers)
     self.session.cookies.update(c)
     browser.quit()
     print('建立链接成功')
Ejemplo n.º 11
0
def login(url, option_path):
    option = webdriver.ChromeOptions()
    option.add_argument('--user-data-dir=' + option_path)
    driver = webdriver.Chrome(options=option)  # 使用谷歌chrome登录,电脑没有chrome的自行下载
    driver.get(url)
    time.sleep(3)
    allcookies = driver.get_cookies()  # 获取浏览器所有的cookie
    driver.quit()
    print("成功获取cookies!")
    s = requests.session()
    try:
        c = rc.RequestsCookieJar()
        for i in allcookies:
            c.set(i['name'], i['value'])
        s.cookies.update(c)
    except Exception as msg:
        print(u'添加cookies时出错:', msg)

    return s
Ejemplo n.º 12
0
    def _handle_basic_auth_407(self, r, kwargs):
        if self.pos is not None:
            r.request.body.seek(self.pos)

        r.content
        r.raw.release_conn()
        prep = r.request.copy()
        if not hasattr(prep, "_cookies"):
            prep._cookies = cookies.RequestsCookieJar()
        cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
        prep.prepare_cookies(prep._cookies)

        self.proxy_auth = auth.HTTPProxyAuth(self.proxy_username,
                                             self.proxy_password)
        prep = self.proxy_auth(prep)
        _r = r.connection.send(prep, **kwargs)
        _r.history.append(r)
        _r.request = prep

        return _r
Ejemplo n.º 13
0
    def _handle_basic_auth_401(self, r, kwargs):
        if self.pos is not None:
            r.request.body.seek(self.pos)

        # Consume content and release the original connection
        # to allow our new request to reuse the same one.
        r.content
        r.raw.release_conn()
        prep = r.request.copy()
        if not hasattr(prep, "_cookies"):
            prep._cookies = cookies.RequestsCookieJar()
        cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
        prep.prepare_cookies(prep._cookies)

        self.auth = auth.HTTPBasicAuth(self.username, self.password)
        prep = self.auth(prep)
        _r = r.connection.send(prep, **kwargs)
        _r.history.append(r)
        _r.request = prep

        return _r
Ejemplo n.º 14
0
 def __init__(self, *args, **kwargs):
     """
     实例化类时如果传入参数url, 则会以传入的url为参数执行crawl
     :param url: base url
     :param logger: logger
     :param log_level: logging level
     """
     if len(args) > 0:
         kwargs.setdefault('url', args[0])
     self._base_url = kwargs.get("url")
     self._referer = None
     self._cookies = cookies.RequestsCookieJar()
     self.async_ = kwargs.get('async', True)
     self.logger = kwargs.pop('logger', logging.getLogger('crawler'))
     log_level = kwargs.pop('log_level', logging.WARN)
     super(BaseCrawler, self).__init__(self.logger, log_level)
     self.fetch = copy.deepcopy(self.default_options)
     self.proxy_lock = None
     if kwargs:
         self._prepare_setting(**kwargs)
     self.ioloop = kwargs.get('ioloop', tornado.ioloop.IOLoop())
Ejemplo n.º 15
0
 def __init__(self):
     self.url_login = TestConfig().url_login
     self.url_adduser = TestConfig().url_adduser
     self.store = cookies.RequestsCookieJar()
Ejemplo n.º 16
0
    def puppeteer_fetch(self, url, task):
        '''Fetch with puppeteer proxy'''
        start_time = time.time()
        self.on_fetch('puppeteer', task)
        handle_error = lambda x: self.handle_error('puppeteer', url, task,
                                                   start_time, x)

        # check puppeteer proxy is enabled
        if not self.puppeteer_proxy:
            result = {
                "orig_url": url,
                "content": "puppeteer is not enabled.",
                "headers": {},
                "status_code": 501,
                "url": url,
                "time": time.time() - start_time,
                "cookies": {},
                "save": task.get('fetch', {}).get('save')
            }
            logger.warning("[501] %s:%s %s 0s", task.get('project'),
                           task.get('taskid'), url)
            raise gen.Return(result)

        # setup request parameters
        fetch = self.pack_tornado_request_parameters(url, task)
        task_fetch = task.get('fetch', {})
        for each in task_fetch:
            if each not in fetch:
                fetch[each] = task_fetch[each]

        # robots.txt
        if task_fetch.get('robots_txt', False):
            user_agent = fetch['headers']['User-Agent']
            can_fetch = yield self.can_fetch(user_agent, url)
            if not can_fetch:
                error = tornado.httpclient.HTTPError(
                    403, 'Disallowed by robots.txt')
                raise gen.Return(handle_error(error))

        request_conf = {'follow_redirects': False}
        request_conf['connect_timeout'] = fetch.get('connect_timeout', 20)
        request_conf['request_timeout'] = fetch.get('request_timeout', 120) + 1

        session = cookies.RequestsCookieJar()
        if 'Cookie' in fetch['headers']:
            c = http_cookies.SimpleCookie()
            try:
                c.load(fetch['headers']['Cookie'])
            except AttributeError:
                c.load(utils.utf8(fetch['headers']['Cookie']))
            for key in c:
                session.set(key, c[key])
            del fetch['headers']['Cookie']
        if 'cookies' in fetch:
            session.update(fetch['cookies'])
            del fetch['cookies']

        request = tornado.httpclient.HTTPRequest(url=fetch['url'])
        cookie_header = cookies.get_cookie_header(session, request)
        if cookie_header:
            fetch['headers']['Cookie'] = cookie_header

        logger.info("%s", self.puppeteer_proxy)
        # making requests
        fetch['headers'] = dict(fetch['headers'])
        headers = {}
        headers['Content-Type'] = 'application/json; charset=UTF-8'
        try:
            request = tornado.httpclient.HTTPRequest(url=self.puppeteer_proxy,
                                                     method="POST",
                                                     headers=headers,
                                                     body=json.dumps(fetch),
                                                     **request_conf)
        except Exception as e:
            raise gen.Return(handle_error(e))

        try:
            response = yield gen.maybe_future(self.http_client.fetch(request))
        except tornado.httpclient.HTTPError as e:
            if e.response:
                response = e.response
            else:
                raise gen.Return(handle_error(e))

        if not response.body:
            raise gen.Return(
                handle_error(
                    Exception('no response from puppeteer: %r' % response)))

        result = {}
        try:
            result = json.loads(utils.text(response.body))
            assert 'status_code' in result, result
        except Exception as e:
            if response.error:
                result['error'] = utils.text(response.error)
            raise gen.Return(handle_error(e))

        if result.get('status_code', 200):
            logger.info("[%d] %s:%s %s %.2fs", result['status_code'],
                        task.get('project'), task.get('taskid'), url,
                        result['time'])
        else:
            logger.error("[%d] %s:%s %s, %r %.2fs", result['status_code'],
                         task.get('project'), task.get('taskid'), url,
                         result['content'], result['time'])

        raise gen.Return(result)
Ejemplo n.º 17
0
    def http_fetch(self, url, task):
        '''HTTP fetcher'''
        start_time = time.time()
        self.on_fetch('http', task)
        handle_error = lambda x: self.handle_error('http', url, task,
                                                   start_time, x)

        # setup request parameters
        fetch = self.pack_tornado_request_parameters(url, task)
        task_fetch = task.get('fetch', {})

        session = cookies.RequestsCookieJar()
        # fix for tornado request obj
        if 'Cookie' in fetch['headers']:
            c = http_cookies.SimpleCookie()
            try:
                c.load(fetch['headers']['Cookie'])
            except AttributeError:
                c.load(utils.utf8(fetch['headers']['Cookie']))
            for key in c:
                session.set(key, c[key])
            del fetch['headers']['Cookie']
        if 'cookies' in fetch:
            session.update(fetch['cookies'])
            del fetch['cookies']

        max_redirects = task_fetch.get('max_redirects', 5)
        # we will handle redirects by hand to capture cookies
        fetch['follow_redirects'] = False

        # making requests
        while True:
            # robots.txt
            if task_fetch.get('robots_txt', False):
                can_fetch = yield self.can_fetch(
                    fetch['headers']['User-Agent'], fetch['url'])
                if not can_fetch:
                    error = tornado.httpclient.HTTPError(
                        403, 'Disallowed by robots.txt')
                    raise gen.Return(handle_error(error))

            try:
                request = tornado.httpclient.HTTPRequest(**fetch)
                # if cookie already in header, get_cookie_header wouldn't work
                old_cookie_header = request.headers.get('Cookie')
                if old_cookie_header:
                    del request.headers['Cookie']
                cookie_header = cookies.get_cookie_header(session, request)
                if cookie_header:
                    request.headers['Cookie'] = cookie_header
                elif old_cookie_header:
                    request.headers['Cookie'] = old_cookie_header
            except Exception as e:
                logger.exception(fetch)
                raise gen.Return(handle_error(e))

            try:
                response = yield gen.maybe_future(
                    self.http_client.fetch(request))
            except tornado.httpclient.HTTPError as e:
                if e.response:
                    response = e.response
                else:
                    raise gen.Return(handle_error(e))

            extract_cookies_to_jar(session, response.request, response.headers)
            if (response.code in (301, 302, 303, 307)
                    and response.headers.get('Location')
                    and task_fetch.get('allow_redirects', True)):
                if max_redirects <= 0:
                    error = tornado.httpclient.HTTPError(
                        599, 'Maximum (%d) redirects followed' %
                        task_fetch.get('max_redirects', 5), response)
                    raise gen.Return(handle_error(error))
                if response.code in (302, 303):
                    fetch['method'] = 'GET'
                    if 'body' in fetch:
                        del fetch['body']
                fetch['url'] = quote_chinese(
                    urljoin(fetch['url'], response.headers['Location']))
                fetch['request_timeout'] -= time.time() - start_time
                if fetch['request_timeout'] < 0:
                    fetch['request_timeout'] = 0.1
                max_redirects -= 1
                continue

            result = {}
            result['orig_url'] = url
            result['content'] = response.body or ''
            result['headers'] = dict(response.headers)
            result['status_code'] = response.code
            result['url'] = response.effective_url or url
            result['time'] = time.time() - start_time
            result['cookies'] = session.get_dict()
            result['save'] = task_fetch.get('save')
            if response.error:
                result['error'] = utils.text(response.error)
            if 200 <= response.code < 300:
                logger.info("[%d] %s:%s %s %.2fs", response.code,
                            task.get('project'), task.get('taskid'), url,
                            result['time'])
            else:
                logger.warning("[%d] %s:%s %s %.2fs", response.code,
                               task.get('project'), task.get('taskid'), url,
                               result['time'])

            raise gen.Return(result)
Ejemplo n.º 18
0
import time
from bs4 import BeautifulSoup
import json
from urllib import parse
import tkinter as tk
import re
import os
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

path = 'D:\\py\\edu\\img\\'

host = 'https://hbt.gpa.enetedu.com'

cookies = c.RequestsCookieJar()
eneteducookies = c.RequestsCookieJar()

headers = {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:83.0) Gecko/20100101 Firefox/83.0'
}


def downImg():
    page = get("https://mingshi8.hbte.com.cn/index.php/home/login/login.html",
               headers=headers)
    cookies.update(page.cookies)
    response = request(
        "GET",
        "https://mingshi8.hbte.com.cn/index.php/Home/Login/Verify",
Ejemplo n.º 19
0
     """Guesses the auth type by the WWW-Authentication header."""
@@ -12,38 +14,57 @@ class GuessAuth(auth.AuthBase):
         self.auth = None
         self.pos = None
 
+    def _handle_basic_auth_401(self, r, kwargs):
+        if self.pos is not None:
+            r.request.body.seek(self.pos)
+
+        # Consume content and release the original connection
+        # to allow our new request to reuse the same one.
+        r.content
+        r.raw.release_conn()
+        prep = r.request.copy()
+        if not hasattr(prep, '_cookies'):
+            prep._cookies = cookies.RequestsCookieJar()
+        cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
+        prep.prepare_cookies(prep._cookies)
+
+        self.auth = auth.HTTPBasicAuth(self.username, self.password)
+        prep = self.auth(prep)
+        _r = r.connection.send(prep, **kwargs)
+        _r.history.append(r)
+        _r.request = prep
+
+        return _r
+
+    def _handle_digest_auth_401(self, r, kwargs):
+        self.auth = auth_compat.HTTPDigestAuth(self.username, self.password)
+        try:
+            self.auth.init_per_thread_state()
Ejemplo n.º 20
0
#!/usr/bin/python3
from selenium import webdriver
import myBaiduIndex
from requests import cookies
import requests

myCookies = myBaiduIndex.prep_cookies()

jar = cookies.RequestsCookieJar()

for mycookie in myCookies:
    mycookie.pop('httpOnly')
    if 'expiry' in mycookie:
        mycookie.pop('expiry')
    jar.set(**mycookie)

# chromeDriver = r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
# browser = webdriver.Chrome(chromeDriver)
# browser.add_cookie(jar)
# browser.get('http://index.baidu.com/?tpl=trend&word=s')
r = requests.get('http://index.baidu.com/?tpl=trend&word=s', cookies=jar)
print(r.text)
print(r.headers)
print(r.cookies)
# print(jar)