Beispiel #1
0
def save_cookies_to_file(cookies: "Cookies", path: Path):
    jar = MozillaCookieJar(path)
    for i in cookies.jar:
        jar.set_cookie(i)
    if not path.is_file():
        path.parent.mkdir(parents=True, exist_ok=True)
    jar.save(ignore_discard=True)
Beispiel #2
0
def test_authholder_credentials_save_reallysave(auth_holder):
    """Save really do save if cookies changed."""
    # create some fake cookies
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookie = get_cookie()
    fake_cookiejar.set_cookie(fake_cookie)
    fake_cookiejar.save()

    # make auth holder to have those credentials loaded, and also load them ourselves for
    # later comparison
    auth_holder._load_credentials()
    with open(auth_holder._cookiejar_filepath, "rb") as fh:
        prv_file_content = fh.read()

    # set a different credential in the auth_holder (mimickin that the user authenticated
    # while doing the request)
    other_cookie = get_cookie(value="different")
    auth_holder._cookiejar.set_cookie(other_cookie)

    # call the tested method and ensure that file changed!
    auth_holder._save_credentials_if_changed()
    with open(auth_holder._cookiejar_filepath, "rb") as fh:
        new_file_content = fh.read()
    assert new_file_content != prv_file_content

    # call the tested method again, to verify that it was calling save on the cookiejar (and
    # not that the file changed as other side effect)
    with patch.object(auth_holder._cookiejar, "save") as mock:
        auth_holder._save_credentials_if_changed()
    assert mock.call_count == 1
Beispiel #3
0
def test_authholder_request_simple(auth_holder):
    """Load credentials the first time, hit the network, save credentials."""
    # save a cookie to be used
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookiejar.set_cookie(get_cookie())
    fake_cookiejar.save()

    other_cookie = get_cookie(value="different")

    def fake_request(self, method, url, json, headers):
        # check it was properly called
        assert method == "testmethod"
        assert url == "testurl"
        assert json == "testbody"
        assert headers == {"User-Agent": build_user_agent()}

        # check credentials were loaded at this time
        assert auth_holder._cookiejar is not None

        # modify the credentials, to simulate that a re-auth happened while the request
        auth_holder._cookiejar.set_cookie(other_cookie)

        return "raw request response"

    with patch("macaroonbakery.httpbakery.Client.request", fake_request):
        resp = auth_holder.request("testmethod", "testurl", "testbody")

    # verify response (the calling checks were done above in fake_request helper)
    assert resp == "raw request response"

    # check the credentials were saved (that were properly loaded was also check in above's helper)
    new_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    new_cookiejar.load()
    assert list(new_cookiejar)[0].value == other_cookie.value
def test_authholder_credentials_load_file_present_ok(auth_holder):
    """Credentials are properly loaded and all internal objects setup ok."""
    # create some fake cookies
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookie = get_cookie()
    fake_cookiejar.set_cookie(fake_cookie)
    fake_cookiejar.save()

    auth_holder._load_credentials()

    # check credentials
    loaded_cookies = list(auth_holder._cookiejar)
    assert len(loaded_cookies) == 1
    assert (
        loaded_cookies[0].value == fake_cookie.value
    )  # compare the value as no __eq__ in Cookie
    assert isinstance(auth_holder._cookiejar, MozillaCookieJar)
    assert auth_holder._old_cookies == list(auth_holder._cookiejar)

    # check other internal objects
    assert isinstance(auth_holder._client, httpbakery.Client)
    assert list(auth_holder._client.cookies)[0].value == fake_cookie.value
    (im,) = auth_holder._client._interaction_methods
    assert isinstance(im, httpbakery.WebBrowserInteractor)
    assert im._open_web_browser == visit_page_with_browser
Beispiel #5
0
 def save_cookies(self):
     self.driver.get(self.config.url_accounts_no_form)
     driver_cookies = self.driver.get_cookies()
     cookie_jar = MozillaCookieJar()
     for driver_cookie in driver_cookies:
         http_cookie = self.get_cookie(driver_cookie)
         cookie_jar.set_cookie(http_cookie)
     save_cookie_jar(self.config, cookie_jar)
Beispiel #6
0
def save_cookies_to_txt(cookies: List[Any], filename: str) -> None:
    # Ensure the cookie file exists
    if not os.path.isfile(filename):
        touch(filename)

    cjar = MozillaCookieJar(filename)
    for cookie in cookies:
        cjar.set_cookie(cookie)
    cjar.save(ignore_discard=False)
Beispiel #7
0
def test_authholder_request_interaction_error(auth_holder):
    """Support authentication failure while doing the request."""
    # save a cookie to be used
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookiejar.set_cookie(get_cookie())
    fake_cookiejar.save()

    with patch("macaroonbakery.httpbakery.Client.request") as mock:
        mock.side_effect = httpbakery.InteractionError("bad auth!!")
        expected = "Authentication failure: cannot start interactive session: bad auth!!"
        with pytest.raises(CommandError, match=expected):
            auth_holder.request("testmethod", "testurl", "testbody")
Beispiel #8
0
def test_authholder_request_credentials_already_loaded(auth_holder):
    """Do not load credentials if already there."""
    # save a cookie to be used
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookiejar.set_cookie(get_cookie())
    fake_cookiejar.save()

    # load credentials, and ensure that this will be fail if it's called again
    auth_holder._load_credentials()
    auth_holder._load_credentials = None

    with patch("macaroonbakery.httpbakery.Client.request"):
        auth_holder.request("testmethod", "testurl", "testbody")
Beispiel #9
0
def test_authholder_credentials_save_notreally(auth_holder):
    """Save does not really save if cookies didn't change."""
    # create some fake cookies
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookie = get_cookie()
    fake_cookiejar.set_cookie(fake_cookie)
    fake_cookiejar.save()

    auth_holder._load_credentials()

    with patch.object(auth_holder._cookiejar, "save") as mock:
        auth_holder._save_credentials_if_changed()
    assert mock.call_count == 0
def save_cookies_lwp(cookiejar):
    """Return cookies from a requests.

    Session cookies member in the Netscape format.
    """

    lwp_cookiejar = MozillaCookieJar()
    for c in cookiejar:
        args = dict(vars(c).items())
        args["rest"] = args["_rest"]
        del args["_rest"]
        if args["expires"] is None:
            args["expires"] = int(time.time()) + 86400
        c = Cookie(**args)
        lwp_cookiejar.set_cookie(c)
    return lwp_cookiejar
Beispiel #11
0
def get_cookies_in_cookiejar(host):
    """Export cookies and put them in a cookiejar.
    Return value: a cookiejar filled with cookies."""
    # based on http://www.guyrutenberg.com/2010/11/27/building-cookiejar-out-of-firefoxs-cookies-sqlite/
    cj = MozillaCookieJar()
    cookie_db = get_cookie_db_path(str(FIREFOX_DIR))
    conn = db.connect(cookie_db)
    cursor = conn.cursor()
    sql = "SELECT {c} FROM moz_cookies WHERE host LIKE '%{h}%'".format(
        c=CONTENTS, h=host)
    cursor.execute(sql)

    for item in cursor.fetchall():
        c = Cookie(0, item[4], item[5], None, False, item[0],
                   item[0].startswith('.'), item[0].startswith('.'), item[1],
                   False, item[2], item[3], item[3] == "", None, None, {})
        #print c
        cj.set_cookie(c)

    return cj
Beispiel #12
0
class requester():
    cache_timeout = 10940
    caching = True
    assert_on_fail = True
    time_out = 4
    debug_output = True
    dir_cache = path.dirname(path.realpath(__file__)) + "/cache/"
    cookie_filename = path.abspath(path.realpath(__file__)) + ".cookie"
    last_page = None
    last_url = None
    ref_url = None
    time_sleep = 1e-1
    limit_file_cache = 200
    counter_file_cache = 0
    verify_word = None

    def print(self, *objs):
        if self.debug_output:
            print(datetime.utcnow(), *objs, file=stderr)

    def __init__(self,
                 proxy=bool(strtobool(environ.get('REQUESTER_PROXY', '0'))),
                 cookie_filename=None,
                 caching=None,
                 user_agent=None,
                 headers=None,
                 file_name_with_proxies=path.join(path.dirname(__file__),
                                                  'proxies.txt')):
        self.opened = None
        if cookie_filename:
            self.cookie_filename = cookie_filename
        if caching is not None:
            self.caching = caching
        if headers:
            self.headers = headers
        else:
            self.headers = [
                ('Accept',
                 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
                 ), ('Accept-Encoding', 'gzip, deflate'),
                ('Accept-Language', 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3'),
                ('Connection', 'keep-alive'),
                ('User-Agent',
                 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'
                 if user_agent is None else user_agent)
            ]
        if self.cookie_filename:
            makedirs(path.dirname(self.cookie_filename), exist_ok=True)
            self.cookiejar = MozillaCookieJar(self.cookie_filename)
            if path.exists(self.cookie_filename):
                self.cookiejar.load()
        else:
            self.cookiejar = MozillaCookieJar()

        self.http_cookie_processor = urllib.request.HTTPCookieProcessor(
            self.cookiejar)
        self.opener = urllib.request.build_opener(self.http_cookie_processor)
        self.proxer = None
        if proxy:
            if proxy is True:
                if not file_name_with_proxies or not path.exists(
                        file_name_with_proxies):
                    raise FileWithProxiesNotFound(
                        "ERROR: not found '%s' file" % file_name_with_proxies)
                self.proxer = proxer(file_name_with_proxies)
                proxy = self.proxer.get()
                self.print("[proxy]", proxy)
                time_response = self.proxer.time_response()
                if time_response:
                    self.print("[average time]", time_response)

            self.opener.add_handler(
                urllib.request.ProxyHandler({
                    'http': proxy,
                    'https': proxy,
                }))

        self._init_opener_headers = self.headers

    def get(
        self,
        url,
        post=None,
        caching=None,
        is_ref_url=True,
        md5_file_cache=None,
        time_out=None,
        headers=None,
        detect_charsets=True,
        content_type=None,
        files=None,
    ):
        prefix = "local-file:"
        if url.startswith(prefix):
            with open(url[len(prefix):], "r") as fo:
                page = fo.read().decode("utf8")
                self.last_page = page
            return page

        if not url.startswith('http') and self.last_url:
            url = urllib.parse.urljoin(self.last_url, url)
        if caching is None:
            caching = self.caching
        url = url.replace('&', '&')
        url = url.replace(' ', '%20')

        makedirs(self.dir_cache, mode=0o777, exist_ok=True)

        files = files or isinstance(post, dict) and post.pop('files__', None)
        post_urlencoded = urllib.parse.urlencode(post).encode(
            'utf-8') if post and isinstance(post, dict) else post

        try:
            file_cache = ''.join((
                self.dir_cache,
                md5((md5_file_cache
                     or url + (post_urlencoded or "")).encode()).hexdigest(),
                ("/" + url[url.find("//") + 2:].split("?", 2)[0]).replace(
                    "/", "_"),
                ".html",
            ))
        except Exception:
            file_cache = None

        caching = file_cache and caching and self.cache_timeout > 0

        from_cache = caching
        if caching:
            if not path.isfile(file_cache):
                from_cache = False
            else:
                diff_time = datetime.now() - datetime.fromtimestamp(
                    path.getctime(file_cache))
                from_cache = diff_time.seconds < self.cache_timeout
        self.print("[cache]" if from_cache else "", url)
        self.error = None
        self.response = None
        if from_cache:
            with open(file_cache, "r") as f:
                page = f.read().encode('utf8')
        else:
            if self.time_sleep:
                v_time_sleep = min(1, abs(gauss(0, 1)) * self.time_sleep)
                sleep(v_time_sleep)
            if not headers:
                headers = {}
            if self.ref_url and 'Referer' not in headers:
                headers.update({"Referer": self.ref_url})
            if not self.last_url or urllib.parse.urlparse(
                    self.last_url).netloc != urllib.parse.urlparse(url).netloc:
                self.opener.addheaders = self._init_opener_headers
            if headers:
                h = dict(self.opener.addheaders)
                h.update(headers)
                self.opener.addheaders = list(h.items())

            if content_type == 'multipart/form-data' and post or files:
                post_urlencoded, multipart_headers = encode_multipart(
                    fields=post, files=files)
                headers.update(multipart_headers)

            try:
                if headers:
                    request = urllib.request.Request(url, headers=headers)
                else:
                    request = url

                time_start = datetime.utcnow()
                response = self.opener.open(
                    request,
                    post_urlencoded if post else None,
                    timeout=time_out or self.time_out,
                )
                if response.info().get("Content-Encoding", None) == "gzip":
                    buf = BytesIO(response.read())
                    page = GzipFile(fileobj=buf).read()
                else:
                    page = response.read()
                self.response = response
                self.time_response = datetime.utcnow() - time_start
                if self.verify_word and self.verify_word not in page:
                    raise NoVerifyWord("No verify word '%s', size page = %d" %
                                       (self.verify_word, len(page)))
            except Exception as err:
                self.error = err
                if self.assert_on_fail:
                    if self.proxer:
                        self.proxer.fail()
                    raise FailOnGetResponse(err)
                else:
                    traceback.print_exc()
                return

            try:
                if file_cache and caching:
                    cookie_write = True
                    if self.response.info().get("Content-Type").startswith(
                            "application/json"):
                        page = dumps(loads(page), indent=4)
                        cookie_write = False
                    if self.response.info().get("Content-Type").startswith(
                            "image/"):
                        cookie_write = False
                    with open(file_cache, "w") as f:
                        f.write(page.decode('utf8'))
                        if cookie_write:
                            f.write("\n\n" +
                                    dumps(self.get_cookies(), indent=4))
            except Exception:
                traceback.print_exc()
                self.print("[cache] ERROR: write to", file_cache)

            if self.proxer:
                if not self.error:
                    self.proxer.ok(self.time_response)
                else:
                    self.proxer.fail()

        if detect_charsets:
            matches = re.findall(
                r'charset=["\']?(?P<charset>[^"\'\s\.>;]{3,}\b)', str(page),
                re.IGNORECASE)
            if matches:
                charsets = [c.lower() for c in matches]
                if len(charsets) > 1 and len(set(charsets)) > 1:
                    self.print(
                        f'[WARNING] set multi charset values: {charsets}')
                charset = charsets[-1].lower()
            else:
                charset = 'utf-8'
            try:
                charset_detect = chardet.detect(page)
                if charset_detect and charset_detect['confidence'] > 0.98:
                    charset = charset_detect['encoding']
            except Exception as e:
                self.print('exception on charset detect:', str(e))
            if charset in ('utf-8', 'utf8'):
                page = page.decode('utf-8', 'replace')
            elif charset in ('windows-1251', 'cp1251'):
                page = page.decode('cp1251', 'replace')
            else:
                page = page.decode(charset, 'replace')

        self.last_page = page
        self.last_url = self.response.geturl() if self.response else url
        if is_ref_url:
            self.ref_url = self.last_url
        self.file_cache_clear()
        return page

    def get_link_by_text(self, text, page=None):
        if page is None:
            page = self.last_page
        match = re.search(
            r"""
            <a[^>]*href="(?P<href>[^"]*)"[^>]*>\s*
                (?:</?[^a][^>]*>\s*)*
                %s
            """ % text.replace(" ", r"\s"), page, re.VERBOSE)
        if not match:
            return
        return match.group("href")

    def get_link_by_text_and_go_if_exist(self, text):
        url = self.get_link_by_text(text)
        if url:
            self.get(url)
        return self.last_page

    def form(self,
             page=None,
             action='',
             limit=1,
             fid=None,
             selectors=(),
             enctype=False):
        if page is None:
            page = self.last_page
        selectors = list(selectors)
        selectors += ['''method=["'](?P<method>post|get)"''']
        if action is not None:
            selectors += [f'''action=["'](?P<url>[^"']*{action}[^"']*)["']''']
            limit += 1
        if fid is not None:
            selectors.append(f'id="{fid}"')
            limit += 1
        if enctype:
            selectors.append('enctype="(?P<enctype>[^"]*)"')
            limit += 1

        selector = '|[^>]*'.join(selectors)
        regex = f'''
            <form([^>]*{selector}){{{limit}}}[^>]*>
            .*?
            </form>
        '''
        match = re.search(regex, page, re.DOTALL | re.VERBOSE | re.IGNORECASE)
        if not match:
            return None
        page = match.group()
        result = match.groupdict()
        post = {}
        fields = re.finditer(
            r'''
            (?:
                type=["'](?P<type>[^"']*)["']\s*|
                value=["'](?P<value>[^"']*)["']\s*|
                name=["'](?P<name>[^"']*)["']\s*|
                [-a-z]+=["'][^"']*["']\s*|
                (?P<checked>checked)\s*
            ){2,}''', page, re.VERBOSE | re.IGNORECASE)

        unchecked = []
        for field in fields:
            field = field.groupdict()
            if field["name"] is None or field["value"] is None:
                continue
            if field["type"] == "checkbox" and field["checked"] is None:
                unchecked.append(field)
            else:
                post[field['name']] = html.unescape(field['value'])

        fields = re.finditer(r'''<select[^>]*name="(?P<name>[^"]*)"[^>]*>''',
                             page, re.VERBOSE)
        for field in fields:
            post[field.group('name')] = ''

        result['post'] = post
        if unchecked:
            result['unchecked'] = unchecked
        return result

    def submit_form(self, data, *args, url=None, form=None, **kwargs):
        form = form or self.form(*args, **kwargs)
        form['post'].update(data)
        data_urlencoded = urllib.parse.urlencode(form['post']).encode('utf-8')
        url = url or form['url']
        content_type = form.get('enctype')
        ret = {
            'get':
            lambda: self.get(urllib.parse.urljoin(url, f'?{data_urlencoded}'),
                             content_type=content_type),
            'post':
            lambda: self.get(url, form['post'], content_type=content_type),
        }[form['method'].lower()]()
        return ret

    def file_cache_clear(self):
        if self.limit_file_cache and self.counter_file_cache % self.limit_file_cache == 0:
            file_list = []
            for file_cache in listdir(self.dir_cache):
                stat_file = stat(self.dir_cache + file_cache)
                file_list.append((stat_file.st_atime, file_cache))
            file_list.sort(reverse=True)
            for atime, file_cache in file_list[self.limit_file_cache:]:
                remove(self.dir_cache + file_cache)
        self.counter_file_cache += 1

    def get_raw_cookies(self):
        for c in self.cookiejar:
            yield c

    def get_cookies(self):
        return dict(((i.name, i.value) for i in self.cookiejar))

    def get_cookie(self, name):
        return self.get_cookies().get(name, None)

    def set_cookie(self, name, value):
        for c in self.cookiejar:
            if c.name == name:
                c.value = value
                self.cookiejar.set_cookie(c)
                self.print("[setcookie]", name)
                break

    @staticmethod
    def rand_string(length):
        a = ascii_letters + digits
        return ''.join([choice(a) for i in range(length)])

    def save_cookie(self):
        if self.cookie_filename:
            try:
                self.cookiejar.save()
            except Exception:
                pass

    def close(self):
        if self.proxer:
            self.proxer.save_data()
        self.save_cookie()

    def __enter__(self):
        if self.opened is not True:
            self.opened = True
        return self

    def __exit__(self, *err):
        if self.opened is not False:
            self.opened = False
            self.close()

    def __del__(self):
        if not isdir(self.dir_cache):
            return

        for file_cache in listdir(self.dir_cache):
            diff_time = (
                datetime.now() -
                datetime.fromtimestamp(getctime(self.dir_cache + file_cache)))
            if diff_time.seconds >= self.cache_timeout:
                remove(self.dir_cache + file_cache)

        self.save_cookie()
Beispiel #13
0
    class ClientHandler(urllib.request.BaseHandler):
        DEFAULT_HEADERS = {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
            "Accept":
            "text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8",
            "Accept-Encoding": "gzip, deflate"
        }

        def __init__(self):
            self.headers = self.DEFAULT_HEADERS
            self.cookies = MozillaCookieJar()

        def load_cookie(self, fpath):
            self.cookies.load(fpath)

        def save_cookie(self, fpath):
            self.cookies.save(fpath)

        def set_header(self, name, value):
            self.headers[name] = value

        def set_cookie(self, cookie_string):
            cookie = self._make_cookie(cookie_string)
            if cookie:
                self.cookies.set_cookie(cookie)

        def _make_cookie(self, cookie_string):
            return HttpClient.ClientCookie().make(cookie_string)

        def _request_add_headers(self, request):
            for name, value in list(self.headers.items()):
                request.add_header(name, value)
                request.add_unredirected_header(name, value)
            return self

        def _request_add_cookies(self, request):
            self.cookies.add_cookie_header(request)

        def _response_extract_cookies(self, request, response):
            self.cookies.extract_cookies(response, request)
            return self

        def _response_decompress_content(self, request, response):
            encoding = response.headers.get("content-encoding", "")
            if encoding == "gzip":
                response.fp = gzip.GzipFile(fileobj=StringIO(response.read()))
            elif encoding == "deflate":
                response.fp = zlib.decompress(response.read())
            else:
                pass
            return self

        def http_request(self, request):
            self._request_add_headers(request)
            self._request_add_cookies(request)
            return request

        def http_response(self, request, response):
            self._response_extract_cookies(
                request,
                response)._response_decompress_content(request, response)
            return response

        https_request = http_request
        https_response = http_response