def create_cookie(name, value, domain, httponly=None, **kwargs): """Creates `cookielib.Cookie` instance""" if domain == 'localhost': domain = '' config = dict( name=name, value=value, version=0, port=None, domain=domain, path='/', secure=False, expires=None, discard=True, comment=None, comment_url=None, rfc2109=False, rest={'HttpOnly': httponly}, ) for key in kwargs: if key not in config: raise GrabMisuseError('Function `create_cookie` does not accept ' '`%s` argument' % key) config.update(**kwargs) config['rest']['HttpOnly'] = httponly config['port_specified'] = bool(config['port']) config['domain_specified'] = bool(config['domain']) config['domain_initial_dot'] = (config['domain'] or '').startswith('.') config['path_specified'] = bool(config['path']) return Cookie(**config)
def har_to_cookie(har_cookie): """ Convert a cookie dict in HAR format to a Cookie instance. >>> har_cookie = { ... "name": "TestCookie", ... "value": "Cookie Value", ... "path": "/foo", ... "domain": "www.janodvarko.cz", ... "expires": "2009-07-24T19:20:30Z", ... "httpOnly": True, ... "secure": True, ... "comment": "this is a test" ... } >>> cookie = har_to_cookie(har_cookie) >>> cookie.name 'TestCookie' >>> cookie.value 'Cookie Value' >>> cookie.port >>> cookie.domain 'www.janodvarko.cz' >>> cookie.path '/foo' >>> cookie.secure True >>> cookie.expires 1248463230 >>> cookie.comment 'this is a test' >>> cookie.get_nonstandard_attr('HttpOnly') True """ expires_timestamp = None if har_cookie.get('expires'): expires = time.strptime(har_cookie['expires'], "%Y-%m-%dT%H:%M:%SZ") expires_timestamp = calendar.timegm(expires) kwargs = dict( version=har_cookie.get('version') or 0, name=har_cookie['name'], value=har_cookie['value'], port=None, domain=har_cookie.get('domain', ''), path=har_cookie.get('path', '/'), secure=har_cookie.get('secure', False), expires=expires_timestamp, discard=False, comment=har_cookie.get('comment'), comment_url=bool(har_cookie.get('comment')), rest={'HttpOnly': har_cookie.get('httpOnly')}, rfc2109=False, ) kwargs['port_specified'] = bool(kwargs['port']) kwargs['domain_specified'] = bool(kwargs['domain']) kwargs['domain_initial_dot'] = kwargs['domain'].startswith('.') kwargs['path_specified'] = bool(kwargs['path']) return Cookie(**kwargs)
def get_stream(self, media): url = "https://mf.svc.nhl.com/ws/media/mf/v2.4/stream" event_id = media["eventId"] if not self.session_key: logger.info("getting session key") params = { "eventId": event_id, "format": "json", "platform": "WEB_MEDIAPLAYER", "subject": "NHLTV", "_": "1538708097285" } res = self.session.get(url, params=params) j = res.json() logger.trace( json.dumps(j, sort_keys=True, indent=4, separators=(',', ': '))) self.session_key = j["session_key"] self.save() params = { "contentId": media["mediaPlaybackId"], "playbackScenario": "HTTP_CLOUD_WIRED_WEB", "sessionKey": self.session_key, "auth": "response", "platform": "WEB_MEDIAPLAYER", "_": "1538708097285" } res = self.session.get(url, params=params) j = res.json() logger.trace( json.dumps(j, sort_keys=True, indent=4, separators=(',', ': '))) try: media_auth = next(x["attributeValue"] for x in j["session_info"]["sessionAttributes"] if x["attributeName"] == "mediaAuth_v2") except KeyError: raise StreamSessionException( f"No stream found for event {event_id}") self.cookies.set_cookie( Cookie(0, 'mediaAuth_v2', media_auth, '80', '80', '.nhl.com', None, None, '/', True, False, 4102444800, None, None, None, {}), ) stream = Stream(j["user_verified_event"][0]["user_verified_content"][0] ["user_verified_media_item"][0]) return stream
def make_cookie(name, value, domain): ''' Makes a cookie with provided name and value. ''' return Cookie(version=0, name=name, value=value, port=None, port_specified=False, domain=domain, domain_specified=True, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)
def create_cookie(self, cookie_dict): """Creates a cookie from the given `cookie_dict`. :param cookie_dict: A dictionary with two keys: 'name' and 'value'. The values of these\ keys are the name of the cookie and its value, respectively. The keys 'path', 'domain', 'secure', 'expiry' can also be set to values.\ These have the respective meanings as defined in `RFC6265 <http://tools.ietf.org/html/rfc6265#section-5.2>` """ name = ascii_as_bytes_or_str(cookie_dict['name']) value = ascii_as_bytes_or_str(cookie_dict['value']) path = ascii_as_bytes_or_str(cookie_dict.get('path', '')) path_set = path != '' domain = ascii_as_bytes_or_str(cookie_dict.get('domain', '')) domain_set = domain != '' secure = cookie_dict.get('secure', False) expires = cookie_dict.get('expiry', None) cookie = Cookie(0, name, value, None, False, domain, domain_set, None, path, path_set, secure, expires, None, None, None, None) self.testapp.cookiejar.set_cookie(cookie)
def add_cookie(self, key, value): value = smart_text(value) cookie = Cookie(None, key, value, None, False, '', False, None, '', False, None, None, None, None, None, None) self._cookie_jar.set_cookie(cookie)
def __init__(self, url, cookie_file=None, username=None, password=None, api_token=None, agent=None, session=None, disable_proxy=False, auth_callback=None, otp_token_callback=None, verify_ssl=True, save_cookies=True, ext_auth_cookies=None): if not url.endswith('/'): url += '/' self.url = url + 'api/' self.save_cookies = save_cookies self.ext_auth_cookies = ext_auth_cookies if self.save_cookies: self.cookie_jar, self.cookie_file = create_cookie_jar( cookie_file=cookie_file) try: self.cookie_jar.load(ignore_expires=True) except IOError: pass else: self.cookie_jar = CookieJar() self.cookie_file = None if self.ext_auth_cookies: try: self.cookie_jar.load(ext_auth_cookies, ignore_expires=True) except IOError as e: logging.critical( 'There was an error while loading a ' 'cookie file: %s', e) pass # Get the cookie domain from the url. If the domain # does not contain a '.' (e.g. 'localhost'), we assume # it is a local domain and suffix it (See RFC 2109). parsed_url = urlparse(url) self.domain = parsed_url[1].partition(':')[0] # Remove Port. if self.domain.count('.') < 1: self.domain = '%s.local' % self.domain if session: cookie = Cookie(version=0, name=RB_COOKIE_NAME, value=session, port=None, port_specified=False, domain=self.domain, domain_specified=True, domain_initial_dot=True, path=parsed_url[2], path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}) self.cookie_jar.set_cookie(cookie) if self.save_cookies: self.cookie_jar.save() if username: # If the username parameter is given, we have to clear the session # cookie manually or it will override the username:password # combination retrieved from the authentication callback. try: self.cookie_jar.clear(self.domain, parsed_url[2], RB_COOKIE_NAME) except KeyError: pass # Set up the HTTP libraries to support all of the features we need. password_mgr = ReviewBoardHTTPPasswordMgr(self.url, username, password, api_token, auth_callback, otp_token_callback) self.preset_auth_handler = PresetHTTPAuthHandler( self.url, password_mgr) handlers = [] if not verify_ssl: context = ssl._create_unverified_context() handlers.append(HTTPSHandler(context=context)) if disable_proxy: handlers.append(ProxyHandler({})) handlers += [ HTTPCookieProcessor(self.cookie_jar), ReviewBoardHTTPBasicAuthHandler(password_mgr), HTTPDigestAuthHandler(password_mgr), self.preset_auth_handler, ReviewBoardHTTPErrorProcessor(), ] if agent: self.agent = agent else: self.agent = ('RBTools/' + get_package_version()).encode('utf-8') opener = build_opener(*handlers) opener.addheaders = [ (str('User-agent'), str(self.agent)), ] install_opener(opener) self._cache = None self._urlopen = urlopen
def __init__(self, url, cookie_file=None, username=None, password=None, api_token=None, agent=None, session=None, disable_proxy=False, auth_callback=None, otp_token_callback=None): self.url = url if not self.url.endswith('/'): self.url += '/' self.url = self.url + 'api/' self.cookie_jar, self.cookie_file = create_cookie_jar( cookie_file=cookie_file) try: self.cookie_jar.load(ignore_expires=True) except IOError: pass if session: parsed_url = urlparse(url) # Get the cookie domain from the url. If the domain # does not contain a '.' (e.g. 'localhost'), we assume # it is a local domain and suffix it (See RFC 2109). domain = parsed_url[1].partition(':')[0] # Remove Port. if domain.count('.') < 1: domain = '%s.local' % domain cookie = Cookie(version=0, name=RB_COOKIE_NAME, value=session, port=None, port_specified=False, domain=domain, domain_specified=True, domain_initial_dot=True, path=parsed_url[2], path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}) self.cookie_jar.set_cookie(cookie) self.cookie_jar.save() # Set up the HTTP libraries to support all of the features we need. password_mgr = ReviewBoardHTTPPasswordMgr(self.url, username, password, api_token, auth_callback, otp_token_callback) self.preset_auth_handler = PresetHTTPAuthHandler( self.url, password_mgr) handlers = [] if disable_proxy: handlers.append(ProxyHandler({})) handlers += [ HTTPCookieProcessor(self.cookie_jar), ReviewBoardHTTPBasicAuthHandler(password_mgr), HTTPDigestAuthHandler(password_mgr), self.preset_auth_handler, ReviewBoardHTTPErrorProcessor(), ] if agent: self.agent = agent else: self.agent = ('RBTools/' + get_package_version()).encode('utf-8') opener = build_opener(*handlers) opener.addheaders = [ (b'User-agent', self.agent), ] install_opener(opener) self._cache = APICache()
def hook(response, *args, **kwargs): ''' Requests hooks system, this is the hook for the response. ''' status_401 = (response.status_code == 401 and response.headers.get('WWW-Authenticate') == 'Macaroon') if not status_401 and response.status_code != 407: return response if response.headers.get('Content-Type') != 'application/json': return response try: error = response.json() except: raise BakeryException('unable to read discharge error response') if error.get('Code') != ERR_DISCHARGE_REQUIRED: return response Retry.count += 1 if Retry.count > MAX_DISCHARGE_RETRIES: raise BakeryException('too many discharges') info = error.get('Info') if not isinstance(info, dict): raise BakeryException( 'unable to read info in discharge error response') serialized_macaroon = info.get('Macaroon') if not isinstance(serialized_macaroon, dict): raise BakeryException( 'unable to read macaroon in discharge error response') macaroon = utils.deserialize(serialized_macaroon) discharges = discharge_all(macaroon, visit_page, jar, key) encoded_discharges = map(utils.serialize_macaroon_string, discharges) macaroons = '[' + ','.join(encoded_discharges) + ']' all_macaroons = base64.urlsafe_b64encode( macaroons.encode('utf-8')).decode('ascii') full_path = urljoin(response.url, info['MacaroonPath']) parsed_url = urlparse(full_path) if info and info.get('CookieNameSuffix'): name = 'macaroon-' + info['CookieNameSuffix'] else: name = 'macaroon-' + discharges[0].signature domain = parsed_url.hostname or parsed_url.netloc port = str(parsed_url.port) if parsed_url.port is not None else None secure = parsed_url.scheme == 'https' cookie = Cookie(version=0, name=name, value=all_macaroons, port=port, port_specified=port is not None, domain=domain, domain_specified=True, domain_initial_dot=False, path=parsed_url.path, path_specified=True, secure=secure, expires=None, discard=False, comment=None, comment_url=None, rest=None, rfc2109=False) jar.set_cookie(cookie) # Replace the private _cookies from req as it is a copy of # the original cookie jar passed into the requests method and we need # to set the cookie for this request. req._cookies = jar req.headers.pop('Cookie', None) req.prepare_cookies(req._cookies) req.headers['Bakery-Protocol-Version'] = '1' with requests.Session() as s: return s.send(req)
def _really_load(self, f, filename, ignore_discard, ignore_expires): """ This function is required to monkey patch MozillaCookieJar's _really_load function which does not understand the curl format cookie file created by ecp-cookie-init. It patches the code so that #HttpOnly_ get loaded. https://bugs.python.org/issue2190 https://bugs.python.org/file37625/httponly.patch """ now = time.time() magic = f.readline() if not re.search(self.magic_re, magic): f.close() raise LoadError( "%r does not look like a Netscape format cookies file" % filename) try: while 1: line = f.readline() if line == "": break # last field may be absent, so keep any trailing tab if line.endswith("\n"): line = line[:-1] sline = line.strip() # support HttpOnly cookies (as stored by curl or old Firefox). if sline.startswith("#HttpOnly_"): line = sline[10:] # skip comments and blank lines XXX what is $ for? elif (sline.startswith(("#", "$")) or sline == ""): continue domain, domain_specified, path, secure, expires, name, value = \ line.split("\t") secure = (secure == "TRUE") domain_specified = (domain_specified == "TRUE") if name == "": # cookies.txt regards 'Set-Cookie: foo' as a cookie # with no name, whereas cookielib regards it as a # cookie with no value. name = value value = None initial_dot = domain.startswith(".") assert domain_specified == initial_dot discard = False if expires == "": expires = None discard = True # assume path_specified is false c = Cookie(0, name, value, None, False, domain, domain_specified, initial_dot, path, False, secure, expires, discard, None, None, {}) if not ignore_discard and c.discard: continue if not ignore_expires and c.is_expired(now): continue self.set_cookie(c) except IOError: raise except Exception: _warn_unhandled_exception() raise LoadError("invalid Netscape format cookies file %r: %r" % (filename, line))
def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=True, cookies=True, replace_headers=False, add_referer=False, only_headers=False, bypass_cloudflare=True, bypass_testcookie=True, no_decode=False, method=None, cache=CACHE_ENABLED, cache_expiration=CACHE_EXPIRATION): """ Descarga una página web y devuelve los resultados :type url: str :type post: dict, str :type headers: dict, list :type timeout: int :type follow_redirects: bool :type cookies: bool, dict :type replace_headers: bool :type add_referer: bool :type only_headers: bool :type bypass_cloudflare: bool :type cache: bool :type cache_expiration: timedelta :return: Resultado """ arguments = locals().copy() if cache: try: cache_key = '|'.join(["%s:%s" %(k,v) for k,v in sorted(arguments.items(),key= lambda x: x[0]) if v]).encode() cache_key = CACHE_PREFIX + hashlib.sha1(cache_key).hexdigest() cacheado = CACHE.get(cache_key) if cacheado: return HTTPResponse(cacheado) except: pass response = {} # Post tipo dict if type(post) == dict: post = urllib_parse.urlencode(post) # Url quote url = urllib_parse.quote(url, safe="%/:=&?~#+!$,;'@()*[]") # Headers por defecto, si no se especifica nada request_headers = default_headers.copy() # Headers pasados como parametros if headers is not None: if not replace_headers: request_headers.update(dict(headers)) else: request_headers = dict(headers) # Referer if add_referer: request_headers["Referer"] = "/".join(url.split("/")[:3]) #logger("Headers:") #logger(request_headers, 'info') # Handlers handlers = list() handlers.append(HTTPHandler(debuglevel=False)) handlers.append(HTTPSHandler(debuglevel=False)) handlers.append(urllib_request.HTTPBasicAuthHandler()) # No redirects if not follow_redirects: handlers.append(NoRedirectHandler()) else: handlers.append(HTTPRedirectHandler()) # Dict con cookies para la sesión if type(cookies) == dict: for name, value in cookies.items(): if not type(value) == dict: value = {'value': value} ck = Cookie( version=0, name=name, value=value.get('value', ''), port=None, port_specified=False, domain=value.get('domain', urllib_parse.urlparse(url)[1]), domain_specified=False, domain_initial_dot=False, path=value.get('path', '/'), path_specified=True, secure=False, expires=value.get('expires', time.time() + 3600 * 24), discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False ) cj.set_cookie(ck) if cookies: handlers.append(urllib_request.HTTPCookieProcessor(cj)) # Opener opener = urllib_request.build_opener(*handlers) # Contador inicio = time.time() # Request req = Request(url, six.ensure_binary(post) if post else None, request_headers, method=method) try: #logger("Realizando Peticion") handle = opener.open(req, timeout=timeout) #logger('Peticion realizada') except HTTPError as handle: #logger('Peticion realizada con error') response["sucess"] = False response["code"] = handle.code response["error"] = handle.__dict__.get("reason", str(handle)) response["headers"] = dict(handle.headers.items()) response['cookies'] = get_cookies(urllib_parse.urlparse(url)[1]) if not only_headers: #logger('Descargando datos...') response["data"] = handle.read() else: response["data"] = b"" response["time"] = time.time() - inicio response["url"] = handle.geturl() except Exception as e: #logger('Peticion NO realizada') response["sucess"] = False response["code"] = e.__dict__.get("errno", e.__dict__.get("code", str(e))) response["error"] = e.__dict__.get("reason", str(e)) response["headers"] = {} response['cookies'] = get_cookies(urllib_parse.urlparse(url)[1]) response["data"] = b"" response["time"] = time.time() - inicio response["url"] = url else: response["sucess"] = True response["code"] = handle.code response["error"] = None response["headers"] = dict(handle.headers.items()) response['cookies'] = get_cookies(urllib_parse.urlparse(url)[1]) if not only_headers: #logger('Descargando datos...') response["data"] = handle.read() else: response["data"] = b"" response["time"] = time.time() - inicio response["url"] = handle.geturl() response['headers'] = dict([(k.lower(), v) for k, v in response['headers'].items()]) #logger("Terminado en %.2f segundos" % (response["time"])) #logger("url: %s" % url) #logger("Response sucess : %s" % (response["sucess"])) #logger("Response code : %s" % (response["code"])) #logger("Response error : %s" % (response["error"])) #logger("Response cookies : %s" % (response["cookies"])) #logger("Response data length: %s" % (len(response["data"]))) #logger("Response headers:") #logger(response['headers']) # Guardamos las cookies if cookies: save_cookies() # Gzip if response["headers"].get('content-encoding') == 'gzip': response["data"] = gzip.GzipFile(fileobj=BytesIO(response["data"])).read() # Binarios no se codifican ni se comprueba cloudflare, etc... if not is_binary(response): response['data'] = six.ensure_str(response['data'], errors='replace') if not no_decode: response["data"] = six.ensure_str(HTMLParser().unescape( six.ensure_text(response['data'], errors='replace') )) # Anti TestCookie if bypass_testcookie: if 'document.cookie="__test="+toHex(slowAES.decrypt(c,2,a,b))+"' in response['data']: a = re.findall('a=toNumbers\("([^"]+)"\)', response['data'])[0].decode("HEX") b = re.findall('b=toNumbers\("([^"]+)"\)', response['data'])[0].decode("HEX") c = re.findall('c=toNumbers\("([^"]+)"\)', response['data'])[0].decode("HEX") arguments['bypass_testcookie'] = False if not type(arguments['cookies']) == dict: arguments['cookies'] = {'__test': ii11.new(a, ii11.MODE_CBC, b).decrypt(c).encode("HEX")} else: arguments['cookies']['__test'] = ii11.new(a, ii11.MODE_CBC, b).decrypt(c).encode("HEX") response = downloadpage(**arguments).__dict__ # Anti Cloudflare if bypass_cloudflare: response = retry_if_cloudflare(response, arguments) if cache: CACHE.set(cache_key, response, expiration=cache_expiration) return HTTPResponse(response)