def calculate_expiration (self): """If "max-age" or "expires" attributes are given, calculate the time when this cookie expires. Stores the time value in self.expires, or None if this cookie does not expire. """ # default: do not expire self.expire = None if "max-age" in self.attributes: now = time.time() try: maxage = int(self.attributes["max-age"]) if maxage == 0: # Expire immediately: subtract 1 to be sure since # some clocks have only full second precision. self.expire = now - 1 else: self.expire = now + maxage except (ValueError, OverflowError): # note: even self.now + maxage can overflow pass elif "expires" in self.attributes: expiration_date = self.attributes["expires"] try: self.expire = cookielib.http2time(expiration_date) except ValueError: # see http://bugs.python.org/issue16181 raise CookieError("Invalid expiration date in %r" % expiration_date)
def test_clear_cookie(self): """Test delete a cookie""" self.headers["Cookie"] = "testcookie=testvalue" obj = self._request("clear_cookie") info = obj.info() cookies = Cookie.BaseCookie() cookies.load(info["Set-Cookie"]) self.assertTrue("testcookie" in cookies, "'testcookie' in cookies") cookie = cookies["testcookie"] t = cookiejar.http2time(cookie["expires"]) self.assertTrue(t < time.time(), "expires time must be smaller then current time")
def test_set_cookie(self): """Test set a cookie""" obj = self._request("set_cookie") info = obj.info() cookies = Cookie.BaseCookie() cookies.load(info["Set-Cookie"]) self.assertTrue("testcookie" in cookies, "'testcookie' in cookies") cookie = cookies["testcookie"] self.assertTrue(cookie["path"] == "/", cookie["path"]) t = cookiejar.http2time(cookie["expires"]) self.assertTrue(t >= time.time(), "expires is smaller then current time") self.assertTrue("httponly" in cookie, "'httponly' in cookie")
def parse_ns_headers(ns_headers): """Improved parser for netscape-style cookies. This version can handle multiple cookies in a single header. """ known_attrs = ("expires", "domain", "path", "secure", "port", "max-age") result = [] for ns_header in ns_headers: pairs = [] version_set = False for ii, param in enumerate( re.split(r"(;\s)|(,\s(?=[a-zA-Z0-9_\-]+=))", ns_header)): if param is None: continue param = param.rstrip() if param == "" or param[0] == ";": continue if param[0] == ",": if pairs: if not version_set: pairs.append(("version", "0")) result.append(pairs) pairs = [] continue if "=" not in param: k, v = param, None else: k, v = re.split(r"\s*=\s*", param, 1) k = k.lstrip() if ii != 0: lc = k.lower() if lc in known_attrs: k = lc if k == "version": # This is an RFC 2109 cookie. version_set = True if k == "expires": # convert expires date to seconds since epoch if v.startswith('"'): v = v[1:] if v.endswith('"'): v = v[:-1] v = cookielib.http2time(v) # None if invalid pairs.append((k, v)) if pairs: if not version_set: pairs.append(("version", "0")) result.append(pairs) return result
def create(cookie_string): try: #split "Set-Cookie:x=y; domain=...; expires=...;..." set_string, tuple_string = cookie_string.split(":", 1) #parse version from set string version = Cookie._version(set_string) #parse name, value from tuple string nv = Cookie._name_value(tuple_string) #change tuple string to dict dict = Cookie._dict(tuple_string) if nv is None or dict is None: raise Exception("invalid cookie string: " + cookie_string) name, value = nv port = dict.get("port", None) port_specified = port is not None domain = dict.get("domain", None) domain_specified = domain is not None domain_initial_dot = False if domain is not None: domain_initial_dot = domain.startswith(".") path = dict.get("path", None) path_specified = path is not None secure = dict.get("secure", False) expires = dict.get("expires", None) if expires is not None: expires = cookielib.http2time(expires) discard = dict.get("discard", False) comment = dict.get("comment", None) comment_url = None rest = {} #create cookielib.Cookie object cookie = cookielib.Cookie(version, name, value, port, port_specified, domain, domain_specified, domain_initial_dot, path, path_specified, secure, expires, discard, comment, comment_url, rest) return cookie except Exception, e: return None
def parse_ns_headers(ns_headers): """Improved parser for netscape-style cookies. This version can handle multiple cookies in a single header. """ known_attrs = ("expires", "domain", "path", "secure","port", "max-age") result = [] for ns_header in ns_headers: pairs = [] version_set = False for ii, param in enumerate(re.split(r"(;\s)|(,\s(?=[a-zA-Z0-9_\-]+=))", ns_header)): if param is None: continue param = param.rstrip() if param == "" or param[0] == ";": continue if param[0] == ",": if pairs: if not version_set: pairs.append(("version", "0")) result.append(pairs) pairs = [] continue if "=" not in param: k, v = param, None else: k, v = re.split(r"\s*=\s*", param, 1) k = k.lstrip() if ii != 0: lc = k.lower() if lc in known_attrs: k = lc if k == "version": # This is an RFC 2109 cookie. version_set = True if k == "expires": # convert expires date to seconds since epoch if v.startswith('"'): v = v[1:] if v.endswith('"'): v = v[:-1] v = cookielib.http2time(v) # None if invalid pairs.append((k, v)) if pairs: if not version_set: pairs.append(("version", "0")) result.append(pairs) return result
def checkCacheInfo(self, path): try: mtime = self.get_mtime(path) if "HTTP_IF_MODIFIED_SINCE" in self.request_headers: if_modified_since = self.request_headers["HTTP_IF_MODIFIED_SINCE"] try: if_modified_since = http2time(if_modified_since) except Exception as err: log.error(err) return False if int(mtime) > int(if_modified_since): return False else: return True else: return False except Exception as err: log.error(str(err), exc_info=True) return False
def checkCacheInfo(self, path): try: mtime = self.get_mtime(path) if "HTTP_IF_MODIFIED_SINCE" in self.request_headers: if_modified_since = self.request_headers[ "HTTP_IF_MODIFIED_SINCE"] try: if_modified_since = http2time(if_modified_since) except Exception as err: log.error(err) return False if int(mtime) > int(if_modified_since): return False else: return True else: return False except Exception as err: log.error(str(err), exc_info=True) return False
def _http( self, url, post=None, headers={}, method=None, proxy=None, cookcookie=True, location=True, locationcount=0): if not method: if post: method = "POST" else: method = "GET" rep = None urlinfo = https, host, port, path = self._get_urlinfo(url) log = {} con = self.conpool._get_connect(urlinfo, proxy) # con .set_debuglevel(2) #? conerr = False try: con._send_output = self._send_output(con._send_output, con, log) tmpheaders = copy.deepcopy(headers) tmpheaders['Accept-Encoding'] = 'gzip, deflate' tmpheaders['Connection'] = 'Keep-Alive' tmpheaders['User-Agent'] = tmpheaders['User-Agent'] if tmpheaders.get('User-Agent') else 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36' if cookcookie: c = self.cookiepool.get(host, None) if not c: self.cookiepool[host] = self.initcookie c = self.cookiepool.get(host) if 'Cookie' in tmpheaders: cookie_str = tmpheaders['Cookie'].strip() if not cookie_str.endswith(';'): cookie_str += ";" for cookiepart in cookie_str.split(";"): if cookiepart.strip() != "": cookiekey, cookievalue = cookiepart.split("=", 1) c[cookiekey.strip()] = cookievalue.strip() for k in c.keys(): m = c[k] # check cookie path if path.find(m['path']) != 0: continue expires = m['expires'] if not expires: continue # check cookie expires time if cookielib.http2time(expires) < time.time(): del c[k] cookie_str = c.output(attrs=[], header='', sep=';').strip() if cookie_str: tmpheaders['Cookie'] = cookie_str if post: tmpheaders['Content-Type'] = tmpheaders.get( 'Content-Type', 'application/x-www-form-urlencoded') else: # content-length err 411 tmpheaders[ 'Content-Length'] = tmpheaders.get('Content-Length', 0) if method == 'GET': del tmpheaders['Content-Length'] con.request(method, path, post, tmpheaders) rep = con.getresponse() body = rep.read() encode = rep.msg.get('content-encoding', None) if encode == 'gzip': body = gzip.GzipFile(fileobj=StringIO.StringIO(body)).read() elif encode == 'deflate': try: body = zlib.decompress(body, -zlib.MAX_WBITS) except: body = zlib.decompress(body) body = self._decode_html( rep.msg.dict.get('content-type', ''), body) retheader = Compatibleheader(str(rep.msg)) retheader.setdict(rep.msg.dict) redirect = rep.msg.dict.get('location', url) if not redirect.startswith('http'): redirect = urlparse.urljoin(url, redirect) if cookcookie and "set-cookie" in rep.msg.dict: c = self.cookiepool[host] c.load(rep.msg.dict['set-cookie']) except httplib.ImproperConnectionState: conerr = True raise except: raise finally: if conerr or (rep and rep.msg.get('connection') == 'close') or proxy: self.conpool._release_connect(urlinfo) con.close() else: self.conpool._put_connect(urlinfo, con) log["url"] = url if post: log['request'] += "\r\n\r\n" + post log["response"] = "HTTP/%.1f %d %s" % ( rep.version * 0.1, rep.status, rep.reason) + '\r\n' + str(retheader) + '\r\n' + (body[:4096]) if location and url != redirect and locationcount < 5: method = 'HEAD' if method == 'HEAD' else 'GET' a, b, c, d, e = self._http( redirect, method=method, proxy=proxy, cookcookie=cookcookie, location=location, locationcount=locationcount + 1) log["response"] = e["response"] return a, b, c, d, log return rep.status, retheader, body, redirect, log
def set_cookie(self, kaka): """PLaces a cookie (a cookielib.Cookie based on a set-cookie header line) in the cookie jar. Always chose the shortest expires time. """ # default rfc2109=False # max-age, httponly for cookie_name, morsel in kaka.items(): std_attr = ATTRS.copy() std_attr["name"] = cookie_name _tmp = morsel.coded_value if _tmp.startswith('"') and _tmp.endswith('"'): std_attr["value"] = _tmp[1:-1] else: std_attr["value"] = _tmp std_attr["version"] = 0 attr = "" # copy attributes that have values try: for attr in morsel.keys(): if attr in ATTRS: if morsel[attr]: if attr == "expires": std_attr[attr] = cookielib.http2time( morsel[attr]) else: std_attr[attr] = morsel[attr] elif attr == "max-age": if morsel[attr]: std_attr["expires"] = cookielib.http2time( morsel[attr]) except TimeFormatError: # Ignore cookie logger.info( "Time format error on %s parameter in received cookie" % (attr, )) continue for att, spec in PAIRS.items(): if std_attr[att]: std_attr[spec] = True if std_attr["domain"] and std_attr["domain"].startswith("."): std_attr["domain_initial_dot"] = True if morsel["max-age"] is 0: try: self.cookiejar.clear(domain=std_attr["domain"], path=std_attr["path"], name=std_attr["name"]) except ValueError: pass else: # Fix for Microsoft cookie error if "version" in std_attr: try: std_attr["version"] = std_attr["version"].split(",")[0] except (TypeError, AttributeError): pass new_cookie = cookielib.Cookie(**std_attr) self.cookiejar.set_cookie(new_cookie)
def _http( self, url, post=None, headers={}, method=None, proxy=None, cookcookie=True, location=True, locationcount=0): if not method: if post: method = "POST" else: method = "GET" rep = None urlinfo = https, host, port, path = self._get_urlinfo(url) log = {} con = self.conpool._get_connect(urlinfo, proxy) # con .set_debuglevel(2) #? conerr = False try: con._send_output = self._send_output(con._send_output, con, log) tmpheaders = copy.deepcopy(headers) tmpheaders['Accept-Encoding'] = 'gzip, deflate' tmpheaders['Connection'] = 'Keep-Alive' tmpheaders['User-Agent'] = tmpheaders['User-Agent'] if tmpheaders.get('User-Agent') else 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36' if cookcookie: c = self.cookiepool.get(host, None) if not c: self.cookiepool[host] = self.initcookie c = self.cookiepool.get(host) if 'Cookie' in tmpheaders: cookie_str = tmpheaders['Cookie'].strip() if not cookie_str.endswith(';'): cookie_str += ";" for cookiepart in cookie_str.split(";"): if cookiepart.strip() != "": cookiekey, cookievalue = cookiepart.split("=", 1) c[cookiekey.strip()] = cookievalue.strip() for k in c.keys(): m = c[k] # check cookie path if path.find(m['path']) != 0: continue expires = m['expires'] if not expires: continue # check cookie expires time if cookielib.http2time(expires) < time.time(): del c[k] cookie_str = c.output(attrs=[], header='', sep=';').strip() if cookie_str: tmpheaders['Cookie'] = cookie_str if post: tmpheaders['Content-Type'] = tmpheaders.get( 'Content-Type', 'application/x-www-form-urlencoded') else: # content-length err 411 tmpheaders[ 'Content-Length'] = tmpheaders.get('Content-Length', 0) con.request(method, path, post, tmpheaders) rep = con.getresponse() body = rep.read() encode = rep.msg.get('content-encoding', None) if encode == 'gzip': body = gzip.GzipFile(fileobj=StringIO.StringIO(body)).read() elif encode == 'deflate': try: body = zlib.decompress(body, -zlib.MAX_WBITS) except: body = zlib.decompress(body) body = self._decode_html( rep.msg.dict.get('content-type', ''), body) retheader = Compatibleheader(str(rep.msg)) retheader.setdict(rep.msg.dict) redirect = rep.msg.dict.get('location', url) if not redirect.startswith('http'): redirect = urlparse.urljoin(url, redirect) if cookcookie and "set-cookie" in rep.msg.dict: c = self.cookiepool[host] c.load(rep.msg.dict['set-cookie']) except httplib.ImproperConnectionState: conerr = True raise except: raise finally: if conerr or (rep and rep.msg.get('connection') == 'close') or proxy: self.conpool._release_connect(urlinfo) con.close() else: self.conpool._put_connect(urlinfo, con) log["url"] = url if post: log['request'] += "\r\n\r\n" + post log["response"] = "HTTP/%.1f %d %s" % ( rep.version * 0.1, rep.status, rep.reason) + '\r\n' + str(retheader) + '\r\n' + (body[:4096]) if location and url != redirect and locationcount < 5: a, b, c, d, e = self._http( redirect, method=method, proxy=proxy, cookcookie=cookcookie, location=location, locationcount=locationcount + 1) log["response"] = e["response"] return a, b, c, d, log return rep.status, retheader, body, redirect, log
def _request(self, args): keep_alive_timeout = 0 url = args.url if not (url.lower().find('http://') == 0 or url.lower().find('https://') == 0): url = 'http://' + url default_port = {'http': 80, 'https': 443} r = urlparse.urlparse(url) isssl = r.scheme == 'https' and args.proxy == None path = r.path if not path: path = '/' if r.scheme not in default_port: raise CurlError(Curl.CURLE_UNSUPPORTED_PROTOCOL) if r.query: path = path + '?' + r.query port = r.port host = r.hostname if port is None: port = default_port[r.scheme] else: port = int(port) is_reuse = False target = '%s:%d' % (r.hostname, port) conn = None self._buf = '' if args.proxy: connecthost = args.proxy[0] connectport = args.proxy[1] else: connecthost = host connectport = port for i in range(2): if target not in self._conn_pool: conn = self._connect(connecthost, connectport, args.connect_timeout, isssl) else: keep_alive_timeout = self._timeout_pool[target] if keep_alive_timeout == 0 or time.time( ) <= keep_alive_timeout: conn = self._conn_pool[target] is_reuse = True else: continue del self._conn_pool[target] del self._timeout_pool[target] break if not conn: raise CurlError(Curl.CURLE_SEND_ERROR) conn.settimeout(20) self._conn = conn postdata = '' if args.raw: request, method = self._make_request(args.raw, url if args.proxy else path, host) else: method = None if args.request: method = args.request elif args.head: method = 'HEAD' elif args.upload_file: method = 'PUT' elif args.data: method = 'POST' else: method = 'GET' headers = {} if r.port: headers['Host'] = '%s:%d' % (r.hostname, port) else: headers['Host'] = r.hostname headers['User-Agent'] = args.user_agent if args.referer: headers['Referer'] = args.referer headers['Accept'] = '*/*' headers['Connection'] = 'Keep-Alive' if args.header: for line in args.header: pos = line.find(':') if pos > 0: key = line[:pos] val = line[pos + 1:].strip() for k in headers: if k.lower() == key.lower(): key = k break headers[key] = val if args.data: if len(args.data) == 1: postdata = args.data[0] else: for d in args.data: if postdata != '': postdata += '&' postdata += d headers['Content-Length'] = str(len(postdata)) if method == 'POST': if not headers.has_key('Content-Type'): headers[ 'Content-Type'] = 'application/x-www-form-urlencoded' authinfo = None if args.user: authinfo = args.user if r.username: authinfo = r.username + ':' + r.password if authinfo: headers['Authorization'] = 'Basic ' + base64.b64encode( authinfo) cookie_str = str(self._init_cookie) if self._init_cookie else '' if target in self._cookie_pool: c = self._cookie_pool[target] for k in c.keys(): m = c[k] if r.path.find(m['path']) != 0: continue expires = m['expires'] if not expires: continue if cookielib.http2time(expires) < time.time(): del c[k] cookie_str += c.output(attrs=[], header='', sep=';').strip() if args.cookie: if cookie_str: cookie_str += '; ' + args.cookie else: cookie_str = args.cookie if cookie_str: headers['Cookie'] = cookie_str if args.proxy: request = '%s %s HTTP/1.1\r\n' % (method, url) else: request = '%s %s HTTP/1.1\r\n' % (method, path) for k in headers: request += k + ': ' + headers[k] + '\r\n' request += '\r\n' response = '' content = '' msg = {} http_code = 0 for i in range(2): msg = {} response = '' mime_type = None try: if args.upload_file: conn.sendall(request) line = self._read_line() if line.find('100 Continue') != -1: self._read_line() conn.sendall(postdata) else: if response == '': cut = line.split() if len(cut) == 2: http_code = int(cut[1]) response += line elif postdata: conn.sendall(request + postdata) else: conn.sendall(request) except: raise CurlError(Curl.CURLE_SEND_ERROR) try: while True: line = self._read_line() if line == '\r\n' or line == '\n': response += line break elif line == '': raise CurlError(Curl.CURLE_RECV_ERROR) if response == '': cut = line.split() http_code = int(cut[1]) response += line pos = line.find(':') if pos == -1: continue end = line.find('\r') key = line[:pos].lower() val = line[pos + 1:end].strip() msg[key] = val if key == 'set-cookie': if target in self._cookie_pool: c = self._cookie_pool[target] else: c = Cookie.SimpleCookie() self._cookie_pool[target] = c c.load(val) elif key == 'keep-alive': m = RE_KEEPALIVE_TIMEOUT.search(val) if m: keep_alive_timeout = int(m.group(1)) if keep_alive_timeout > 0: keep_alive_timeout += time.time() elif args.mime_type and key == 'content-type': m = RE_MIME_TYPE.search(val) if m: mime_type = m.group(1).strip() except CurlError as e: if i == 0 and is_reuse: conn = self._connect(host, port, args.connect_timeout, isssl) else: raise e else: break if args.mime_type and not (args.location and msg.has_key('location')): if not mime_type or mime_type.lower().find( args.mime_type.lower()) == -1: raise CurlError(Curl.CURLE_MIME_ERROR) if method != 'HEAD': if msg.get('transfer-encoding', None) == 'chunked': while True: chunk_size = int(self._read_line(), 16) if chunk_size > 0: content += self._read(chunk_size) self._read_line() if chunk_size <= 0: break else: content_len = msg.get('content-length', None) if content_len == None: if http_code != 204: content = self._read() elif content_len > 0: content_len = int(content_len) content = self._read(content_len) if len(content) != content_len: raise CurlError(Curl.CURLE_RECV_ERROR) encode = msg.get('content-encoding', None) if encode == 'gzip': content = gzip.GzipFile( fileobj=StringIO.StringIO(content)).read() elif encode == 'deflate': try: content = zlib.decompress(content, -zlib.MAX_WBITS) except: content = zlib.decompress(content) if msg.get('connection', '').find( 'close' ) != -1 or keep_alive_timeout > 0 and time.time() > keep_alive_timeout: conn.close() else: self._conn_pool[target] = conn self._timeout_pool[target] = keep_alive_timeout if msg.has_key('location') and args.location and ( args.max_redirs == 0 or self._redirs < args.max_redirs): self._redirs += 1 args.data = '' if msg['location'].startswith('http'): location_url = msg['location'] elif msg['location'].startswith('/'): location_url = '%s://%s%s' % (r.scheme, r.netloc, msg['location']) if args.url != location_url: args.url = location_url return self._request(args) self._error_count = 0 if self.sniff_func: self.sniff_func(args.url, response, content) return (http_code, response, content, 0, url)
def set_cookie(self, kaka): """PLaces a cookie (a cookielib.Cookie based on a set-cookie header line) in the cookie jar. Always chose the shortest expires time. """ # default rfc2109=False # max-age, httponly for cookie_name, morsel in kaka.items(): std_attr = ATTRS.copy() std_attr["name"] = cookie_name _tmp = morsel.coded_value if _tmp.startswith('"') and _tmp.endswith('"'): std_attr["value"] = _tmp[1:-1] else: std_attr["value"] = _tmp std_attr["version"] = 0 attr = "" # copy attributes that have values try: for attr in morsel.keys(): if attr in ATTRS: if morsel[attr]: if attr == "expires": std_attr[attr] = cookielib.http2time(morsel[attr]) else: std_attr[attr] = morsel[attr] elif attr == "max-age": if morsel[attr]: std_attr["expires"] = cookielib.http2time(morsel[attr]) except TimeFormatError: # Ignore cookie logger.info( "Time format error on %s parameter in received cookie" % ( attr,)) continue for att, spec in PAIRS.items(): if std_attr[att]: std_attr[spec] = True if std_attr["domain"] and std_attr["domain"].startswith("."): std_attr["domain_initial_dot"] = True if morsel["max-age"] is 0: try: self.cookiejar.clear(domain=std_attr["domain"], path=std_attr["path"], name=std_attr["name"]) except ValueError: pass else: # Fix for Microsoft cookie error if "version" in std_attr: try: std_attr["version"] = std_attr["version"].split(",")[0] except (TypeError, AttributeError): pass new_cookie = cookielib.Cookie(**std_attr) self.cookiejar.set_cookie(new_cookie)