def patched_start_response(status, headers, exc_info=None): # if self._should_handle(headers) wsgi_headers = Headers(headers) # If we're debugging, or the response already has an expires # header, just skip this. if not self.debug and "Expires" not in wsgi_headers: mime = wsgi_headers.get("Content-Type", "*").split(";")[0] # If the mime type is explicitly called out, use the expire # delay specified. if mime in self.expire_seconds: expire_time = self.make_expire_time_for(mime) # If there's a catch-all wildcard delay, use that. elif "*" in self.expire_seconds: expire_time = self.make_expire_time_for("*") # Otherwise, don't set the header. else: expire_time = None if expire_time is not None: log.debug("Adding expires header value: " + expire_time) headers.append(("Expires", expire_time)) return start_response(status, headers, exc_info)
def testMappingInterface(self): test = [("x", "y")] self.assertEqual(len(Headers([])), 0) self.assertEqual(len(Headers(test[:])), 1) self.assertEqual(Headers(test[:]).keys(), ["x"]) self.assertEqual(Headers(test[:]).values(), ["y"]) self.assertEqual(Headers(test[:]).items(), test) self.assertIsNot(Headers(test).items(), test) # must be copy! h = Headers([]) del h["foo"] # should not raise an error h["Foo"] = "bar" for m in h.has_key, h.__contains__, h.get, h.get_all, h.__getitem__: self.assertTrue(m("foo")) self.assertTrue(m("Foo")) self.assertTrue(m("FOO")) self.assertFalse(m("bar")) self.assertEqual(h["foo"], "bar") h["foo"] = "baz" self.assertEqual(h["FOO"], "baz") self.assertEqual(h.get_all("foo"), ["baz"]) self.assertEqual(h.get("foo", "whee"), "baz") self.assertEqual(h.get("zoo", "whee"), "whee") self.assertEqual(h.setdefault("foo", "whee"), "baz") self.assertEqual(h.setdefault("zoo", "whee"), "whee") self.assertEqual(h["foo"], "baz") self.assertEqual(h["zoo"], "whee")
def testMappingInterface(self): test = [('x', 'y')] self.assertEqual(len(Headers()), 0) self.assertEqual(len(Headers([])), 0) self.assertEqual(len(Headers(test[:])), 1) self.assertEqual(Headers(test[:]).keys(), ['x']) self.assertEqual(Headers(test[:]).values(), ['y']) self.assertEqual(Headers(test[:]).items(), test) self.assertIsNot(Headers(test).items(), test) # must be copy! h = Headers() del h['foo'] # should not raise an error h['Foo'] = 'bar' for m in h.__contains__, h.get, h.get_all, h.__getitem__: self.assertTrue(m('foo')) self.assertTrue(m('Foo')) self.assertTrue(m('FOO')) self.assertFalse(m('bar')) self.assertEqual(h['foo'], 'bar') h['foo'] = 'baz' self.assertEqual(h['FOO'], 'baz') self.assertEqual(h.get_all('foo'), ['baz']) self.assertEqual(h.get("foo", "whee"), "baz") self.assertEqual(h.get("zoo", "whee"), "whee") self.assertEqual(h.setdefault("foo", "whee"), "baz") self.assertEqual(h.setdefault("zoo", "whee"), "whee") self.assertEqual(h["foo"], "baz") self.assertEqual(h["zoo"], "whee")
def patched_start_response(status, headers, exc_info=None): # if self._should_handle(headers) wsgi_headers = Headers(headers) # If we're debugging, or the response already has an expires # header, just skip this. log.debug('Skipping expired headers' if self.debug else 'Calculating expires headers') if not self.debug and 'Expires' not in wsgi_headers: mime = wsgi_headers.get('Content-Type', '*').split(';')[0] log.debug('See mime type ' + mime) # If the mime type is explicitly called out, use the expire # delay specified. if mime in self.expire_seconds: log.debug('Matched mimetype exactly.') expire_time = self.make_expire_time_for(mime) # If there's a catch-all wildcard delay, use that. elif '*' in self.expire_seconds: log.debug('Matched mimetype with universal.') expire_time = self.make_expire_time_for('*') # Otherwise, don't set the header. else: log.debug('No mimetype match.') expire_time = None if expire_time is not None: log.debug('Adding expires header value: ' + expire_time) headers.append(('Expires', expire_time)) log.debug('-'*60) return start_response(status, headers, exc_info)
def testMappingInterface(self): test = [('x','y')] self.assertEqual(len(Headers([])),0) self.assertEqual(len(Headers(test[:])),1) self.assertEqual(Headers(test[:]).keys(), ['x']) self.assertEqual(Headers(test[:]).values(), ['y']) self.assertEqual(Headers(test[:]).items(), test) self.assertFalse(Headers(test).items() is test) # must be copy! h=Headers([]) del h['foo'] # should not raise an error h['Foo'] = 'bar' for m in h.__contains__, h.get, h.get_all, h.__getitem__: self.assertTrue(m('foo')) self.assertTrue(m('Foo')) self.assertTrue(m('FOO')) self.assertFalse(m('bar')) self.assertEqual(h['foo'],'bar') h['foo'] = 'baz' self.assertEqual(h['FOO'],'baz') self.assertEqual(h.get_all('foo'),['baz']) self.assertEqual(h.get("foo","whee"), "baz") self.assertEqual(h.get("zoo","whee"), "whee") self.assertEqual(h.setdefault("foo","whee"), "baz") self.assertEqual(h.setdefault("zoo","whee"), "whee") self.assertEqual(h["foo"],"baz") self.assertEqual(h["zoo"],"whee")
def patched_start_response(status, headers, exc_info=None): # if self._should_handle(headers) wsgi_headers = Headers(headers) # If we're debugging, or the response already has an expires # header, just skip this. if not self.debug and 'Expires' not in wsgi_headers: mime = wsgi_headers.get('Content-Type', '*').split(';')[0] # If the mime type is explicitly called out, use the expire # delay specified. if mime in self.expire_seconds: expire_time = self.make_expire_time_for(mime) # If there's a catch-all wildcard delay, use that. elif '*' in self.expire_seconds: expire_time = self.make_expire_time_for('*') # Otherwise, don't set the header. else: expire_time = None if expire_time is not None: log.debug('Adding expires header value: ' + expire_time) headers.append(('Expires', expire_time)) return start_response(status, headers, exc_info)
class Request: id: str environ: dict wsgi_input: BufferedReader path: str method: str port: str host: str protocol: str server_name: str query: dict headers: Headers body: Dict raw_body: bytes params: Dict[str, Any] def __init__(self, environ: dict): self.id = ''.join(random.choice(chars) for i in range(30)) self.body = dict() self.environ = environ self.wsgi_input: BufferedReader = cast(BufferedReader, environ.get('wsgi.input')) self.path = environ.get('PATH_INFO', '') self.method = environ.get('REQUEST_METHOD', '') self.port = environ.get('PORT', '') self.host = environ.get('HTTP_HOST', '') self.protocol = environ.get('HTTP_PROTOCOL', '') self.server_name = environ.get('SERVER_NAME', '') self.query = parse_qs(environ.get('QUERY_STRING', '')) self.params = dict() self.headers = Headers() self._parse_http_headers() self._read_request_body() def _parse_http_headers(self: 'Request'): for key in self.environ.keys(): replaced_key: str = key.replace('HTTP_', '') value = self.environ.get(key) if replaced_key in HTTP_HEADERS: final_key: str = capwords(replaced_key, '_').replace('_', '-') self.headers.add_header(final_key, value) def _read_request_body(self: 'Request'): try: request_body_size = int(self.headers.get(CONTENT_LENGTH, '0')) except ValueError: request_body_size = 0 self.raw_body = self.wsgi_input.read(request_body_size)
def __call__(self, environ, start_response): key_morsel = Cookie(environ.get("HTTP_COOKIE", "")).get(self.toggle_key) # useful vars query = query_str2dict(environ.get("QUERY_STRING")) enable_by_cookie = key_morsel.value == self.enable_value if key_morsel else False enable_by_query = query.get(self.toggle_key) == self.enable_value # pop toggle_key from query dic to avoid case: '?_profile=on&_profile=' disable = query.pop(self.toggle_key, None) == "" # only can be disabled by query enable = not disable and (enable_by_query or enable_by_cookie) run_app, resp_body, saved_ss_args = self._intercept_call() # processing cookies and queries so = query.pop(self.SIMPLE_OUTPUT_TOGGLE_KEY, None) if so is not None: self.simple_output = so == "True" cookie_to_set = None if enable_by_query and not enable_by_cookie: cookie_to_set = "%s=%s; Path=/; HttpOnly" % (self.toggle_key, self.enable_value) elif disable: cookie_to_set = "%s=; Path=/; Max-Age=1; HttpOnly" % self.toggle_key if enable: start = time.time() profile = Profile() profile.runcall(run_app, environ) # here we call the WSGI app elapsed = time.time() - start else: profile = elapsed = None # for annoying IDE run_app(environ) status, headers = saved_ss_args[:2] headers_dic = Headers(headers) if cookie_to_set: headers_dic.add_header("Set-Cookie", cookie_to_set) # insert result into response content_type = headers_dic.get("Content-Type", "") if enable and status.startswith("200") and content_type.startswith("text/html"): environ["QUERY_STRING"] = dict2query_str(query) matched = _find_charset.match(content_type) encoding = matched.group(1) if matched else "ascii" rendered = self.render_result(profile, elapsed, environ).encode(encoding, "replace") resp_body = [insert_into_body(rendered, b"".join(resp_body))] headers_dic["Content-Length"] = str(len(resp_body[0])) start_response(status, headers, saved_ss_args[2] if len(saved_ss_args) == 3 else None) return resp_body
def testBytes(self): h = Headers([(b"Content-Type", b"text/plain; charset=utf-8")]) self.assertEqual("text/plain; charset=utf-8", h.get("Content-Type")) h[b"Foo"] = bytes(b"bar") self.assertEqual("bar", h.get("Foo")) self.assertEqual("bar", h.get(b"Foo")) h.setdefault(b"Bar", b"foo") self.assertEqual("foo", h.get("Bar")) self.assertEqual("foo", h.get(b"Bar")) h.add_header(b"content-disposition", b"attachment", filename=b"bud.gif") self.assertEqual('attachment; filename="bud.gif"', h.get("content-disposition")) del h["content-disposition"] self.assertTrue(b"content-disposition" not in h)
def testBytes(self): h = Headers([ (b"Content-Type", b"text/plain; charset=utf-8"), ]) self.assertEqual("text/plain; charset=utf-8", h.get("Content-Type")) h[b"Foo"] = bytes(b"bar") self.assertEqual("bar", h.get("Foo")) self.assertEqual("bar", h.get(b"Foo")) h.setdefault(b"Bar", b"foo") self.assertEqual("foo", h.get("Bar")) self.assertEqual("foo", h.get(b"Bar")) h.add_header(b'content-disposition', b'attachment', filename=b'bud.gif') self.assertEqual('attachment; filename="bud.gif"', h.get("content-disposition")) del h['content-disposition'] self.assertTrue(b'content-disposition' not in h)
class AzureFunctionsWsgi: "Convert between Azure Functions API and the WSGI protocol." def __init__(self, app, include_os_environ=True): self._app = app self._include_os_environ = include_os_environ self._req = None self._context = None self._status = None self._wsgi_headers = [] self._azure_headers = None self._body = None self._errors = StringIO() self._environ = [] def main(self, req: func.HttpRequest, context: func.Context = None) -> func.HttpResponse: self._req = req self._context = context self._get_body() self._setup_environ() buffer = [x for x in self._app(self._environ, self._start_response)] if self._errors.tell() > 0: self._errors.seek(0, 0) for line in self._errors.readline(): logging.error(line) response_values = self._response_values() return func.HttpResponse(b''.join(buffer), headers=self._azure_headers, **response_values) def _get_body(self): body_encoding = 'utf-8' # default body_content_type = self._req.headers.get('Content-Type') if body_content_type and 'charset=' in body_content_type: header_parts = body_content_type.split(';') for part in header_parts: directive, value = part.split('=', maxsplit=1) if 'charset' in directive: body_encoding = value break self._body = self._req.get_body().decode(body_encoding) def _setup_environ(self): req_url = urlparse(self._req.url) port = req_url.port if not port: if req_url.scheme == 'https': port = 443 else: port = 80 environ = { 'REQUEST_METHOD': self._req.method, 'SCRIPT_NAME': '', # SCRIPT_NAME is always the root 'PATH_INFO': req_url.path, 'SERVER_NAME': req_url.hostname, 'SERVER_PORT': str(port), 'SERVER_PROTOCOL': 'HTTP/1.1', # TODO 'SERVER_SOFTWARE': 'azure-functions', 'wsgi.version': (1, 0), 'wsgi.url_scheme': req_url.scheme, 'wsgi.input': BytesIO(self._body.encode()), 'wsgi.errors': self._errors, 'wsgi.multithread': True, 'wsgi.multiprocess': False, 'wsgi.run_once': False, 'azure_functions.url': self._req.url, } if self._context: environ.update({ 'azure_functions.function_directory': self._context.function_directory, 'azure_functions.function_name': self._context.function_name, 'azure_functions.invocation_id': self._context.invocation_id, }) if req_url.query: environ['QUERY_STRING'] = req_url.query passthru_headers = ['Content-Type', 'Content-Length'] for header_name in passthru_headers: if header_name in self._req.headers: environ[self._header_name( header_name)] = self._req.headers[header_name] for header, value in self._req.headers.items(): environ['HTTP_' + self._header_name(header)] = str(value) if self._include_os_environ: environ.update(read_environ()) # Workaround Content-Length bug when 'application/json' if environ.get('CONTENT_TYPE') == 'application/json': environ['CONTENT_LENGTH'] = str(len(self._body)) self._environ = environ def _header_name(self, header_name): return header_name.replace('-', '_').upper() def _start_response(self, status, headers): self._status = status self._wsgi_headers = headers def _response_values(self): self._azure_headers = Headers(self._wsgi_headers) return { 'status_code': int(self._status.split(' ')[0]), 'mimetype': self._azure_headers.get('Content-Type', 'text/plain'), 'charset': 'utf-8', }
class MultipartPart(object): def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"): self.headerlist = [] self.headers = None self.file = False self.size = 0 self._buf = tob("") self.disposition, self.name, self.filename = None, None, None self.content_type, self.charset = None, charset self.memfile_limit = memfile_limit self.buffer_size = buffer_size def feed(self, line, nl=""): if self.file: return self.write_body(line, nl) return self.write_header(line, nl) def write_header(self, line, nl): line = line.decode(self.charset or "latin1") if not nl: raise MultipartError("Unexpected end of line in header.") if not line.strip(): # blank line -> end of header segment self.finish_header() elif line[0] in " \t" and self.headerlist: name, value = self.headerlist.pop() self.headerlist.append((name, value + line.strip())) else: if ":" not in line: raise MultipartError("Syntax error in header: No colon.") name, value = line.split(":", 1) self.headerlist.append((name.strip(), value.strip())) def write_body(self, line, nl): if not line and not nl: return # This does not even flush the buffer self.size += len(line) + len(self._buf) self.file.write(self._buf + line) self._buf = nl if self.content_length > 0 and self.size > self.content_length: raise MultipartError("Size of body exceeds Content-Length header.") if self.size > self.memfile_limit and isinstance(self.file, BytesIO): # TODO: What about non-file uploads that exceed the memfile_limit? self.file, old = TemporaryFile(mode="w+b"), self.file old.seek(0) copy_file(old, self.file, self.size, self.buffer_size) def finish_header(self): self.file = BytesIO() self.headers = Headers(self.headerlist) cdis = self.headers.get("Content-Disposition", "") ctype = self.headers.get("Content-Type", "") clen = self.headers.get("Content-Length", "-1") if not cdis: raise MultipartError("Content-Disposition header is missing.") self.disposition, self.options = parse_options_header(cdis) self.name = self.options.get("name") self.filename = self.options.get("filename") self.content_type, options = parse_options_header(ctype) self.charset = options.get("charset") or self.charset self.content_length = int(self.headers.get("Content-Length", "-1")) def is_buffered(self): """ Return true if the data is fully buffered in memory.""" return isinstance(self.file, BytesIO) @property def value(self): """ Data decoded with the specified charset """ return self.raw.decode(self.charset) @property def raw(self): """ Data without decoding """ pos = self.file.tell() self.file.seek(0) try: val = self.file.read() except IOError: raise finally: self.file.seek(pos) return val def save_as(self, path): fp = open(path, "wb") pos = self.file.tell() try: self.file.seek(0) size = copy_file(self.file, fp) finally: self.file.seek(pos) return size
class MultipartPart(object): def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset='latin1'): self.headerlist = [] self.headers = None self.file = False self.size = 0 self._buf = tob('') self.disposition, self.name, self.filename = None, None, None self.content_type, self.charset = None, charset self.memfile_limit = memfile_limit self.buffer_size = buffer_size def feed(self, line, nl=''): if self.file: return self.write_body(line, nl) return self.write_header(line, nl) def write_header(self, line, nl): line = line.decode(self.charset or 'latin1') if not nl: raise MalformattedError('Unexpected end of line in header.') if not line.strip(): # blank line -> end of header segment self.finish_header() elif line[0] in ' \t' and self.headerlist: name, value = self.headerlist.pop() self.headerlist.append((name, value + line.strip())) else: if ':' not in line: raise MalformattedError("Syntax error in header: No colon.") name, value = line.split(':', 1) self.headerlist.append((name.strip(), value.strip())) def write_body(self, line, nl): if not line and not nl: return # This does not even flush the buffer if self.content_transfer_encoding and not nl: raise MalformattedError('Line too long on transfer_encoded chunk.') if self.content_transfer_encoding == 'quoted-printable': if line.endswith(tob('=')): nl = tob('') line = quopri.decodestring(line) elif self.content_transfer_encoding == 'base64': line, nl = binascii.a2b_base64(line), tob('') self.size += len(line) + len(self._buf) self.file.write(self._buf + line) self._buf = nl if self.content_length > 0 and self.size > self.content_length: raise MalformattedError('Size of body exceeds Content-Length header.') if self.size > self.memfile_limit and isinstance(self.file, BytesIO): self.file, old = TemporaryFile(mode='w+b'), self.file old.seek(0) copy_file(old, self.file, self.size, self.buffer_size) def finish_header(self): self.file = BytesIO() self.headers = Headers(self.headerlist) cdis = self.headers.get('Content-Disposition', '') ctype = self.headers.get('Content-Type', '') if not cdis: raise MalformattedError('Content-Disposition header is missing.') self.disposition, self.options = parse_options_header(cdis) self.name = self.options.get('name') self.filename = self.options.get('filename') self.content_type, options = parse_options_header(ctype) self.charset = options.get('charset') or self.charset self.content_length = int(self.headers.get('Content-Length', '-1')) self.content_transfer_encoding = \ self.headers.get('Content-Transfer-Encoding') if self.content_transfer_encoding not in \ [None, 'base64', 'quoted-printable']: raise MalformattedError('invalid Content-Transfer-Encoding') def is_buffered(self): ''' Return true if the data is fully buffered in memory.''' return isinstance(self.file, BytesIO) def value(self, limit): ''' Data decoded with the specified charset ''' pos = self.file.tell() try: self.file.seek(0) val = self.file.read(limit) if self.file.read(1): raise MemoryLimitError("Request too big. Increase mem_limit.") finally: self.file.seek(pos) return val.decode(self.charset) def save_as(self, path): fp = open(path, 'wb') pos = self.file.tell() try: self.file.seek(0) size = copy_file(self.file, fp) finally: self.file.seek(pos) return size
class MultipartPart(object): def __init__(self, buffer_size=2**16, memfile_limit=2**18, charset='latin1'): self.headerlist = [] self.headers = None self.file = False self.size = 0 self._buf = tob('') self.disposition, self.name, self.filename = None, None, None self.content_type, self.charset = None, charset self.memfile_limit = memfile_limit self.buffer_size = buffer_size def feed(self, line, nl=''): if self.file: return self.write_body(line, nl) return self.write_header(line, nl) def write_header(self, line, nl): line = line.decode(self.charset or 'latin1') if not nl: raise MultipartError('Unexpected end of line in header.') if not line.strip(): # blank line -> end of header segment self.finish_header() elif line[0] in ' \t' and self.headerlist: name, value = self.headerlist.pop() self.headerlist.append((name, value + line.strip())) else: if ':' not in line: raise MultipartError("Syntax error in header: No colon.") name, value = line.split(':', 1) self.headerlist.append((name.strip(), value.strip())) def write_body(self, line, nl): if not line and not nl: return # This does not even flush the buffer self.size += len(line) + len(self._buf) self.file.write(self._buf + line) self._buf = nl if self.content_length > 0 and self.size > self.content_length: raise MultipartError('Size of body exceeds Content-Length header.') if self.size > self.memfile_limit and isinstance(self.file, BytesIO): # TODO: What about non-file uploads that exceed the memfile_limit? self.file, old = TemporaryFile(mode='w+b'), self.file old.seek(0) copy_file(old, self.file, self.size, self.buffer_size) def finish_header(self): self.file = BytesIO() self.headers = Headers(self.headerlist) cdis = self.headers.get('Content-Disposition', '') ctype = self.headers.get('Content-Type', '') clen = self.headers.get('Content-Length', '-1') if not cdis: raise MultipartError('Content-Disposition header is missing.') self.disposition, self.options = parse_options_header(cdis) self.name = self.options.get('name') self.filename = self.options.get('filename') self.content_type, options = parse_options_header(ctype) self.charset = options.get('charset') or self.charset self.content_length = int(self.headers.get('Content-Length', '-1')) def is_buffered(self): ''' Return true if the data is fully buffered in memory.''' return isinstance(self.file, BytesIO) @property def value(self): ''' Data decoded with the specified charset ''' pos = self.file.tell() self.file.seek(0) val = self.file.read() self.file.seek(pos) return val.decode(self.charset) def save_as(self, path): fp = open(path, 'wb') pos = self.file.tell() try: self.file.seek(0) size = copy_file(self.file, fp) finally: self.file.seek(pos) return size
class MultipartPart(object): def __init__(self, buffer_size=2**16, memfile_limit=2**18, charset="latin1"): self.headerlist = [] self.headers = None self.file = False self.size = 0 self._buf = b"" self.disposition = None self.name = None self.filename = None self.content_type = None self.charset = charset self.memfile_limit = memfile_limit self.buffer_size = buffer_size def feed(self, line, nl=""): if self.file: return self.write_body(line, nl) return self.write_header(line, nl) def write_header(self, line, nl): line = line.decode(self.charset) if not nl: raise MultipartError("Unexpected end of line in header.") if not line.strip(): # blank line -> end of header segment self.finish_header() elif line[0] in " \t" and self.headerlist: name, value = self.headerlist.pop() self.headerlist.append((name, value + line.strip())) else: if ":" not in line: raise MultipartError("Syntax error in header: No colon.") name, value = line.split(":", 1) self.headerlist.append((name.strip(), value.strip())) def write_body(self, line, nl): if not line and not nl: return # This does not even flush the buffer self.size += len(line) + len(self._buf) self.file.write(self._buf + line) self._buf = nl if self.content_length > 0 and self.size > self.content_length: raise MultipartError("Size of body exceeds Content-Length header.") if self.size > self.memfile_limit and isinstance(self.file, BytesIO): # TODO: What about non-file uploads that exceed the memfile_limit? self.file, old = TemporaryFile(mode="w+b"), self.file old.seek(0) copy_file(old, self.file, self.size, self.buffer_size) def finish_header(self): self.file = BytesIO() self.headers = Headers(self.headerlist) content_disposition = self.headers.get("Content-Disposition", "") content_type = self.headers.get("Content-Type", "") if not content_disposition: raise MultipartError("Content-Disposition header is missing.") self.disposition, self.options = parse_options_header( content_disposition) self.name = self.options.get("name") self.filename = self.options.get("filename") self.content_type, options = parse_options_header(content_type) self.charset = options.get("charset") or self.charset self.content_length = int(self.headers.get("Content-Length", "-1")) def is_buffered(self): """ Return true if the data is fully buffered in memory.""" return isinstance(self.file, BytesIO) @property def value(self): """ Data decoded with the specified charset """ return self.raw.decode(self.charset) @property def raw(self): """ Data without decoding """ pos = self.file.tell() self.file.seek(0) try: val = self.file.read() except IOError: raise finally: self.file.seek(pos) return val def save_as(self, path): with open(path, "wb") as fp: pos = self.file.tell() try: self.file.seek(0) size = copy_file(self.file, fp) finally: self.file.seek(pos) return size def close(self): if self.file: self.file.close() self.file = False
class MultipartPart(object): def __init__(self, buffer_size=2**16, memfile_limit=2**18, charset='latin1'): self.headerlist = [] self.headers = None self.file = False self.size = 0 self._buf = tob('') self.disposition, self.name, self.filename = None, None, None self.content_type, self.charset = None, charset self.memfile_limit = memfile_limit self.buffer_size = buffer_size def feed(self, line, nl=''): if self.file: return self.write_body(line, nl) return self.write_header(line, nl) def write_header(self, line, nl): line = line.decode(self.charset or 'latin1') if not nl: raise MultipartError('Unexpected end of line in header.') if not line.strip(): # blank line -> end of header segment self.finish_header() elif line[0] in ' \t' and self.headerlist: name, value = self.headerlist.pop() self.headerlist.append((name, value + line.strip())) else: if ':' not in line: raise MultipartError("Syntax error in header: No colon.") name, value = line.split(':', 1) self.headerlist.append((name.strip(), value.strip())) def write_body(self, line, nl): if not line and not nl: return # This does not even flush the buffer self.size += len(line) + len(self._buf) self.file.write(self._buf + line) self._buf = nl if self.content_length > 0 and self.size > self.content_length: raise MultipartError('Size of body exceeds Content-Length header.') if self.size > self.memfile_limit and isinstance(self.file, BytesIO): # TODO: What about non-file uploads that exceed the memfile_limit? self.file, old = TemporaryFile(mode='w+b'), self.file old.seek(0) copy_file(old, self.file, self.size, self.buffer_size) def finish_header(self): self.file = BytesIO() self.headers = Headers(self.headerlist) cdis = self.headers.get('Content-Disposition', '') ctype = self.headers.get('Content-Type', '') clen = self.headers.get('Content-Length', '-1') if not cdis: raise MultipartError('Content-Disposition header is missing.') self.disposition, self.options = parse_options_header(cdis) self.name = self.options.get('name') self.filename = self.options.get('filename') self.content_type, options = parse_options_header(ctype) self.charset = options.get('charset') or self.charset self.content_length = int(self.headers.get('Content-Length', '-1')) def is_buffered(self): ''' Return true if the data is fully buffered in memory.''' return isinstance(self.file, BytesIO) @property def value(self): ''' Data decoded with the specified charset ''' return self.raw.decode(self.charset) @property def raw(self): ''' Data without decoding ''' pos = self.file.tell() self.file.seek(0) try: val = self.file.read() except IOError: raise finally: self.file.seek(pos) return val def save_as(self, path): fp = open(path, 'wb') pos = self.file.tell() try: self.file.seek(0) size = copy_file(self.file, fp) finally: self.file.seek(pos) return size
class Url(Packer, object): def __init__(self, id, url, cookie='', headers=HEADERS_CHROME, host=None, port=None, path=None, protocol=None, proxy=None, max_thread=-1, range_format='Range: bytes=%d-%d'): self.id = id self.url = url self.host = host if host is not None else getattr(self, 'host', None) self.port = port if port is not None else getattr(self, 'port', None) self.path = path if path is not None else getattr(self, 'path', None) self.protocol = protocol if protocol is not None else getattr( self, 'protocol', None) self.cookie = cookie if isinstance(headers, Headers): self.headers = headers elif isinstance(headers, dict): self.headers = Headers(list(headers.items())) else: raise ValueError('headers must be an instance of dict or Headers') self.etag = None self.proxy = proxy self.target = Target() self.max_thread = max_thread self.range_format = range_format def __eq__(self, other): if isinstance(other, Url): return self.url == other.url and \ self.cookie == other.cookie and \ self.proxy == other.proxy and \ self.range_format == other.range_format else: object.__eq__(self, other) def config(self): pass def getContentSize(self): if self.target.code == 200 and int( self.target.headers.get('Content-Length', -1)) != -1: return int(self.target.headers.get('Content-Length')) elif self.target.code == 206 and self.target.headers.get( 'Content-Range'): return int(self.target.headers.get('Content-Range').split('/')[-1]) else: return -1 def getFileName(self): ctd = self.target.headers.get('Content-Disposition') if ctd is not None: filename = re.findall(r'filename="(.*?)"', ctd) if filename: return filename[0] filename = self.path.split('?')[0].split('/')[-1] if filename != '': if '.' not in filename or filename.split('.')[-1] == '': extension = _content_type( self.target.headers.get('Content-Type')) filename = filename + extension else: filename = None return filename def reload(self): self.target.load(self.url) def __setattr__(self, key, value): object.__setattr__(self, key, value) if key == 'url': self.protocol, s1 = splittype(self.url) if s1: s2, self.path = splithost(s1) if s2: self.host, port = splitport(s2) self.port = int(port) if port is not None else None if not getattr(self, 'port', None): if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443 def activate(self): res, cookie_dict = self.__request__() # if res.getcode() == 200 or res.getcode() == 206: headers_items = () if sys.version_info < (3, 0): headers_items = res.info().items() if sys.version_info >= (3, 0): headers_items = res.getheaders() self.target.update(res.geturl(), headers_items, res.getcode()) # else: # raise Exception('UrlNoRespond or UrlError') def __request__(self): Cookiejar = CookieJar() opener = build_opener(HTTPCookieProcessor(Cookiejar)) _header = dict(self.headers.items()) if self.cookie: _header.update({'Cookie': self.cookie}) req = Request(self.url, headers=_header, origin_req_host=self.host) error_counter = 0 while error_counter < 3: try: res = opener.open(req) break except Exception as e: # traceback.print_exc() error_counter += 1 time.sleep(0.5) else: raise Exception('UrlNotRespond') return res, Cookiejar._cookies def getHeader(self, name, default=None): return self.headers.get(name, default) def __packet_params__(self): return [ 'id', 'url', 'host', 'port', 'protocal', 'cookie', 'etag', 'proxy', 'max_thread', 'range_format', 'headers' ]
class MultipartPart(object): def __init__(self, buffer_size=2**16, memfile_limit=2**18, charset='latin1'): self.headerlist = [] self.headers = None self.file = False self.size = 0 self._buf = tob('') self.disposition, self.name, self.filename = None, None, None self.content_type, self.charset = None, charset self.memfile_limit = memfile_limit self.buffer_size = buffer_size def feed(self, line, nl=''): if self.file: return self.write_body(line, nl) return self.write_header(line, nl) def write_header(self, line, nl): line = line.decode(self.charset or 'latin1') if not nl: raise MultipartError('Unexpected end of line in header.') if not line.strip(): # blank line -> end of header segment self.finish_header() elif line[0] in ' \t' and self.headerlist: name, value = self.headerlist.pop() self.headerlist.append((name, value + line.strip())) else: if ':' not in line: raise MultipartError("Syntax error in header: No colon.") name, value = line.split(':', 1) self.headerlist.append((name.strip(), value.strip())) def write_body(self, line, nl): if not line and not nl: return # This does not even flush the buffer if self.content_transfer_encoding and not nl: raise MultipartError('Line too long on transfer_encoded chunk.') if self.content_transfer_encoding == 'quoted-printable': if line.endswith(tob('=')): nl = tob('') line = quopri.decodestring(line) elif self.content_transfer_encoding == 'base64': line, nl = binascii.a2b_base64(line), tob('') self.size += len(line) + len(self._buf) self.file.write(self._buf + line) self._buf = nl if self.content_length > 0 and self.size > self.content_length: raise MultipartError('Size of body exceeds Content-Length header.') if self.size > self.memfile_limit and isinstance(self.file, BytesIO): self.file, old = TemporaryFile(mode='w+b'), self.file old.seek(0) copy_file(old, self.file, self.size, self.buffer_size) def finish_header(self): self.file = BytesIO() self.headers = Headers(self.headerlist) cdis = self.headers.get('Content-Disposition', '') ctype = self.headers.get('Content-Type', '') if not cdis: raise MultipartError('Content-Disposition header is missing.') self.disposition, self.options = parse_options_header(cdis) self.name = self.options.get('name') self.filename = self.options.get('filename') self.content_type, options = parse_options_header(ctype) self.charset = options.get('charset') or self.charset self.content_length = int(self.headers.get('Content-Length', '-1')) self.content_transfer_encoding = \ self.headers.get('Content-Transfer-Encoding') if self.content_transfer_encoding not in \ [None, 'base64', 'quoted-printable']: raise MultipartError('invalid Content-Transfer-Encoding') def is_buffered(self): ''' Return true if the data is fully buffered in memory.''' return isinstance(self.file, BytesIO) def value(self, limit): ''' Data decoded with the specified charset ''' pos = self.file.tell() try: self.file.seek(0) val = self.file.read(limit) if self.file.read(1): raise MultipartError("Request too big. Increase mem_limit.") finally: self.file.seek(pos) return val.decode(self.charset) def save_as(self, path): fp = open(path, 'wb') pos = self.file.tell() try: self.file.seek(0) size = copy_file(self.file, fp) finally: self.file.seek(pos) return size