def headerReceived(self, line): """ Overridden to reduce the function actions and in particular to avoid self._transferDecoder actions and implement a streaming proxy """ header, data = line.split(b':', 1) header = header.lower() data = data.strip() req = self.requests[-1] if header == b'content-length': try: self.length = int(data) except ValueError: self.transport.write(b"HTTP/1.1 400 Bad Request\r\n\r\n") self.length = None self.transport.loseConnection() return self._transferDecoder = _IdentityTransferDecoder( self.length, req.bodyProducer.dataReceived, self._finishRequestBody) elif header == b'transfer-encoding' and data.lower() == b'chunked': self.length = None self._transferDecoder = _ChunkedTransferDecoder( req.bodyProducer.dataReceived, self._finishRequestBody) reqHeaders = req.requestHeaders values = reqHeaders.getRawHeaders(header) if values is not None: values.append(data) else: reqHeaders.setRawHeaders(header, [data])
def headerReceived(self, line): """Do pre-processing (for content-length) and store this header away. Enforce the per-request header limit. :param line: A line from the header section of a request, excluding the line delimiter. """ header, data = line.split(':', 1) header = header.lower() data = data.strip() if header == 'content-length': self.length = int(data) self._transferDecoder = _IdentityTransferDecoder( self.length, self.requests[-1].handleContentChunk, self._finishRequestBody) elif header == 'transfer-encoding' and data.lower() == 'chunked': self.length = None self._transferDecoder = _ChunkedTransferDecoder( self.requests[-1].handleContentChunk, self._finishRequestBody) reqHeaders = self.requests[-1].requestHeaders values = reqHeaders.getRawHeaders(header) if values is not None: values.append(data) else: reqHeaders.setRawHeaders(header, [data]) self._receivedHeaderCount += 1 if self._receivedHeaderCount > self.maxHeaders: self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n") self.transport.loseConnection()
def _setup_identity_decoding(self, length): if length == 0: return decoder = http._IdentityTransferDecoder(length, self._got_decoder_data, self._all_data_decoded) self._body_decoder = decoder
def setUp(self): """ Create an L{_IdentityTransferDecoder} with callbacks hooked up so that calls to them can be inspected. """ self.data = [] self.finish = [] self.contentLength = 10 self.decoder = _IdentityTransferDecoder( self.contentLength, self.data.append, self.finish.append)
def test_unknownContentLengthConnectionLose(self): """ L{_IdentityTransferDecoder.noMoreData} calls the finish callback and raises L{PotentialDataLoss} if it is called and the content length is unknown. """ body = [] finished = [] decoder = _IdentityTransferDecoder(None, body.append, finished.append) self.assertRaises(PotentialDataLoss, decoder.noMoreData) self.assertEqual(body, []) self.assertEqual(finished, ['']) self._verifyCallbacksUnreferenced(decoder)
def test_unknownContentLength(self): """ If L{_IdentityTransferDecoder} is constructed with C{None} for the content length, it passes all data delivered to it through to the data callback. """ data = [] finish = [] decoder = _IdentityTransferDecoder(None, data.append, finish.append) decoder.dataReceived('x') self.assertEqual(data, ['x']) decoder.dataReceived('y') self.assertEqual(data, ['x', 'y']) self.assertEqual(finish, [])
def headerReceived(self, line): """ Do pre-processing (for content-length) and store this header away. Enforce the per-request header limit. @type line: C{bytes} @param line: A line from the header section of a request, excluding the line delimiter. """ header, data = line.split(b':', 1) header = header.lower() data = data.strip() if header == b'content-length': try: self.length = int(data) except ValueError: _respondToBadRequestAndDisconnect(self.transport) self.length = None return self._transferDecoder = _IdentityTransferDecoder( self.length, self.requests[-1].handleContentChunk, self._finishRequestBody) elif header == b'transfer-encoding' and data.lower() == b'chunked': # XXX Rather poorly tested code block, apparently only exercised by # test_chunkedEncoding self.length = None self._transferDecoder = _ChunkedTransferDecoder( self.requests[-1].handleContentChunk, self._finishRequestBody) elif header == b'user-agent': self.requests[-1].user_agent = data reqHeaders = self.requests[-1].requestHeaders self.requests[-1].content_type = reqHeaders._rawHeaders.get( 'content-type') if self.requests[-1].content_type: self.requests[-1].content_type = self.requests[-1].content_type[0] # self.user_agent = reqHeaders[''] values = reqHeaders.getRawHeaders(header) if values is not None: values.append(data) else: reqHeaders.setRawHeaders(header, [data]) self._receivedHeaderCount += 1 if self._receivedHeaderCount > self.maxHeaders: _respondToBadRequestAndDisconnect(self.transport) return
def test_rejectDataAfterFinished(self): """ If data is passed to L{_IdentityTransferDecoder.dataReceived} after the finish callback has been invoked, L{RuntimeError} is raised. """ failures = [] def finish(bytes): try: decoder.dataReceived('foo') except: failures.append(Failure()) decoder = _IdentityTransferDecoder(5, self.data.append, finish) decoder.dataReceived('x' * 4) self.assertEqual(failures, []) decoder.dataReceived('y') failures[0].trap(RuntimeError) self.assertEqual( str(failures[0].value), "_IdentityTransferDecoder cannot decode data after finishing")
def allHeadersReceived(self): clh = self.headers.getRawHeaders('content-length') if clh is None: content_length = None elif len(clh) == 1: content_length = int(clh[0]) else: raise ValueError("Too many Content-Length headers;" "WARC Record header is invalid") if content_length == 0 or content_length is None: transferDecoder = None self._finished(self.clearLineBuffer()) else: transferDecoder = lambda x, y: _IdentityTransferDecoder( content_length, x, y) if transferDecoder is not None: if self.transport: self.transport.pauseProducing() td = transferDecoder(self.bodyDataReceived, self._finished) self.switchToBodyMode(td)
def _clientHandshake76(self): """ Complete hixie-76 handshake, which consists of a challenge and response. If the request is not identified with a proper WebSocket handshake, the connection will be closed. Otherwise, the response to the handshake is sent and a C{WebSocketHandler} is created to handle the request. """ def finish(): self.channel.transport.loseConnection() if self.queued: return finish() secKey1 = self.requestHeaders.getRawHeaders("Sec-WebSocket-Key1", []) secKey2 = self.requestHeaders.getRawHeaders("Sec-WebSocket-Key2", []) if len(secKey1) != 1 or len(secKey2) != 1: return finish() # copied originHeaders = self.requestHeaders.getRawHeaders("Origin", []) if len(originHeaders) != 1: return finish() hostHeaders = self.requestHeaders.getRawHeaders("Host", []) if len(hostHeaders) != 1: return finish() handlerFactory = self.site.handlers.get(self.uri) if not handlerFactory: return finish() # key1 and key2 exist and are a string of characters # filter both keys to get a string with all numbers in order key1 = secKey1[0] key2 = secKey2[0] numBuffer1 = ''.join([x for x in key1 if x in _ascii_numbers]) numBuffer2 = ''.join([x for x in key2 if x in _ascii_numbers]) # make sure numbers actually exist if not numBuffer1 or not numBuffer2: return finish() # these should be int-like num1 = int(numBuffer1) num2 = int(numBuffer2) # count the number of spaces in each character string numSpaces1 = 0 for x in key1: if x == ' ': numSpaces1 += 1 numSpaces2 = 0 for x in key2: if x == ' ': numSpaces2 += 1 # there should be at least one space in each if numSpaces1 == 0 or numSpaces2 == 0: return finish() # get two resulting numbers, as specified in hixie-76 num1 = num1 / numSpaces1 num2 = num2 / numSpaces2 transport = WebSocketTransport(self) handler = handlerFactory(transport) transport._attachHandler(handler) self.channel.setRawMode() def finishHandshake(nonce): """ Receive nonce value from request body, and calculate repsonse. """ protocolHeaders = self.requestHeaders.getRawHeaders( "WebSocket-Protocol", []) if len(protocolHeaders) not in (0, 1): return finish() if protocolHeaders: if protocolHeaders[0] not in self.site.supportedProtocols: return finish() protocolHeader = protocolHeaders[0] else: protocolHeader = None originHeader = originHeaders[0] hostHeader = hostHeaders[0] self.startedWriting = True handshake = [ "HTTP/1.1 101 Web Socket Protocol Handshake", "Upgrade: WebSocket", "Connection: Upgrade"] handshake.append("Sec-WebSocket-Origin: %s" % (originHeader)) if self.isSecure(): scheme = "wss" else: scheme = "ws" handshake.append( "Sec-WebSocket-Location: %s://%s%s" % ( scheme, hostHeader, self.uri)) if protocolHeader is not None: handshake.append("Sec-WebSocket-Protocol: %s" % protocolHeader) for header in handshake: self.write("%s\r\n" % header) self.write("\r\n") # concatenate num1 (32 bit in), num2 (32 bit int), nonce, and take md5 of result res = struct.pack('>II8s', num1, num2, nonce) server_response = md5(res).digest() self.write(server_response) # XXX we probably don't want to set _transferDecoder self.channel._transferDecoder = WebSocketFrameDecoder( self, handler) transport._connectionMade() # we need the nonce from the request body self.channel._transferDecoder = _IdentityTransferDecoder(0, lambda _ : None, finishHandshake)
def dataCoroutine(self): line = (yield) # IE sends an extraneous empty line (\r\n) after a POST request; # eat up such a line, but only ONCE if not line.strip(): line = (yield) # Get the first line, e.g. "POST /api/foo HTTP/1.1" parts = line.split() if len(parts) != 3: self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n") self.transport.loseConnection() return command, request, version = parts if command not in ('POST', 'GET'): self.transport.write("HTTP/1.1 405 Method Not Allowed\r\n\r\n") self.transport.loseConnection() return self.command = command self.path = request self.version = version if self.command == 'POST': self.request = self.uploadRequestClass(channel=self, path=self.path) else: self.request = self.downloadRequestClass(channel=self, queued=False) # The base class HTTPChannel keeps a list of enqueued requests; we don't # need anything that complex, but we do need to fill out the list so that # headerReceived() and other inherited methods still work self.requests = [ self.request ] line = (yield) # Parse header lines header = '' while line.strip(): if line[0] in ' \t': # Continuation of a header # TODO: test multiline headers header = header + '\n' + line else: if header: self.headerReceived(header) header = line line = (yield) # Last line was empty; process final header if header: self.headerReceived(header) # Now we're processing the body if not self._transferDecoder: # TODO: figure this out -- why doesn't FF send transfer-encoding OR content-length? self._transferDecoder = _IdentityTransferDecoder( contentLength=None, dataCallback=self.handleContentChunk, finishCallback=self._finishRequestBody ) self.count_line_data = True # TODO: HACK!! logging.info('%s %s' % (self.command, self.path)) self.request.parseCookies() # TODO: test if we're actually handling cookies well if command == 'POST': logging.debug('%s gotLength() %s' % (command, self.length)) self.request.gotLength(self.length) if command == 'GET': self.request.requestReceived(self.command, self.path, self.version) if command == 'POST': ctypes_raw = self.request.requestHeaders.getRawHeaders('content-type') self.content_type, type_options = cgi.parse_header(ctypes_raw[0] if ctypes_raw else '') if self.content_type.lower() == 'multipart/form-data': # Multipart form processing boundary = type_options['boundary'] start_boundary = '--' + boundary end_boundary = '--' + boundary + '--' # TODO: use coroutine trampolining to let us push this into a subroutine # See http://www.vivtek.com/rfc1867.html for a writeup of the format we're parsing here line = (yield) while line: # TODO: explain extra = '' if line == end_boundary: return # End the coroutine elif line == start_boundary: # We're starting a new value -- either a file or a normal form field line = (yield) assert line.startswith('Content-Disposition:') content_disposition, disp_options = cgi.parse_header(line) if 'filename' in disp_options: filename = disp_options.get('filename', 'file') # This is a header like: # Content-Type: image/jpeg line = (yield) content_type = line.split(':')[1].strip() self.request.fileStarted(filename=filename, content_type=content_type) # Consume a blank line line = (yield) assert not line.strip() self.setRawMode() # TODO: test this for very small chunk sizes and large boundaries # TODO: test for different sequences of form-data and files in the multipart form # TODO: surely there's a more efficient implementation of this? data = (yield) while data: parts = data.split('\r\n' + start_boundary, 1) if len(parts) == 1: # Data does not contain boundary, we're in the middle of # a file head = parts[0] # If end_boundary is N bytes long, we mustn't send the last # N bytes we've seen until we know whether they're # a boundary or not. Save the last N bytes of data. boundary_len = max(len(start_boundary), len(end_boundary)) self.request.handleFileChunk(filename, head[:-boundary_len]) # Continue reading the file saved_len = len(head[-boundary_len:]) data = head[-boundary_len:] + (yield) else: # Data contains boundary, we're at the end of this file, # and data might contain more form fields, or we might be at # the end of the request body # TODO: hack!! can't use setLineMode(tail) because that will # cause us to re-enter this coroutine head, extra = parts if head: self.request.handleFileChunk(filename, head) self.request.fileCompleted() # TODO: cleanup if extra: extra = start_boundary + extra # Continue parsing the body in the outer loop break self.setLineMode() # TODO: explain line = (yield extra) else: assert content_disposition.split(':')[1].strip() == 'form-data' form_data_name = disp_options['name'] # Eat a blank line line = (yield) assert not line.strip() # TODO: multi-line values?? form_data_value = (yield) self.request.args[form_data_name] = form_data_value logging.debug('%s = %s' % (form_data_name, form_data_value)) # Continue line = (yield) else: # Line isn't start boundary logging.warning('weird line: %s' % line) line = (yield) else: # We're processing an XMLHTTPRequest file upload -- the body is the file itself if self.request.getHeader('X-File-Name'): # filename was quoted with Javascript's encodeURIComponent() filename = urllib.unquote(self.request.getHeader('X-File-Name')) else: filename = 'file' self.request.fileStarted(filename, self.content_type) self.setRawMode() data = (yield) while data: self.request.handleFileChunk(filename, data) data = (yield) self.request.fileCompleted()