class ProxyRequest(Request): """This class catches all HTTP connections coming from an end client and preforms the necessary proxy functions""" protocols = {'http': ProxyClientFactory} ports = {'http': 80} def __init__(self, channel, queued, reactor=reactor): Request.__init__(self, channel, queued) self.reactor = reactor self.peers = {} self.protocol = None self.host = None self.port = None self.rest = None self.configs = reactor.configs self.log = Logger() def parseHostInfo(self): """parse the protocol, url, and extension out of the uri provided in the request""" parsed = urlparse.urlparse(self.uri) self.protocol = parsed[0] self.host = parsed[1] try: self.port = self.ports[self.protocol] except KeyError: self.log.logic("no protocol provided, assuming http") self.protocol = 'http' if ':' in self.host: self.host, self.port = self.host.split(':') self.port = int(self.port) self.rest = urlparse.urlunparse(('','') + parsed[2:]) if not self.rest: self.rest = self.rest + '/' def process(self): """process the request for sending""" self.parseHostInfo() class_ = self.protocols[self.protocol] headers = self.getAllHeaders().copy() if 'host' not in headers: headers['host'] = self.host self.content.seek(0, 0) s = self.content.read() #A client factory for the ProxyCLient needs to be created. The factory is then passed to #the reactor which will call it when a TCP connection is established with the host self.uri = self.host + self.rest clientFacotry = class_(self.method, self.rest, self.clientproto, headers, s, self) self.reactor.connectTCP(self.host,self.port,clientFacotry)
class PeerWorker(): """ a modified variant of the persistent HTTP client class, optimized to work with the download pool by using PPM headers. """ def __init__(self): self.pool = HTTPConnectionPool(reactor) #the connection to be persisted self.agent = Agent(reactor, pool=self.pool) self.responseWriter = RequestBodyReciever self.log = Logger() def getChunk(self,request): """issue the HTTP GET request for the range of the file specified""" try: headers = _headers(request) range = headers['Range'] uri = headers['Target'][0] except: request.setResponseCode(400) #bad request request.setHeader('Reason','INVALID') request.write(" ")#send the headers request.finish() return self.log.logic("{}:{}".format(uri,range)) request.setResponseCode(202) #Accepted defered = self.agent.request( 'GET', uri, Headers({ 'Range' : range }), None) defered.addCallback(self.responseRecieved,request) return defered def responseRecieved(self,response,request): if response.code > 206: #206 is the code returned for http range responses self.log.warn("error with response from server") finished = Deferred() recvr = self.responseWriter(request,finished) response.deliverBody(recvr) return finished
class PeerHandler(): """maintains a Persistent TCP connection with the supplied neighbor. Talks to the neighbor over the new ppm API via HTTP deferreds. This class is meant to be instantiated for each session.""" def __init__(self,neighbor,id,target,downloadPool): self.configs = reactor.configs self.pool = HTTPConnectionPool(reactor) #the connection to be persisted self.agent = Agent(reactor, pool=self.pool) self.responseWriter = PH_RequestBodyReciever self.peer_ip = neighbor.ip self.target = target #uri of the target resource (requested on behalf of the client) self.downloadPool = downloadPool #the calling download pool that spawned the handler self.id = id self.neighbor = neighbor self.index = 0 self.recvd = 0 self.data_stop = 0 self.verified = False self.active = False #set to true when we receive a response self.trust_level = 0 self.timer = SlidingWindow(10) self.assigned_chunk = None self.log = Logger() self.records = SessionRecord(neighbor.ip) self.records.new(target=target,req_size=downloadPool.requestSize) self._signature = self._sign(self.target) def _url(self,path): return 'http://{}:{}/{}'.format(self.peer_ip,self.configs.peer_port,path) def _sign(self,target): msg = '{}-{}'.format(self.configs.ip,target) return self.configs.key.sign(msg)[0] def _baseHeaders(self): """return a dictionary of the base headers needed in a peer request""" headers = Headers() headers.addRawHeader('Target',self.target) headers.addRawHeader('Signature',self._signature) return headers def _doRequest(self,url,headers,doCallback=True): defered = self.agent.request( 'GET', url, headers, None) defered.addCallback(self.responseRecieved) defered.addErrback(deferedError) return defered def checkTimeout(self,tmp=None): """check if a timeout has occured""" if self.timer.timedout(): self.terminateConnection() reactor.callLater(1,self.checkTimeout,None) def getInit(self): """Hit a peer with an init request for a session at the supplied url""" headers = self._baseHeaders() return self._doRequest(self._url('init'),headers) def getChunk(self,range): """Hit a peer with a /chunk request for the range specified""" headers = self._baseHeaders() self.assigned_chunk = range headers.addRawHeader('Range',httpRange(range)) self.log.info("Handler dispatching {} to peer".format(range)) return self._doRequest(self._url('chunk'),headers) def terminateConnection(self): """ called when the handler wishes to end its session, usually by the wish of the peer. This involves removing the instance from the downloadPool instance's records. """ self.records.timeout() self.records.save() self.downloadPool.terminatePeer(self) def begin(self): """called when a peer aggrees to participate in an aggregation session""" self.active = True chunk = self.downloadPool.getNextChunk(self.id) if chunk: return self.getChunk(chunk) def end(self,code): """called when a peer sends an errorful response code""" self.log.logic("Terminating connection with peer (error: {})".format(code)) self.terminateConnection() def responseRecieved(self,response): """ Hook in here before setting up the response body reader. Look at headers to determine if the signature is valid, what the peer is sending back, ect. """ self.log.info("Received reply from peer") if response.code > 206: #peer wises to terminate it's involvement #add makeup chunk to downloadPool's buffers return self.end(response.code) if not self.active: dummyRecr = Dummy() response.deliverBody(dummyRecr) return self.begin() self.timer.reset() headers = headersFromResponse(response) try: recvr = self.responseWriter(self,range=self.assigned_chunk) response.deliverBody(recvr) except: self.log.warning("error setting up reciever")
class ProxyClient(HTTPClient): _finished = False bytes_recvd = 0 def __init__(self, command, rest, version, headers, data, father): self.father = father self.command = command self.rest = rest if "proxy-connection" in headers: del headers["proxy-connection"] headers["connection"] = "close" headers.pop('keep-alive', None) self.headers = headers self.data = data self.stop = False self.log = Logger() self.configs = reactor.configs self.can_pool = False #does the server support http range? self.should_pool = False #is the response size large enough to merit aggregation? self.req_size = 0 #how big is the response? self.closed = False #have we closed the connection to the server? def connectionMade(self): self.log.info('successful TCP connection established with {}'.format(self.father.uri)) self.sendCommand(self.command, self.rest) for header, value in self.headers.items(): self.sendHeader(header,value) if header == 'Range': self.log.info("range request:{}".format(value)) self.endHeaders() self.transport.write(self.data) def handleStatus(self, version, code, message): self.father.setResponseCode(int(code),message) def handleHeader(self, key, value): """ Pass the headers to the father request object (which writes to the client) If the content-length is larger then the minimum file size, stop the request and start over with a download pool """ if self.stop: #already decided to aggregate return if key == 'Content-Length' and int(value) > self.configs.minimum_file_size: self.should_pool = True self.req_size = int(value) if key == 'Accept-Ranges' and 'bytes' in value: self.can_pool = True #the server accepts range requests if key.lower() in ['server', 'date', 'content-type']: self.father.responseHeaders.setRawHeaders(key, [value]) else: self.father.responseHeaders.addRawHeader(key, value) if self.should_pool and self.can_pool: try: pool = DownloadPool(self.req_size,self) self.log.logic('using two streams, for target of size {}'.format(value)) pool.queryPeers() self.stop = True except: self.log.info("error instantiating download pool for request") self.finish() #something went wrong loading the download pool self.should_pool = False def handleResponsePart(self, buffer): if self.stop: self.handleResponseEnd() return self.bytes_recvd += len(buffer) self.father.write(buffer) def finish(self): self._finished = True if not self.closed: self.transport.loseConnection() self.father.transport.loseConnection() def handleResponseEnd(self): if self.stop: self.transport.loseConnection() self.closed = True #don't 'finish' the proxy session with the client, we'll be aggregating a response for them elif not self._finished: self.log.info("Response Delivered to Proxy Client") self.finish() #close normally (for a regular proxy request)
class DownloadPool(): """ This is a manager object that delegates (maps) each chunk to a given peer handler class. It indirectly communicates with the peer through these. Since twisted is not inherently thread safe, and is heavily event driven, this class does not run as its own thread. Instead, I chose to make use of twisted's Deferred class, which was recommended for use in any blocking situation. Background on Deferred: A deferred is a function that will be called after asynchronous information that it is depended on comes in. Callbacks are attached to it, which will fire of in chain after the deferred is triggered. From the website: 'in cases where a function in a threaded program would block until it gets a result, for Twisted it should not block. Instead, it should return a Deferred.' In this case, whenever a new chunk is requested by one of the peer handlers (implying that it's associated peer has finished its prior work), a deferred is dispatched. It will then check the head of the pools buffers for data to write. If none exist, it will reschedule its self to be called in a short time. """ def __init__(self,requestSize,proxyRequest): self.configs = reactor.configs self.neighbors = self.configs.neighbors self.participants = {} self.requestSize = requestSize self.bytes_sent = 0 #the proxy request (which maintains a TCP connection to the end client). self.proxyRequest = proxyRequest.father self.proxyClient = proxyRequest self.uri = self.proxyRequest.uri self.host = self.proxyRequest.host self.rest = self.proxyRequest.rest self.sendBuffers = [] #an array of buffers currently being filled by peer clients. #the start index of the next chunk to send. This is used as a key into the pool's #sending buffers. It will only be moved once the sendBuf it maps to has finished #receiving its expected data self.rangeIndex = 0 optimal_chunk_size = self.configs.max_chunk_size self.chunkSize = optimal_chunk_size if optimal_chunk_size < self.configs.max_chunk_size else self.configs.max_chunk_size self.chunks = requestChunks(self.requestSize,self.chunkSize) self.zeroKnowledgeProver = ZeroKnowledgeConnection(self) self.client = PersistentProxyClient(self.host,self.rest,self,RequestBodyReciever,cid=0) self.participants[0] = self.client self.finished = False self.log = Logger() #begin downloading immediately if 'http://' not in self.uri: self.uri = 'http://' + self.uri self.client.getChunk(self.getNextChunk(self.client.id)) def handleHeader(self, key, value): """ the content length returned from the first chunk request will be for the size of the chunk, but the client needs to see the length of the entire file, so the value must be forged before the headers are sent back to the client """ if key.lower() == 'Content-Range': return #don't include if key.lower() in ['server', 'date', 'content-type']: self.proxyRequest.responseHeaders.setRawHeaders(key, [value]) elif 'content-length' in key.lower(): self.proxyRequest.responseHeaders.addRawHeader(key,requestSize) else: self.proxyRequest.responseHeaders.addRawHeader(key, value) def handleResponseCode(self, code): """ handle the response code (the one the client sees). If it is 206 (returned for partial content responses) the code must be changed to 200, so the client sees it as it would be for a real request """ if int(code) == 206: #206 is returned for partial content files. code = 200 self.proxyRequest.setResponseCode(int(code),"") def _peerBuffer(self,peer,start): """find the peers buffer in the send buffers""" for buf in self.sendBuffers: if buf.peer is peer and buf.start_idx == start: return buf self.log.warning("no peer found in send buffers") return None def queryPeers(self): """give shared request info to each peer""" id = 1 for ip in self.neighbors: self.participants[id] = PeerHandler(self.neighbors[ip],id,self.uri,self) self.participants[id].getInit() self.log.info("Querying Neighbor {} with id {}".format(ip,id)) id+=1 def releaseChunk(self,chunk): """called by a peer who wants to give up on its assigned chunk""" self.chunks = chain([(0,chunk)],self.chunks) def terminatePeer(self,handler): """break it off with a peer. If they had work, push it onto the makeup queue. Close the connection with the peer for the rest of the session.""" working = handler.assigned_chunk if working: #assign this request to another peer (or self) self.releaseChunk(working) #add its chunk to the start of the chunks generator if handler.id > 0 and handler.id in self.participants: del self.participants[handler.id] def endSession(self,msg=""): """break off with every peer and do some cleanup""" if self.finished: return self.log.logic(msg) ids = self.participants.keys() for pid in ids: try: self.participants[pid].terminateConnection() except: pass #already removed somehow self.finished = True self.proxyClient.finish() def getNextChunk(self,senderID): """ this function is called by a peerHandler class when it is ready to dispatch more work to a sender. """ if self.finished: return None try: peer = self.participants[senderID] except KeyError: self.log.warning("Peer ({}) for chunk request does not exist in this download pool".format(senderID)) return None try: chunk_range = self.chunks.next() except StopIteration: #no more chunks to download, so terminate self.log.info("No more chunks to allocate") chunk_range = None #consider removing the following line. In the case that the last chunk #is substantially large, it is difficult to predict a proper timeout #interval to wait. Hard coding is clearly a poor decision. #reactor.callLater(15,self.endSession,"Timeout session end") if chunk_range: try: buf = sendBuf(peer,chunk_range) self.sendBuffers.append(buf) except: #invalid range tuple (weird bug) chunk_range = None #create a deferred object to handle the response defered = self.waitForData() return chunk_range def appendData(self,peer,startidx,data): """ called by a peerHandler when it has data to write, passes in a buffer index (the start of the chunk) to write at """ buf = self._peerBuffer(peer,startidx) buf.writeData(data) def waitForData(self,d=None): """ the heart of the callback chain. This will either trigger it's callback (writeData), or schedule its self to be called later (to prevent blocking) """ if self.finished: return None#no need to keep waiting postpone = True if not d: d = defer.Deferred() d.addCallback(self.writeData) d.addErrback(deferedError) try: buf = self.sendBuffers[0] if len(buf) > 0 and buf.start_idx == self.rangeIndex: d.callback(buf) postpone = False except IndexError: pass #send buffers empty if postpone: reactor.callLater(.4,self.waitForData,d) return d def writeData(self,req=None): """ write the data at the head of the buffer to the transport. Must take an argument (as defered callbacks pass one in) """ try: buf = self.sendBuffers[0] data = buf.getData() size = len(buf) except: self.log.warning("meant to write data, but no buffers were available") return if size == 0: return try: self.proxyRequest.write(data) self.bytes_sent+=size buf.clear() self.log.info("{} / {} ({}%) sent back to client from peer {}".format(self.bytes_sent, self.requestSize, float(self.bytes_sent) / float(self.requestSize), buf.peer.id)) except: self.log.warning('error writing to client') raise sys.exit(0) if buf.done: self.rangeIndex = buf.stop_idx + 1 del self.sendBuffers[0] #remove the buffer, and update the index if self.bytes_sent >= self.requestSize - 1: #wiggle room self.endSession(msg="All data for request has been written to client") self.waitForData()