Exemple #1
0
class SingleShotHTTPClient(component):
    """\
    SingleShotHTTPClient() -> component that can download a file using HTTP by URL

    Arguments:
    - starturl     -- the URL of the file to download
    - [postbody]   -- data to POST to that URL
    - [connectionclass] -- specify a class other than TCPClient to connect with
    """

    Inboxes = {
        "inbox": "UNUSED",
        "control": "UNUSED",
        "_parserinbox": "Data from HTTP parser",
        "_parsercontrol": "Signals from HTTP parser",
        "_tcpcontrol": "Signals from TCP client",
    }

    Outboxes = {
        "outbox": "Requested file",
        "debug": "Output to aid debugging",
        "_parsersignal": "Signals for HTTP parser",
        "_tcpoutbox": "Send over TCP connection",
        "_tcpsignal": "Signals shutdown of TCP connection",
        "signal": "UNUSED"
    }

    def __init__(self, starturl, postbody="", connectionclass=TCPClient):
        #print "SingleShotHTTPClient.__init__()"
        super(SingleShotHTTPClient, self).__init__()
        self.tcpclient = None
        self.httpparser = None
        self.requestqueue = []
        self.starturl = starturl
        self.connectionclass = connectionclass

        self.postbody = postbody
        #print "Start url: " + starturl

    def formRequest(self, url):
        """Craft a HTTP request string for the supplied url"""
        splituri = splitUri(url)

        host = splituri["uri-server"]
        if splituri.has_key("uri-port"):
            host += ":" + splituri["uri-port"]

        splituri["request"] = []
        if self.postbody == "":
            splituri["request"].append("GET " + splituri["raw-uri"] +
                                       " HTTP/1.1\r\n")
        else:
            splituri["request"].append("POST " + splituri["raw-uri"] +
                                       " HTTP/1.1\r\n")
            splituri["request"].append("Content-Length: " +
                                       str(len(self.postbody)) + "\r\n")

        splituri["request"].append("Host: " + host + "\r\n")
        splituri["request"].append(
            "User-agent: Kamaelia HTTP Client 0.3 (RJL)\r\n")
        splituri["request"].append(
            "Connection: Keep-Alive\r\n"
        )  # keep-alive is a work around for lack of shutdown notification in TCPClient
        splituri["request"].append("\r\n")

        splituri["request"] = [string.join(
            splituri["request"],
            "")]  # might improve performance by sending more together

        if self.postbody != "":
            splituri["request"].append(self.postbody)

        return splituri

    def makeRequest(self, request):
        """Connect to the remote HTTP server and send request"""
        self.tcpclient = None
        self.httpparser = None
        port = intval(request.requestobject.get("uri-port", ""))
        if port == None:
            port = 80

        self.tcpclient = self.connectionclass(
            request.requestobject["uri-server"], port)
        self.httpparser = HTTPParser(mode="response")

        self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox"))
        self.link((self, "_tcpsignal"), (self.tcpclient, "control"))
        self.link((self.tcpclient, "signal"), (self, "_tcpcontrol"))

        self.link((self.tcpclient, "outbox"),
                  (self.httpparser,
                   "inbox"))  #incoming TCP data -> HTTPParser directly

        self.link((self, "_parsersignal"), (self.httpparser, "control"))
        self.link((self.httpparser, "outbox"), (self, "_parserinbox"))
        self.link((self.httpparser, "signal"), (self, "_parsercontrol"))

        self.addChildren(self.tcpclient, self.httpparser)
        self.tcpclient.activate()
        self.httpparser.activate()
        self.response = ""
        if isinstance(request.requestobject["request"], str):
            self.send(request.requestobject["request"], "_tcpoutbox")
        else:
            for part in request.requestobject["request"]:
                self.send(part, "_tcpoutbox")

    def shutdownKids(self):
        """Close TCP connection and HTTP parser"""
        if self.tcpclient != None and self.httpparser != None:
            self.send(producerFinished(), "_tcpsignal")
            self.send(shutdown(), "_parsersignal")
            self.removeChild(self.tcpclient)
            self.removeChild(self.httpparser)
            self.tcpclient = None
            self.httpparser = None

    def handleRedirect(self, header):
        """Check for a redirect response and queue the fetching the page it points to if it is such a response.
        Returns true if it was a redirect page and false otherwise."""

        if header["responsecode"] == "302" or header[
                "responsecode"] == "303" or header["responsecode"] == "307":
            # location header gives the redirect URL
            newurl = header["headers"].get("location", "")
            if newurl != "":
                redirectedrequest = HTTPRequest(
                    self.formRequest(newurl),
                    self.currentrequest.redirectcount + 1)
                self.requestqueue.append(redirectedrequest)
                return True
            else:
                return False
                # do something equivalent to what we'd do for 404
        else:
            return False

    def main(self):
        """Main loop."""
        self.requestqueue.append(
            HTTPRequest(self.formRequest(self.starturl), 0))
        while self.mainBody():
            #print "SingleShotHTTPClient.main"
            yield 1
        self.send(producerFinished(self), "signal")
        yield 1
        return

    def mainBody(self):
        """Called repeatedly by main loop. Checks inboxes and processes messages received.
        Start the fetching of the new page if the current one is a redirect and has been
        completely fetched."""

        self.send("SingleShotHTTPClient.mainBody()", "debug")
        while self.dataReady("_parserinbox"):
            msg = self.recv("_parserinbox")
            if isinstance(msg, ParsedHTTPHeader):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPHeader on _parserinbox",
                    "debug")
                # if the page is a redirect page
                if not self.handleRedirect(msg.header):
                    if msg.header["responsecode"] == "200":
                        self.send(
                            msg, "outbox"
                        )  # if not redirecting then send the response on
                    else:  #treat as not found
                        pass

            elif isinstance(msg, ParsedHTTPBodyChunk):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPBodyChunk on _parserinbox",
                    "debug")
                if len(self.requestqueue
                       ) == 0:  # if not redirecting then send the response on
                    self.send(msg, "outbox")

            elif isinstance(msg, ParsedHTTPEnd):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPEnd on _parserinbox",
                    "debug")
                if len(self.requestqueue
                       ) == 0:  # if not redirecting then send the response on
                    self.send(msg, "outbox")
                self.shutdownKids()
                return 1

        while self.dataReady("_parsercontrol"):
            temp = self.recv("_parsercontrol")
            self.send(
                "SingleShotHTTPClient received something on _parsercontrol",
                "debug")

        while self.dataReady("_tcpcontrol"):
            msg = self.recv("_tcpcontrol")
            self.send(msg, "_parsersignal")

        while self.dataReady("control"):
            msg = self.recv("control")
            if isinstance(msg, shutdownMicroprocess) or isinstance(
                    msg, shutdown):
                self.shutdownKids()
                return 0

        # if we're not currently downloading a page
        if self.tcpclient == None:
            # then either we've finished or we should download the next URL (if we've been redirected)
            if len(self.requestqueue) > 0:
                self.currentrequest = self.requestqueue.pop(0)
                if self.currentrequest.redirectcount == 3:  # 3 redirects is excessive, give up, we're probably in a loop anyway
                    return 0
                else:
                    self.makeRequest(self.currentrequest)
            else:
                return 0

        self.pause()
        return 1
Exemple #2
0
class SingleShotHTTPClient(component): 
    """\
    SingleShotHTTPClient() -> component that can download a file using HTTP by URL

    Arguments:
    - starturl     -- the URL of the file to download
    - [postbody]   -- data to POST to that URL
    - [connectionclass] -- specify a class other than TCPClient to connect with
    """
   
    Inboxes =  {             
        "inbox"          : "UNUSED",
        "control"        : "UNUSED",
                    
        "_parserinbox"   : "Data from HTTP parser",
        "_parsercontrol" : "Signals from HTTP parser",
        "_tcpcontrol"    : "Signals from TCP client",
    }
        

    Outboxes = {
        "outbox"         : "Requested file",
        "debug"          : "Output to aid debugging",
        
        "_parsersignal"  : "Signals for HTTP parser",
                
        "_tcpoutbox"     : "Send over TCP connection",
        "_tcpsignal"     : "Signals shutdown of TCP connection",
        
        "signal"         : "UNUSED"
    }
        
    def __init__(self, starturl, postbody = "", connectionclass = TCPClient):
        #print "SingleShotHTTPClient.__init__()"
        super(SingleShotHTTPClient, self).__init__()
        self.tcpclient = None
        self.httpparser = None
        self.requestqueue = []
        self.starturl = starturl
        self.connectionclass = connectionclass
        
        self.postbody = postbody
        #print "Start url: " + starturl
        
    def formRequest(self, url):
        """Craft a HTTP request string for the supplied url"""
        splituri = splitUri(url)
        
        host = splituri["uri-server"]
        if splituri.has_key("uri-port"):
            host += ":" + splituri["uri-port"]

        splituri["request"] = []        
        if self.postbody == "":    
            splituri["request"].append("GET " + splituri["raw-uri"] + " HTTP/1.1\r\n")
        else:
            splituri["request"].append("POST " + splituri["raw-uri"] + " HTTP/1.1\r\n")
            splituri["request"].append("Content-Length: " + str(len(self.postbody)) + "\r\n")

        splituri["request"].append("Host: " + host + "\r\n")
        splituri["request"].append("User-agent: Kamaelia HTTP Client 0.3 (RJL)\r\n")
        splituri["request"].append("Connection: Keep-Alive\r\n") # keep-alive is a work around for lack of shutdown notification in TCPClient
        splituri["request"].append("\r\n") 

        splituri["request"] = [string.join(splituri["request"], "")] # might improve performance by sending more together
        
        if self.postbody != "":
            splituri["request"].append(self.postbody)
        
        return splituri

    def makeRequest(self, request):
        """Connect to the remote HTTP server and send request"""
        self.tcpclient = None
        self.httpparser = None
        port = intval(request.requestobject.get("uri-port", ""))
        if port == None:
            port = 80
        
        self.tcpclient = self.connectionclass(request.requestobject["uri-server"], port)
        self.httpparser = HTTPParser(mode="response")
                
        self.link( (self, "_tcpoutbox"),       (self.tcpclient, "inbox") )
        self.link( (self, "_tcpsignal"),       (self.tcpclient, "control") )
        self.link( (self.tcpclient, "signal"), (self, "_tcpcontrol") )

        self.link( (self.tcpclient, "outbox"), (self.httpparser, "inbox") ) #incoming TCP data -> HTTPParser directly
        
        self.link( (self, "_parsersignal"), (self.httpparser, "control") )
        self.link( (self.httpparser, "outbox"), (self, "_parserinbox") )
        self.link( (self.httpparser, "signal"), (self, "_parsercontrol") )

        self.addChildren( self.tcpclient, self.httpparser )
        self.tcpclient.activate()
        self.httpparser.activate()
        self.response = ""
        if isinstance(request.requestobject["request"], str):
            self.send(request.requestobject["request"], "_tcpoutbox")
        else:
            for part in request.requestobject["request"]:
                self.send(part, "_tcpoutbox")

    def shutdownKids(self):
        """Close TCP connection and HTTP parser"""
        if self.tcpclient != None and self.httpparser != None:
            self.send(producerFinished(), "_tcpsignal")
            self.send(shutdown(), "_parsersignal")
            self.removeChild(self.tcpclient)
            self.removeChild(self.httpparser)            
            self.tcpclient = None
            self.httpparser = None

    def handleRedirect(self, header):
        """Check for a redirect response and queue the fetching the page it points to if it is such a response.
        Returns true if it was a redirect page and false otherwise."""
        
        if header["responsecode"] == "302" or header["responsecode"] == "303" or header["responsecode"] == "307":
            # location header gives the redirect URL
            newurl = header["headers"].get("location", "")
            if newurl != "":
                redirectedrequest = HTTPRequest(self.formRequest(newurl), self.currentrequest.redirectcount + 1)
                self.requestqueue.append(redirectedrequest)
                return True
            else:
                return False
                # do something equivalent to what we'd do for 404
        else:
            return False
                            
    def main(self):
        """Main loop."""
        self.requestqueue.append(HTTPRequest(self.formRequest(self.starturl), 0))
        while self.mainBody():
            #print "SingleShotHTTPClient.main"
            yield 1
        self.send(producerFinished(self), "signal")
        yield 1
        return
        
    def mainBody(self):
        """Called repeatedly by main loop. Checks inboxes and processes messages received.
        Start the fetching of the new page if the current one is a redirect and has been
        completely fetched."""
        
        self.send("SingleShotHTTPClient.mainBody()", "debug")
        while self.dataReady("_parserinbox"):
            msg = self.recv("_parserinbox")
            if isinstance(msg, ParsedHTTPHeader):
                self.send("SingleShotHTTPClient received a ParsedHTTPHeader on _parserinbox", "debug")                        
                # if the page is a redirect page
                if not self.handleRedirect(msg.header):
                    if msg.header["responsecode"] == "200":
                        self.send(msg, "outbox") # if not redirecting then send the response on
                    else:  #treat as not found
                        pass
                        
            elif isinstance(msg, ParsedHTTPBodyChunk):
                self.send("SingleShotHTTPClient received a ParsedHTTPBodyChunk on _parserinbox", "debug")
                if len(self.requestqueue) == 0: # if not redirecting then send the response on
                    self.send(msg, "outbox")
                
            elif isinstance(msg, ParsedHTTPEnd):
                self.send("SingleShotHTTPClient received a ParsedHTTPEnd on _parserinbox", "debug")
                if len(self.requestqueue) == 0: # if not redirecting then send the response on
                    self.send(msg, "outbox")
                self.shutdownKids()
                return 1
            
        while self.dataReady("_parsercontrol"):
            temp = self.recv("_parsercontrol")
            self.send("SingleShotHTTPClient received something on _parsercontrol", "debug")
            
        while self.dataReady("_tcpcontrol"):
            msg = self.recv("_tcpcontrol")
            self.send(msg, "_parsersignal")

        while self.dataReady("control"):
            msg = self.recv("control")
            if isinstance(msg, shutdownMicroprocess) or isinstance(msg, shutdown):
                self.shutdownKids()
                return 0

        # if we're not currently downloading a page
        if self.tcpclient == None:
            # then either we've finished or we should download the next URL (if we've been redirected)
            if len(self.requestqueue) > 0:
                self.currentrequest = self.requestqueue.pop(0)
                if self.currentrequest.redirectcount == 3: # 3 redirects is excessive, give up, we're probably in a loop anyway
                    return 0
                else:
                    self.makeRequest(self.currentrequest)
            else:
                return 0
                    
        self.pause()
        return 1
Exemple #3
0
class HTTPServer(component):
    """\
    HTTPServer() -> new HTTPServer component capable of handling a single connection
    
    Arguments:
       -- createRequestHandler - a function required by HTTPRequestHandler that
                                 creates the appropriate request-handler component
                                 for each request, see HTTPResourceGlue
    """

    Inboxes = {
        "inbox": "TCP data stream - receive",
        "mime-signal": "Error signals from MIME handler",
        "http-signal": "Error signals from the HTTP resource retriever",
        "control": "Receive shutdown etc. signals"
    }

    Outboxes = {
        "outbox": "TCP data stream - send",
        "mime-control": "To MIME handler",
        "http-control": "To HTTP resource retriever's signalling inbox",
        "signal": "UNUSED"
    }

    def __init__(self, createRequestHandler):
        super(HTTPServer, self).__init__()
        self.createRequestHandler = createRequestHandler

    def initialiseComponent(self):
        """Create an HTTPParser component to convert the requests we receive
        into a more convenient form, and a HTTPRequestHandler component to
        sort out the correct response to requests received."""

        self.mimehandler = HTTPParser()
        self.httphandler = HTTPRequestHandler(createRequestHandler)
        #self.httphandler.filereader = TriggeredFileReader()

        self.link((self, "mime-control"), (self.mimehandler, "control"))
        self.link((self.mimehandler, "signal"), (self, "mime-signal"))

        self.link((self.mimehandler, "outbox"), (self.httphandler, "inbox"))

        self.link((self, "http-control"), (self.httphandler, "control"))
        self.link((self.httphandler, "signal"), (self, "http-signal"))

        self.addChildren(self.mimehandler,
                         self.httphandler)  #self.httphandler.filereader)
        self.httphandler.activate()
        self.mimehandler.activate()
        #self.httphandler.filereader.activate()

        self.link((self.httphandler, "outbox"), (self, "outbox"),
                  passthrough=2)
        self.link((self, "inbox"), (self.mimehandler, "inbox"), passthrough=1)

    def main(self):
        self.initialiseComponent()
        loop = True
        while loop:
            yield 1
            while self.dataReady("control"):
                temp = self.recv("control")
                if isinstance(temp, producerFinished):
                    self.send(temp, "mime-control")
                elif isinstance(temp, shutdownMicroprocess) or isinstance(
                        temp, shutdown):
                    self.send(shutdown(), "mime-control")
                    self.send(shutdown(), "http-control")
                    #print "HTTPServer received shutdown"
                    loop = False
                    break

            while self.dataReady("mime-signal"):
                temp = self.recv("mime-signal")
                if isinstance(temp, producerFinished):
                    pass
                    #we don't need to care yet - wait 'til the request handler finishes

            while self.dataReady("http-signal"):
                temp = self.recv("http-signal")
                if isinstance(temp, producerFinished):
                    sig = producerFinished(self)
                    self.send(sig, "mime-control")
                    self.send(sig, "signal")
                    loop = False
                    #close the connection

            self.pause()

        self.closeDownComponent()

    def closeDownComponent(self):
        "Remove my subcomponents (HTTPParser, HTTPRequestHandler)"
        for child in self.childComponents():
            self.removeChild(child)
        self.mimehandler = None
        self.httphandler = None
Exemple #4
0
class HTTPServer(component):
    """\
    HTTPServer() -> new HTTPServer component capable of handling a single connection
    
    Arguments:
       -- createRequestHandler - a function required by HTTPRequestHandler that
                                 creates the appropriate request-handler component
                                 for each request, see HTTPResourceGlue
    """
    
    Inboxes =  { "inbox"         : "TCP data stream - receive",
                 "mime-signal"   : "Error signals from MIME handler",
                 "http-signal"   : "Error signals from the HTTP resource retriever",
                 "control"       : "Receive shutdown etc. signals" }


    Outboxes = { "outbox"        : "TCP data stream - send",
                 "mime-control"  : "To MIME handler",
                 "http-control"  : "To HTTP resource retriever's signalling inbox",
                 "signal"        : "UNUSED" }

    def __init__(self, createRequestHandler):
        super(HTTPServer, self).__init__()
        self.createRequestHandler = createRequestHandler

    def initialiseComponent(self):
        """Create an HTTPParser component to convert the requests we receive
        into a more convenient form, and a HTTPRequestHandler component to
        sort out the correct response to requests received."""
        
        self.mimehandler = HTTPParser()
        self.httphandler = HTTPRequestHandler(createRequestHandler)
        #self.httphandler.filereader = TriggeredFileReader()
        
        self.link( (self,"mime-control"), (self.mimehandler,"control") )
        self.link( (self.mimehandler, "signal"), (self, "mime-signal") )

        self.link( (self.mimehandler, "outbox"), (self.httphandler, "inbox") )
        
        self.link( (self, "http-control"), (self.httphandler, "control") )
        self.link( (self.httphandler, "signal"), (self, "http-signal") )
        
        self.addChildren(self.mimehandler, self.httphandler) #self.httphandler.filereader)
        self.httphandler.activate()
        self.mimehandler.activate()
        #self.httphandler.filereader.activate()

        self.link((self.httphandler, "outbox"), (self, "outbox"), passthrough=2)
        self.link((self, "inbox"), (self.mimehandler, "inbox"), passthrough=1)
      
    def main(self):
        self.initialiseComponent()
        loop = True
        while loop:
            yield 1
            while self.dataReady("control"):
                temp = self.recv("control")
                if isinstance(temp, producerFinished):
                    self.send(temp, "mime-control")
                elif isinstance(temp, shutdownMicroprocess) or isinstance(temp, shutdown):
                    self.send(shutdown(), "mime-control")
                    self.send(shutdown(), "http-control")
                    #print "HTTPServer received shutdown"
                    loop = False
                    break
            
            while self.dataReady("mime-signal"):
                temp = self.recv("mime-signal")
                if isinstance(temp, producerFinished):
                    pass
                    #we don't need to care yet - wait 'til the request handler finishes
            
            while self.dataReady("http-signal"):
                temp = self.recv("http-signal")
                if isinstance(temp, producerFinished):
                    sig = producerFinished(self)
                    self.send(sig, "mime-control")                
                    self.send(sig, "signal")
                    loop = False
                    #close the connection
            
            self.pause()
                
        self.closeDownComponent()
        
    def closeDownComponent(self):
        "Remove my subcomponents (HTTPParser, HTTPRequestHandler)"
        for child in self.childComponents():
            self.removeChild(child)
        self.mimehandler = None
        self.httphandler = None