class HarvestManUrlServer(asyncore.dispatcher_with_send): """ An asynchronous url server class for HarvestMan. This class can replace the url queue and work as a url server multiplexing several url requests simultaneously """ def __init__(self, host, port, protocol='tcp'): self.urls = PriorityQueue(0) self.port = port self.host = host self.protocol = protocol self.urlmap = {} asyncore.dispatcher_with_send.__init__(self) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) try: self.bind((self.host, port)) except socket.error: raise self.listen(5) def get_port(self): return self.port def seturl(self, url): self.urlmap['last'] = url def geturl(self): return self.urlmap['last'] def handle_accept(self): newSocket, address = self.accept() secondary_url_server(sock=newSocket, addr=address, url_server=self) def handle_close(self): pass def notify(self, handler): """ Notify method for secondary socket server to add urls. (Not Used) """ for url in handler.urls: self.urls.put(url)
class HarvestManUrlServer(asyncore.dispatcher_with_send): """ An asynchronous url server class for HarvestMan. This class can replace the url queue and work as a url server multiplexing several url requests simultaneously """ def __init__(self, host, port, protocol='tcp'): # For storing data from crawlers self.urls = PriorityQueue(0) # For storing data from fetchers self.urls2 = PriorityQueue(0) self.port = port self.host = host self.protocol = protocol self.urlmap = {} # Count of gets self.count1 = 0 self.count2 = 0 asyncore.dispatcher_with_send.__init__(self) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) try: self.bind((self.host, port)) self.port = self.getsockname()[1] except socket.error: raise self.listen(20) def get1(self): #if self.count1>1: # return self.urls.get() #else: return self.urls.get_nowait() def get2(self): #if self.count2>1: # return self.urls2.get() #else: return self.urls2.get_nowait() def get_port(self): return self.port def seturl(self, url): self.urlmap['lasturl'] = url def seturllist(self, urllist): self.urlmap['lastlist'] = urllist def geturl(self): return self.urlmap['lasturl'] def geturllist(self): return self.urlmap['lastlist'] def handle_accept(self): newSocket, address = self.accept() # print newSocket, address sec = secondary_url_server(sock=newSocket, addr=address, url_server=self) def handle_close(self): pass def handle_expt(self): pass def notify(self, handler): """ Notify method for secondary socket server to add urls. (Not Used) """ for url in handler.urls: self.urls.put(url)
class HarvestManUrlServer(asyncore.dispatcher_with_send): """ An asynchronous url server class for HarvestMan. This class can replace the url queue and work as a url server multiplexing several url requests simultaneously """ def __init__(self, host, port, protocol='tcp'): # For storing data from crawlers self.urls = PriorityQueue(0) # For storing data from fetchers self.urls2 = PriorityQueue(0) self.port = port self.host = host self.protocol = protocol self.urlmap = {} # Count of gets self.count1 = 0 self.count2 = 0 asyncore.dispatcher_with_send.__init__(self) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) try: self.bind((self.host, port)) self.port = self.getsockname()[1] except socket.error: raise self.listen(20) def get1(self): #if self.count1>1: # return self.urls.get() #else: return self.urls.get_nowait() def get2(self): #if self.count2>1: # return self.urls2.get() #else: return self.urls2.get_nowait() def get_port(self): return self.port def seturl(self, url): self.urlmap['lasturl'] = url def seturllist(self, urllist): self.urlmap['lastlist'] = urllist def geturl(self): return self.urlmap['lasturl'] def geturllist(self): return self.urlmap['lastlist'] def handle_accept(self): newSocket, address = self.accept() # print newSocket, address sec = secondary_url_server(sock=newSocket, addr=address,url_server=self) def handle_close(self): pass def handle_expt(self): pass def notify(self, handler): """ Notify method for secondary socket server to add urls. (Not Used) """ for url in handler.urls: self.urls.put(url)