def __init__ (self, configuration, name, program, protocol): self.configuration = configuration self.http_parser = self.HTTPParser(configuration) self.tls_parser = self.TLSParser(configuration) self.enabled = bool(program is not None) and configuration.redirector.enable self._transparent = configuration.http.transparent self.log = Logger('worker ' + str(name), configuration.log.worker) self.usage = UsageLogger('usage', configuration.log.worker) self.response_factory = self.ResponseFactory() self.child_factory = self.ChildFactory(configuration, name) self.wid = name # a unique name self.creation = time.time() # when the thread was created # self.last_worked = self.creation # when the thread last picked a task self.program = program # the squid redirector program to fork self.running = True # the thread is active self.stats_timestamp = None # time of the most recent outstanding request to generate stats self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid()) universal = configuration.redirector.protocol == 'url' # Do not move, we need the forking AFTER the setup if program: self.process = self.child_factory.createProcess(self.program, universal=universal) else: self.process = None
def __init__(self, configuration, poller): self.low = configuration.redirector.minimum # minimum concurrent redirector workers self.high = configuration.redirector.maximum # maximum concurrent redirector workers self.poller = poller self.configuration = configuration self.queue = Queue( ) # store requests we do not immediately have the resources to process self.nextid = 1 # unique id to give to the next spawned worker self.worker = {} # worker tasks for each spawned child self.processes = { } # worker tasks indexed by file descriptors we can poll self.available = set( ) # workers that are currently available to handle new requests self.active = { } # workers that are currently busy waiting for a response from the spawned process self.stopping = set( ) # workers we want to stop as soon as they stop being active program = configuration.redirector.program protocol = configuration.redirector.protocol self.redirector_factory = RedirectorFactory(configuration, program, protocol) self.log = Logger('manager', configuration.log.manager)
def __init__(self, name, poller, read_name, max_clients): self.socks = {} self.name = name self.poller = poller self.read_name = read_name self.max_clients = max_clients self.client_count = 0 self.saturated = False # we are receiving more connections than we can handle self.binding = set() self.serving = True # We are currenrly listening self.log = Logger('server', configuration.log.server) self.log.info('server [%s] accepting up to %d clients' % (name, max_clients))
def __init__(self, poller, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.total_requested = 0L self.norequest = TimeCache(configuration.http.idle_connect) self.bysock = {} self.byname = {} self.buffered = [] self._nextid = 0 self.poller = poller self.log = Logger('client', configuration.log.client) self.proxied = configuration.http.proxied self.max_buffer = configuration.http.header_size
def __init__ (self,configuration,poller): self.configuration = configuration self.low = configuration.redirector.minimum # minimum number of workers at all time self.high = configuration.redirector.maximum # maximum numbe of workers at all time self.program = configuration.redirector.program # what program speaks the squid redirector API self.nextid = 1 # incremental number to make the name of the next worker self.queue = Queue() # queue with HTTP headers to process self.poller = poller # poller interface that checks for events on sockets self.worker = {} # our workers threads self.closing = set() # workers that are currently closing self.running = True # we are running self.log = Logger('manager', configuration.log.manager)
def __init__(self, supervisor, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.opening = {} self.established = {} self.byclientid = {} self.buffered = [] self.retry = [] self.configuration = configuration self.supervisor = supervisor self.poller = supervisor.poller self.log = Logger('download', configuration.log.download) self.location = os.path.realpath(os.path.normpath(configuration.web.html)) self.page = supervisor.page self._header = {}
def __init__ (self, configuration, querier, decider, logger, poller): self.querier = querier # Incoming requests from the proxy self.decider = decider # Decides how each request should be handled self.logger = logger # Log writing interfaces self.poller = poller # NOT the same logger the rest of the proxy uses since we're running in a different process self.log = Logger('redirector', configuration.log.supervisor) self.running = True
def __init__(self, poller, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.total_requested = 0L self.norequest = TimeCache(configuration.http.idle_connect) self.bysock = {} self.byname = {} self.buffered = [] self._nextid = 0 self.poller = poller self.log = Logger('client', configuration.log.client) self.http_max_buffer = configuration.http.header_size self.icap_max_buffer = configuration.icap.header_size self.tls_max_buffer = configuration.tls.header_size self.passthrough_max_buffer = 0 self.proxied = { 'proxy' : configuration.http.proxied, 'icap' : configuration.icap.proxied, 'tls' : configuration.tls.proxied, }
def __init__(self, poller, configuration, max_workers): self.poller = poller self.configuration = configuration self.resolver_factory = self.resolverFactory(configuration) # The actual work is done in the worker self.worker = self.resolver_factory.createUDPClient() # All currently active clients (one UDP and many TCP) self.workers = {} self.workers[self.worker.socket] = self.worker self.poller.addReadSocket('read_resolver', self.worker.socket) # Track the clients currently expecting results self.clients = {} # client_id : identifier # Key should be the hostname rather than the request ID? self.resolving = {} # identifier, worker_id : # TCP workers that have not yet sent a complete request self.sending = {} # sock : # Maximum number of entry we will cache (1024 DNS lookup per second !) # assuming 1k per entry, which is a lot, it mean 20Mb of memory # which at the default of 900 seconds of cache is 22 new host per seonds self.max_entries = 1024 * 20 # track the current queries and when they were started self.active = [] self.cache = {} self.cached = deque() self.max_workers = max_workers self.worker_count = len(self.workers) # the UDP client self.waiting = [] self.log = Logger('resolver', configuration.log.resolver) self.chained = {}
def __init__(self, configuration, name, request_box, program): self.configuration = configuration self.icap_parser = self.ICAPParser(configuration) self.enabled = configuration.redirector.enable self.protocol = configuration.redirector.protocol self._transparent = configuration.http.transparent self.log = Logger('worker ' + str(name), configuration.log.worker) self.usage = UsageLogger('usage', configuration.log.worker) self.universal = True if self.protocol == 'url' else False self.icap = self.protocol[len('icap://'):].split( '/')[0] if self.protocol.startswith('icap://') else '' r, w = os.pipe() # pipe for communication with the main thread self.response_box_write = os.fdopen(w, 'w', 0) # results are written here self.response_box_read = os.fdopen(r, 'r', 0) # read from the main thread self.wid = name # a unique name self.creation = time.time() # when the thread was created # self.last_worked = self.creation # when the thread last picked a task self.request_box = request_box # queue with HTTP headers to process self.program = program # the squid redirector program to fork self.running = True # the thread is active self.stats_timestamp = None # time of the most recent outstanding request to generate stats self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version, os.getpid()) if self.protocol == 'url': self.classify = self._classify_url if self.protocol.startswith('icap://'): self.classify = self._classify_icap # Do not move, we need the forking AFTER the setup self.process = self._createProcess( ) # the forked program to handle classification Thread.__init__(self)
class Server(object): _listen = staticmethod(listen) def __init__(self, name, poller, read_name, config): self.socks = {} self.name = name self.poller = poller self.read_name = read_name self.max_clients = config.connections self.client_count = 0 self.saturated = False # we are receiving more connections than we can handle self.binding = set() self.log = Logger('server', configuration.log.server) self.serving = config.enable # We are currenrly listening if self.serving: self.log.info('server [%s] accepting up to %d clients' % (name, self.max_clients)) def accepting (self): if self.serving: return True for ip, port, timeout, backlog in self.binding: try: self.log.critical('re-listening on %s:%d' % (ip,port)) self.listen(ip,port,timeout,backlog) except socket.error,e: self.log.critical('could not re-listen on %s:%d : %s' % (ip,port,str(e))) return False self.serving = True return True
class Server(object): _listen = staticmethod(listen) def __init__(self, name, poller, read_name, max_clients): self.socks = {} self.name = name self.poller = poller self.read_name = read_name self.max_clients = max_clients self.client_count = 0 self.saturated = False # we are receiving more connections than we can handle self.binding = set() self.serving = True # We are currenrly listening self.log = Logger('server', configuration.log.server) self.log.info('server [%s] accepting up to %d clients' % (name, max_clients)) def accepting (self): if self.serving: return True for ip, port, timeout, backlog in self.binding: try: self.log.critical('re-listening on %s:%d' % (ip,port)) self.listen(ip,port,timeout,backlog) except socket.error,e: self.log.critical('could not re-listen on %s:%d : %s' % (ip,port,str(e))) return False self.serving = True return True
class ChildFactory: def preExec(self): os.setpgrp() def __init__(self, configuration, name): self.log = Logger('worker ' + str(name), configuration.log.worker) def createProcess(self, program, universal=False): try: process = subprocess.Popen( program.split(' '), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=universal, preexec_fn=self.preExec, ) self.log.debug('spawn process %s' % program) except KeyboardInterrupt: process = None except (subprocess.CalledProcessError, OSError, ValueError): self.log.error('could not spawn process %s' % program) process = None if process: try: fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK) except IOError: self.destroyProcess(process) process = None return process def destroyProcess(self, process): try: process.terminate() process.wait() self.log.info('terminated process PID %s' % process.pid) except OSError, e: # No such processs if e[0] != errno.ESRCH: self.log.error('PID %s died' % process.pid)
class ChildFactory: def preExec (self): os.setpgrp() def __init__ (self, configuration, name): self.log = Logger('worker ' + str(name), configuration.log.worker) def createProcess (self, program, universal=False): try: process = subprocess.Popen([program], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=universal, preexec_fn=self.preExec, ) self.log.debug('spawn process %s' % program) except KeyboardInterrupt: process = None except (subprocess.CalledProcessError,OSError,ValueError): self.log.error('could not spawn process %s' % program) process = None if process: try: fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK) except IOError: self.destroyProcess(process) process = None return process def destroyProcess (self, process): try: process.terminate() process.wait() self.log.info('terminated process PID %s' % process.pid) except OSError, e: # No such processs if e[0] != errno.ESRCH: self.log.error('PID %s died' % process.pid)
def __init__(self, configuration, web, proxy, decider, content, client, resolver, logger, usage, poller): self.web = web # Manage listening web sockets self.proxy = proxy # Manage listening proxy sockets self.decider = decider # Task manager for handling child decider processes self.content = content # The Content Download manager self.client = client # Currently open client connections self.resolver = resolver # The DNS query manager self.poller = poller # Interface to the poller self.logger = logger # Log writing interfaces self.usage = usage # Request logging self.running = True # Until we stop we run :) self.nb_events = 0L # Number of events received self.nb_loops = 0L # Number of loop iteration self.events = [] # events so we can report them once in a while self.log = Logger('supervisor', configuration.log.supervisor)
def __init__ (self, poller, configuration, max_workers): self.poller = poller self.configuration = configuration self.resolver_factory = self.resolverFactory(configuration) # The actual work is done in the worker self.worker = self.resolver_factory.createUDPClient() # All currently active clients (one UDP and many TCP) self.workers = { self.worker.socket: self.worker } self.poller.addReadSocket('read_resolver', self.worker.socket) # Track the clients currently expecting results self.clients = {} # client_id : identifier # Key should be the hostname rather than the request ID? self.resolving = {} # identifier, worker_id : # TCP workers that have not yet sent a complete request self.sending = {} # sock : # Maximum number of entry we will cache (1024 DNS lookup per second !) # assuming 1k per entry, which is a lot, it mean 20Mb of memory # which at the default of 900 seconds of cache is 22 new host per seonds self.max_entries = 1024*20 # track the current queries and when they were started self.active = [] self.cache = {} self.cached = deque() self.max_workers = max_workers self.worker_count = len(self.workers) # the UDP client self.waiting = [] self.log = Logger('resolver', configuration.log.resolver) self.chained = {}
def __init__ (self, configuration, poller): self.low = configuration.redirector.minimum # minimum concurrent redirector workers self.high = configuration.redirector.maximum # maximum concurrent redirector workers self.poller = poller self.configuration = configuration self.queue = Queue() # store requests we do not immediately have the resources to process self.nextid = 1 # unique id to give to the next spawned worker self.worker = {} # worker tasks for each spawned child self.processes = {} # worker tasks indexed by file descriptors we can poll self.available = set() # workers that are currently available to handle new requests self.active = {} # workers that are currently busy waiting for a response from the spawned process self.stopping = set() # workers we want to stop as soon as they stop being active program = configuration.redirector.program protocol = configuration.redirector.protocol self.redirector_factory = RedirectorFactory(configuration, program, protocol) self.log = Logger('manager', configuration.log.manager)
def __init__ (self, configuration, name, request_box, program): self.configuration = configuration self.icap_parser = self.ICAPParser(configuration) self.enabled = configuration.redirector.enable self.protocol = configuration.redirector.protocol self._transparent = configuration.http.transparent self.log = Logger('worker ' + str(name), configuration.log.worker) self.usage = UsageLogger('usage', configuration.log.worker) self.universal = True if self.protocol == 'url' else False self.icap = self.protocol[len('icap://'):].split('/')[0] if self.protocol.startswith('icap://') else '' r, w = os.pipe() # pipe for communication with the main thread self.response_box_write = os.fdopen(w,'w',0) # results are written here self.response_box_read = os.fdopen(r,'r',0) # read from the main thread self.wid = name # a unique name self.creation = time.time() # when the thread was created # self.last_worked = self.creation # when the thread last picked a task self.request_box = request_box # queue with HTTP headers to process self.program = program # the squid redirector program to fork self.running = True # the thread is active self.stats_timestamp = None # time of the most recent outstanding request to generate stats self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid()) if self.protocol == 'url': self.classify = self._classify_url if self.protocol.startswith('icap://'): self.classify = self._classify_icap # Do not move, we need the forking AFTER the setup self.process = self._createProcess() # the forked program to handle classification Thread.__init__(self)
class Page(object): def __init__(self, supervisor): self.supervisor = supervisor self.monitor = supervisor.monitor self.email_sent = False self.log = Logger('web', supervisor.configuration.log.web) def _introspection(self, objects): introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">Looking at the internal of ExaProxy for %s </div><br/>\n' % cgi.escape( '.'.join(objects)) link = cgi.escape('/'.join( objects[:-1])) if objects[:-1] else 'supervisor' line = [ '<a href="/information/introspection/%s.html">Back to parent object</a><br/>' % link ] for k, content in self.monitor.introspection(objects): link = '/information/introspection/%s.html' % cgi.escape( '%s/%s' % ('/'.join(objects), k)) line.append( '<a href="%s">%s</a><span class="value">%s</span><br/>' % (link, k, cgi.escape(content))) return introduction + _listing % ('\n'.join(line)) def _configuration(self): introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Configuration</div><br/>\n' line = [] for k, v in sorted(self.monitor.configuration().items()): line.append( '<span class="key">%s</span><span class="value"> %s</span><br/>' % (k, cgi.escape(str(v)))) return introduction + _listing % ('\n'.join(line)) def _statistics(self): introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>\n' line = [] for k, v in sorted(self.monitor.statistics().items()): line.append( '<span class="key">%s</span><span class="value"> %s</span><br/>' % (k, cgi.escape(str(str(v))))) return introduction + _listing % ('\n'.join(line)) def _connections(self): return graph(self.monitor, 'Connections', 20000, [ 'clients.silent', 'clients.speaking', 'servers.opening', 'servers.established', ]) def _processes(self): return graph(self.monitor, 'Forked processes', 20000, [ 'processes.forked', 'processes.min', 'processes.max', ]) def _requests(self): return graph( self.monitor, 'Requests/seconds received from clients', 20000, [ 'clients.requests', ], True, ) def _clients(self): return graph( self.monitor, 'Bits/seconds received from clients', 20000, [ 'transfer.client4', 'transfer.client6', ], True, adaptor=Bpstobps, ) def _servers(self): return graph( self.monitor, 'Bits/seconds received from servers', 20000, [ 'transfer.content4', 'transfer.content6', ], True, adaptor=Bpstobps, ) def _transfer(self): return graph( self.monitor, 'Bits/seconds received', 20000, [ 'transfer.client', 'transfer.content', ], True, adaptor=Bpstobps, ) def _loops(self): return graph( self.monitor, 'Reactor loops', 20000, [ 'load.loops', ], True, ) def _events(self): return graph( self.monitor, 'Sockets which became readeable', 20000, [ 'load.events', ], True, ) def _queue(self): return graph( self.monitor, 'Queued URL for classification', 20000, [ 'queue.size', ], True, ) def _source(self, bysock): conns = 0 clients = defaultdict(lambda: 0) for sock in bysock: try: host, port = sock.getpeername() except socket.error: host, port = None, None clients[host] += 1 conns += 1 ordered = defaultdict(list) for host, number in clients.items(): ordered[number].append(host) result = [ '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>', '<center>we have %d connection(s) from %d source(s)</center><br/>' % (conns, len(clients)) ] for number in reversed(sorted(ordered)): for host in ordered[number]: result.append( '<span class="key">%s</span><span class="value"> %s</span><br/>' % (host, number)) return _listing % '\n'.join(result) def _servers_source(self): return self._source(self.supervisor.content.established) def _clients_source(self): return self._source(self.supervisor.client.bysock) def _workers(self): form = '<form action="/control/workers/commit" method="get">%s: <input type="text" name="%s" value="%s"><input type="submit" value="Submit"></form>' change = { 'exaproxy.redirector.minimum': self.supervisor.manager.low, 'exaproxy.redirector.maximum': self.supervisor.manager.high, } forms = [] for name in ('exaproxy.redirector.minimum', 'exaproxy.redirector.maximum'): value = change[name] forms.append(form % (name, name, value)) return '<pre style="margin-left:40px;">\n' + '\n'.join(forms) def _run(self): s = '<pre style="margin-left:40px;">' s += '<form action="/control/debug/eval" method="get">eval <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>' s += '<form action="/control/debug/exec" method="get">exec <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>' return s def _logs(self): return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join( History().formated()) def _errs(self): return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join( Errors().formated()) def _email(self, args): if self.email_sent: return '<center><b>You can only send one email per time ExaProxy is started</b></center>' self.email_sent, message = mail.send(args) return message def _json_running(self): return json.dumps(self.monitor.seconds[-1], sort_keys=True, indent=2, separators=(',', ': ')) def _json_configuration(self): return json.dumps(self.monitor.configuration(), sort_keys=True, indent=2, separators=(',', ': ')) def html(self, path): if len(path) > 5000: return menu('<center><b>path is too long</b></center>') if path == '/': path = '/index.html' args = '' elif '?' in path: path, args = path.split('?', 1) else: args = '' if not path.startswith('/'): return menu('<center><b>invalid url</b></center>') elif not path.endswith('.html'): if path == '/humans.txt': return humans.txt if path not in ('/json', '/json/running', '/json/configuration', '/control/workers/commit', '/control/debug/eval', '/control/debug/exec'): return menu('<center><b>invalid url</b></center>') sections = path[1:].split('/') + [''] else: sections = path[1:-5].split('/') + [''] if not sections[0]: return menu(index) section = sections[0] subsection = sections[1] if section == 'json': if subsection == 'running': return self._json_running() if subsection == 'configuration': return self._json_configuration() return '{ "errror" : "invalid url", "valid-paths": [ "/json/running", "/json/configuration" ] }' if section == 'index': return menu(index) if section == 'information': if subsection == 'introspection': return menu(self._introspection(sections[2:-1])) if subsection == 'configuration': return menu(self._configuration()) if subsection == 'statistics': return menu(self._statistics()) if subsection == 'logs': return self._logs() if subsection == 'errs': return self._errs() return menu(index) if section == 'graph': if subsection == 'processes': return menu(self._processes()) if subsection == 'connections': return menu(self._connections()) if subsection == 'servers': return menu(self._servers()) if subsection == 'clients': return menu(self._clients()) if subsection == 'transfered': return menu(self._transfer()) if subsection == 'requests': return menu(self._requests()) if subsection == 'loops': return menu(self._loops()) if subsection == 'events': return menu(self._events()) if subsection == 'queue': return menu(self._queue()) return menu(index) if section == 'end-point': if subsection == 'servers': return menu(self._servers_source()) if subsection == 'clients': return menu(self._clients_source()) return menu(index) if section == 'control': action = (sections + [None, None, None])[2] if subsection == 'debug': if not self.supervisor.configuration.web.debug: return menu('not enabled') if action == 'exec': if '=' in args: try: key, value = args.split('=', 1) self.log.critical('PYTHON CODE RAN : %s' % value) command = unquote(value.replace('+', ' ')) code = compile(command, '<string>', 'exec') exec code return 'done !' except Exception, e: return 'failed to run : \n' + command + '\n\nreason : \n' + str( type(e)) + '\n' + str(e) if action == 'eval': if '=' in args: try: key, value = args.split('=', 1) self.log.critical('PYTHON CODE RAN : %s' % value) command = unquote(value.replace('+', ' ')) return str(eval(command)) except Exception, e: return 'failed to run : \n' + command + '\n\nreason : \n' + str( type(e)) + '\n' + str(e) return menu(self._run()) if subsection == 'workers': if action == 'commit': if '=' in args: key, value = args.split('=', 1) if key == 'exaproxy.redirector.minimum': if value.isdigit( ): # this prevents negative values setting = int(value) if setting > self.supervisor.manager.high: return menu( self._workers() + '<div style="color: red; padding-top: 3em;">value is higher than exaproxy.redirector.maximum</div>' ) self.supervisor.manager.low = setting return menu( self._workers() + '<div style="color: green; padding-top: 3em;">changed successfully</div>' ) if key == 'exaproxy.redirector.maximum': if value.isdigit(): setting = int(value) if setting < self.supervisor.manager.low: return menu( self._workers() + '<div style="color: red; padding-top: 3em;">value is lower than exaproxy.redirector.minimum</div>' ) self.supervisor.manager.high = setting return menu( self._workers() + '<div style="color: green; padding-top: 3em;">changed successfully</div>' ) return menu( self._workers() + '<div style="color: red; padding-top: 3em;">invalid request</div>' ) return menu(self._workers()) return menu(index)
class Supervisor (object): alarm_time = 0.1 # regular backend work second_frequency = int(1/alarm_time) # when we record history minute_frequency = int(60/alarm_time) # when we want to average history increase_frequency = int(5/alarm_time) # when we add workers decrease_frequency = int(60/alarm_time) # when we remove workers saturation_frequency = int(20/alarm_time) # when we report connection saturation interface_frequency = int(300/alarm_time) # when we check for new interfaces # import os # clear = [hex(ord(c)) for c in os.popen('clear').read()] # clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']]) def __init__ (self,configuration): configuration = load() self.configuration = configuration # Only here so the introspection code can find them self.log = Logger('supervisor', configuration.log.supervisor) self.log.error('Starting exaproxy version %s' % configuration.proxy.version) self.signal_log = Logger('signal', configuration.log.signal) self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level) self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable) self.log_writer.setIdentifier(configuration.daemon.identifier) #self.usage_writer.setIdentifier(configuration.daemon.identifier) if configuration.debug.log: self.log_writer.toggleDebug() self.usage_writer.toggleDebug() self.log.error('python version %s' % sys.version.replace(os.linesep,' ')) self.log.debug('starting %s' % sys.argv[0]) self.pid = PID(self.configuration) self.daemon = Daemon(self.configuration) self.poller = Poller(self.configuration.daemon) self.poller.setupRead('read_proxy') # Listening proxy sockets self.poller.setupRead('read_web') # Listening webserver sockets self.poller.setupRead('read_icap') # Listening icap sockets self.poller.setupRead('read_workers') # Pipes carrying responses from the child processes self.poller.setupRead('read_resolver') # Sockets currently listening for DNS responses self.poller.setupRead('read_client') # Active clients self.poller.setupRead('opening_client') # Clients we have not yet read a request from self.poller.setupWrite('write_client') # Active clients with buffered data to send self.poller.setupWrite('write_resolver') # Active DNS requests with buffered data to send self.poller.setupRead('read_download') # Established connections self.poller.setupWrite('write_download') # Established connections we have buffered data to send to self.poller.setupWrite('opening_download') # Opening connections self.monitor = Monitor(self) self.page = Page(self) self.manager = RedirectorManager( self.configuration, self.poller, ) self.content = ContentManager(self,configuration) self.client = ClientManager(self.poller, configuration) self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10) self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections) self.web = Server('web server',self.poller,'read_web', configuration.web.connections) self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections) self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.manager, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller) self._shutdown = True if self.daemon.filemax == 0 else False # stop the program self._softstop = False # stop once all current connection have been dealt with self._reload = False # unimplemented self._toggle_debug = False # start logging a lot self._decrease_spawn_limit = 0 self._increase_spawn_limit = 0 self._refork = False # unimplemented self._pdb = False # turn on pdb debugging self._listen = None # listening change ? None: no, True: listen, False: stop listeing self.wait_time = 5.0 # how long do we wait at maximum once we have been soft-killed self.local = set() # what addresses are on our local interfaces self.interfaces() signal.signal(signal.SIGQUIT, self.sigquit) signal.signal(signal.SIGINT, self.sigterm) signal.signal(signal.SIGTERM, self.sigterm) # signal.signal(signal.SIGABRT, self.sigabrt) # signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGTRAP, self.sigtrap) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) signal.signal(signal.SIGTTOU, self.sigttou) signal.signal(signal.SIGTTIN, self.sigttin) signal.signal(signal.SIGALRM, self.sigalrm) # make sure we always have data in history # (done in zero for dependencies reasons) self.monitor.zero() def sigquit (self,signum, frame): if self._softstop: self.signal_log.critical('multiple SIG INT received, shutdown') self._shutdown = True else: self.signal_log.critical('SIG INT received, soft-stop') self._softstop = True self._listen = False def sigterm (self,signum, frame): self.signal_log.critical('SIG TERM received, shutdown request') if os.environ.get('PDB',False): self._pdb = True else: self._shutdown = True # def sigabrt (self,signum, frame): # self.signal_log.info('SIG INFO received, refork request') # self._refork = True # def sighup (self,signum, frame): # self.signal_log.info('SIG HUP received, reload request') # self._reload = True def sigtrap (self,signum, frame): self.signal_log.critical('SIG TRAP received, toggle debug') self._toggle_debug = True def sigusr1 (self,signum, frame): self.signal_log.critical('SIG USR1 received, decrease worker number') self._decrease_spawn_limit += 1 def sigusr2 (self,signum, frame): self.signal_log.critical('SIG USR2 received, increase worker number') self._increase_spawn_limit += 1 def sigttou (self,signum, frame): self.signal_log.critical('SIG TTOU received, stop listening') self._listen = False def sigttin (self,signum, frame): self.signal_log.critical('SIG IN received, star listening') self._listen = True def sigalrm (self,signum, frame): self.signal_log.debug('SIG ALRM received, timed actions') self.reactor.running = False signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time) def interfaces (self): local = set(['127.0.0.1','::1']) for interface in getifaddrs(): if interface.family not in (AF_INET,AF_INET6): continue if interface.address not in self.local: self.log.info('found new local ip %s (%s)' % (interface.address,interface.name)) local.add(interface.address) for ip in self.local: if ip not in local: self.log.info('removed local ip %s' % ip) if local == self.local: self.log.info('no ip change') else: self.local = local def run (self): if self.daemon.drop_privileges(): self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user) self.log.critical('Set the environment value USER to change the unprivileged user') self._shutdown = True elif not self.initialise(): self._shutdown = True signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time) count_second = 0 count_minute = 0 count_increase = 0 count_decrease = 0 count_saturation = 0 count_interface = 0 while True: count_second = (count_second + 1) % self.second_frequency count_minute = (count_minute + 1) % self.minute_frequency count_increase = (count_increase + 1) % self.increase_frequency count_decrease = (count_decrease + 1) % self.decrease_frequency count_saturation = (count_saturation + 1) % self.saturation_frequency count_interface = (count_interface + 1) % self.interface_frequency try: if self._pdb: self._pdb = False import pdb pdb.set_trace() # check for IO change with select self.reactor.run() # must follow the reactor so we are sure to go through the reactor at least once # and flush any logs if self._shutdown: self._shutdown = False self.shutdown() break elif self._reload: self._reload = False self.reload() elif self._refork: self._refork = False self.signal_log.warning('refork not implemented') # stop listening to new connections # refork the program (as we have been updated) # just handle current open connection if self._softstop: if self._listen == False: self.proxy.rejecting() self._listen = None if self.client.softstop(): self._shutdown = True # only change listening if we are not shutting down elif self._listen is not None: if self._listen: self._shutdown = not self.proxy.accepting() self._listen = None else: self.proxy.rejecting() self._listen = None if self._toggle_debug: self._toggle_debug = False self.log_writer.toggleDebug() if self._increase_spawn_limit: number = self._increase_spawn_limit self._increase_spawn_limit = 0 self.manager.low += number self.manager.high = max(self.manager.low,self.manager.high) for _ in range(number): self.manager.increase() if self._decrease_spawn_limit: number = self._decrease_spawn_limit self._decrease_spawn_limit = 0 self.manager.high = max(1,self.manager.high-number) self.manager.low = min(self.manager.high,self.manager.low) for _ in range(number): self.manager.decrease() # save our monitoring stats if count_second == 0: self.monitor.second() expired = self.reactor.client.expire() self.reactor.log.debug('events : ' + ', '.join('%s:%d' % (k,len(v)) for (k,v) in self.reactor.events.items())) else: expired = 0 if expired: self.proxy.notifyClose(None, count=expired) if count_minute == 0: self.monitor.minute() # make sure we have enough workers if count_increase == 0: self.manager.provision() # and every so often remove useless workers if count_decrease == 0: self.manager.deprovision() # report if we saw too many connections if count_saturation == 0: self.proxy.saturation() self.web.saturation() if self.configuration.daemon.poll_interfaces and count_interface == 0: self.interfaces() except KeyboardInterrupt: self.log.critical('^C received') self._shutdown = True except OSError,e: # This shoould never happen as we are limiting how many connections we accept if e.errno == 24: # Too many open files self.log.critical('Too many opened files, shutting down') for line in traceback.format_exc().split('\n'): self.log.critical(line) self._shutdown = True else: self.log.critical('unrecoverable io error') for line in traceback.format_exc().split('\n'): self.log.critical(line) self._shutdown = True finally:
class ClientManager (object): unproxy = ProxyProtocol().parseRequest def __init__(self, poller, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.total_requested = 0L self.norequest = TimeCache(configuration.http.idle_connect) self.bysock = {} self.byname = {} self.buffered = [] self._nextid = 0 self.poller = poller self.log = Logger('client', configuration.log.client) self.proxied = configuration.http.proxied self.max_buffer = configuration.http.header_size def __contains__(self, item): return item in self.byname def getnextid(self): self._nextid += 1 return str(self._nextid) def expire (self,number=100): count = 0 for sock in self.norequest.expired(number): client = self.norequest.get(sock,[None,])[0] if client: self.cleanup(sock,client.name) count += 1 return count def newConnection(self, sock, peer, source): name = self.getnextid() client = Client(name, sock, peer, self.log, self.max_buffer) self.norequest[sock] = client, source self.byname[name] = client, source # watch for the opening request self.poller.addReadSocket('opening_client', client.sock) #self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock)) return peer def readRequest(self, sock): """Read only the initial HTTP headers sent by the client""" client, source = self.norequest.get(sock, (None, None)) if client: name, peer, request, content = client.readData() if request: self.total_requested += 1 # headers can be read only once self.norequest.pop(sock, (None, None)) # we have now read the client's opening request self.poller.removeReadSocket('opening_client', client.sock) elif request is None: self.cleanup(sock, client.name) else: self.log.error('trying to read headers from a client that does not exist %s' % sock) name, peer, request, content, source = None, None, None, None, None if request and self.proxied is True and source == 'proxy': client_ip, client_request = self.unproxy(request) if client_ip and client_request: peer = client_ip request = client_request client.setPeer(client_ip) return name, peer, request, content, source def readDataBySocket(self, sock): client, source = self.bysock.get(sock, (None, None)) if client: name, peer, request, content = client.readData() if request: self.total_requested += 1 # Parsing of the new request will be handled asynchronously. Ensure that # we do not read anything from the client until a request has been sent # to the remote webserver. # Since we just read a request, we know that the cork is not currently # set and so there's no risk of it being erroneously removed. self.poller.corkReadSocket('read_client', sock) elif request is None: self.cleanup(sock, client.name) else: self.log.error('trying to read from a client that does not exist %s' % sock) name, peer, request, content = None, None, None, None return name, peer, request, content, source def readDataByName(self, name): client, source = self.byname.get(name, (None, None)) if client: name, peer, request, content = client.readData() if request: self.total_requested += 1 # Parsing of the new request will be handled asynchronously. Ensure that # we do not read anything from the client until a request has been sent # to the remote webserver. # Since we just read a request, we know that the cork is not currently # set and so there's no risk of it being erroneously removed. self.poller.corkReadSocket('read_client', client.sock) elif request is None: self.cleanup(client.sock, name) else: self.log.error('trying to read from a client that does not exist %s' % name) name, peer, request, content = None, None, None, None return name, peer, request, content def sendDataBySocket(self, sock, data): client, source = self.bysock.get(sock, (None, None)) if client: name = client.name res = client.writeData(data) if res is None: # close the client connection self.cleanup(sock, client.name) buffered, had_buffer, sent4, sent6 = None, None, 0, 0 result = None buffer_change = None else: buffered, had_buffer, sent4, sent6 = res self.total_sent4 += sent4 self.total_sent6 += sent6 result = buffered if buffered: if sock not in self.buffered: self.buffered.append(sock) buffer_change = True # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) else: buffer_change = False elif had_buffer and sock in self.buffered: self.buffered.remove(sock) buffer_change = True # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: buffer_change = False else: result = None buffer_change = None name = None return result, buffer_change, name, source def sendDataByName(self, name, data): client, source = self.byname.get(name, (None, None)) if client: res = client.writeData(data) if res is None: # we cannot write to the client so clean it up self.cleanup(client.sock, name) buffered, had_buffer, sent4, sent6 = None, None, 0, 0 result = None buffer_change = None else: buffered, had_buffer, sent4, sent6 = res self.total_sent4 += sent4 self.total_sent6 += sent6 result = buffered if buffered: if client.sock not in self.buffered: self.buffered.append(client.sock) buffer_change = True # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) else: buffer_change = False elif had_buffer and client.sock in self.buffered: self.buffered.remove(client.sock) buffer_change = True # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: buffer_change = False else: result = None buffer_change = None return result, buffer_change, client def startData(self, name, data, remaining): # NOTE: soo ugly but fast to code nb_to_read = 0 if type(remaining) == type(''): if 'chunked' in remaining: mode = 'chunked' else: mode = 'passthrough' elif remaining > 0: mode = 'transfer' nb_to_read = remaining elif remaining == 0: mode = 'request' else: mode = 'passthrough' client, source = self.byname.get(name, (None, None)) if client: try: command, d = data except (ValueError, TypeError): self.log.error('invalid command sent to client %s' % name) self.cleanup(client.sock, name) res = None else: if client.sock not in self.bysock: # Start checking for content sent by the client self.bysock[client.sock] = client, source # watch for the client sending new data self.poller.addReadSocket('read_client', client.sock) # make sure we don't somehow end up with this still here self.norequest.pop(client.sock, (None,None)) # NOTE: always done already in readRequest self.poller.removeReadSocket('opening_client', client.sock) res = client.startData(command, d) else: res = client.restartData(command, d) # If we are here then we must have prohibited reading from the client # and it must otherwise have been in a readable state self.poller.uncorkReadSocket('read_client', client.sock) if res is not None: buffered, had_buffer, sent4, sent6 = res # buffered data we read with the HTTP headers name, peer, request, content = client.readRelated(mode,nb_to_read) if request: self.total_requested += 1 self.log.info('reading multiple requests') self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None elif request is None: self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None else: # we cannot write to the client so clean it up self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None if buffered: if client.sock not in self.buffered: self.buffered.append(client.sock) # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) elif had_buffer and client.sock in self.buffered: self.buffered.remove(client.sock) # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: content = None return client, content, source def corkUploadByName(self, name): client, source = self.byname.get(name, (None, None)) if client: self.poller.corkReadSocket('read_client', client.sock) def uncorkUploadByName(self, name): client, source = self.byname.get(name, (None, None)) if client: if client.sock in self.bysock: self.poller.uncorkReadSocket('read_client', client.sock) def cleanup(self, sock, name): self.log.debug('cleanup for socket %s' % sock) client, source = self.bysock.get(sock, (None,None)) client, source = (client,None) if client else self.norequest.get(sock, (None,None)) client, source = (client,None) or self.byname.get(name, (None,None)) self.bysock.pop(sock, None) self.norequest.pop(sock, (None,None)) self.byname.pop(name, None) if client: self.poller.removeWriteSocket('write_client', client.sock) self.poller.removeReadSocket('read_client', client.sock) self.poller.removeReadSocket('opening_client', client.sock) client.shutdown() else: self.log.error('COULD NOT CLEAN UP SOCKET %s' % sock) if sock in self.buffered: self.buffered.remove(sock) def softstop (self): if len(self.byname) > 0 or len(self.norequest) > 0: return False self.log.critical('no more client connection, exiting.') return True def stop(self): for client, source in self.bysock.itervalues(): client.shutdown() for client, source in self.norequest.itervalues(): client.shutdown() self.poller.clearRead('read_client') self.poller.clearRead('opening_client') self.poller.clearWrite('write_client') self.bysock = {} self.norequest = {} self.byname = {} self.buffered = []
Created by Thomas Mangin on 2011-11-30. Copyright (c) 2011-2013 Exa Networks. All rights reserved. """ import socket import errno from exaproxy.util.log.logger import Logger from exaproxy.network.errno_list import errno_block from exaproxy.configuration import load IP_TRANSPARENT = 19 configuration = load() log = Logger('server', configuration.log.server) def isipv4(address): try: socket.inet_pton(socket.AF_INET, address) return True except socket.error: return False def isipv6(address): try: socket.inet_pton(socket.AF_INET6, address) return True except socket.error: return False
""" # http://code.google.com/speed/articles/web-metrics.html import select import socket import errno from exaproxy.network.errno_list import errno_block, errno_fatal from interface import IPoller from exaproxy.util.log.logger import Logger from exaproxy.configuration import load configuration = load() log = Logger('select', configuration.log.server) def poll_select(read, write, timeout=None): try: r, w, x = select.select(read, write, read + write, timeout) except socket.error, e: if e.args[0] in errno_block: log.error('select not ready, errno %d: %s' % (e.args[0], errno.errorcode.get(e.args[0], ''))) return [], [], [] if e.args[0] in errno_fatal: log.error('select problem, errno %d: %s' % (e.args[0], errno.errorcode.get(e.args[0], ''))) log.error('poller read : %s' % str(read)) log.error('poller write : %s' % str(write)) log.error('read : %s' % str(read)) else:
def __init__ (self,configuration): configuration = load() self.configuration = configuration # Only here so the introspection code can find them self.log = Logger('supervisor', configuration.log.supervisor) self.log.error('Starting exaproxy version %s' % configuration.proxy.version) self.signal_log = Logger('signal', configuration.log.signal) self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level) self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable) sys.exitfunc = self.log_writer.writeMessages self.log_writer.setIdentifier(configuration.daemon.identifier) #self.usage_writer.setIdentifier(configuration.daemon.identifier) if configuration.debug.log: self.log_writer.toggleDebug() self.usage_writer.toggleDebug() self.log.error('python version %s' % sys.version.replace(os.linesep,' ')) self.log.debug('starting %s' % sys.argv[0]) self.pid = PID(self.configuration) self.daemon = Daemon(self.configuration) self.poller = Poller(self.configuration.daemon) self.poller.setupRead('read_proxy') # Listening proxy sockets self.poller.setupRead('read_web') # Listening webserver sockets self.poller.setupRead('read_icap') # Listening icap sockets self.poller.setupRead('read_redirector') # Pipes carrying responses from the redirector process self.poller.setupRead('read_resolver') # Sockets currently listening for DNS responses self.poller.setupRead('read_client') # Active clients self.poller.setupRead('opening_client') # Clients we have not yet read a request from self.poller.setupWrite('write_client') # Active clients with buffered data to send self.poller.setupWrite('write_resolver') # Active DNS requests with buffered data to send self.poller.setupRead('read_download') # Established connections self.poller.setupWrite('write_download') # Established connections we have buffered data to send to self.poller.setupWrite('opening_download') # Opening connections self.monitor = Monitor(self) self.page = Page(self) self.content = ContentManager(self,configuration) self.client = ClientManager(self.poller, configuration) self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10) self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections) self.web = Server('web server',self.poller,'read_web', configuration.web.connections) self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections) self._shutdown = True if self.daemon.filemax == 0 else False # stop the program self._softstop = False # stop once all current connection have been dealt with self._reload = False # unimplemented self._toggle_debug = False # start logging a lot self._decrease_spawn_limit = 0 self._increase_spawn_limit = 0 self._refork = False # unimplemented self._pdb = False # turn on pdb debugging self._listen = None # listening change ? None: no, True: listen, False: stop listeing self.wait_time = 5.0 # how long do we wait at maximum once we have been soft-killed self.local = set() # what addresses are on our local interfaces if not self.initialise(): self._shutdown = True elif self.daemon.drop_privileges(): self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user) self.log.critical('Set the environment value USER to change the unprivileged user') self._shutdown = True # fork the redirector process before performing any further setup redirector = fork_redirector(self.poller, self.configuration) # create threads _after_ all forking is done self.redirector = redirector_message_thread(redirector) self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.redirector, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller) self.interfaces() signal.signal(signal.SIGQUIT, self.sigquit) signal.signal(signal.SIGINT, self.sigterm) signal.signal(signal.SIGTERM, self.sigterm) # signal.signal(signal.SIGABRT, self.sigabrt) # signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGTRAP, self.sigtrap) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) signal.signal(signal.SIGTTOU, self.sigttou) signal.signal(signal.SIGTTIN, self.sigttin) signal.signal(signal.SIGALRM, self.sigalrm) # make sure we always have data in history # (done in zero for dependencies reasons) self.monitor.zero()
class RedirectorManager (object): def __init__ (self, configuration, poller): self.low = configuration.redirector.minimum # minimum concurrent redirector workers self.high = configuration.redirector.maximum # maximum concurrent redirector workers self.poller = poller self.configuration = configuration self.queue = Queue() # store requests we do not immediately have the resources to process self.nextid = 1 # unique id to give to the next spawned worker self.worker = {} # worker tasks for each spawned child self.processes = {} # worker tasks indexed by file descriptors we can poll self.available = set() # workers that are currently available to handle new requests self.active = {} # workers that are currently busy waiting for a response from the spawned process self.stopping = set() # workers we want to stop as soon as they stop being active program = configuration.redirector.program protocol = configuration.redirector.protocol self.redirector_factory = RedirectorFactory(configuration, program, protocol) self.log = Logger('manager', configuration.log.manager) def _getid(self): wid = str(self.nextid) self.nextid += 1 return wid def _spawn (self): """add one worker to the pool""" wid = self._getid() worker = self.redirector_factory.create(wid) self.worker[wid] = worker self.available.add(wid) if worker.process is not None: identifier = worker.process.stdout self.processes[identifier] = worker self.poller.addReadSocket('read_workers', identifier) self.log.info("added a worker") self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker), self.low, self.high)) def spawn (self, number=1): """create the request number of worker processes""" self.log.info("spawning %d more workers" % number) for _ in range(number): self._spawn() def respawn (self): """make sure we reach the minimum number of workers""" number = max(min(len(self.worker), self.high), self.low) for wid in set(self.worker): self.stopWorker(wid) self.spawn(number) def stopWorker (self, wid): self.log.info('want worker %s to go away' % wid) if wid not in self.active: self.reap(wid) else: self.stopping.add(wid) def reap (self, wid): self.log.info('we are killing worker %s' % wid) worker = self.worker[wid] if wid in self.active: self.log.error('reaping worker %s even though it is still active' % wid) self.active.pop(wid) if wid in self.stopping: self.stopping.remove(wid) if wid in self.available: self.available.remove(wid) if worker.process is not None: self.poller.removeReadSocket('read_workers', worker.process.stdout) self.processes.pop(worker.process.stdout) worker.shutdown() self.worker.pop(wid) def _decrease (self): if self.low < len(self.worker): wid = self._oldest() if wid: self.stopWorker(wid) def _increase (self): if len(self.worker) < self.high: self.spawn() def decrease (self, count=1): for _ in xrange(count): self._decrease() def increase (self, count=1): for _ in xrange(count): self._increase() def start (self): """spawn our minimum number of workers""" self.log.info("starting workers.") self.spawn(max(0,self.low-len(self.worker))) def stop (self): """tell all our worker to stop reading the queue and stop""" for wid in self.worker: self.reap(wid) self.worker = {} def _oldest (self): """find the oldest worker""" oldest = None past = time.time() for wid in set(self.worker): creation = self.worker[wid].creation if creation < past and wid not in self.stopping: past = creation oldest = wid return oldest def provision (self): """manage our workers to make sure we have enough to consume the queue""" size = self.queue.qsize() num_workers = len(self.worker) # bad we are bleeding workers ! if num_workers < self.low: self.log.info("we lost some workers, respawing %d new workers" % (self.low - num_workers)) self.spawn(self.low - num_workers) # we need more workers if size >= num_workers: # nothing we can do we have reach our limit if num_workers >= self.high: self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers)) return # try to figure a good number to add .. # no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers)) self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size)) self.spawn(nb_to_add) def deprovision (self): """manage our workers to make sure we have enough to consume the queue""" size = self.queue.qsize() num_workers = len(self.worker) # we are now overprovisioned if size < 2 and num_workers > self.low: self.log.info("we have too many workers (%d), stopping the oldest" % num_workers) # if we have to kill one, at least stop the one who had the most chance to memory leak :) wid = self._oldest() if wid: self.stopWorker(wid) def acquire (self): if self.available: identifier = self.available.pop() worker = self.worker[identifier] else: worker = None return worker def release (self, wid): if wid not in self.stopping: self.available.add(wid) else: self.reap(wid) def persist (self, wid, client_id, peer, data, header, subheader, source, tainted): self.active[wid] = client_id, peer, data, header, subheader, source, tainted def progress (self, wid): return self.active.pop(wid) def doqueue (self): if self.available and not self.queue.isempty(): client_id, peer, header, subheader, source, tainted = self.queue.get() _, command, decision = self.request(client_id, peer, header, subheader, source, tainted=tainted) else: client_id, command, decision = None, None, None return client_id, command, decision def request (self, client_id, peer, header, subheader, source, tainted=False): worker = self.acquire() if worker is not None: try: _, command, decision = worker.decide(client_id, peer, header, subheader, source) except: command, decision = None, None if command is None: self.reap(worker.wid) if tainted is False: _, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True) else: _, command, decision = Respond.close(client_id) else: command, decision = None, None self.queue.put((client_id, peer, header, subheader, source, tainted)) if command == 'defer': self.persist(worker.wid, client_id, peer, decision, header, subheader, source, tainted) command, decision = None, None elif worker is not None: self.release(worker.wid) return client_id, command, decision def getDecision (self, pipe_in): worker = self.processes.get(pipe_in, None) if worker is not None and worker.wid in self.active: client_id, peer, request, header, subheader, source, tainted = self.progress(worker.wid) try: _, command, decision = worker.progress(client_id, peer, request, header, subheader, source) except Exception, e: command, decision = None, None self.release(worker.wid) if command is None: self.reap(worker.wid) if tainted is False: _, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True) else: _, command, decision = Respond.close(client_id) else:
def __init__(self,supervisor): self.supervisor = supervisor self.monitor = supervisor.monitor self.email_sent = False self.log = Logger('web', supervisor.configuration.log.web)
class ClientManager(object): unproxy = ProxyProtocol().parseRequest def __init__(self, poller, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.total_requested = 0L self.norequest = TimeCache(configuration.http.idle_connect) self.bysock = {} self.byname = {} self.buffered = [] self._nextid = 0 self.poller = poller self.log = Logger('client', configuration.log.client) self.proxied = configuration.http.proxied self.max_buffer = configuration.http.header_size def __contains__(self, item): return item in self.byname def getnextid(self): self._nextid += 1 return str(self._nextid) def expire(self, number=100): count = 0 for sock in self.norequest.expired(number): client = self.norequest.get(sock, [ None, ])[0] if client: self.cleanup(sock, client.name) count += 1 return count def newConnection(self, sock, peer, source): name = self.getnextid() client = Client(name, sock, peer, self.log, self.max_buffer) self.norequest[sock] = client, source self.byname[name] = client, source # watch for the opening request self.poller.addReadSocket('opening_client', client.sock) #self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock)) return peer def readRequest(self, sock): """Read only the initial HTTP headers sent by the client""" client, source = self.norequest.get(sock, (None, None)) if client: name, peer, request, content = client.readData() if request: self.total_requested += 1 # headers can be read only once self.norequest.pop(sock, (None, None)) # we have now read the client's opening request self.poller.removeReadSocket('opening_client', client.sock) elif request is None: self.cleanup(sock, client.name) else: self.log.error( 'trying to read headers from a client that does not exist %s' % sock) name, peer, request, content, source = None, None, None, None, None if request and self.proxied is True and source == 'proxy': client_ip, client_request = self.unproxy(request) if client_ip and client_request: peer = client_ip request = client_request client.setPeer(client_ip) return name, peer, request, content, source def readDataBySocket(self, sock): client, source = self.bysock.get(sock, (None, None)) if client: name, peer, request, content = client.readData() if request: self.total_requested += 1 # Parsing of the new request will be handled asynchronously. Ensure that # we do not read anything from the client until a request has been sent # to the remote webserver. # Since we just read a request, we know that the cork is not currently # set and so there's no risk of it being erroneously removed. self.poller.corkReadSocket('read_client', sock) elif request is None: self.cleanup(sock, client.name) else: self.log.error( 'trying to read from a client that does not exist %s' % sock) name, peer, request, content = None, None, None, None return name, peer, request, content, source def readDataByName(self, name): client, source = self.byname.get(name, (None, None)) if client: name, peer, request, content = client.readData() if request: self.total_requested += 1 # Parsing of the new request will be handled asynchronously. Ensure that # we do not read anything from the client until a request has been sent # to the remote webserver. # Since we just read a request, we know that the cork is not currently # set and so there's no risk of it being erroneously removed. self.poller.corkReadSocket('read_client', client.sock) elif request is None: self.cleanup(client.sock, name) else: self.log.error( 'trying to read from a client that does not exist %s' % name) name, peer, request, content = None, None, None, None return name, peer, request, content def sendDataBySocket(self, sock, data): client, source = self.bysock.get(sock, (None, None)) if client: name = client.name res = client.writeData(data) if res is None: # close the client connection self.cleanup(sock, client.name) buffered, had_buffer, sent4, sent6 = None, None, 0, 0 result = None buffer_change = None else: buffered, had_buffer, sent4, sent6 = res self.total_sent4 += sent4 self.total_sent6 += sent6 result = buffered if buffered: if sock not in self.buffered: self.buffered.append(sock) buffer_change = True # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) else: buffer_change = False elif had_buffer and sock in self.buffered: self.buffered.remove(sock) buffer_change = True # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: buffer_change = False else: result = None buffer_change = None name = None return result, buffer_change, name, source def sendDataByName(self, name, data): client, source = self.byname.get(name, (None, None)) if client: res = client.writeData(data) if res is None: # we cannot write to the client so clean it up self.cleanup(client.sock, name) buffered, had_buffer, sent4, sent6 = None, None, 0, 0 result = None buffer_change = None else: buffered, had_buffer, sent4, sent6 = res self.total_sent4 += sent4 self.total_sent6 += sent6 result = buffered if buffered: if client.sock not in self.buffered: self.buffered.append(client.sock) buffer_change = True # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) else: buffer_change = False elif had_buffer and client.sock in self.buffered: self.buffered.remove(client.sock) buffer_change = True # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: buffer_change = False else: result = None buffer_change = None return result, buffer_change, client def startData(self, name, data, remaining): # NOTE: soo ugly but fast to code nb_to_read = 0 if type(remaining) == type(''): if 'chunked' in remaining: mode = 'chunked' else: mode = 'passthrough' elif remaining > 0: mode = 'transfer' nb_to_read = remaining elif remaining == 0: mode = 'request' else: mode = 'passthrough' client, source = self.byname.get(name, (None, None)) if client: try: command, d = data except (ValueError, TypeError): self.log.error('invalid command sent to client %s' % name) self.cleanup(client.sock, name) res = None else: if client.sock not in self.bysock: # Start checking for content sent by the client self.bysock[client.sock] = client, source # watch for the client sending new data self.poller.addReadSocket('read_client', client.sock) # make sure we don't somehow end up with this still here self.norequest.pop(client.sock, (None, None)) # NOTE: always done already in readRequest self.poller.removeReadSocket('opening_client', client.sock) res = client.startData(command, d) else: res = client.restartData(command, d) # If we are here then we must have prohibited reading from the client # and it must otherwise have been in a readable state self.poller.uncorkReadSocket('read_client', client.sock) if res is not None: buffered, had_buffer, sent4, sent6 = res # buffered data we read with the HTTP headers name, peer, request, content = client.readRelated( mode, nb_to_read) if request: self.total_requested += 1 self.log.info('reading multiple requests') self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None elif request is None: self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None else: # we cannot write to the client so clean it up self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None if buffered: if client.sock not in self.buffered: self.buffered.append(client.sock) # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) elif had_buffer and client.sock in self.buffered: self.buffered.remove(client.sock) # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: content = None return client, content, source def corkUploadByName(self, name): client, source = self.byname.get(name, (None, None)) if client: self.poller.corkReadSocket('read_client', client.sock) def uncorkUploadByName(self, name): client, source = self.byname.get(name, (None, None)) if client: if client.sock in self.bysock: self.poller.uncorkReadSocket('read_client', client.sock) def cleanup(self, sock, name): self.log.debug('cleanup for socket %s' % sock) client, source = self.bysock.get(sock, (None, None)) client, source = (client, None) if client else self.norequest.get( sock, (None, None)) client, source = (client, None) or self.byname.get(name, (None, None)) self.bysock.pop(sock, None) self.norequest.pop(sock, (None, None)) self.byname.pop(name, None) if client: self.poller.removeWriteSocket('write_client', client.sock) self.poller.removeReadSocket('read_client', client.sock) self.poller.removeReadSocket('opening_client', client.sock) client.shutdown() else: self.log.error('COULD NOT CLEAN UP SOCKET %s' % sock) if sock in self.buffered: self.buffered.remove(sock) def softstop(self): if len(self.byname) > 0 or len(self.norequest) > 0: return False self.log.critical('no more client connection, exiting.') return True def stop(self): for client, source in self.bysock.itervalues(): client.shutdown() for client, source in self.norequest.itervalues(): client.shutdown() self.poller.clearRead('read_client') self.poller.clearRead('opening_client') self.poller.clearWrite('write_client') self.bysock = {} self.norequest = {} self.byname = {} self.buffered = []
import datetime from interface import IPoller from select import KQ_FILTER_READ, KQ_FILTER_WRITE, KQ_EV_ADD, KQ_EV_DELETE, kevent # KQ_EV_ENABLE, KQ_EV_DISABLE, # KQ_EV_CLEAR, KQ_EV_ONESHOT, # KQ_EV_ERROR, # KQ_EV_EOF, from exaproxy.util.log.logger import Logger from exaproxy.configuration import load configuration = load() log = Logger('select', configuration.log.server) class KQueuePoller (IPoller): kqueue = staticmethod(select.kqueue) def __init__(self, speed): self.speed = speed self.sockets = {} self.pollers = {} self.master = self.kqueue() self.errors = {} self.max_events = 10000 def addReadSocket(self, name, sock): sockets, poller, fdtosock, corked = self.sockets[name]
class ContentManager(object): downloader_factory = Content def __init__(self, supervisor, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.opening = {} self.established = {} self.byclientid = {} self.buffered = [] self.retry = [] self.configuration = configuration self.supervisor = supervisor self.poller = supervisor.poller self.log = Logger('download', configuration.log.download) self.location = os.path.realpath( os.path.normpath(configuration.web.html)) self.page = supervisor.page self._header = {} def hasClient(self, client_id): return client_id in self.byclientid def getLocalContent(self, code, name): filename = os.path.normpath(os.path.join(self.location, name)) if not filename.startswith(self.location + os.path.sep): filename = '' if os.path.isfile(filename): try: stat = os.stat(filename) except IOError: # NOTE: we are always returning an HTTP/1.1 response content = 'close', http( 501, 'local file is inaccessible %s' % str(filename)) else: if filename in self._header: cache_time, header = self._header[filename] else: cache_time, header = None, None if cache_time is None or cache_time < stat.st_mtime: header = file_header(code, stat.st_size, filename) self._header[filename] = stat.st_size, header content = 'file', (header, filename) else: self.log.debug('local file is missing for %s: %s' % (str(name), str(filename))) # NOTE: we are always returning an HTTP/1.1 response content = 'close', http( 501, 'could not serve missing file %s' % str(filename)) return content def readLocalContent(self, code, reason, data={}): filename = os.path.normpath(os.path.join(self.location, reason)) if not filename.startswith(self.location + os.path.sep): filename = '' if os.path.isfile(filename): try: with open(filename) as fd: body = fd.read() % data # NOTE: we are always returning an HTTP/1.1 response content = 'close', http(code, body) except IOError: self.log.debug('local file is missing for %s: %s' % (str(reason), str(filename))) # NOTE: we are always returning an HTTP/1.1 response content = 'close', http( 501, 'could not serve missing file %s' % str(reason)) else: self.log.debug('local file is missing for %s: %s' % (str(reason), str(filename))) # NOTE: we are always returning an HTTP/1.1 response content = 'close', http( 501, 'could not serve missing file %s' % str(reason)) return content def getDownloader(self, client_id, host, port, command, request): downloader = self.byclientid.get(client_id, None) if downloader: # NOTE: with pipeline, consequent request could go to other sites if the browser knows we are a proxy # NOTE: therefore the second request could reach the first site # NOTE: and we could kill the connection before the data is fully back to the client # NOTE: in practice modern browser are too clever and test for it ! if host != downloader.host or port != downloader.port: self.endClientDownload(client_id) downloader = None else: newdownloader = False if isipv4(host): bind = self.configuration.tcp4.bind elif isipv6(host): bind = self.configuration.tcp6.bind else: # should really never happen self.log.critical( 'the host IP address is neither IPv4 or IPv6 .. what year is it ?' ) return None, False if downloader is None: # supervisor.local is replaced when interface are changed, so do not cache or reference it in this class if host in self.supervisor.local: for h, p in self.configuration.security.local: if (h == '*' or h == host) and (p == '*' or p == port): break else: # we did not break return None, False downloader = self.downloader_factory(client_id, host, port, bind, command, request, self.log) newdownloader = True if downloader.sock is None: return None, False return downloader, newdownloader def getContent(self, client_id, command, args): try: if command == 'download': try: host, port, upgrade, length, request = args except (ValueError, TypeError), e: raise ParsingError() downloader, newdownloader = self.getDownloader( client_id, host, int(port), command, request) if downloader is not None: content = ('stream', '') if upgrade in ('', 'http/1.0', 'http/1.1'): length = int(length) if length.isdigit() else length else: length = -1 else: content = self.getLocalContent('400', 'noconnect.html') length = 0 elif command == 'connect': try: host, port, request = args except (ValueError, TypeError), e: raise ParsingError() downloader, newdownloader = self.getDownloader( client_id, host, int(port), command, '') if downloader is not None: content = ('stream', '') length = -1 # the client can send as much data as it wants else: content = self.getLocalContent('400', 'noconnect.html') length = 0
class ContentManager(object): downloader_factory = Content def __init__(self, supervisor, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.opening = {} self.established = {} self.byclientid = {} self.buffered = [] self.retry = [] self.configuration = configuration self.supervisor = supervisor self.poller = supervisor.poller self.log = Logger('download', configuration.log.download) self.location = os.path.realpath(os.path.normpath(configuration.web.html)) self.page = supervisor.page self._header = {} def hasClient(self, client_id): return client_id in self.byclientid def getLocalContent(self, code, name): filename = os.path.normpath(os.path.join(self.location, name)) if not filename.startswith(self.location + os.path.sep): filename = '' if os.path.isfile(filename): try: stat = os.stat(filename) except IOError: # NOTE: we are always returning an HTTP/1.1 response content = 'close', http(501, 'local file is inaccessible %s' % str(filename)) else: if filename in self._header : cache_time, header = self._header[filename] else: cache_time, header = None, None if cache_time is None or cache_time < stat.st_mtime: header = file_header(code, stat.st_size, filename) self._header[filename] = stat.st_size, header content = 'file', (header, filename) else: self.log.debug('local file is missing for %s: %s' % (str(name), str(filename))) # NOTE: we are always returning an HTTP/1.1 response content = 'close', http(501, 'could not serve missing file %s' % str(filename)) return content def readLocalContent(self, code, reason, data={}): filename = os.path.normpath(os.path.join(self.location, reason)) if not filename.startswith(self.location + os.path.sep): filename = '' if os.path.isfile(filename): try: with open(filename) as fd: body = fd.read() % data # NOTE: we are always returning an HTTP/1.1 response content = 'close', http(code, body) except IOError: self.log.debug('local file is missing for %s: %s' % (str(reason), str(filename))) # NOTE: we are always returning an HTTP/1.1 response content = 'close', http(501, 'could not serve missing file %s' % str(reason)) else: self.log.debug('local file is missing for %s: %s' % (str(reason), str(filename))) # NOTE: we are always returning an HTTP/1.1 response content = 'close', http(501, 'could not serve missing file %s' % str(reason)) return content def getDownloader(self, client_id, host, port, command, request): downloader = self.byclientid.get(client_id, None) if downloader: # NOTE: with pipeline, consequent request could go to other sites if the browser knows we are a proxy # NOTE: therefore the second request could reach the first site # NOTE: and we could kill the connection before the data is fully back to the client # NOTE: in practice modern browser are too clever and test for it ! if host != downloader.host or port != downloader.port: self.endClientDownload(client_id) downloader = None else: newdownloader = False if isipv4(host): bind = self.configuration.tcp4.bind elif isipv6(host): bind = self.configuration.tcp6.bind else: # should really never happen self.log.critical('the host IP address is neither IPv4 or IPv6 .. what year is it ?') return None, False if downloader is None: # supervisor.local is replaced when interface are changed, so do not cache or reference it in this class if host in self.supervisor.local: for h,p in self.configuration.security.local: if (h == '*' or h == host) and (p == '*' or p == port): break else: # we did not break return None, False downloader = self.downloader_factory(client_id, host, port, bind, command, request, self.log) newdownloader = True if downloader.sock is None: return None, False return downloader, newdownloader def getContent(self, client_id, command, args): try: if command == 'download': try: host, port, upgrade, length, request = args.split('\0', 4) except (ValueError, TypeError), e: raise ParsingError() downloader, newdownloader = self.getDownloader(client_id, host, int(port), command, request) if downloader is not None: content = ('stream', '') if upgrade in ('', 'http/1.0', 'http/1.1'): length = int(length) if length.isdigit() else length else: length = -1 else: content = self.getLocalContent('400', 'noconnect.html') length = 0 elif command == 'connect': try: host, port, request = args.split('\0', 2) except (ValueError, TypeError), e: raise ParsingError() downloader, newdownloader = self.getDownloader(client_id, host, int(port), command, '') if downloader is not None: content = ('stream', '') length = -1 # the client can send as much data as it wants else: content = self.getLocalContent('400', 'noconnect.html') length = 0
'memory' : (value.boolean,string.lower,'false','command line option --memory'), 'pdb' : (value.boolean,string.lower,'false','command line option --pdb'), 'log' : (value.boolean,string.lower,'false','command line option --debug'), }, } try: configuration = load('exaproxy',defaults,arguments['configuration']) except ConfigurationError,e: print >> sys.stderr, 'configuration issue,', str(e) sys.exit(1) configuration.proxy.version = version from exaproxy.util.log.logger import Logger log = Logger('supervisor', configuration.log.supervisor) for arg in sys.argv[1:]: if arg in ['--',]: break if arg in ['-h','--help']: usage() sys.exit(0) if arg in ['-i','-fi','--ini']: ini() sys.exit(0) if arg in ['-e','-fe','--env']: env() sys.exit(0) if arg in ['-di','--diff-ini']: ini(True)
class ClientManager (object): def __init__(self, poller, configuration): self.total_sent4 = 0L self.total_sent6 = 0L self.total_requested = 0L self.norequest = TimeCache(configuration.http.idle_connect) self.bysock = {} self.byname = {} self.buffered = [] self._nextid = 0 self.poller = poller self.log = Logger('client', configuration.log.client) self.http_max_buffer = configuration.http.header_size self.icap_max_buffer = configuration.icap.header_size self.proxied = { 'proxy' : configuration.http.proxied, 'icap' : configuration.icap.proxied, } def __contains__(self, item): return item in self.bysock def lookupSocket (self, item): return self.byname.get(item, None) def getnextid(self): self._nextid += 1 return str(self._nextid) def expire (self,number=100): count = 0 for sock in self.norequest.expired(number): client = self.norequest.get(sock,[None,])[0] if client: self.cleanup(sock,client.name) count += 1 return count def httpConnection (self, sock, peer, source): name = self.getnextid() client = HTTPClient(name, sock, peer, self.log, self.http_max_buffer, self.proxied.get(source)) self.norequest[sock] = client, source self.byname[name] = sock # watch for the opening request self.poller.addReadSocket('opening_client', client.sock) #self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock)) return peer def icapConnection (self, sock, peer, source): name = self.getnextid() client = ICAPClient(name, sock, peer, self.log, self.icap_max_buffer, self.proxied.get(source)) self.norequest[sock] = client, source self.byname[name] = sock # watch for the opening request self.poller.addReadSocket('opening_client', client.sock) #self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock)) return peer def readRequest (self, sock): """Read only the initial HTTP headers sent by the client""" client, source = self.norequest.get(sock, (None, None)) if client: name, peer, request, subrequest, content = client.readData() if request: self.total_requested += 1 # headers can be read only once self.norequest.pop(sock, (None, None)) self.bysock[sock] = client, source # watch for the client sending new data self.poller.addReadSocket('read_client', client.sock) # we have now read the client's opening request self.poller.removeReadSocket('opening_client', client.sock) # do not read more data until we have properly handled the request self.poller.corkReadSocket('read_client', sock) elif request is None: self.cleanup(sock, client.name) else: self.log.error('trying to read headers from a client that does not exist %s' % sock) name, peer, request, subrequest, content, source = None, None, None, None, None, None return name, peer, request, subrequest, content, source def readData (self, sock): client, source = self.bysock.get(sock, (None, None)) if client: name, peer, request, subrequest, content = client.readData() if request: self.total_requested += 1 # Parsing of the new request will be handled asynchronously. Ensure that # we do not read anything from the client until a request has been sent # to the remote webserver. # Since we just read a request, we know that the cork is not currently # set and so there's no risk of it being erroneously removed. self.poller.corkReadSocket('read_client', sock) elif request is None: self.cleanup(sock, client.name) else: self.log.error('trying to read from a client that does not exist %s' % sock) name, peer, request, subrequest, content = None, None, None, None, None return name, peer, request, subrequest, content, source def sendData (self, sock, data): client, source = self.bysock.get(sock, (None, None)) if client: name = client.name res = client.writeData(data) if res is None: # close the client connection self.cleanup(sock, client.name) buffered, had_buffer, sent4, sent6 = None, None, 0, 0 result = None buffer_change = None else: buffered, had_buffer, sent4, sent6 = res self.total_sent4 += sent4 self.total_sent6 += sent6 result = buffered if buffered: if sock not in self.buffered: self.buffered.append(sock) buffer_change = True # watch for the socket's send buffer becoming less than full self.poller.addWriteSocket('write_client', client.sock) else: buffer_change = False elif had_buffer and sock in self.buffered: self.buffered.remove(sock) buffer_change = True # we no longer care about writing to the client self.poller.removeWriteSocket('write_client', client.sock) else: buffer_change = False else: result = None buffer_change = None name = None return result, buffer_change, name, source def parseRemaining (self, remaining): nb_to_read = 0 if isinstance(remaining, basestring): mode = 'chunked' if remaining == 'chunked' else 'passthrough' elif remaining > 0: mode = 'transfer' nb_to_read = remaining elif remaining == 0: mode = '' else: mode = 'passthrough' return mode, nb_to_read def startData(self, sock, data, remaining): client, source = self.bysock.get(sock, (None, None)) try: mode, nb_to_read = self.parseRemaining(remaining) command, d = data if client is not None else (None, None) except (ValueError, TypeError), e: self.log.error('invalid command sent to client %s' % client.name) command, d = None, None if not client or command is None: return None, source name, peer, res = client.startData(command, d) if res is not None: name, peer, request, subrequest, content = client.readRelated(mode, nb_to_read) buffered, had_buffer, sent4, sent6 = res self.poller.uncorkReadSocket('read_client', client.sock) self.total_sent4 += sent4 self.total_sent6 += sent6 else: self.cleanup(client.sock, name) return None, source if request: self.total_requested += 1 self.log.info('reading multiple requests') self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None elif request is None: self.cleanup(client.sock, name) buffered, had_buffer = None, None content = None if buffered is True and had_buffer is False: self.buffered.append(client.sock) self.poller.addWriteSocket('write_client', client.sock) elif buffered is False and had_buffer is True: self.buffered.remove(client.sock) self.poller.removeWriteSocket('write_client', client.sock) return content, source
class Page (object): def __init__(self,supervisor): self.supervisor = supervisor self.monitor = supervisor.monitor self.email_sent = False self.log = Logger('web', supervisor.configuration.log.web) def _introspection (self,objects): introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">Looking at the internal of ExaProxy for %s </div><br/>\n' % cgi.escape('.'.join(objects)) link = cgi.escape('/'.join(objects[:-1])) if objects[:-1] else 'supervisor' line = ['<a href="/information/introspection/%s.html">Back to parent object</a><br/>' % link] for k,content in self.monitor.introspection(objects): link = '/information/introspection/%s.html' % cgi.escape('%s/%s' % ('/'.join(objects),k)) line.append('<a href="%s">%s</a><span class="value">%s</span><br/>' % (link,k,cgi.escape(content))) return introduction + _listing % ('\n'.join(line)) def _configuration (self): introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Configuration</div><br/>\n' line = [] for k,v in sorted(self.monitor.configuration().items()): line.append('<span class="key">%s</span><span class="value"> %s</span><br/>' % (k,cgi.escape(str(v)))) return introduction + _listing % ('\n'.join(line)) def _statistics (self): introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>\n' line = [] for k,v in sorted(self.monitor.statistics().items()): line.append('<span class="key">%s</span><span class="value"> %s</span><br/>' % (k,cgi.escape(str(str(v))))) return introduction + _listing % ('\n'.join(line)) def _connections (self): return graph( self.monitor, 'Connections', 20000, [ 'clients.silent', 'clients.speaking', 'servers.opening', 'servers.established', ] ) def _processes (self): return graph( self.monitor, 'Forked processes', 20000, [ 'processes.forked', 'processes.min', 'processes.max', ] ) def _requests (self): return graph( self.monitor, 'Requests/seconds received from clients', 20000, [ 'clients.requests', ], True, ) def _clients (self): return graph( self.monitor, 'Bits/seconds received from clients', 20000, [ 'transfer.client4', 'transfer.client6', ], True, adaptor=Bpstobps, ) def _servers (self): return graph( self.monitor, 'Bits/seconds received from servers', 20000, [ 'transfer.content4', 'transfer.content6', ], True, adaptor=Bpstobps, ) def _transfer (self): return graph( self.monitor, 'Bits/seconds received', 20000, [ 'transfer.client', 'transfer.content', ], True, adaptor=Bpstobps, ) def _loops (self): return graph( self.monitor, 'Reactor loops', 20000, [ 'load.loops', ], True, ) def _events (self): return graph( self.monitor, 'Sockets which became readeable', 20000, [ 'load.events', ], True, ) def _queue (self): return graph( self.monitor, 'Queued URL for classification', 20000, [ 'queue.size', ], True, ) def _source (self,bysock): conns = 0 clients = defaultdict(lambda:0) for sock in bysock: try: host,port = sock.getpeername() except socket.error: host,port = None,None clients[host] += 1 conns += 1 ordered = defaultdict(list) for host,number in clients.items(): ordered[number].append(host) result = [] result.append('<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>') result.append('<center>we have %d connection(s) from %d source(s)</center><br/>' % (conns, len(clients))) for number in reversed(sorted(ordered)): for host in ordered[number]: result.append('<span class="key">%s</span><span class="value"> %s</span><br/>' % (host,number)) return _listing % '\n'.join(result) def _servers_source (self): return self._source(self.supervisor.content.established) def _clients_source (self): return self._source(self.supervisor.client.bysock) def _workers (self): form = '<form action="/control/workers/commit" method="get">%s: <input type="text" name="%s" value="%s"><input type="submit" value="Submit"></form>' change = { 'exaproxy.redirector.minimum' : self.supervisor.manager.low, 'exaproxy.redirector.maximum' : self.supervisor.manager.high, } forms = [] for name in ('exaproxy.redirector.minimum', 'exaproxy.redirector.maximum'): value = change[name] forms.append(form % (name,name,value)) return '<pre style="margin-left:40px;">\n' + '\n'.join(forms) def _run (self): s = '<pre style="margin-left:40px;">' s += '<form action="/control/debug/eval" method="get">eval <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>' s += '<form action="/control/debug/exec" method="get">exec <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>' return s def _logs (self): return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join(History().formated()) def _errs (self): return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join(Errors().formated()) def _email (self,args): if self.email_sent: return '<center><b>You can only send one email per time ExaProxy is started</b></center>' self.email_sent, message = mail.send(args) return message def _json_running (self): return json.dumps(self.monitor.seconds[-1],sort_keys=True,indent=2,separators=(',', ': ')) def _json_configuration (self): return json.dumps(self.monitor.configuration(),sort_keys=True,indent=2,separators=(',', ': ')) def html (self,path): if len(path) > 5000: return menu('<center><b>path is too long</b></center>') if path == '/': path = '/index.html' args = '' elif '?' in path: path,args = path.split('?',1) else: args = '' if not path.startswith('/'): return menu('<center><b>invalid url</b></center>') elif not path.endswith('.html'): if path == '/humans.txt': return humans.txt if path not in ('/json','/json/running','/json/configuration','/control/workers/commit','/control/debug/eval','/control/debug/exec'): return menu('<center><b>invalid url</b></center>') sections = path[1:].split('/') + [''] else: sections = path[1:-5].split('/') + [''] if not sections[0]: return menu(index) section = sections[0] subsection = sections[1] if section == 'json': if subsection == 'running': return self._json_running() if subsection == 'configuration': return self._json_configuration() return '{ "errror" : "invalid url", "valid-paths": [ "/json/running", "/json/configuration" ] }' if section == 'index': return menu(index) if section == 'information': if subsection == 'introspection': return menu(self._introspection(sections[2:-1])) if subsection == 'configuration': return menu(self._configuration()) if subsection == 'statistics': return menu(self._statistics()) if subsection == 'logs': return self._logs() if subsection == 'errs': return self._errs() return menu(index) if section == 'graph': if subsection == 'processes': return menu(self._processes()) if subsection == 'connections': return menu(self._connections()) if subsection == 'servers': return menu(self._servers()) if subsection == 'clients': return menu(self._clients()) if subsection == 'transfered': return menu(self._transfer()) if subsection == 'requests': return menu(self._requests()) if subsection == 'loops': return menu(self._loops()) if subsection == 'events': return menu(self._events()) if subsection == 'queue': return menu(self._queue()) return menu(index) if section == 'end-point': if subsection == 'servers': return menu(self._servers_source()) if subsection == 'clients': return menu(self._clients_source()) return menu(index) if section == 'control': action = (sections + [None,]) [2] if subsection == 'debug': if not self.supervisor.configuration.web.debug: return menu('not enabled') if action == 'exec': if '=' in args: try: key,value = args.split('=',1) self.log.critical('PYTHON CODE RAN : %s' % value) command = unquote(value.replace('+',' ')) code = compile(command,'<string>', 'exec') exec code return 'done !' except Exception,e: return 'failed to run : \n' + command + '\n\nreason : \n' + str(type(e)) + '\n' + str(e) if action == 'eval': if '=' in args: try: key,value = args.split('=',1) self.log.critical('PYTHON CODE RAN : %s' % value) command = unquote(value.replace('+',' ')) return str(eval(command)) except Exception,e: return 'failed to run : \n' + command + '\n\nreason : \n' + str(type(e)) + '\n' + str(e) return menu(self._run()) if subsection == 'workers': if action == 'commit': if '=' in args: key,value = args.split('=',1) if key == 'exaproxy.redirector.minimum': if value.isdigit(): # this prevents negative values setting = int(value) if setting > self.supervisor.manager.high: return menu(self._workers() + '<div style="color: red; padding-top: 3em;">value is higher than exaproxy.redirector.maximum</div>') self.supervisor.manager.low = setting return menu(self._workers() + '<div style="color: green; padding-top: 3em;">changed successfully</div>') if key == 'exaproxy.redirector.maximum': if value.isdigit(): setting = int(value) if setting < self.supervisor.manager.low: return menu(self._workers() + '<div style="color: red; padding-top: 3em;">value is lower than exaproxy.redirector.minimum</div>') self.supervisor.manager.high = setting return menu(self._workers() + '<div style="color: green; padding-top: 3em;">changed successfully</div>') return menu(self._workers() + '<div style="color: red; padding-top: 3em;">invalid request</div>') return menu(self._workers()) return menu(index)
def __init__ (self, configuration, name): self.log = Logger('worker ' + str(name), configuration.log.worker)
class Supervisor (object): alarm_time = 0.1 # regular backend work second_frequency = int(1/alarm_time) # when we record history minute_frequency = int(60/alarm_time) # when we want to average history increase_frequency = int(5/alarm_time) # when we add workers decrease_frequency = int(60/alarm_time) # when we remove workers saturation_frequency = int(20/alarm_time) # when we report connection saturation interface_frequency = int(300/alarm_time) # when we check for new interfaces # import os # clear = [hex(ord(c)) for c in os.popen('clear').read()] # clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']]) def __init__ (self,configuration): configuration = load() self.configuration = configuration # Only here so the introspection code can find them self.log = Logger('supervisor', configuration.log.supervisor) self.log.error('Starting exaproxy version %s' % configuration.proxy.version) self.signal_log = Logger('signal', configuration.log.signal) self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level) self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable) sys.exitfunc = self.log_writer.writeMessages self.log_writer.setIdentifier(configuration.daemon.identifier) #self.usage_writer.setIdentifier(configuration.daemon.identifier) if configuration.debug.log: self.log_writer.toggleDebug() self.usage_writer.toggleDebug() self.log.error('python version %s' % sys.version.replace(os.linesep,' ')) self.log.debug('starting %s' % sys.argv[0]) self.pid = PID(self.configuration) self.daemon = Daemon(self.configuration) self.poller = Poller(self.configuration.daemon) self.poller.setupRead('read_proxy') # Listening proxy sockets self.poller.setupRead('read_web') # Listening webserver sockets self.poller.setupRead('read_icap') # Listening icap sockets self.poller.setupRead('read_redirector') # Pipes carrying responses from the redirector process self.poller.setupRead('read_resolver') # Sockets currently listening for DNS responses self.poller.setupRead('read_client') # Active clients self.poller.setupRead('opening_client') # Clients we have not yet read a request from self.poller.setupWrite('write_client') # Active clients with buffered data to send self.poller.setupWrite('write_resolver') # Active DNS requests with buffered data to send self.poller.setupRead('read_download') # Established connections self.poller.setupWrite('write_download') # Established connections we have buffered data to send to self.poller.setupWrite('opening_download') # Opening connections self.monitor = Monitor(self) self.page = Page(self) self.content = ContentManager(self,configuration) self.client = ClientManager(self.poller, configuration) self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10) self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections) self.web = Server('web server',self.poller,'read_web', configuration.web.connections) self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections) self._shutdown = True if self.daemon.filemax == 0 else False # stop the program self._softstop = False # stop once all current connection have been dealt with self._reload = False # unimplemented self._toggle_debug = False # start logging a lot self._decrease_spawn_limit = 0 self._increase_spawn_limit = 0 self._refork = False # unimplemented self._pdb = False # turn on pdb debugging self._listen = None # listening change ? None: no, True: listen, False: stop listeing self.wait_time = 5.0 # how long do we wait at maximum once we have been soft-killed self.local = set() # what addresses are on our local interfaces if not self.initialise(): self._shutdown = True elif self.daemon.drop_privileges(): self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user) self.log.critical('Set the environment value USER to change the unprivileged user') self._shutdown = True # fork the redirector process before performing any further setup redirector = fork_redirector(self.poller, self.configuration) # create threads _after_ all forking is done self.redirector = redirector_message_thread(redirector) self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.redirector, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller) self.interfaces() signal.signal(signal.SIGQUIT, self.sigquit) signal.signal(signal.SIGINT, self.sigterm) signal.signal(signal.SIGTERM, self.sigterm) # signal.signal(signal.SIGABRT, self.sigabrt) # signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGTRAP, self.sigtrap) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) signal.signal(signal.SIGTTOU, self.sigttou) signal.signal(signal.SIGTTIN, self.sigttin) signal.signal(signal.SIGALRM, self.sigalrm) # make sure we always have data in history # (done in zero for dependencies reasons) self.monitor.zero() def exit (self): sys.exit() def sigquit (self,signum, frame): if self._softstop: self.signal_log.critical('multiple SIG INT received, shutdown') self._shutdown = True else: self.signal_log.critical('SIG INT received, soft-stop') self._softstop = True self._listen = False def sigterm (self,signum, frame): self.signal_log.critical('SIG TERM received, shutdown request') if os.environ.get('PDB',False): self._pdb = True else: self._shutdown = True # def sigabrt (self,signum, frame): # self.signal_log.info('SIG INFO received, refork request') # self._refork = True # def sighup (self,signum, frame): # self.signal_log.info('SIG HUP received, reload request') # self._reload = True def sigtrap (self,signum, frame): self.signal_log.critical('SIG TRAP received, toggle debug') self._toggle_debug = True def sigusr1 (self,signum, frame): self.signal_log.critical('SIG USR1 received, decrease worker number') self._decrease_spawn_limit += 1 def sigusr2 (self,signum, frame): self.signal_log.critical('SIG USR2 received, increase worker number') self._increase_spawn_limit += 1 def sigttou (self,signum, frame): self.signal_log.critical('SIG TTOU received, stop listening') self._listen = False def sigttin (self,signum, frame): self.signal_log.critical('SIG IN received, star listening') self._listen = True def sigalrm (self,signum, frame): self.reactor.running = False signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time) def interfaces (self): local = set(['127.0.0.1','::1']) for interface in getifaddrs(): if interface.family not in (AF_INET,AF_INET6): continue if interface.address not in self.local: self.log.info('found new local ip %s (%s)' % (interface.address,interface.name)) local.add(interface.address) for ip in self.local: if ip not in local: self.log.info('removed local ip %s' % ip) if local == self.local: self.log.info('no ip change') else: self.local = local def run (self): signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time) count_second = 0 count_minute = 0 count_saturation = 0 count_interface = 0 while True: count_second = (count_second + 1) % self.second_frequency count_minute = (count_minute + 1) % self.minute_frequency count_saturation = (count_saturation + 1) % self.saturation_frequency count_interface = (count_interface + 1) % self.interface_frequency try: if self._pdb: self._pdb = False import pdb pdb.set_trace() # check for IO change with select status = self.reactor.run() if status is False: self._shutdown = True # must follow the reactor so we are sure to go through the reactor at least once # and flush any logs if self._shutdown: self._shutdown = False self.shutdown() break elif self._reload: self._reload = False self.reload() elif self._refork: self._refork = False self.signal_log.warning('refork not implemented') # stop listening to new connections # refork the program (as we have been updated) # just handle current open connection if self._softstop: if self._listen == False: self.proxy.rejecting() self._listen = None if self.client.softstop(): self._shutdown = True # only change listening if we are not shutting down elif self._listen is not None: if self._listen: self._shutdown = not self.proxy.accepting() self._listen = None else: self.proxy.rejecting() self._listen = None if self._toggle_debug: self._toggle_debug = False self.log_writer.toggleDebug() if self._decrease_spawn_limit: count = self._decrease_spawn_limit self.redirector.decreaseSpawnLimit(count) self._decrease_spawn_limit = 0 if self._increase_spawn_limit: count = self._increase_spawn_limit self.redirector.increaseSpawnLimit(count) self._increase_spawn_limit = 0 # save our monitoring stats if count_second == 0: self.monitor.second() expired = self.reactor.client.expire() else: expired = 0 if expired: self.proxy.notifyClose(None, count=expired) if count_minute == 0: self.monitor.minute() # report if we saw too many connections if count_saturation == 0: self.proxy.saturation() self.web.saturation() if self.configuration.daemon.poll_interfaces and count_interface == 0: self.interfaces() except KeyboardInterrupt: self.log.critical('^C received') self._shutdown = True except OSError,e: # This shoould never happen as we are limiting how many connections we accept if e.errno == 24: # Too many open files self.log.critical('Too many opened files, shutting down') for line in traceback.format_exc().split('\n'): self.log.critical(line) self._shutdown = True else: self.log.critical('unrecoverable io error') for line in traceback.format_exc().split('\n'): self.log.critical(line) self._shutdown = True finally:
def __init__(self, configuration, name): self.log = Logger('worker ' + str(name), configuration.log.worker)
class RedirectorManager (object): def __init__ (self,configuration,poller): self.configuration = configuration self.low = configuration.redirector.minimum # minimum number of workers at all time self.high = configuration.redirector.maximum # maximum numbe of workers at all time self.program = configuration.redirector.program # what program speaks the squid redirector API self.nextid = 1 # incremental number to make the name of the next worker self.queue = Queue() # queue with HTTP headers to process self.poller = poller # poller interface that checks for events on sockets self.worker = {} # our workers threads self.closing = set() # workers that are currently closing self.running = True # we are running self.log = Logger('manager', configuration.log.manager) def _getid(self): id = str(self.nextid) self.nextid +=1 return id def _spawn (self): """add one worker to the pool""" wid = self._getid() worker = Redirector(self.configuration,wid,self.queue,self.program) self.poller.addReadSocket('read_workers', worker.response_box_read) self.worker[wid] = worker self.log.info("added a worker") self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker),self.low,self.high)) self.worker[wid].start() def spawn (self,number=1): """create the set number of worker""" self.log.info("spawning %d more worker" % number) for _ in range(number): self._spawn() def respawn (self): """make sure we reach the minimum number of workers""" number = max(min(len(self.worker),self.high),self.low) for wid in set(self.worker): self.reap(wid) self.spawn(number) def reap (self,wid): self.log.info('we are killing worker %s' % wid) worker = self.worker[wid] self.closing.add(wid) worker.stop() # will cause the worker to stop when it can def decrease (self): if self.low < len(self.worker): worker = self._oldest() if worker: self.reap(worker.wid) def increase (self): if len(self.worker) < self.high: self.spawn() def start (self): """spawn our minimum number of workers""" self.log.info("starting workers.") self.spawn(max(0,self.low-len(self.worker))) def stop (self): """tell all our worker to stop reading the queue and stop""" self.running = False threads = self.worker.values() if len(self.worker): self.log.info("stopping %d workers." % len(self.worker)) for wid in set(self.worker): self.reap(wid) for thread in threads: self.request(None, None, None, 'nop') for thread in threads: thread.destroyProcess() thread.join() self.worker = {} def _oldest (self): """find the oldest worker""" oldest = None past = time.time() for wid in set(self.worker): creation = self.worker[wid].creation if creation < past and wid not in self.closing: past = creation oldest = self.worker[wid] return oldest def provision (self): """manage our workers to make sure we have enough to consume the queue""" if not self.running: return num_workers = len(self.worker) # bad we are bleeding workers ! if num_workers < self.low: self.log.info("we lost some workers, respawing %d new workers" % (self.low-num_workers)) self.spawn(self.low-num_workers) size = self.queue.qsize() # we need more workers if size >= num_workers: # nothing we can do we have reach our limit if num_workers >= self.high: self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers)) return # try to figure a good number to add .. # no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers)) self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size)) self.spawn(nb_to_add) def deprovision (self): """manage our workers to make sure we have enough to consume the queue""" if not self.running: return size = self.queue.qsize() num_workers = len(self.worker) # we are now overprovisioned if size < 2 and num_workers > self.low: self.log.info("we have too many workers (%d), stopping the oldest" % num_workers) # if we have to kill one, at least stop the one who had the most chance to memory leak :) worker = self._oldest() if worker: self.reap(worker.wid) def request(self, client_id, peer, request, source): return self.queue.put((client_id,peer,request,source,False)) def getDecision(self, box): # NOTE: reads may block if we send badly formatted data try: r_buffer = box.read(3) while r_buffer.isdigit(): r_buffer += box.read(1) if ':' in r_buffer: size, response = r_buffer.split(':', 1) if size.isdigit(): size = int(size) else: size, response = None, None else: # not a netstring size, response = None, None if size is not None: required = size + 1 - len(response) response += box.read(required) if response is not None: if response.endswith(','): response = response[:-1] else: response = None except ValueError: # I/O operation on closed file worker = self.worker.get(box, None) if worker is not None: worker.destroyProcess() response = None except TypeError: response = None try: if response: client_id, command, decision = response.split('\0', 2) else: client_id = None command = None decision = None except (ValueError, TypeError): client_id = None command = None decision = None if command == 'requeue': _client_id, _peer, _source, _header = response.split('\0', 3) self.queue.put((_client_id,_peer,_header,_source,True)) client_id = None command = None decision = None elif command == 'hangup': wid = decision client_id = None command = None decision = None worker = self.worker.pop(wid, None) if worker: self.poller.removeReadSocket('read_workers', worker.response_box_read) if wid in self.closing: self.closing.remove(wid) worker.shutdown() worker.join() elif command == 'stats': wid, timestamp, stats = decision self.storeStats(timestamp, wid, stats) client_id = None command = None decision = None return client_id, command, decision def showInternalError(self): return 'file', '\0'.join(('200', 'internal_error.html')) def requestStats(self): for wid, worker in self.worker.iteritems(): worker.requestStats() def storeStats(self, timestamp, wid, stats): pairs = (d.split('=',1) for d in stats.split('?', 1).split('&')) d = self.cache.setdefault(timestamp, {}) for k, v in pairs: d.setdefault(k, []).append(v)
""" # http://code.google.com/speed/articles/web-metrics.html import select import socket import errno from exaproxy.network.errno_list import errno_block, errno_fatal from interface import IPoller from exaproxy.util.log.logger import Logger from exaproxy.configuration import load configuration = load() log = Logger('select', configuration.log.server) def poll_select(read, write, timeout=None): try: r, w, x = select.select(read, write, read + write, timeout) except socket.error, e: if e.args[0] in errno_block: log.error('select not ready, errno %d: %s' % (e.args[0], errno.errorcode.get(e.args[0], ''))) return [], [], [] if e.args[0] in errno_fatal: log.error('select problem, errno %d: %s' % (e.args[0], errno.errorcode.get(e.args[0], ''))) log.error('poller read : %s' % str(read))
""" nettools.py Created by Thomas Mangin on 2011-11-30. Copyright (c) 2011-2013 Exa Networks. All rights reserved. """ import socket import errno from exaproxy.util.log.logger import Logger from exaproxy.network.errno_list import errno_block from exaproxy.configuration import load configuration = load() log = Logger('server', configuration.log.server) def isipv4(address): try: socket.inet_pton(socket.AF_INET, address) return True except socket.error: return False def isipv6(address): try: socket.inet_pton(socket.AF_INET6, address) return True except socket.error: return False
'command line option --pdb'), 'log': (value.boolean, string.lower, 'false', 'command line option --debug'), }, } try: configuration = load('exaproxy', defaults, arguments['configuration']) except ConfigurationError, e: print >> sys.stderr, 'configuration issue,', str(e) sys.exit(1) configuration.proxy.version = version from exaproxy.util.log.logger import Logger log = Logger('supervisor', configuration.log.supervisor) for arg in sys.argv[1:]: if arg in [ '--', ]: break if arg in ['-h', '--help']: help() sys.exit(0) if arg in ['-i', '-fi', '--ini']: ini() sys.exit(0) if arg in ['-e', '-fe', '--env']: env() sys.exit(0)
# encoding: utf-8 """ async/__init__.py Created by David Farrar on 2012-01-31. Copyright (c) 2011-2013 Exa Networks. All rights reserved. """ import sys import select from exaproxy.util.log.logger import Logger from exaproxy.configuration import load configuration = load() log = Logger('supervisor', configuration.log.supervisor) def Poller(configuration, speed=None): reactor = configuration.reactor if reactor == 'best': if sys.platform.startswith('linux'): configuration.reactor = 'epoll' elif sys.platform.startswith('freebsd'): configuration.reactor = 'kqueue' elif sys.platform.startswith('darwin'): configuration.reactor = 'kqueue' else: log.error( 'we could not autodetect an high performance reactor for your OS'
class ResolverManager (object): resolverFactory = DNSResolver def __init__ (self, poller, configuration, max_workers): self.poller = poller self.configuration = configuration self.resolver_factory = self.resolverFactory(configuration) # The actual work is done in the worker self.worker = self.resolver_factory.createUDPClient() # All currently active clients (one UDP and many TCP) self.workers = {} self.workers[self.worker.socket] = self.worker self.poller.addReadSocket('read_resolver', self.worker.socket) # Track the clients currently expecting results self.clients = {} # client_id : identifier # Key should be the hostname rather than the request ID? self.resolving = {} # identifier, worker_id : # TCP workers that have not yet sent a complete request self.sending = {} # sock : # Maximum number of entry we will cache (1024 DNS lookup per second !) # assuming 1k per entry, which is a lot, it mean 20Mb of memory # which at the default of 900 seconds of cache is 22 new host per seonds self.max_entries = 1024*20 # track the current queries and when they were started self.active = [] self.cache = {} self.cached = deque() self.max_workers = max_workers self.worker_count = len(self.workers) # the UDP client self.waiting = [] self.log = Logger('resolver', configuration.log.resolver) self.chained = {} def cacheDestination (self, hostname, ip): if hostname not in self.cache: expire_time = time.time() + self.configuration.dns.ttl expire_time = expire_time - expire_time % 5 # group the DNS record per buckets 5 seconds latest_time, latest_hosts = self.cached[-1] if self.cached else (-1, None) if expire_time > latest_time: hosts = [] self.cached.append((expire_time, hosts)) else: hosts = latest_hosts self.cache[hostname] = ip hosts.append(hostname) def expireCache (self): # expire only one set of cache entries at a time if self.cached: current_time = time.time() expire_time, hosts = self.cached[0] if current_time >= expire_time or len(self.cache) > self.max_entries: expire_time, hosts = self.cached.popleft() for hostname in hosts: self.cache.pop(hostname, None) def cleanup(self): now = time.time() cutoff = now - self.configuration.dns.timeout count = 0 for timestamp, client_id, sock in self.active: if timestamp > cutoff: break count += 1 cli_data = self.clients.pop(client_id, None) worker = self.workers.get(sock) tcpudp = 'udp' if worker is self.worker else 'tcp' if cli_data is not None: w_id, identifier, active_time, resolve_count = cli_data data = self.resolving.pop((w_id, identifier), None) if not data: data = self.sending.pop(sock, None) if data: client_id, original, hostname, command, decision = data self.log.error('timeout when requesting address for %s using the %s client - attempt %s' % (hostname, tcpudp, resolve_count)) if resolve_count < self.configuration.dns.retries and worker is self.worker: self.log.info('going to retransmit request for %s - attempt %s of %s' % (hostname, resolve_count+1, self.configuration.dns.retries)) self.startResolving(client_id, command, decision, resolve_count+1, identifier=identifier) continue self.log.error('given up trying to resolve %s after %s attempts' % (hostname, self.configuration.dns.retries)) yield client_id, 'rewrite', ('503', 'dns.html', '', '', '', hostname, 'peer') if worker is not None: if worker is not self.worker: worker.close() self.workers.pop(sock) if count: self.active = self.active[count:] def resolves(self, command, decision): if command in ('download', 'connect'): hostname = decision[0] if isip(hostname): res = False else: res = True else: res = False return res def extractHostname(self, command, decision): if command in ('download', 'connect'): hostname = decision[0] else: hostname = None return hostname def resolveDecision(self, command, decision, ip): if command in ('download', 'connect'): hostname, args = decision[0], decision[1:] newdecision = (ip,) + args else: newdecision = None return newdecision def startResolving(self, client_id, command, decision, resolve_count=1, identifier=None): hostname = self.extractHostname(command, decision) if hostname: # Resolution is already in our cache if hostname in self.cache and identifier is None: ip = self.cache[hostname] if ip is not None: resolved = self.resolveDecision(command, decision, ip) response = (client_id, command) + resolved else: response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer' # do not try to resolve domains which are not FQDN elif self.configuration.dns.fqdn and '.' not in hostname: identifier = None response = client_id, 'rewrite', '200', 'dns.html', 'http', '', '', hostname, 'peer' # each DNS part (between the dots) must be under 256 chars elif max(len(p) for p in hostname.split('.')) > 255: identifier = None self.log.info('jumbo hostname: %s' % hostname) response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer' # Lookup that DNS name else: identifier, _ = self.worker.resolveHost(hostname, identifier=identifier) response = None active_time = time.time() self.resolving[(self.worker.w_id, identifier)] = client_id, hostname, hostname, command, decision self.clients[client_id] = (self.worker.w_id, identifier, active_time, resolve_count) self.active.append((active_time, client_id, self.worker.socket)) else: identifier = None response = None return identifier, response def beginResolvingTCP (self, client_id, command, decision, resolve_count): if self.worker_count < self.max_workers: identifier = self.newTCPResolver(client_id, command, decision, resolve_count) self.worker_count += 1 else: self.waiting.append((client_id, command, decision, resolve_count)) identifier = None return identifier def notifyClose (self): paused = self.worker_count >= self.max_workers self.worker_count -= 1 if paused and self.worker_count < self.max_workers: for _ in range(self.worker_count, self.max_workers): if self.waiting: data, self.waiting = self.waiting[0], self.waiting[1:] client_id, command, decision, resolve_count = data identifier = self.newTCPResolver(client_id, command, decision, resolve_count) self.worker_count += 1 def newTCPResolver (self, client_id, command, decision, resolve_count): hostname = self.extractHostname(command, decision) if hostname: worker = self.resolver_factory.createTCPClient() self.workers[worker.socket] = worker identifier, all_sent = worker.resolveHost(hostname) active_time = time.time() self.resolving[(worker.w_id, identifier)] = client_id, hostname, hostname, command, decision self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count) self.active.append((active_time, client_id, self.worker.socket)) if all_sent: self.poller.addReadSocket('read_resolver', worker.socket) self.resolving[(worker.w_id, identifier)] = client_id, hostname, hostname, command, decision else: self.poller.addWriteSocket('write_resolver', worker.socket) self.sending[worker.socket] = client_id, hostname, hostname, command, decision else: identifier = None return identifier def getResponse(self, sock): worker = self.workers.get(sock) if worker: result = worker.getResponse(self.chained) if result: identifier, forhost, ip, completed, newidentifier, newhost, newcomplete = result data = self.resolving.pop((worker.w_id, identifier), None) chain_count = self.chained.pop(identifier, 0) if newidentifier: self.chained[newidentifier] = chain_count + 1 if not data: self.log.info('ignoring response for %s (%s) with identifier %s' % (forhost, ip, identifier)) else: # unable to parse response self.log.error('unable to parse response') data = None if data: client_id, original, hostname, command, decision = data clidata = self.clients.pop(client_id, None) if completed: if clidata is not None: key = clidata[2], client_id, worker.socket if key in self.active: self.active.remove(key) # check to see if we received an incomplete response if not completed: newidentifier = self.beginResolvingTCP(client_id, command, decision, 1) newhost = hostname response = None # check to see if the worker started a new request if newidentifier: if completed: active_time = time.time() self.resolving[(worker.w_id, newidentifier)] = client_id, original, newhost, command, decision self.clients[client_id] = (worker.w_id, newidentifier, active_time, 1) self.active.append((active_time, client_id, worker.socket)) response = None if completed and newcomplete: self.poller.addReadSocket('read_resolver', worker.socket) elif completed and not newcomplete: self.poller.addWriteSocket('write_resolver', worker.socket) self.sending[worker.socket] = client_id, original, hostname, command, decision # we just started a new (TCP) request and have not yet completely sent it # make sure we still know who the request is for elif not completed: response = None # maybe we read the wrong response? elif forhost != hostname: _, _, _, resolve_count = clidata active_time = time.time() self.resolving[(worker.w_id, identifier)] = client_id, original, hostname, command, decision self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count) self.active.append((active_time, client_id, worker.socket)) response = None # success elif ip is not None: resolved = self.resolveDecision(command, decision, ip) response = (client_id, command) + resolved self.cacheDestination(original, ip) # not found else: response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer' #self.cacheDestination(original, ip) else: response = None if response or result is None: if worker is not self.worker: self.poller.removeReadSocket('read_resolver', sock) self.poller.removeWriteSocket('write_resolver', sock) worker.close() self.workers.pop(sock) self.notifyClose() else: response = None return response def continueSending(self, sock): """Continue sending data over the connected TCP socket""" data = self.sending.get(sock) if data: client_id, original, hostname, command, decision = data else: client_id, original, hostname, command, decision = None, None, None, None, None, None worker = self.workers[sock] res = worker.continueSending() if res is False: # we've sent all we need to send self.poller.removeWriteSocket('write_resolver', sock) if client_id in self.clients: w_id, identifier, active_time, resolve_count = self.clients[client_id] tmp = self.sending.pop(sock) self.resolving[(w_id, identifier)] = tmp self.poller.addReadSocket('read_resolver', sock) else: self.log.error('could not find client for dns request for %s. request is being left to timeout.' % str(hostname))
class Redirector (object): # TODO : if the program is a function, fork and run :) HTTPParser = HTTPRequestFactory TLSParser = TLSParser ResponseFactory = ResponseFactory ChildFactory = ChildFactory __slots__ = ['configuration', 'tls_parser', 'http_parser', 'enabled', '_transparent', 'log', 'usage', 'response_factory', 'child_factory', 'wid', 'creation', 'program', 'running', 'stats_timestamp', '_proxy', 'universal', 'process'] def __init__ (self, configuration, name, program, protocol): self.configuration = configuration self.http_parser = self.HTTPParser(configuration) self.tls_parser = self.TLSParser(configuration) self.enabled = bool(program is not None) and configuration.redirector.enable self._transparent = configuration.http.transparent self.log = Logger('worker ' + str(name), configuration.log.worker) self.usage = UsageLogger('usage', configuration.log.worker) self.response_factory = self.ResponseFactory() self.child_factory = self.ChildFactory(configuration, name) self.wid = name # a unique name self.creation = time.time() # when the thread was created # self.last_worked = self.creation # when the thread last picked a task self.program = program # the squid redirector program to fork self.running = True # the thread is active self.stats_timestamp = None # time of the most recent outstanding request to generate stats self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid()) universal = configuration.redirector.protocol == 'url' # Do not move, we need the forking AFTER the setup if program: self.process = self.child_factory.createProcess(self.program, universal=universal) else: self.process = None def addHeaders (self, message, peer): headers = message.headers # http://homepage.ntlworld.com./jonathan.deboynepollard/FGA/web-proxy-connection-header.html headers.pop('proxy-connection',None) # NOTE: To be RFC compliant we need to add a Via field http://tools.ietf.org/html/rfc2616#section-14.45 on the reply too # NOTE: At the moment we only add it from the client to the server (which is what really matters) if not self._transparent: headers.extend('via','Via: %s %s' % (message.request.version, self._proxy)) headers.extend('x_forwarded_for', 'X-Forwarded-For: %s' % peer) headers.pop('proxy-authenticate') return message def checkChild (self): if not self.enabled: return True if not bool(self.process): return False # A None value indicates that the process hasn’t terminated yet. # A negative value -N indicates that the child was terminated by signal N (Unix only). # In practice: also returns 1 ... if self.process.poll() is None: return True return False def writeChild (self, request_string): try: self.process.stdin.write(request_string) status = True except ValueError: status = False return status def readChildResponse (self): try: response = None while not response: response = self.process.stdout.readline() except: response = None if response: response = response.strip() return response def createChildRequest (self, accept_addr, accept_port, peer, message, http_header): return '%s %s - %s -\n' % (message.url_noport, peer, message.request.method) def classifyURL (self, request, url_response): if not url_response: return 'permit', None, None if url_response.startswith('http://'): response = url_response[7:] if response == request.url_noport: return 'permit', None, '' if response.startswith(request.host + '/'): _, rewrite_path = response.split('/', 1) if '/' in request.url else '' return 'rewrite', rewrite_path, '' if url_response.startswith('file://'): return 'file', url_response[7:], '' if url_response.startswith('intercept://'): return 'intercept', url_response[12:], '' if url_response.startswith('redirect://'): return 'redirect', url_response[11:], '' return 'file', 'internal_error.html', '' def parseHTTP (self, client_id, accept_addr, accept_port, peer, http_header): message = HTTP(self.configuration, http_header, peer) message.parse(self._transparent) return message def validateHTTP (self, client_id, message): if message.reply_code: try: version = message.request.version except AttributeError: version = '1.0' if message.reply_string: clean_header = message.raw.replace('\t','\\t').replace('\r','\\r').replace('\n','\\n\n') content = '%s<br/>\n<!--\n\n<![CDATA[%s]]>\n\n-->\n' % (message.reply_string, clean_header) response = Respond.http(client_id, http(str(message.reply_code), content, version)) else: response = Respond.http(client_id, http(str(message.reply_code),'',version)) else: response = None return response def doHTTPRequest (self, client_id, accept_addr, accept_port, peer, message, http_header, source): method = message.request.method if self.enabled: request_string = self.createChildRequest(accept_addr, accept_port, peer, message, http_header) if message else None status = self.writeChild(request_string) if request_string else None if status is True: response = Respond.defer(client_id, message) else: response = None else: response = Respond.download(client_id, message.host, message.port, message.upgrade, message.content_length, message) self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', message.host) return response def doHTTPConnect (self, client_id, accept_addr, accept_port, peer, message, http_header, source): method = message.request.method if not self.configuration.http.connect or message.port not in self.configuration.security.connect: # NOTE: we are always returning an HTTP/1.1 response response = Respond.http(client_id, http('501', 'CONNECT NOT ALLOWED\n')) self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'DENY', 'CONNECT NOT ALLOWED') elif self.enabled: request_string = self.createChildRequest(accept_addr, accept_port, peer, message, http_header) if message else None status = self.writeChild(request_string) if request_string else None if status is True: response = Respond.defer(client_id, message) else: response = None else: response = Respond.connect(client_id, message.host, message.port, '') self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', message.host) return response def doHTTPOptions (self, client_id, accept_addr, accept_port, peer, message): # NOTE: we are always returning an HTTP/1.1 response method = message.request.method header = message.headers.get('max-forwards', '') if header: value = header[-1].split(':')[-1].strip() if not value.isdigit(): self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'ERROR', 'INVALID MAX FORWARDS') return Respond.http(client_id, http('400', 'INVALID MAX-FORWARDS\n')) max_forward = int(value) if max_forward == 0: self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', method) return Respond.http(client_id, http('200', '')) message.headers.set('max-forwards','Max-Forwards: %d' % (max_forward-1)) return Respond.download(client_id, message.headerhost, message.port, message.upgrade, message.content_length, message) def doHTTP (self, client_id, accept_addr, accept_port, peer, http_header, source): message = self.parseHTTP(client_id, accept_addr, accept_port, peer, http_header) response = self.validateHTTP(client_id, message) if message.validated: message = self.addHeaders(message, peer) method = message.request.method if method in ('GET', 'PUT', 'POST','HEAD','DELETE','PATCH'): response = self.doHTTPRequest(client_id, accept_addr, accept_port, peer, message, http_header, source) elif method == 'CONNECT': response = self.doHTTPConnect(client_id, accept_addr, accept_port, peer, message, http_header, source) elif method in ('OPTIONS','TRACE'): response = self.doHTTPOptions(client_id, accept_addr, accept_port, peer, message) elif method in ( 'BCOPY', 'BDELETE', 'BMOVE', 'BPROPFIND', 'BPROPPATCH', 'COPY', 'DELETE','LOCK', 'MKCOL', 'MOVE', 'NOTIFY', 'POLL', 'PROPFIND', 'PROPPATCH', 'SEARCH', 'SUBSCRIBE', 'UNLOCK', 'UNSUBSCRIBE', 'X-MS-ENUMATTS'): response = Respond.download(client_id, message.headerhost, message.port, message.upgrade, message.content_length, message) self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', method) elif message.request in self.configuration.http.extensions: response = Respond.download(client_id, message.headerhost, message.port, message.upgrade, message.content_length, message) self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', message.request) else: # NOTE: we are always returning an HTTP/1.1 response response = Respond.http(client_id, http('405', '')) # METHOD NOT ALLOWED self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'DENY', method) elif response is None: response = Respond.hangup(client_id) return response def doTLS (self, client_id, accept_addr, accept_port, peer, tls_header, source): tls_hello = self.tls_parser.parseClientHello(tls_header) if self.enabled and tls_hello: request_string = '%s %s - %s -\n' % (tls_hello.hostname, peer, 'TLS') status = self.writeChild(request_string) if status is True: response = Respond.defer(client_id, tls_hello.hostname) else: response = None elif tls_hello: response = Respond.intercept(client_id, tls_hello.hostname, 443, tls_header) else: response = Respond.hangup(client_id) return response def doMonitor (self, client_id, accept_addr, accept_port, peer, http_header, source): message = self.parseHTTP(client_id, accept_addr, accept_port, peer, http_header) response = self.validateHTTP(client_id, message) # pylint: disable=W0612 return Respond.monitor(client_id, message.request.path) def decide (self, client_id, accept_addr, accept_port, peer, header, subheader, source): if self.checkChild(): if source == 'proxy': response = self.doHTTP(client_id, accept_addr, accept_port, peer, header, source) elif source == 'web': response = self.doMonitor(client_id, accept_addr, accept_port, peer, header, source) elif source == 'tls': response = self.doTLS(client_id, accept_addr, accept_port, peer, header, source) else: response = Respond.hangup(client_id) else: response = Respond.error(client_id) return response def progress (self, client_id, accept_addr, accept_port, peer, message, header, subheader, source): if self.checkChild(): response_s = self.readChildResponse() else: response_s = None if source == 'tls': return Respond.hangup(client_id) response = self.classifyURL(message.request, response_s) if response_s is not None else None if response is not None and source == 'proxy': classification, data, comment = response if message.request.method in ('GET','PUT','POST','HEAD','DELETE','PATCH'): (operation, destination), decision = self.response_factory.contentResponse(client_id, message, classification, data, comment) elif message.request.method == 'CONNECT': (operation, destination), decision = self.response_factory.connectResponse(client_id, message, classification, data, comment) else: self.log.info('unhandled command %s - dev, please look into it!' % str(message.request.method)) operation, destination, decision = None, None, None if operation is not None: self.usage.logRequest(client_id, accept_addr, accept_port, peer, message.request.method, message.url, operation, message.host) else: decision = None if decision is None: decision = Respond.error(client_id) return decision def shutdown(self): if self.process is not None: self.child_factory.destroyProcess(self.process) self.process = None
class Redirector (Thread): # TODO : if the program is a function, fork and run :) def __init__ (self, configuration, name, request_box, program): self.configuration = configuration self.enabled = configuration.redirector.enable self.protocol = configuration.redirector.protocol self._transparent = configuration.http.transparent self.log = Logger('worker ' + str(name), configuration.log.worker) self.usage = UsageLogger('usage', configuration.log.worker) self.universal = True if self.protocol == 'url' else False self.icap = self.protocol[len('icap://'):].split('/')[0] if self.protocol.startswith('icap://') else '' r, w = os.pipe() # pipe for communication with the main thread self.response_box_write = os.fdopen(w,'w',0) # results are written here self.response_box_read = os.fdopen(r,'r',0) # read from the main thread self.wid = name # a unique name self.creation = time.time() # when the thread was created # self.last_worked = self.creation # when the thread last picked a task self.request_box = request_box # queue with HTTP headers to process self.program = program # the squid redirector program to fork self.running = True # the thread is active self.stats_timestamp = None # time of the most recent outstanding request to generate stats self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid()) if self.protocol == 'url': self.classify = self._classify_url if self.protocol.startswith('icap://'): self.classify = self._classify_icap # Do not move, we need the forking AFTER the setup self.process = self._createProcess() # the forked program to handle classification Thread.__init__(self) def _createProcess (self): if not self.enabled: return def preexec(): # Don't forward signals. os.setpgrp() try: process = subprocess.Popen([self.program,], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=self.universal, preexec_fn=preexec, ) self.log.debug('spawn process %s' % self.program) except KeyboardInterrupt: process = None except (subprocess.CalledProcessError,OSError,ValueError): self.log.error('could not spawn process %s' % self.program) process = None if process: try: fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK) except IOError: self.destroyProcess() process = None return process def destroyProcess (self): if not self.enabled: return self.log.debug('destroying process %s' % self.program) if not self.process: return try: if self.process: self.process.terminate() self.process.wait() self.log.info('terminated process PID %s' % self.process.pid) except OSError, e: # No such processs if e[0] != errno.ESRCH: self.log.error('PID %s died' % self.process.pid)
def __init__ (self,configuration): configuration = load() self.configuration = configuration # Only here so the introspection code can find them self.log = Logger('supervisor', configuration.log.supervisor) self.log.error('Starting exaproxy version %s' % configuration.proxy.version) self.signal_log = Logger('signal', configuration.log.signal) self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level) self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable) self.log_writer.setIdentifier(configuration.daemon.identifier) #self.usage_writer.setIdentifier(configuration.daemon.identifier) if configuration.debug.log: self.log_writer.toggleDebug() self.usage_writer.toggleDebug() self.log.error('python version %s' % sys.version.replace(os.linesep,' ')) self.log.debug('starting %s' % sys.argv[0]) self.pid = PID(self.configuration) self.daemon = Daemon(self.configuration) self.poller = Poller(self.configuration.daemon) self.poller.setupRead('read_proxy') # Listening proxy sockets self.poller.setupRead('read_web') # Listening webserver sockets self.poller.setupRead('read_icap') # Listening icap sockets self.poller.setupRead('read_workers') # Pipes carrying responses from the child processes self.poller.setupRead('read_resolver') # Sockets currently listening for DNS responses self.poller.setupRead('read_client') # Active clients self.poller.setupRead('opening_client') # Clients we have not yet read a request from self.poller.setupWrite('write_client') # Active clients with buffered data to send self.poller.setupWrite('write_resolver') # Active DNS requests with buffered data to send self.poller.setupRead('read_download') # Established connections self.poller.setupWrite('write_download') # Established connections we have buffered data to send to self.poller.setupWrite('opening_download') # Opening connections self.monitor = Monitor(self) self.page = Page(self) self.manager = RedirectorManager( self.configuration, self.poller, ) self.content = ContentManager(self,configuration) self.client = ClientManager(self.poller, configuration) self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10) self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections) self.web = Server('web server',self.poller,'read_web', configuration.web.connections) self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections) self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.manager, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller) self._shutdown = True if self.daemon.filemax == 0 else False # stop the program self._softstop = False # stop once all current connection have been dealt with self._reload = False # unimplemented self._toggle_debug = False # start logging a lot self._decrease_spawn_limit = 0 self._increase_spawn_limit = 0 self._refork = False # unimplemented self._pdb = False # turn on pdb debugging self._listen = None # listening change ? None: no, True: listen, False: stop listeing self.wait_time = 5.0 # how long do we wait at maximum once we have been soft-killed self.local = set() # what addresses are on our local interfaces self.interfaces() signal.signal(signal.SIGQUIT, self.sigquit) signal.signal(signal.SIGINT, self.sigterm) signal.signal(signal.SIGTERM, self.sigterm) # signal.signal(signal.SIGABRT, self.sigabrt) # signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGTRAP, self.sigtrap) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) signal.signal(signal.SIGTTOU, self.sigttou) signal.signal(signal.SIGTTIN, self.sigttin) signal.signal(signal.SIGALRM, self.sigalrm) # make sure we always have data in history # (done in zero for dependencies reasons) self.monitor.zero()
class ResolverManager(object): resolverFactory = DNSResolver def __init__(self, poller, configuration, max_workers): self.poller = poller self.configuration = configuration self.resolver_factory = self.resolverFactory(configuration) # The actual work is done in the worker self.worker = self.resolver_factory.createUDPClient() # All currently active clients (one UDP and many TCP) self.workers = {} self.workers[self.worker.socket] = self.worker self.poller.addReadSocket('read_resolver', self.worker.socket) # Track the clients currently expecting results self.clients = {} # client_id : identifier # Key should be the hostname rather than the request ID? self.resolving = {} # identifier, worker_id : # TCP workers that have not yet sent a complete request self.sending = {} # sock : # Maximum number of entry we will cache (1024 DNS lookup per second !) # assuming 1k per entry, which is a lot, it mean 20Mb of memory # which at the default of 900 seconds of cache is 22 new host per seonds self.max_entries = 1024 * 20 # track the current queries and when they were started self.active = [] self.cache = {} self.cached = deque() self.max_workers = max_workers self.worker_count = len(self.workers) # the UDP client self.waiting = [] self.log = Logger('resolver', configuration.log.resolver) self.chained = {} def cacheDestination(self, hostname, ip): if hostname not in self.cache: expire_time = time.time() + self.configuration.dns.ttl expire_time = expire_time - expire_time % 5 # group the DNS record per buckets 5 seconds latest_time, latest_hosts = self.cached[-1] if self.cached else ( -1, None) if expire_time > latest_time: hosts = [] self.cached.append((expire_time, hosts)) else: hosts = latest_hosts self.cache[hostname] = ip hosts.append(hostname) def expireCache(self): # expire only one set of cache entries at a time if self.cached: current_time = time.time() expire_time, hosts = self.cached[0] if current_time >= expire_time or len( self.cache) > self.max_entries: expire_time, hosts = self.cached.popleft() for hostname in hosts: self.cache.pop(hostname, None) def cleanup(self): now = time.time() cutoff = now - self.configuration.dns.timeout count = 0 for timestamp, client_id, sock in self.active: if timestamp > cutoff: break count += 1 cli_data = self.clients.pop(client_id, None) worker = self.workers.get(sock) tcpudp = 'udp' if worker is self.worker else 'tcp' if cli_data is not None: w_id, identifier, active_time, resolve_count = cli_data data = self.resolving.pop((w_id, identifier), None) if not data: data = self.sending.pop(sock, None) if data: client_id, original, hostname, command, decision = data self.log.error( 'timeout when requesting address for %s using the %s client - attempt %s' % (hostname, tcpudp, resolve_count)) if resolve_count < self.configuration.dns.retries and worker is self.worker: self.log.info( 'going to retransmit request for %s - attempt %s of %s' % (hostname, resolve_count + 1, self.configuration.dns.retries)) self.startResolving(client_id, command, decision, resolve_count + 1, identifier=identifier) continue self.log.error( 'given up trying to resolve %s after %s attempts' % (hostname, self.configuration.dns.retries)) yield client_id, 'rewrite', '\0'.join( ('503', 'dns.html', '', '', '', hostname, 'peer')) if worker is not None: if worker is not self.worker: worker.close() self.workers.pop(sock) if count: self.active = self.active[count:] def resolves(self, command, decision): if command in ('download', 'connect'): hostname = decision.split('\0')[0] if isip(hostname): res = False else: res = True else: res = False return res def extractHostname(self, command, decision): data = decision.split('\0') if command == 'download': hostname = data[0] elif command == 'connect': hostname = decision.split('\0')[0] else: hostname = None return hostname def resolveDecision(self, command, decision, ip): if command in ('download', 'connect'): hostname, args = decision.split('\0', 1) newdecision = '\0'.join((ip, args)) else: newdecision = None return newdecision def startResolving(self, client_id, command, decision, resolve_count=1, identifier=None): hostname = self.extractHostname(command, decision) if hostname: # Resolution is already in our cache if hostname in self.cache: identifier = None ip = self.cache[hostname] if ip is not None: resolved = self.resolveDecision(command, decision, ip) response = client_id, command, resolved else: newdecision = '\0'.join( ('503', 'dns.html', 'http', '', '', hostname, 'peer')) response = client_id, 'rewrite', newdecision # do not try to resolve domains which are not FQDN elif self.configuration.dns.fqdn and '.' not in hostname: identifier = None newdecision = '\0'.join( ('200', 'dns.html', 'http', '', '', hostname, 'peer')) response = client_id, 'rewrite', newdecision # each DNS part (between the dots) must be under 256 chars elif max(len(p) for p in hostname.split('.')) > 255: identifier = None self.log.info('jumbo hostname: %s' % hostname) newdecision = '\0'.join( ('503', 'dns.html', 'http', '', '', hostname, 'peer')) response = client_id, 'rewrite', newdecision # Lookup that DNS name else: identifier, _ = self.worker.resolveHost(hostname, identifier=identifier) response = None active_time = time.time() self.resolving[( self.worker.w_id, identifier )] = client_id, hostname, hostname, command, decision self.clients[client_id] = (self.worker.w_id, identifier, active_time, resolve_count) self.active.append( (active_time, client_id, self.worker.socket)) else: identifier = None response = None return identifier, response def beginResolvingTCP(self, client_id, command, decision, resolve_count): if self.worker_count < self.max_workers: identifier = self.newTCPResolver(client_id, command, decision, resolve_count) self.worker_count += 1 else: self.waiting.append((client_id, command, decision, resolve_count)) identifier = None return identifier def notifyClose(self): paused = self.worker_count >= self.max_workers self.worker_count -= 1 if paused and self.worker_count < self.max_workers: for _ in range(self.worker_count, self.max_workers): if self.waiting: data, self.waiting = self.waiting[0], self.waiting[1:] client_id, command, decision, resolve_count = data identifier = self.newTCPResolver(client_id, command, decision, resolve_count) self.worker_count += 1 def newTCPResolver(self, client_id, command, decision, resolve_count): hostname = self.extractHostname(command, decision) if hostname: worker = self.resolver_factory.createTCPClient() self.workers[worker.socket] = worker identifier, all_sent = worker.resolveHost(hostname) active_time = time.time() self.resolving[( worker.w_id, identifier)] = client_id, hostname, hostname, command, decision self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count) self.active.append((active_time, client_id, self.worker.socket)) if all_sent: self.poller.addReadSocket('read_resolver', worker.socket) self.resolving[( worker.w_id, identifier )] = client_id, hostname, hostname, command, decision else: self.poller.addWriteSocket('write_resolver', worker.socket) self.sending[ worker. socket] = client_id, hostname, hostname, command, decision else: identifier = None return identifier def getResponse(self, sock): worker = self.workers.get(sock) if worker: result = worker.getResponse(self.chained) if result: identifier, forhost, ip, completed, newidentifier, newhost, newcomplete = result data = self.resolving.pop((worker.w_id, identifier), None) chain_count = self.chained.pop(identifier, 0) if newidentifier: self.chained[newidentifier] = chain_count + 1 if not data: self.log.info( 'ignoring response for %s (%s) with identifier %s' % (forhost, ip, identifier)) else: # unable to parse response self.log.error('unable to parse response') data = None if data: client_id, original, hostname, command, decision = data clidata = self.clients.pop(client_id, None) if completed: if clidata is not None: key = clidata[2], client_id, worker.socket if key in self.active: self.active.remove(key) # check to see if we received an incomplete response if not completed: newidentifier = self.beginResolvingTCP( client_id, command, decision, 1) newhost = hostname response = None # check to see if the worker started a new request if newidentifier: if completed: active_time = time.time() self.resolving[( worker.w_id, newidentifier )] = client_id, original, newhost, command, decision self.clients[client_id] = (worker.w_id, newidentifier, active_time, 1) self.active.append( (active_time, client_id, worker.socket)) response = None if completed and newcomplete: self.poller.addReadSocket('read_resolver', worker.socket) elif completed and not newcomplete: self.poller.addWriteSocket('write_resolver', worker.socket) self.sending[ worker. socket] = client_id, original, hostname, command, decision # we just started a new (TCP) request and have not yet completely sent it # make sure we still know who the request is for elif not completed: response = None # maybe we read the wrong response? elif forhost != hostname: _, _, _, resolve_count = clidata active_time = time.time() self.resolving[( worker.w_id, identifier )] = client_id, original, hostname, command, decision self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count) self.active.append((active_time, client_id, worker.socket)) response = None # success elif ip is not None: resolved = self.resolveDecision(command, decision, ip) response = client_id, command, resolved self.cacheDestination(original, ip) # not found else: newdecision = '\0'.join( ('503', 'dns.html', 'http', '', '', hostname, 'peer')) response = client_id, 'rewrite', newdecision #self.cacheDestination(original, ip) else: response = None if response or result is None: if worker is not self.worker: self.poller.removeReadSocket('read_resolver', sock) self.poller.removeWriteSocket('write_resolver', sock) worker.close() self.workers.pop(sock) self.notifyClose() else: response = None return response def continueSending(self, sock): """Continue sending data over the connected TCP socket""" data = self.sending.get(sock) if data: client_id, original, hostname, command, decision = data else: client_id, original, hostname, command, decision = None, None, None, None, None, None worker = self.workers[sock] res = worker.continueSending() if res is False: # we've sent all we need to send self.poller.removeWriteSocket('write_resolver', sock) if client_id in self.clients: w_id, identifier, active_time, resolve_count = self.clients[ client_id] tmp = self.sending.pop(sock) self.resolving[(w_id, identifier)] = tmp self.poller.addReadSocket('read_resolver', sock) else: self.log.error( 'could not find client for dns request for %s. request is being left to timeout.' % str(hostname))
def __init__(self, supervisor): self.supervisor = supervisor self.monitor = supervisor.monitor self.email_sent = False self.log = Logger('web', supervisor.configuration.log.web)
class Redirector(Thread): # TODO : if the program is a function, fork and run :) ICAPParser = ICAPParser def __init__(self, configuration, name, request_box, program): self.configuration = configuration self.icap_parser = self.ICAPParser(configuration) self.enabled = configuration.redirector.enable self.protocol = configuration.redirector.protocol self._transparent = configuration.http.transparent self.log = Logger('worker ' + str(name), configuration.log.worker) self.usage = UsageLogger('usage', configuration.log.worker) self.universal = True if self.protocol == 'url' else False self.icap = self.protocol[len('icap://'):].split( '/')[0] if self.protocol.startswith('icap://') else '' r, w = os.pipe() # pipe for communication with the main thread self.response_box_write = os.fdopen(w, 'w', 0) # results are written here self.response_box_read = os.fdopen(r, 'r', 0) # read from the main thread self.wid = name # a unique name self.creation = time.time() # when the thread was created # self.last_worked = self.creation # when the thread last picked a task self.request_box = request_box # queue with HTTP headers to process self.program = program # the squid redirector program to fork self.running = True # the thread is active self.stats_timestamp = None # time of the most recent outstanding request to generate stats self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version, os.getpid()) if self.protocol == 'url': self.classify = self._classify_url if self.protocol.startswith('icap://'): self.classify = self._classify_icap # Do not move, we need the forking AFTER the setup self.process = self._createProcess( ) # the forked program to handle classification Thread.__init__(self) def _createProcess(self): if not self.enabled: return def preexec(): # Don't forward signals. os.setpgrp() try: process = subprocess.Popen( [ self.program, ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=self.universal, preexec_fn=preexec, ) self.log.debug('spawn process %s' % self.program) except KeyboardInterrupt: process = None except (subprocess.CalledProcessError, OSError, ValueError): self.log.error('could not spawn process %s' % self.program) process = None if process: try: fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK) except IOError: self.destroyProcess() process = None return process def destroyProcess(self): if not self.enabled: return self.log.debug('destroying process %s' % self.program) if not self.process: return try: if self.process: self.process.terminate() self.process.wait() self.log.info('terminated process PID %s' % self.process.pid) except OSError, e: # No such processs if e[0] != errno.ESRCH: self.log.error('PID %s died' % self.process.pid)