Example #1
0
	def __init__ (self, configuration, name, program, protocol):
		self.configuration = configuration
		self.http_parser = self.HTTPParser(configuration)
		self.tls_parser = self.TLSParser(configuration)
		self.enabled = bool(program is not None) and configuration.redirector.enable
		self._transparent = configuration.http.transparent
		self.log = Logger('worker ' + str(name), configuration.log.worker)
		self.usage = UsageLogger('usage', configuration.log.worker)
		self.response_factory = self.ResponseFactory()
		self.child_factory = self.ChildFactory(configuration, name)

		self.wid = name							   # a unique name
		self.creation = time.time()				   # when the thread was created
	#	self.last_worked = self.creation			  # when the thread last picked a task

		self.program = program						# the squid redirector program to fork
		self.running = True						   # the thread is active

		self.stats_timestamp = None				   # time of the most recent outstanding request to generate stats

		self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid())

		universal = configuration.redirector.protocol == 'url'
		# Do not move, we need the forking AFTER the setup
		if program:
			self.process = self.child_factory.createProcess(self.program, universal=universal)
		else:
			self.process = None
Example #2
0
    def __init__(self, configuration, poller):
        self.low = configuration.redirector.minimum  # minimum concurrent redirector workers
        self.high = configuration.redirector.maximum  # maximum concurrent redirector workers

        self.poller = poller
        self.configuration = configuration
        self.queue = Queue(
        )  # store requests we do not immediately have the resources to process

        self.nextid = 1  # unique id to give to the next spawned worker
        self.worker = {}  # worker tasks for each spawned child
        self.processes = {
        }  # worker tasks indexed by file descriptors we can poll
        self.available = set(
        )  # workers that are currently available to handle new requests
        self.active = {
        }  # workers that are currently busy waiting for a response from the spawned process
        self.stopping = set(
        )  # workers we want to stop as soon as they stop being active

        program = configuration.redirector.program
        protocol = configuration.redirector.protocol
        self.redirector_factory = RedirectorFactory(configuration, program,
                                                    protocol)

        self.log = Logger('manager', configuration.log.manager)
Example #3
0
	def __init__(self, name, poller, read_name, max_clients):
		self.socks = {}
		self.name = name
		self.poller = poller
		self.read_name = read_name
		self.max_clients = max_clients
		self.client_count = 0
		self.saturated = False  # we are receiving more connections than we can handle
		self.binding = set()
		self.serving = True  # We are currenrly listening
		self.log = Logger('server', configuration.log.server)
		self.log.info('server [%s] accepting up to %d clients' % (name, max_clients))
Example #4
0
 def __init__(self, poller, configuration):
     self.total_sent4 = 0L
     self.total_sent6 = 0L
     self.total_requested = 0L
     self.norequest = TimeCache(configuration.http.idle_connect)
     self.bysock = {}
     self.byname = {}
     self.buffered = []
     self._nextid = 0
     self.poller = poller
     self.log = Logger('client', configuration.log.client)
     self.proxied = configuration.http.proxied
     self.max_buffer = configuration.http.header_size
Example #5
0
	def __init__ (self,configuration,poller):
		self.configuration = configuration

		self.low = configuration.redirector.minimum       # minimum number of workers at all time
		self.high = configuration.redirector.maximum      # maximum numbe of workers at all time
		self.program = configuration.redirector.program   # what program speaks the squid redirector API

		self.nextid = 1                   # incremental number to make the name of the next worker
		self.queue = Queue()              # queue with HTTP headers to process
		self.poller = poller              # poller interface that checks for events on sockets
		self.worker = {}                  # our workers threads
		self.closing = set()              # workers that are currently closing
		self.running = True               # we are running

		self.log = Logger('manager', configuration.log.manager)
Example #6
0
	def __init__(self, supervisor, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.opening = {}
		self.established = {}
		self.byclientid = {}
		self.buffered = []
		self.retry = []
		self.configuration = configuration
		self.supervisor = supervisor

		self.poller = supervisor.poller
		self.log = Logger('download', configuration.log.download)

		self.location = os.path.realpath(os.path.normpath(configuration.web.html))
		self.page = supervisor.page
		self._header = {}
Example #7
0
	def __init__ (self, configuration, querier, decider, logger, poller):
		self.querier = querier    # Incoming requests from the proxy
		self.decider = decider    # Decides how each request should be handled
		self.logger = logger      # Log writing interfaces
		self.poller = poller

		# NOT the same logger the rest of the proxy uses since we're running in a different process
		self.log = Logger('redirector', configuration.log.supervisor)
		self.running = True
Example #8
0
	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.http_max_buffer = configuration.http.header_size
		self.icap_max_buffer = configuration.icap.header_size
		self.tls_max_buffer = configuration.tls.header_size
		self.passthrough_max_buffer = 0
		self.proxied = {
			'proxy' : configuration.http.proxied,
			'icap'  : configuration.icap.proxied,
			'tls'   : configuration.tls.proxied,
		}
Example #9
0
    def __init__(self, poller, configuration, max_workers):
        self.poller = poller
        self.configuration = configuration

        self.resolver_factory = self.resolverFactory(configuration)

        # The actual work is done in the worker
        self.worker = self.resolver_factory.createUDPClient()

        # All currently active clients (one UDP and many TCP)
        self.workers = {}
        self.workers[self.worker.socket] = self.worker
        self.poller.addReadSocket('read_resolver', self.worker.socket)

        # Track the clients currently expecting results
        self.clients = {}  # client_id : identifier

        # Key should be the hostname rather than the request ID?
        self.resolving = {}  # identifier, worker_id :

        # TCP workers that have not yet sent a complete request
        self.sending = {}  # sock :

        # Maximum number of entry we will cache (1024 DNS lookup per second !)
        # assuming 1k per entry, which is a lot, it mean 20Mb of memory
        # which at the default of 900 seconds of cache is 22 new host per seonds
        self.max_entries = 1024 * 20

        # track the current queries and when they were started
        self.active = []

        self.cache = {}
        self.cached = deque()

        self.max_workers = max_workers
        self.worker_count = len(self.workers)  # the UDP client

        self.waiting = []

        self.log = Logger('resolver', configuration.log.resolver)
        self.chained = {}
Example #10
0
    def __init__(self, configuration, name, request_box, program):
        self.configuration = configuration
        self.icap_parser = self.ICAPParser(configuration)
        self.enabled = configuration.redirector.enable
        self.protocol = configuration.redirector.protocol
        self._transparent = configuration.http.transparent
        self.log = Logger('worker ' + str(name), configuration.log.worker)
        self.usage = UsageLogger('usage', configuration.log.worker)

        self.universal = True if self.protocol == 'url' else False
        self.icap = self.protocol[len('icap://'):].split(
            '/')[0] if self.protocol.startswith('icap://') else ''

        r, w = os.pipe()  # pipe for communication with the main thread
        self.response_box_write = os.fdopen(w, 'w',
                                            0)  # results are written here
        self.response_box_read = os.fdopen(r, 'r',
                                           0)  # read from the main thread

        self.wid = name  # a unique name
        self.creation = time.time()  # when the thread was created
        #	self.last_worked = self.creation			  # when the thread last picked a task
        self.request_box = request_box  # queue with HTTP headers to process

        self.program = program  # the squid redirector program to fork
        self.running = True  # the thread is active

        self.stats_timestamp = None  # time of the most recent outstanding request to generate stats

        self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,
                                             os.getpid())

        if self.protocol == 'url':
            self.classify = self._classify_url
        if self.protocol.startswith('icap://'):
            self.classify = self._classify_icap

        # Do not move, we need the forking AFTER the setup
        self.process = self._createProcess(
        )  # the forked program to handle classification
        Thread.__init__(self)
Example #11
0
	def __init__(self, name, poller, read_name, max_clients):
		self.socks = {}
		self.name = name
		self.poller = poller
		self.read_name = read_name
		self.max_clients = max_clients
		self.client_count = 0
		self.saturated = False  # we are receiving more connections than we can handle
		self.binding = set()
		self.serving = True  # We are currenrly listening
		self.log = Logger('server', configuration.log.server)
		self.log.info('server [%s] accepting up to %d clients' % (name, max_clients))
Example #12
0
class Server(object):
	_listen = staticmethod(listen)

	def __init__(self, name, poller, read_name, config):
		self.socks = {}
		self.name = name
		self.poller = poller
		self.read_name = read_name
		self.max_clients = config.connections
		self.client_count = 0
		self.saturated = False  # we are receiving more connections than we can handle
		self.binding = set()
		self.log = Logger('server', configuration.log.server)
		self.serving = config.enable  # We are currenrly listening
		if self.serving:
			self.log.info('server [%s] accepting up to %d clients' % (name, self.max_clients))

	def accepting (self):
		if self.serving:
			return True

		for ip, port, timeout, backlog in self.binding:
			try:
				self.log.critical('re-listening on %s:%d' % (ip,port))
				self.listen(ip,port,timeout,backlog)
			except socket.error,e:
				self.log.critical('could not re-listen on %s:%d : %s' % (ip,port,str(e)))
				return False
		self.serving = True
		return True
Example #13
0
class Server(object):
	_listen = staticmethod(listen)

	def __init__(self, name, poller, read_name, max_clients):
		self.socks = {}
		self.name = name
		self.poller = poller
		self.read_name = read_name
		self.max_clients = max_clients
		self.client_count = 0
		self.saturated = False  # we are receiving more connections than we can handle
		self.binding = set()
		self.serving = True  # We are currenrly listening
		self.log = Logger('server', configuration.log.server)
		self.log.info('server [%s] accepting up to %d clients' % (name, max_clients))


	def accepting (self):
		if self.serving:
			return True

		for ip, port, timeout, backlog in self.binding:
			try:
				self.log.critical('re-listening on %s:%d' % (ip,port))
				self.listen(ip,port,timeout,backlog)
			except socket.error,e:
				self.log.critical('could not re-listen on %s:%d : %s' % (ip,port,str(e)))
				return False
		self.serving = True
		return True
Example #14
0
	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.proxied = configuration.http.proxied
		self.max_buffer = configuration.http.header_size
Example #15
0
class ChildFactory:
    def preExec(self):
        os.setpgrp()

    def __init__(self, configuration, name):
        self.log = Logger('worker ' + str(name), configuration.log.worker)

    def createProcess(self, program, universal=False):
        try:
            process = subprocess.Popen(
                program.split(' '),
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=universal,
                preexec_fn=self.preExec,
            )

            self.log.debug('spawn process %s' % program)

        except KeyboardInterrupt:
            process = None

        except (subprocess.CalledProcessError, OSError, ValueError):
            self.log.error('could not spawn process %s' % program)
            process = None

        if process:
            try:
                fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
            except IOError:
                self.destroyProcess(process)
                process = None

        return process

    def destroyProcess(self, process):
        try:
            process.terminate()
            process.wait()
            self.log.info('terminated process PID %s' % process.pid)

        except OSError, e:
            # No such processs
            if e[0] != errno.ESRCH:
                self.log.error('PID %s died' % process.pid)
Example #16
0
class ChildFactory:
	def preExec (self):
		os.setpgrp()

	def __init__ (self, configuration, name):
		self.log = Logger('worker ' + str(name), configuration.log.worker)

	def createProcess (self, program, universal=False):
		try:
			process = subprocess.Popen([program],
				stdin=subprocess.PIPE,
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE,
				universal_newlines=universal,
				preexec_fn=self.preExec,
			)

			self.log.debug('spawn process %s' % program)

		except KeyboardInterrupt:
			process = None

		except (subprocess.CalledProcessError,OSError,ValueError):
			self.log.error('could not spawn process %s' % program)
			process = None

		if process:
			try:
				fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
			except IOError:
				self.destroyProcess(process)
				process = None

		return process

	def destroyProcess (self, process):
		try:
			process.terminate()
			process.wait()
			self.log.info('terminated process PID %s' % process.pid)

		except OSError, e:
			# No such processs
			if e[0] != errno.ESRCH:
				self.log.error('PID %s died' % process.pid)
Example #17
0
	def __init__(self, supervisor, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.opening = {}
		self.established = {}
		self.byclientid = {}
		self.buffered = []
		self.retry = []
		self.configuration = configuration
		self.supervisor = supervisor

		self.poller = supervisor.poller
		self.log = Logger('download', configuration.log.download)

		self.location = os.path.realpath(os.path.normpath(configuration.web.html))
		self.page = supervisor.page
		self._header = {}
Example #18
0
    def __init__(self, configuration, web, proxy, decider, content, client,
                 resolver, logger, usage, poller):
        self.web = web  # Manage listening web sockets
        self.proxy = proxy  # Manage listening proxy sockets
        self.decider = decider  # Task manager for handling child decider processes
        self.content = content  # The Content Download manager
        self.client = client  # Currently open client connections
        self.resolver = resolver  # The DNS query manager
        self.poller = poller  # Interface to the poller
        self.logger = logger  # Log writing interfaces
        self.usage = usage  # Request logging
        self.running = True  # Until we stop we run :)
        self.nb_events = 0L  # Number of events received
        self.nb_loops = 0L  # Number of loop iteration
        self.events = []  # events so we can report them once in a while

        self.log = Logger('supervisor', configuration.log.supervisor)
Example #19
0
	def __init__ (self, poller, configuration, max_workers):
		self.poller = poller
		self.configuration = configuration

		self.resolver_factory = self.resolverFactory(configuration)

		# The actual work is done in the worker
		self.worker = self.resolver_factory.createUDPClient()

		# All currently active clients (one UDP and many TCP)
		self.workers = {
			self.worker.socket: self.worker
		}
		self.poller.addReadSocket('read_resolver', self.worker.socket)

		# Track the clients currently expecting results
		self.clients = {}  # client_id : identifier

		# Key should be the hostname rather than the request ID?
		self.resolving = {}  # identifier, worker_id :

		# TCP workers that have not yet sent a complete request
		self.sending = {}  # sock :

		# Maximum number of entry we will cache (1024 DNS lookup per second !)
		# assuming 1k per entry, which is a lot, it mean 20Mb of memory
		# which at the default of 900 seconds of cache is 22 new host per seonds
		self.max_entries  = 1024*20

		# track the current queries and when they were started
		self.active = []

		self.cache = {}
		self.cached = deque()

		self.max_workers = max_workers
		self.worker_count = len(self.workers)  # the UDP client

		self.waiting = []

		self.log = Logger('resolver', configuration.log.resolver)
		self.chained = {}
Example #20
0
	def __init__ (self, configuration, poller):
		self.low = configuration.redirector.minimum 		# minimum concurrent redirector workers
		self.high = configuration.redirector.maximum		# maximum concurrent redirector workers

		self.poller = poller
		self.configuration = configuration
		self.queue = Queue()    # store requests we do not immediately have the resources to process

		self.nextid = 1			# unique id to give to the next spawned worker
		self.worker = {}		# worker tasks for each spawned child
		self.processes = {}		# worker tasks indexed by file descriptors we can poll
		self.available = set()	# workers that are currently available to handle new requests
		self.active = {}        # workers that are currently busy waiting for a response from the spawned process
		self.stopping = set()   # workers we want to stop as soon as they stop being active

		program = configuration.redirector.program
		protocol = configuration.redirector.protocol
		self.redirector_factory = RedirectorFactory(configuration, program, protocol)

		self.log = Logger('manager', configuration.log.manager)
Example #21
0
	def __init__ (self, configuration, name, request_box, program):
		self.configuration = configuration
		self.icap_parser = self.ICAPParser(configuration)
		self.enabled = configuration.redirector.enable
		self.protocol = configuration.redirector.protocol
		self._transparent = configuration.http.transparent
		self.log = Logger('worker ' + str(name), configuration.log.worker)
		self.usage = UsageLogger('usage', configuration.log.worker)

		self.universal = True if self.protocol == 'url' else False
		self.icap = self.protocol[len('icap://'):].split('/')[0] if self.protocol.startswith('icap://') else ''

		r, w = os.pipe()                              # pipe for communication with the main thread
		self.response_box_write = os.fdopen(w,'w',0)  # results are written here
		self.response_box_read = os.fdopen(r,'r',0)   # read from the main thread

		self.wid = name                               # a unique name
		self.creation = time.time()                   # when the thread was created
	#	self.last_worked = self.creation	             # when the thread last picked a task
		self.request_box = request_box                # queue with HTTP headers to process

		self.program = program                        # the squid redirector program to fork
		self.running = True                           # the thread is active

		self.stats_timestamp = None                   # time of the most recent outstanding request to generate stats

		self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid())

		if self.protocol == 'url':
			self.classify = self._classify_url
		if self.protocol.startswith('icap://'):
			self.classify = self._classify_icap


		# Do not move, we need the forking AFTER the setup
		self.process = self._createProcess()		  # the forked program to handle classification
		Thread.__init__(self)
Example #22
0
class Page(object):
    def __init__(self, supervisor):
        self.supervisor = supervisor
        self.monitor = supervisor.monitor
        self.email_sent = False
        self.log = Logger('web', supervisor.configuration.log.web)

    def _introspection(self, objects):
        introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">Looking at the internal of ExaProxy for %s </div><br/>\n' % cgi.escape(
            '.'.join(objects))
        link = cgi.escape('/'.join(
            objects[:-1])) if objects[:-1] else 'supervisor'
        line = [
            '<a href="/information/introspection/%s.html">Back to parent object</a><br/>'
            % link
        ]
        for k, content in self.monitor.introspection(objects):
            link = '/information/introspection/%s.html' % cgi.escape(
                '%s/%s' % ('/'.join(objects), k))
            line.append(
                '<a href="%s">%s</a><span class="value">%s</span><br/>' %
                (link, k, cgi.escape(content)))
        return introduction + _listing % ('\n'.join(line))

    def _configuration(self):
        introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Configuration</div><br/>\n'
        line = []
        for k, v in sorted(self.monitor.configuration().items()):
            line.append(
                '<span class="key">%s</span><span class="value">&nbsp; %s</span><br/>'
                % (k, cgi.escape(str(v))))
        return introduction + _listing % ('\n'.join(line))

    def _statistics(self):
        introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>\n'
        line = []
        for k, v in sorted(self.monitor.statistics().items()):
            line.append(
                '<span class="key">%s</span><span class="value">&nbsp; %s</span><br/>'
                % (k, cgi.escape(str(str(v)))))
        return introduction + _listing % ('\n'.join(line))

    def _connections(self):
        return graph(self.monitor, 'Connections', 20000, [
            'clients.silent',
            'clients.speaking',
            'servers.opening',
            'servers.established',
        ])

    def _processes(self):
        return graph(self.monitor, 'Forked processes', 20000, [
            'processes.forked',
            'processes.min',
            'processes.max',
        ])

    def _requests(self):
        return graph(
            self.monitor,
            'Requests/seconds received from clients',
            20000,
            [
                'clients.requests',
            ],
            True,
        )

    def _clients(self):
        return graph(
            self.monitor,
            'Bits/seconds received from clients',
            20000,
            [
                'transfer.client4',
                'transfer.client6',
            ],
            True,
            adaptor=Bpstobps,
        )

    def _servers(self):
        return graph(
            self.monitor,
            'Bits/seconds received from servers',
            20000,
            [
                'transfer.content4',
                'transfer.content6',
            ],
            True,
            adaptor=Bpstobps,
        )

    def _transfer(self):
        return graph(
            self.monitor,
            'Bits/seconds received',
            20000,
            [
                'transfer.client',
                'transfer.content',
            ],
            True,
            adaptor=Bpstobps,
        )

    def _loops(self):
        return graph(
            self.monitor,
            'Reactor loops',
            20000,
            [
                'load.loops',
            ],
            True,
        )

    def _events(self):
        return graph(
            self.monitor,
            'Sockets which became readeable',
            20000,
            [
                'load.events',
            ],
            True,
        )

    def _queue(self):
        return graph(
            self.monitor,
            'Queued URL for classification',
            20000,
            [
                'queue.size',
            ],
            True,
        )

    def _source(self, bysock):
        conns = 0

        clients = defaultdict(lambda: 0)
        for sock in bysock:
            try:
                host, port = sock.getpeername()
            except socket.error:
                host, port = None, None

            clients[host] += 1
            conns += 1

        ordered = defaultdict(list)
        for host, number in clients.items():
            ordered[number].append(host)

        result = [
            '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>',
            '<center>we have %d connection(s) from %d source(s)</center><br/>'
            % (conns, len(clients))
        ]
        for number in reversed(sorted(ordered)):
            for host in ordered[number]:
                result.append(
                    '<span class="key">%s</span><span class="value">&nbsp; %s</span><br/>'
                    % (host, number))

        return _listing % '\n'.join(result)

    def _servers_source(self):
        return self._source(self.supervisor.content.established)

    def _clients_source(self):
        return self._source(self.supervisor.client.bysock)

    def _workers(self):
        form = '<form action="/control/workers/commit" method="get">%s: <input type="text" name="%s" value="%s"><input type="submit" value="Submit"></form>'

        change = {
            'exaproxy.redirector.minimum': self.supervisor.manager.low,
            'exaproxy.redirector.maximum': self.supervisor.manager.high,
        }

        forms = []
        for name in ('exaproxy.redirector.minimum',
                     'exaproxy.redirector.maximum'):
            value = change[name]
            forms.append(form % (name, name, value))
        return '<pre style="margin-left:40px;">\n' + '\n'.join(forms)

    def _run(self):
        s = '<pre style="margin-left:40px;">'
        s += '<form action="/control/debug/eval" method="get">eval <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>'
        s += '<form action="/control/debug/exec" method="get">exec <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>'
        return s

    def _logs(self):
        return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join(
            History().formated())

    def _errs(self):
        return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join(
            Errors().formated())

    def _email(self, args):
        if self.email_sent:
            return '<center><b>You can only send one email per time ExaProxy is started</b></center>'
        self.email_sent, message = mail.send(args)
        return message

    def _json_running(self):
        return json.dumps(self.monitor.seconds[-1],
                          sort_keys=True,
                          indent=2,
                          separators=(',', ': '))

    def _json_configuration(self):
        return json.dumps(self.monitor.configuration(),
                          sort_keys=True,
                          indent=2,
                          separators=(',', ': '))

    def html(self, path):
        if len(path) > 5000:
            return menu('<center><b>path is too long</b></center>')

        if path == '/':
            path = '/index.html'
            args = ''
        elif '?' in path:
            path, args = path.split('?', 1)
        else:
            args = ''

        if not path.startswith('/'):
            return menu('<center><b>invalid url</b></center>')
        elif not path.endswith('.html'):
            if path == '/humans.txt':
                return humans.txt
            if path not in ('/json', '/json/running', '/json/configuration',
                            '/control/workers/commit', '/control/debug/eval',
                            '/control/debug/exec'):
                return menu('<center><b>invalid url</b></center>')
            sections = path[1:].split('/') + ['']
        else:
            sections = path[1:-5].split('/') + ['']

        if not sections[0]:
            return menu(index)
        section = sections[0]
        subsection = sections[1]

        if section == 'json':
            if subsection == 'running':
                return self._json_running()
            if subsection == 'configuration':
                return self._json_configuration()
            return '{ "errror" : "invalid url", "valid-paths": [ "/json/running", "/json/configuration" ] }'

        if section == 'index':
            return menu(index)

        if section == 'information':
            if subsection == 'introspection':
                return menu(self._introspection(sections[2:-1]))
            if subsection == 'configuration':
                return menu(self._configuration())
            if subsection == 'statistics':
                return menu(self._statistics())
            if subsection == 'logs':
                return self._logs()
            if subsection == 'errs':
                return self._errs()
            return menu(index)

        if section == 'graph':
            if subsection == 'processes':
                return menu(self._processes())
            if subsection == 'connections':
                return menu(self._connections())
            if subsection == 'servers':
                return menu(self._servers())
            if subsection == 'clients':
                return menu(self._clients())
            if subsection == 'transfered':
                return menu(self._transfer())
            if subsection == 'requests':
                return menu(self._requests())
            if subsection == 'loops':
                return menu(self._loops())
            if subsection == 'events':
                return menu(self._events())
            if subsection == 'queue':
                return menu(self._queue())
            return menu(index)

        if section == 'end-point':
            if subsection == 'servers':
                return menu(self._servers_source())
            if subsection == 'clients':
                return menu(self._clients_source())
            return menu(index)

        if section == 'control':
            action = (sections + [None, None, None])[2]

            if subsection == 'debug':
                if not self.supervisor.configuration.web.debug:
                    return menu('not enabled')

                if action == 'exec':
                    if '=' in args:
                        try:
                            key, value = args.split('=', 1)
                            self.log.critical('PYTHON CODE RAN : %s' % value)
                            command = unquote(value.replace('+', ' '))
                            code = compile(command, '<string>', 'exec')
                            exec code
                            return 'done !'
                        except Exception, e:
                            return 'failed to run : \n' + command + '\n\nreason : \n' + str(
                                type(e)) + '\n' + str(e)

                if action == 'eval':
                    if '=' in args:
                        try:
                            key, value = args.split('=', 1)
                            self.log.critical('PYTHON CODE RAN : %s' % value)
                            command = unquote(value.replace('+', ' '))
                            return str(eval(command))
                        except Exception, e:
                            return 'failed to run : \n' + command + '\n\nreason : \n' + str(
                                type(e)) + '\n' + str(e)

                return menu(self._run())

            if subsection == 'workers':
                if action == 'commit':
                    if '=' in args:
                        key, value = args.split('=', 1)

                        if key == 'exaproxy.redirector.minimum':
                            if value.isdigit(
                            ):  # this prevents negative values
                                setting = int(value)
                                if setting > self.supervisor.manager.high:
                                    return menu(
                                        self._workers() +
                                        '<div style="color: red; padding-top: 3em;">value is higher than exaproxy.redirector.maximum</div>'
                                    )
                                self.supervisor.manager.low = setting
                                return menu(
                                    self._workers() +
                                    '<div style="color: green; padding-top: 3em;">changed successfully</div>'
                                )

                        if key == 'exaproxy.redirector.maximum':
                            if value.isdigit():
                                setting = int(value)
                                if setting < self.supervisor.manager.low:
                                    return menu(
                                        self._workers() +
                                        '<div style="color: red; padding-top: 3em;">value is lower than exaproxy.redirector.minimum</div>'
                                    )
                                self.supervisor.manager.high = setting
                                return menu(
                                    self._workers() +
                                    '<div style="color: green; padding-top: 3em;">changed successfully</div>'
                                )

                        return menu(
                            self._workers() +
                            '<div style="color: red; padding-top: 3em;">invalid request</div>'
                        )

                return menu(self._workers())

            return menu(index)
Example #23
0
class Supervisor (object):
	alarm_time = 0.1                           # regular backend work
	second_frequency = int(1/alarm_time)       # when we record history
	minute_frequency = int(60/alarm_time)      # when we want to average history
	increase_frequency = int(5/alarm_time)     # when we add workers
	decrease_frequency = int(60/alarm_time)    # when we remove workers
	saturation_frequency = int(20/alarm_time)  # when we report connection saturation
	interface_frequency = int(300/alarm_time)  # when we check for new interfaces

	# import os
	# clear = [hex(ord(c)) for c in os.popen('clear').read()]
	# clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')           # Listening proxy sockets
		self.poller.setupRead('read_web')             # Listening webserver sockets
		self.poller.setupRead('read_icap')             # Listening icap sockets
		self.poller.setupRead('read_workers')         # Pipes carrying responses from the child processes
		self.poller.setupRead('read_resolver')        # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')          # Active clients
		self.poller.setupRead('opening_client')       # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')        # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')      # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')        # Established connections
		self.poller.setupWrite('write_download')      # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')    # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.manager = RedirectorManager(
			self.configuration,
			self.poller,
		)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.manager, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()


	def sigquit (self,signum, frame):
		if self._softstop:
			self.signal_log.critical('multiple SIG INT received, shutdown')
			self._shutdown = True
		else:
			self.signal_log.critical('SIG INT received, soft-stop')
			self._softstop = True
			self._listen = False

	def sigterm (self,signum, frame):
		self.signal_log.critical('SIG TERM received, shutdown request')
		if os.environ.get('PDB',False):
			self._pdb = True
		else:
			self._shutdown = True

	# def sigabrt (self,signum, frame):
	# 	self.signal_log.info('SIG INFO received, refork request')
	# 	self._refork = True

	# def sighup (self,signum, frame):
	# 	self.signal_log.info('SIG HUP received, reload request')
	# 	self._reload = True

	def sigtrap (self,signum, frame):
		self.signal_log.critical('SIG TRAP received, toggle debug')
		self._toggle_debug = True


	def sigusr1 (self,signum, frame):
		self.signal_log.critical('SIG USR1 received, decrease worker number')
		self._decrease_spawn_limit += 1

	def sigusr2 (self,signum, frame):
		self.signal_log.critical('SIG USR2 received, increase worker number')
		self._increase_spawn_limit += 1


	def sigttou (self,signum, frame):
		self.signal_log.critical('SIG TTOU received, stop listening')
		self._listen = False

	def sigttin (self,signum, frame):
		self.signal_log.critical('SIG IN received, star listening')
		self._listen = True


	def sigalrm (self,signum, frame):
		self.signal_log.debug('SIG ALRM received, timed actions')
		self.reactor.running = False
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)


	def interfaces (self):
		local = set(['127.0.0.1','::1'])
		for interface in getifaddrs():
			if interface.family not in (AF_INET,AF_INET6):
				continue
			if interface.address not in self.local:
				self.log.info('found new local ip %s (%s)' % (interface.address,interface.name))
			local.add(interface.address)
		for ip in self.local:
			if ip not in local:
				self.log.info('removed local ip %s' % ip)
		if local == self.local:
			self.log.info('no ip change')
		else:
			self.local = local

	def run (self):
		if self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		elif not self.initialise():
			self._shutdown = True

		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)

		count_second = 0
		count_minute = 0
		count_increase = 0
		count_decrease = 0
		count_saturation = 0
		count_interface = 0

		while True:
			count_second = (count_second + 1) % self.second_frequency
			count_minute = (count_minute + 1) % self.minute_frequency

			count_increase = (count_increase + 1) % self.increase_frequency
			count_decrease = (count_decrease + 1) % self.decrease_frequency
			count_saturation = (count_saturation + 1) % self.saturation_frequency
			count_interface = (count_interface + 1) % self.interface_frequency

			try:
				if self._pdb:
					self._pdb = False
					import pdb
					pdb.set_trace()


				# check for IO change with select
				self.reactor.run()


				# must follow the reactor so we are sure to go through the reactor at least once
				# and flush any logs
				if self._shutdown:
					self._shutdown = False
					self.shutdown()
					break
				elif self._reload:
					self._reload = False
					self.reload()
				elif self._refork:
					self._refork = False
					self.signal_log.warning('refork not implemented')
					# stop listening to new connections
					# refork the program (as we have been updated)
					# just handle current open connection


				if self._softstop:
					if self._listen == False:
						self.proxy.rejecting()
						self._listen = None
					if self.client.softstop():
						self._shutdown = True
				# only change listening if we are not shutting down
				elif self._listen is not None:
					if self._listen:
						self._shutdown = not self.proxy.accepting()
						self._listen = None
					else:
						self.proxy.rejecting()
						self._listen = None


				if self._toggle_debug:
					self._toggle_debug = False
					self.log_writer.toggleDebug()


				if self._increase_spawn_limit:
					number = self._increase_spawn_limit
					self._increase_spawn_limit = 0
					self.manager.low += number
					self.manager.high = max(self.manager.low,self.manager.high)
					for _ in range(number):
						self.manager.increase()

				if self._decrease_spawn_limit:
					number = self._decrease_spawn_limit
					self._decrease_spawn_limit = 0
					self.manager.high = max(1,self.manager.high-number)
					self.manager.low = min(self.manager.high,self.manager.low)
					for _ in range(number):
						self.manager.decrease()


				# save our monitoring stats
				if count_second == 0:
					self.monitor.second()
					expired = self.reactor.client.expire()
					self.reactor.log.debug('events : ' + ', '.join('%s:%d' % (k,len(v)) for (k,v) in self.reactor.events.items()))
				else:
					expired = 0

				if expired:
					self.proxy.notifyClose(None, count=expired)

				if count_minute == 0:
					self.monitor.minute()

				# make sure we have enough workers
				if count_increase == 0:
					self.manager.provision()
				# and every so often remove useless workers
				if count_decrease == 0:
					self.manager.deprovision()

				# report if we saw too many connections
				if count_saturation == 0:
					self.proxy.saturation()
					self.web.saturation()

				if self.configuration.daemon.poll_interfaces and count_interface == 0:
					self.interfaces()

			except KeyboardInterrupt:
				self.log.critical('^C received')
				self._shutdown = True
			except OSError,e:
				# This shoould never happen as we are limiting how many connections we accept
				if e.errno == 24:  # Too many open files
					self.log.critical('Too many opened files, shutting down')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True
				else:
					self.log.critical('unrecoverable io error')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True

			finally:
Example #24
0
class ClientManager (object):
	unproxy = ProxyProtocol().parseRequest

	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.proxied = configuration.http.proxied
		self.max_buffer = configuration.http.header_size

	def __contains__(self, item):
		return item in self.byname

	def getnextid(self):
		self._nextid += 1
		return str(self._nextid)

	def expire (self,number=100):
		count = 0
		for sock in self.norequest.expired(number):
			client = self.norequest.get(sock,[None,])[0]
			if client:
				self.cleanup(sock,client.name)
				count += 1

		return count

	def newConnection(self, sock, peer, source):
		name = self.getnextid()
		client = Client(name, sock, peer, self.log, self.max_buffer)

		self.norequest[sock] = client, source
		self.byname[name] = client, source

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def readRequest(self, sock):
		"""Read only the initial HTTP headers sent by the client"""

		client, source = self.norequest.get(sock, (None, None))
		if client:
			name, peer, request, content = client.readData()
			if request:
				self.total_requested += 1
				# headers can be read only once
				self.norequest.pop(sock, (None, None))

				# we have now read the client's opening request
				self.poller.removeReadSocket('opening_client', client.sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read headers from a client that does not exist %s' % sock)
			name, peer, request, content, source = None, None, None, None, None

		if request and self.proxied is True and source == 'proxy':
			client_ip, client_request = self.unproxy(request)

			if client_ip and client_request:
				peer = client_ip
				request = client_request
				client.setPeer(client_ip)

		return name, peer, request, content, source


	def readDataBySocket(self, sock):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name, peer, request, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % sock)
			name, peer, request, content = None, None, None, None


		return name, peer, request, content, source


	def readDataByName(self, name):
		client, source = self.byname.get(name, (None, None))
		if client:
			name, peer, request, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', client.sock)

			elif request is None:
				self.cleanup(client.sock, name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % name)
			name, peer, request, content = None, None, None, None


		return name, peer, request, content

	def sendDataBySocket(self, sock, data):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name = client.name
			res = client.writeData(data)

			if res is None:
				# close the client connection
				self.cleanup(sock, client.name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered


			if buffered:
				if sock not in self.buffered:
					self.buffered.append(sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and sock in self.buffered:
				self.buffered.remove(sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None
			name = None

		return result, buffer_change, name, source

	def sendDataByName(self, name, data):
		client, source = self.byname.get(name, (None, None))
		if client:
			res = client.writeData(data)

			if res is None:
				# we cannot write to the client so clean it up
				self.cleanup(client.sock, name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered

			if buffered:
				if client.sock not in self.buffered:
					self.buffered.append(client.sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and client.sock in self.buffered:
				self.buffered.remove(client.sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None

		return result, buffer_change, client


	def startData(self, name, data, remaining):
		# NOTE: soo ugly but fast to code
		nb_to_read = 0
		if type(remaining) == type(''):
			if 'chunked' in remaining:
				mode = 'chunked'
			else:
				mode = 'passthrough'
		elif remaining > 0:
			mode = 'transfer'
			nb_to_read = remaining
		elif remaining == 0:
			mode = 'request'
		else:
			mode = 'passthrough'

		client, source = self.byname.get(name, (None, None))
		if client:
			try:
				command, d = data
			except (ValueError, TypeError):
				self.log.error('invalid command sent to client %s' % name)
				self.cleanup(client.sock, name)
				res = None
			else:
				if client.sock not in self.bysock:
					# Start checking for content sent by the client
					self.bysock[client.sock] = client, source

					# watch for the client sending new data
					self.poller.addReadSocket('read_client', client.sock)

					# make sure we don't somehow end up with this still here
					self.norequest.pop(client.sock, (None,None))

					# NOTE: always done already in readRequest
					self.poller.removeReadSocket('opening_client', client.sock)
					res = client.startData(command, d)

				else:
					res = client.restartData(command, d)

					# If we are here then we must have prohibited reading from the client
					# and it must otherwise have been in a readable state
					self.poller.uncorkReadSocket('read_client', client.sock)



			if res is not None:
				buffered, had_buffer, sent4, sent6 = res

				# buffered data we read with the HTTP headers
				name, peer, request, content = client.readRelated(mode,nb_to_read)
				if request:
					self.total_requested += 1
					self.log.info('reading multiple requests')
					self.cleanup(client.sock, name)
					buffered, had_buffer = None, None
					content = None

				elif request is None:
					self.cleanup(client.sock, name)
					buffered, had_buffer = None, None
					content = None

			else:
				# we cannot write to the client so clean it up
				self.cleanup(client.sock, name)

				buffered, had_buffer = None, None
				content = None

			if buffered:
				if client.sock not in self.buffered:
					self.buffered.append(client.sock)

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)

			elif had_buffer and client.sock in self.buffered:
				self.buffered.remove(client.sock)

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)
		else:
			content = None

		return client, content, source


	def corkUploadByName(self, name):
		client, source = self.byname.get(name, (None, None))
		if client:
			self.poller.corkReadSocket('read_client', client.sock)

	def uncorkUploadByName(self, name):
		client, source = self.byname.get(name, (None, None))
		if client:
			if client.sock in self.bysock:
				self.poller.uncorkReadSocket('read_client', client.sock)

	def cleanup(self, sock, name):
		self.log.debug('cleanup for socket %s' % sock)
		client, source = self.bysock.get(sock, (None,None))
		client, source = (client,None) if client else self.norequest.get(sock, (None,None))
		client, source = (client,None) or self.byname.get(name, (None,None))

		self.bysock.pop(sock, None)
		self.norequest.pop(sock, (None,None))
		self.byname.pop(name, None)

		if client:
			self.poller.removeWriteSocket('write_client', client.sock)
			self.poller.removeReadSocket('read_client', client.sock)
			self.poller.removeReadSocket('opening_client', client.sock)

			client.shutdown()
		else:
			self.log.error('COULD NOT CLEAN UP SOCKET %s' % sock)

		if sock in self.buffered:
			self.buffered.remove(sock)

	def softstop (self):
		if len(self.byname) > 0 or len(self.norequest) > 0:
			return False
		self.log.critical('no more client connection, exiting.')
		return True

	def stop(self):
		for client, source in self.bysock.itervalues():
			client.shutdown()

		for client, source in self.norequest.itervalues():
			client.shutdown()

		self.poller.clearRead('read_client')
		self.poller.clearRead('opening_client')
		self.poller.clearWrite('write_client')

		self.bysock = {}
		self.norequest = {}
		self.byname = {}
		self.buffered = []
Example #25
0
Created by Thomas Mangin on 2011-11-30.
Copyright (c) 2011-2013  Exa Networks. All rights reserved.
"""

import socket
import errno

from exaproxy.util.log.logger import Logger
from exaproxy.network.errno_list import errno_block
from exaproxy.configuration import load

IP_TRANSPARENT = 19

configuration = load()
log = Logger('server', configuration.log.server)

def isipv4(address):
	try:
		socket.inet_pton(socket.AF_INET, address)
		return True
	except socket.error:
		return False

def isipv6(address):
	try:
		socket.inet_pton(socket.AF_INET6, address)
		return True
	except socket.error:
		return False
Example #26
0
"""

# http://code.google.com/speed/articles/web-metrics.html

import select
import socket
import errno

from exaproxy.network.errno_list import errno_block, errno_fatal
from interface import IPoller

from exaproxy.util.log.logger import Logger
from exaproxy.configuration import load

configuration = load()
log = Logger('select', configuration.log.server)

def poll_select(read, write, timeout=None):
	try:
		r, w, x = select.select(read, write, read + write, timeout)
	except socket.error, e:
		if e.args[0] in errno_block:
			log.error('select not ready, errno %d: %s' % (e.args[0], errno.errorcode.get(e.args[0], '')))
			return [], [], []

		if e.args[0] in errno_fatal:
			log.error('select problem, errno %d: %s' % (e.args[0], errno.errorcode.get(e.args[0], '')))
			log.error('poller read  : %s' % str(read))
			log.error('poller write : %s' % str(write))
			log.error('read : %s' % str(read))
		else:
Example #27
0
	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		sys.exitfunc = self.log_writer.writeMessages

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')       # Listening proxy sockets
		self.poller.setupRead('read_web')         # Listening webserver sockets
		self.poller.setupRead('read_icap')        # Listening icap sockets
		self.poller.setupRead('read_redirector')  # Pipes carrying responses from the redirector process
		self.poller.setupRead('read_resolver')    # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')      # Active clients
		self.poller.setupRead('opening_client')   # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')    # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')  # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')      # Established connections
		self.poller.setupWrite('write_download')    # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')  # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		if not self.initialise():
			self._shutdown = True

		elif self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		# fork the redirector process before performing any further setup
		redirector = fork_redirector(self.poller, self.configuration)

		# create threads _after_ all forking is done
		self.redirector = redirector_message_thread(redirector)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.redirector, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()
Example #28
0
class RedirectorManager (object):
	def __init__ (self, configuration, poller):
		self.low = configuration.redirector.minimum 		# minimum concurrent redirector workers
		self.high = configuration.redirector.maximum		# maximum concurrent redirector workers

		self.poller = poller
		self.configuration = configuration
		self.queue = Queue()    # store requests we do not immediately have the resources to process

		self.nextid = 1			# unique id to give to the next spawned worker
		self.worker = {}		# worker tasks for each spawned child
		self.processes = {}		# worker tasks indexed by file descriptors we can poll
		self.available = set()	# workers that are currently available to handle new requests
		self.active = {}        # workers that are currently busy waiting for a response from the spawned process
		self.stopping = set()   # workers we want to stop as soon as they stop being active

		program = configuration.redirector.program
		protocol = configuration.redirector.protocol
		self.redirector_factory = RedirectorFactory(configuration, program, protocol)

		self.log = Logger('manager', configuration.log.manager)

	def _getid(self):
		wid = str(self.nextid)
		self.nextid += 1
		return wid

	def _spawn (self):
		"""add one worker to the pool"""
		wid = self._getid()

		worker = self.redirector_factory.create(wid)
		self.worker[wid] = worker
		self.available.add(wid)

		if worker.process is not None:
			identifier = worker.process.stdout
			self.processes[identifier] = worker
			self.poller.addReadSocket('read_workers', identifier)

		self.log.info("added a worker")
		self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker), self.low, self.high))

	def spawn (self, number=1):
		"""create the request number of worker processes"""
		self.log.info("spawning %d more workers" % number)
		for _ in range(number):
			self._spawn()

	def respawn (self):
		"""make sure we reach the minimum number of workers"""
		number = max(min(len(self.worker), self.high), self.low)

		for wid in set(self.worker):
			self.stopWorker(wid)

		self.spawn(number)

	def stopWorker (self, wid):
		self.log.info('want worker %s to go away' % wid)

		if wid not in self.active:
			self.reap(wid)

		else:
			self.stopping.add(wid)

	def reap (self, wid):
		self.log.info('we are killing worker %s' % wid)
		worker = self.worker[wid]

		if wid in self.active:
			self.log.error('reaping worker %s even though it is still active' % wid)
			self.active.pop(wid)

		if wid in self.stopping:
			self.stopping.remove(wid)

		if wid in self.available:
			self.available.remove(wid)

		if worker.process is not None:
			self.poller.removeReadSocket('read_workers', worker.process.stdout)
			self.processes.pop(worker.process.stdout)

		worker.shutdown()
		self.worker.pop(wid)

	def _decrease (self):
		if self.low < len(self.worker):
			wid = self._oldest()
			if wid:
				self.stopWorker(wid)

	def _increase (self):
		if len(self.worker) < self.high:
			self.spawn()

	def decrease (self, count=1):
		for _ in xrange(count):
			self._decrease()

	def increase (self, count=1):
		for _ in xrange(count):
			self._increase()

	def start (self):
		"""spawn our minimum number of workers"""
		self.log.info("starting workers.")
		self.spawn(max(0,self.low-len(self.worker)))

	def stop (self):
		"""tell all our worker to stop reading the queue and stop"""

		for wid in self.worker:
			self.reap(wid)

		self.worker = {}

	def _oldest (self):
		"""find the oldest worker"""
		oldest = None
		past = time.time()
		for wid in set(self.worker):
			creation = self.worker[wid].creation
			if creation < past and wid not in self.stopping:
				past = creation
				oldest = wid

		return oldest

	def provision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		size = self.queue.qsize()
		num_workers = len(self.worker)

		# bad we are bleeding workers !
		if num_workers < self.low:
			self.log.info("we lost some workers, respawing %d new workers" % (self.low - num_workers))
			self.spawn(self.low - num_workers)

		# we need more workers
		if size >= num_workers:
			# nothing we can do we have reach our limit
			if num_workers >= self.high:
				self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers))
				return
			# try to figure a good number to add ..
			# no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth
			nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers))
			self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size))
			self.spawn(nb_to_add)

	def deprovision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		size = self.queue.qsize()
		num_workers = len(self.worker)

		# we are now overprovisioned
		if size < 2 and num_workers > self.low:
			self.log.info("we have too many workers (%d), stopping the oldest" % num_workers)
			# if we have to kill one, at least stop the one who had the most chance to memory leak :)
			wid = self._oldest()
			if wid:
				self.stopWorker(wid)



	def acquire (self):
		if self.available:
			identifier = self.available.pop()
			worker = self.worker[identifier]

		else:
			worker = None

		return worker

	def release (self, wid):
		if wid not in self.stopping:
			self.available.add(wid)

		else:
			self.reap(wid)

	def persist (self, wid, client_id, peer, data, header, subheader, source, tainted):
		self.active[wid] = client_id, peer, data, header, subheader, source, tainted

	def progress (self, wid):
		return self.active.pop(wid)

	def doqueue (self):
		if self.available and not self.queue.isempty():
			client_id, peer, header, subheader, source, tainted = self.queue.get()
			_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=tainted)

		else:
			client_id, command, decision = None, None, None

		return client_id, command, decision


	def request (self, client_id, peer, header, subheader, source, tainted=False):
		worker = self.acquire()

		if worker is not None:
			try:
				_, command, decision = worker.decide(client_id, peer, header, subheader, source)

			except:
				command, decision = None, None

			if command is None:
				self.reap(worker.wid)

				if tainted is False:
					_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True)

				else:
					_, command, decision = Respond.close(client_id)

		else:
			command, decision = None, None
			self.queue.put((client_id, peer, header, subheader, source, tainted))

		if command == 'defer':
			self.persist(worker.wid, client_id, peer, decision, header, subheader, source, tainted)
			command, decision = None, None

		elif worker is not None:
			self.release(worker.wid)

		return client_id, command, decision


	def getDecision (self, pipe_in):
		worker = self.processes.get(pipe_in, None)

		if worker is not None and worker.wid in self.active:
			client_id, peer, request, header, subheader, source, tainted = self.progress(worker.wid)
			try:
				_, command, decision = worker.progress(client_id, peer, request, header, subheader, source)

			except Exception, e:
				command, decision = None, None

			self.release(worker.wid)

			if command is None:
				self.reap(worker.wid)

				if tainted is False:
					_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True)

				else:
					_, command, decision = Respond.close(client_id)

		else:
Example #29
0
	def __init__(self,supervisor):
		self.supervisor = supervisor
		self.monitor = supervisor.monitor
		self.email_sent = False
		self.log = Logger('web', supervisor.configuration.log.web)
Example #30
0
class ClientManager(object):
    unproxy = ProxyProtocol().parseRequest

    def __init__(self, poller, configuration):
        self.total_sent4 = 0L
        self.total_sent6 = 0L
        self.total_requested = 0L
        self.norequest = TimeCache(configuration.http.idle_connect)
        self.bysock = {}
        self.byname = {}
        self.buffered = []
        self._nextid = 0
        self.poller = poller
        self.log = Logger('client', configuration.log.client)
        self.proxied = configuration.http.proxied
        self.max_buffer = configuration.http.header_size

    def __contains__(self, item):
        return item in self.byname

    def getnextid(self):
        self._nextid += 1
        return str(self._nextid)

    def expire(self, number=100):
        count = 0
        for sock in self.norequest.expired(number):
            client = self.norequest.get(sock, [
                None,
            ])[0]
            if client:
                self.cleanup(sock, client.name)
                count += 1

        return count

    def newConnection(self, sock, peer, source):
        name = self.getnextid()
        client = Client(name, sock, peer, self.log, self.max_buffer)

        self.norequest[sock] = client, source
        self.byname[name] = client, source

        # watch for the opening request
        self.poller.addReadSocket('opening_client', client.sock)

        #self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
        return peer

    def readRequest(self, sock):
        """Read only the initial HTTP headers sent by the client"""

        client, source = self.norequest.get(sock, (None, None))
        if client:
            name, peer, request, content = client.readData()
            if request:
                self.total_requested += 1
                # headers can be read only once
                self.norequest.pop(sock, (None, None))

                # we have now read the client's opening request
                self.poller.removeReadSocket('opening_client', client.sock)

            elif request is None:
                self.cleanup(sock, client.name)
        else:
            self.log.error(
                'trying to read headers from a client that does not exist %s' %
                sock)
            name, peer, request, content, source = None, None, None, None, None

        if request and self.proxied is True and source == 'proxy':
            client_ip, client_request = self.unproxy(request)

            if client_ip and client_request:
                peer = client_ip
                request = client_request
                client.setPeer(client_ip)

        return name, peer, request, content, source

    def readDataBySocket(self, sock):
        client, source = self.bysock.get(sock, (None, None))
        if client:
            name, peer, request, content = client.readData()
            if request:
                self.total_requested += 1
                # Parsing of the new request will be handled asynchronously. Ensure that
                # we do not read anything from the client until a request has been sent
                # to the remote webserver.
                # Since we just read a request, we know that the cork is not currently
                # set and so there's no risk of it being erroneously removed.
                self.poller.corkReadSocket('read_client', sock)

            elif request is None:
                self.cleanup(sock, client.name)
        else:
            self.log.error(
                'trying to read from a client that does not exist %s' % sock)
            name, peer, request, content = None, None, None, None

        return name, peer, request, content, source

    def readDataByName(self, name):
        client, source = self.byname.get(name, (None, None))
        if client:
            name, peer, request, content = client.readData()
            if request:
                self.total_requested += 1
                # Parsing of the new request will be handled asynchronously. Ensure that
                # we do not read anything from the client until a request has been sent
                # to the remote webserver.
                # Since we just read a request, we know that the cork is not currently
                # set and so there's no risk of it being erroneously removed.
                self.poller.corkReadSocket('read_client', client.sock)

            elif request is None:
                self.cleanup(client.sock, name)
        else:
            self.log.error(
                'trying to read from a client that does not exist %s' % name)
            name, peer, request, content = None, None, None, None

        return name, peer, request, content

    def sendDataBySocket(self, sock, data):
        client, source = self.bysock.get(sock, (None, None))
        if client:
            name = client.name
            res = client.writeData(data)

            if res is None:
                # close the client connection
                self.cleanup(sock, client.name)

                buffered, had_buffer, sent4, sent6 = None, None, 0, 0
                result = None
                buffer_change = None
            else:
                buffered, had_buffer, sent4, sent6 = res
                self.total_sent4 += sent4
                self.total_sent6 += sent6
                result = buffered

            if buffered:
                if sock not in self.buffered:
                    self.buffered.append(sock)
                    buffer_change = True

                    # watch for the socket's send buffer becoming less than full
                    self.poller.addWriteSocket('write_client', client.sock)
                else:
                    buffer_change = False

            elif had_buffer and sock in self.buffered:
                self.buffered.remove(sock)
                buffer_change = True

                # we no longer care about writing to the client
                self.poller.removeWriteSocket('write_client', client.sock)

            else:
                buffer_change = False
        else:
            result = None
            buffer_change = None
            name = None

        return result, buffer_change, name, source

    def sendDataByName(self, name, data):
        client, source = self.byname.get(name, (None, None))
        if client:
            res = client.writeData(data)

            if res is None:
                # we cannot write to the client so clean it up
                self.cleanup(client.sock, name)

                buffered, had_buffer, sent4, sent6 = None, None, 0, 0
                result = None
                buffer_change = None
            else:
                buffered, had_buffer, sent4, sent6 = res
                self.total_sent4 += sent4
                self.total_sent6 += sent6
                result = buffered

            if buffered:
                if client.sock not in self.buffered:
                    self.buffered.append(client.sock)
                    buffer_change = True

                    # watch for the socket's send buffer becoming less than full
                    self.poller.addWriteSocket('write_client', client.sock)
                else:
                    buffer_change = False

            elif had_buffer and client.sock in self.buffered:
                self.buffered.remove(client.sock)
                buffer_change = True

                # we no longer care about writing to the client
                self.poller.removeWriteSocket('write_client', client.sock)

            else:
                buffer_change = False
        else:
            result = None
            buffer_change = None

        return result, buffer_change, client

    def startData(self, name, data, remaining):
        # NOTE: soo ugly but fast to code
        nb_to_read = 0
        if type(remaining) == type(''):
            if 'chunked' in remaining:
                mode = 'chunked'
            else:
                mode = 'passthrough'
        elif remaining > 0:
            mode = 'transfer'
            nb_to_read = remaining
        elif remaining == 0:
            mode = 'request'
        else:
            mode = 'passthrough'

        client, source = self.byname.get(name, (None, None))
        if client:
            try:
                command, d = data
            except (ValueError, TypeError):
                self.log.error('invalid command sent to client %s' % name)
                self.cleanup(client.sock, name)
                res = None
            else:
                if client.sock not in self.bysock:
                    # Start checking for content sent by the client
                    self.bysock[client.sock] = client, source

                    # watch for the client sending new data
                    self.poller.addReadSocket('read_client', client.sock)

                    # make sure we don't somehow end up with this still here
                    self.norequest.pop(client.sock, (None, None))

                    # NOTE: always done already in readRequest
                    self.poller.removeReadSocket('opening_client', client.sock)
                    res = client.startData(command, d)

                else:
                    res = client.restartData(command, d)

                    # If we are here then we must have prohibited reading from the client
                    # and it must otherwise have been in a readable state
                    self.poller.uncorkReadSocket('read_client', client.sock)

            if res is not None:
                buffered, had_buffer, sent4, sent6 = res

                # buffered data we read with the HTTP headers
                name, peer, request, content = client.readRelated(
                    mode, nb_to_read)
                if request:
                    self.total_requested += 1
                    self.log.info('reading multiple requests')
                    self.cleanup(client.sock, name)
                    buffered, had_buffer = None, None
                    content = None

                elif request is None:
                    self.cleanup(client.sock, name)
                    buffered, had_buffer = None, None
                    content = None

            else:
                # we cannot write to the client so clean it up
                self.cleanup(client.sock, name)

                buffered, had_buffer = None, None
                content = None

            if buffered:
                if client.sock not in self.buffered:
                    self.buffered.append(client.sock)

                    # watch for the socket's send buffer becoming less than full
                    self.poller.addWriteSocket('write_client', client.sock)

            elif had_buffer and client.sock in self.buffered:
                self.buffered.remove(client.sock)

                # we no longer care about writing to the client
                self.poller.removeWriteSocket('write_client', client.sock)
        else:
            content = None

        return client, content, source

    def corkUploadByName(self, name):
        client, source = self.byname.get(name, (None, None))
        if client:
            self.poller.corkReadSocket('read_client', client.sock)

    def uncorkUploadByName(self, name):
        client, source = self.byname.get(name, (None, None))
        if client:
            if client.sock in self.bysock:
                self.poller.uncorkReadSocket('read_client', client.sock)

    def cleanup(self, sock, name):
        self.log.debug('cleanup for socket %s' % sock)
        client, source = self.bysock.get(sock, (None, None))
        client, source = (client, None) if client else self.norequest.get(
            sock, (None, None))
        client, source = (client, None) or self.byname.get(name, (None, None))

        self.bysock.pop(sock, None)
        self.norequest.pop(sock, (None, None))
        self.byname.pop(name, None)

        if client:
            self.poller.removeWriteSocket('write_client', client.sock)
            self.poller.removeReadSocket('read_client', client.sock)
            self.poller.removeReadSocket('opening_client', client.sock)

            client.shutdown()
        else:
            self.log.error('COULD NOT CLEAN UP SOCKET %s' % sock)

        if sock in self.buffered:
            self.buffered.remove(sock)

    def softstop(self):
        if len(self.byname) > 0 or len(self.norequest) > 0:
            return False
        self.log.critical('no more client connection, exiting.')
        return True

    def stop(self):
        for client, source in self.bysock.itervalues():
            client.shutdown()

        for client, source in self.norequest.itervalues():
            client.shutdown()

        self.poller.clearRead('read_client')
        self.poller.clearRead('opening_client')
        self.poller.clearWrite('write_client')

        self.bysock = {}
        self.norequest = {}
        self.byname = {}
        self.buffered = []
Example #31
0
import datetime

from interface import IPoller

from select import KQ_FILTER_READ, KQ_FILTER_WRITE, KQ_EV_ADD, KQ_EV_DELETE, kevent
# KQ_EV_ENABLE, KQ_EV_DISABLE,
# KQ_EV_CLEAR, KQ_EV_ONESHOT,
# KQ_EV_ERROR,
# KQ_EV_EOF,


from exaproxy.util.log.logger import Logger
from exaproxy.configuration import load

configuration = load()
log = Logger('select', configuration.log.server)

class KQueuePoller (IPoller):
	kqueue = staticmethod(select.kqueue)

	def __init__(self, speed):
		self.speed = speed

		self.sockets = {}
		self.pollers = {}
		self.master = self.kqueue()
		self.errors = {}
		self.max_events = 10000

	def addReadSocket(self, name, sock):
		sockets, poller, fdtosock, corked = self.sockets[name]
Example #32
0
class ContentManager(object):
    downloader_factory = Content

    def __init__(self, supervisor, configuration):
        self.total_sent4 = 0L
        self.total_sent6 = 0L
        self.opening = {}
        self.established = {}
        self.byclientid = {}
        self.buffered = []
        self.retry = []
        self.configuration = configuration
        self.supervisor = supervisor

        self.poller = supervisor.poller
        self.log = Logger('download', configuration.log.download)

        self.location = os.path.realpath(
            os.path.normpath(configuration.web.html))
        self.page = supervisor.page
        self._header = {}

    def hasClient(self, client_id):
        return client_id in self.byclientid

    def getLocalContent(self, code, name):
        filename = os.path.normpath(os.path.join(self.location, name))
        if not filename.startswith(self.location + os.path.sep):
            filename = ''

        if os.path.isfile(filename):
            try:
                stat = os.stat(filename)
            except IOError:
                # NOTE: we are always returning an HTTP/1.1 response
                content = 'close', http(
                    501, 'local file is inaccessible %s' % str(filename))
            else:
                if filename in self._header:
                    cache_time, header = self._header[filename]
                else:
                    cache_time, header = None, None

                if cache_time is None or cache_time < stat.st_mtime:
                    header = file_header(code, stat.st_size, filename)
                    self._header[filename] = stat.st_size, header

                content = 'file', (header, filename)
        else:
            self.log.debug('local file is missing for %s: %s' %
                           (str(name), str(filename)))
            # NOTE: we are always returning an HTTP/1.1 response
            content = 'close', http(
                501, 'could not serve missing file %s' % str(filename))

        return content

    def readLocalContent(self, code, reason, data={}):
        filename = os.path.normpath(os.path.join(self.location, reason))
        if not filename.startswith(self.location + os.path.sep):
            filename = ''

        if os.path.isfile(filename):
            try:
                with open(filename) as fd:
                    body = fd.read() % data

                # NOTE: we are always returning an HTTP/1.1 response
                content = 'close', http(code, body)
            except IOError:
                self.log.debug('local file is missing for %s: %s' %
                               (str(reason), str(filename)))
                # NOTE: we are always returning an HTTP/1.1 response
                content = 'close', http(
                    501, 'could not serve missing file  %s' % str(reason))
        else:
            self.log.debug('local file is missing for %s: %s' %
                           (str(reason), str(filename)))
            # NOTE: we are always returning an HTTP/1.1 response
            content = 'close', http(
                501, 'could not serve missing file  %s' % str(reason))

        return content

    def getDownloader(self, client_id, host, port, command, request):
        downloader = self.byclientid.get(client_id, None)
        if downloader:
            # NOTE: with pipeline, consequent request could go to other sites if the browser knows we are a proxy
            # NOTE: therefore the second request could reach the first site
            # NOTE: and we could kill the connection before the data is fully back to the client
            # NOTE: in practice modern browser are too clever and test for it !
            if host != downloader.host or port != downloader.port:
                self.endClientDownload(client_id)
                downloader = None
            else:
                newdownloader = False

        if isipv4(host):
            bind = self.configuration.tcp4.bind
        elif isipv6(host):
            bind = self.configuration.tcp6.bind
        else:
            # should really never happen
            self.log.critical(
                'the host IP address is neither IPv4 or IPv6 .. what year is it ?'
            )
            return None, False

        if downloader is None:
            # supervisor.local is replaced when interface are changed, so do not cache or reference it in this class
            if host in self.supervisor.local:
                for h, p in self.configuration.security.local:
                    if (h == '*' or h == host) and (p == '*' or p == port):
                        break
                else:
                    # we did not break
                    return None, False

            downloader = self.downloader_factory(client_id, host, port, bind,
                                                 command, request, self.log)
            newdownloader = True

        if downloader.sock is None:
            return None, False

        return downloader, newdownloader

    def getContent(self, client_id, command, args):
        try:
            if command == 'download':
                try:
                    host, port, upgrade, length, request = args
                except (ValueError, TypeError), e:
                    raise ParsingError()

                downloader, newdownloader = self.getDownloader(
                    client_id, host, int(port), command, request)

                if downloader is not None:
                    content = ('stream', '')
                    if upgrade in ('', 'http/1.0', 'http/1.1'):
                        length = int(length) if length.isdigit() else length
                    else:
                        length = -1
                else:
                    content = self.getLocalContent('400', 'noconnect.html')
                    length = 0

            elif command == 'connect':
                try:
                    host, port, request = args
                except (ValueError, TypeError), e:
                    raise ParsingError()

                downloader, newdownloader = self.getDownloader(
                    client_id, host, int(port), command, '')

                if downloader is not None:
                    content = ('stream', '')
                    length = -1  # the client can send as much data as it wants
                else:
                    content = self.getLocalContent('400', 'noconnect.html')
                    length = 0
Example #33
0
class ContentManager(object):
	downloader_factory = Content

	def __init__(self, supervisor, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.opening = {}
		self.established = {}
		self.byclientid = {}
		self.buffered = []
		self.retry = []
		self.configuration = configuration
		self.supervisor = supervisor

		self.poller = supervisor.poller
		self.log = Logger('download', configuration.log.download)

		self.location = os.path.realpath(os.path.normpath(configuration.web.html))
		self.page = supervisor.page
		self._header = {}

	def hasClient(self, client_id):
		return client_id in self.byclientid

	def getLocalContent(self, code, name):
		filename = os.path.normpath(os.path.join(self.location, name))
		if not filename.startswith(self.location + os.path.sep):
			filename = ''

		if os.path.isfile(filename):
			try:
				stat = os.stat(filename)
			except IOError:
				# NOTE: we are always returning an HTTP/1.1 response
				content = 'close', http(501, 'local file is inaccessible %s' % str(filename))
			else:
				if filename in self._header :
					cache_time, header = self._header[filename]
				else:
					cache_time, header = None, None

				if cache_time is None or cache_time < stat.st_mtime:
					header = file_header(code, stat.st_size, filename)
					self._header[filename] = stat.st_size, header

				content = 'file', (header, filename)
		else:
			self.log.debug('local file is missing for %s: %s' % (str(name), str(filename)))
			# NOTE: we are always returning an HTTP/1.1 response
			content = 'close', http(501, 'could not serve missing file %s' % str(filename))

		return content

	def readLocalContent(self, code, reason, data={}):
		filename = os.path.normpath(os.path.join(self.location, reason))
		if not filename.startswith(self.location + os.path.sep):
			filename = ''

		if os.path.isfile(filename):
			try:
				with open(filename) as fd:
					body = fd.read() % data

				# NOTE: we are always returning an HTTP/1.1 response
				content = 'close', http(code, body)
			except IOError:
				self.log.debug('local file is missing for %s: %s' % (str(reason), str(filename)))
				# NOTE: we are always returning an HTTP/1.1 response
				content = 'close', http(501, 'could not serve missing file  %s' % str(reason))
		else:
			self.log.debug('local file is missing for %s: %s' % (str(reason), str(filename)))
				# NOTE: we are always returning an HTTP/1.1 response
			content = 'close', http(501, 'could not serve missing file  %s' % str(reason))

		return content


	def getDownloader(self, client_id, host, port, command, request):
		downloader = self.byclientid.get(client_id, None)
		if downloader:
			# NOTE: with pipeline, consequent request could go to other sites if the browser knows we are a proxy
			# NOTE: therefore the second request could reach the first site
			# NOTE: and we could kill the connection before the data is fully back to the client
			# NOTE: in practice modern browser are too clever and test for it !
			if host != downloader.host or port != downloader.port:
				self.endClientDownload(client_id)
				downloader = None
			else:
				newdownloader = False

		if isipv4(host):
			bind = self.configuration.tcp4.bind
		elif isipv6(host):
			bind = self.configuration.tcp6.bind
		else:
			# should really never happen
			self.log.critical('the host IP address is neither IPv4 or IPv6 .. what year is it ?')
			return None, False

		if downloader is None:
			# supervisor.local is replaced when interface are changed, so do not cache or reference it in this class
			if host in self.supervisor.local:
				for h,p in self.configuration.security.local:
					if (h == '*' or h == host) and (p == '*' or p == port):
						break
				else:
					# we did not break
					return None, False

			downloader = self.downloader_factory(client_id, host, port, bind, command, request, self.log)
			newdownloader = True

		if downloader.sock is None:
			return None, False

		return downloader, newdownloader

	def getContent(self, client_id, command, args):
		try:
			if command == 'download':
				try:
					host, port, upgrade, length, request = args.split('\0', 4)
				except (ValueError, TypeError), e:
					raise ParsingError()

				downloader, newdownloader = self.getDownloader(client_id, host, int(port), command, request)

				if downloader is not None:
					content = ('stream', '')
					if upgrade in ('', 'http/1.0', 'http/1.1'):
						length = int(length) if length.isdigit() else length
					else:
						length = -1
				else:
					content = self.getLocalContent('400', 'noconnect.html')
					length = 0

			elif command == 'connect':
				try:
					host, port, request = args.split('\0', 2)
				except (ValueError, TypeError), e:
					raise ParsingError()

				downloader, newdownloader = self.getDownloader(client_id, host, int(port), command, '')

				if downloader is not None:
					content = ('stream', '')
					length = -1  # the client can send as much data as it wants
				else:
					content = self.getLocalContent('400', 'noconnect.html')
					length = 0
Example #34
0
			'memory' : (value.boolean,string.lower,'false','command line option --memory'),
			'pdb'    : (value.boolean,string.lower,'false','command line option --pdb'),
			'log'    : (value.boolean,string.lower,'false','command line option --debug'),
		},
	}

	try:
		configuration = load('exaproxy',defaults,arguments['configuration'])
	except ConfigurationError,e:
		print >> sys.stderr, 'configuration issue,', str(e)
		sys.exit(1)

	configuration.proxy.version = version

	from exaproxy.util.log.logger import Logger
	log = Logger('supervisor', configuration.log.supervisor)

	for arg in sys.argv[1:]:
		if arg in ['--',]:
			break
		if arg in ['-h','--help']:
			usage()
			sys.exit(0)
		if arg in ['-i','-fi','--ini']:
			ini()
			sys.exit(0)
		if arg in ['-e','-fe','--env']:
			env()
			sys.exit(0)
		if arg in ['-di','--diff-ini']:
			ini(True)
Example #35
0
class ClientManager (object):
	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.http_max_buffer = configuration.http.header_size
		self.icap_max_buffer = configuration.icap.header_size
		self.proxied = {
			'proxy' : configuration.http.proxied,
			'icap'  : configuration.icap.proxied,
		}

	def __contains__(self, item):
		return item in self.bysock

	def lookupSocket (self, item):
		return self.byname.get(item, None)

	def getnextid(self):
		self._nextid += 1
		return str(self._nextid)

	def expire (self,number=100):
		count = 0
		for sock in self.norequest.expired(number):
			client = self.norequest.get(sock,[None,])[0]
			if client:
				self.cleanup(sock,client.name)
				count += 1

		return count

	def httpConnection (self, sock, peer, source):
		name = self.getnextid()
		client = HTTPClient(name, sock, peer, self.log, self.http_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def icapConnection (self, sock, peer, source):
		name = self.getnextid()
		client = ICAPClient(name, sock, peer, self.log, self.icap_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def readRequest (self, sock):
		"""Read only the initial HTTP headers sent by the client"""

		client, source = self.norequest.get(sock, (None, None))

		if client:
			name, peer, request, subrequest, content = client.readData()
			if request:
				self.total_requested += 1

				# headers can be read only once
				self.norequest.pop(sock, (None, None))
				self.bysock[sock] = client, source

				# watch for the client sending new data
				self.poller.addReadSocket('read_client', client.sock)

				# we have now read the client's opening request
				self.poller.removeReadSocket('opening_client', client.sock)

				# do not read more data until we have properly handled the request
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read headers from a client that does not exist %s' % sock)
			name, peer, request, subrequest, content, source = None, None, None, None, None, None

		return name, peer, request, subrequest, content, source


	def readData (self, sock):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name, peer, request, subrequest, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % sock)
			name, peer, request, subrequest, content = None, None, None, None, None


		return name, peer, request, subrequest, content, source

	def sendData (self, sock, data):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name = client.name
			res = client.writeData(data)

			if res is None:
				# close the client connection
				self.cleanup(sock, client.name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered


			if buffered:
				if sock not in self.buffered:
					self.buffered.append(sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and sock in self.buffered:
				self.buffered.remove(sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None
			name = None

		return result, buffer_change, name, source


	def parseRemaining (self, remaining):
		nb_to_read = 0

		if isinstance(remaining, basestring):
			mode = 'chunked' if remaining == 'chunked' else 'passthrough'

		elif remaining > 0:
			mode = 'transfer'
			nb_to_read = remaining

		elif remaining == 0:
			mode = ''

		else:
			mode = 'passthrough'

		return mode, nb_to_read

	def startData(self, sock, data, remaining):
		client, source = self.bysock.get(sock, (None, None))

		try:
			mode, nb_to_read = self.parseRemaining(remaining)
			command, d = data if client is not None else (None, None)

		except (ValueError, TypeError), e:
			self.log.error('invalid command sent to client %s' % client.name)
			command, d = None, None

		if not client or command is None:
			return None, source

		name, peer, res = client.startData(command, d)

		if res is not None:
			name, peer, request, subrequest, content = client.readRelated(mode, nb_to_read)

			buffered, had_buffer, sent4, sent6 = res

			self.poller.uncorkReadSocket('read_client', client.sock)

			self.total_sent4 += sent4
			self.total_sent6 += sent6

		else:
			self.cleanup(client.sock, name)
			return None, source


		if request:
			self.total_requested += 1
			self.log.info('reading multiple requests')
			self.cleanup(client.sock, name)
			buffered, had_buffer = None, None
			content = None

		elif request is None:
			self.cleanup(client.sock, name)
			buffered, had_buffer = None, None
			content = None

		if buffered is True and had_buffer is False:
			self.buffered.append(client.sock)

			self.poller.addWriteSocket('write_client', client.sock)

		elif buffered is False and had_buffer is True:
			self.buffered.remove(client.sock)

			self.poller.removeWriteSocket('write_client', client.sock)

		return content, source
Example #36
0
class Page (object):

	def __init__(self,supervisor):
		self.supervisor = supervisor
		self.monitor = supervisor.monitor
		self.email_sent = False
		self.log = Logger('web', supervisor.configuration.log.web)

	def _introspection (self,objects):
		introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">Looking at the internal of ExaProxy for %s </div><br/>\n' % cgi.escape('.'.join(objects))
		link = cgi.escape('/'.join(objects[:-1])) if objects[:-1] else 'supervisor'
		line = ['<a href="/information/introspection/%s.html">Back to parent object</a><br/>' % link]
		for k,content in self.monitor.introspection(objects):
			link = '/information/introspection/%s.html' % cgi.escape('%s/%s' % ('/'.join(objects),k))
			line.append('<a href="%s">%s</a><span class="value">%s</span><br/>' % (link,k,cgi.escape(content)))
		return introduction + _listing % ('\n'.join(line))

	def _configuration (self):
		introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Configuration</div><br/>\n'
		line = []
		for k,v in sorted(self.monitor.configuration().items()):
			line.append('<span class="key">%s</span><span class="value">&nbsp; %s</span><br/>' % (k,cgi.escape(str(v))))
		return introduction + _listing % ('\n'.join(line))

	def _statistics (self):
		introduction = '<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>\n'
		line = []
		for k,v in sorted(self.monitor.statistics().items()):
			line.append('<span class="key">%s</span><span class="value">&nbsp; %s</span><br/>' % (k,cgi.escape(str(str(v)))))
		return introduction + _listing % ('\n'.join(line))

	def _connections (self):
		return graph(
			self.monitor,
			'Connections',
			20000,
			[
				'clients.silent',
				'clients.speaking',
				'servers.opening',
				'servers.established',
				]
		)

	def _processes (self):
		return graph(
			self.monitor,
			'Forked processes',
			20000,
			[
				'processes.forked',
				'processes.min',
				'processes.max',
			]
		)

	def _requests (self):
		return graph(
			self.monitor,
			'Requests/seconds received from clients',
			20000,
			[
				'clients.requests',
			],
			True,
		)

	def _clients (self):
		return graph(
			self.monitor,
			'Bits/seconds received from clients',
			20000,
			[
				'transfer.client4',
				'transfer.client6',
			],
			True,
			adaptor=Bpstobps,
		)

	def _servers (self):
		return graph(
			self.monitor,
			'Bits/seconds received from servers',
			20000,
			[
				'transfer.content4',
				'transfer.content6',
			],
			True,
			adaptor=Bpstobps,
		)

	def _transfer (self):
		return graph(
			self.monitor,
			'Bits/seconds received',
			20000,
			[
				'transfer.client',
				'transfer.content',
			],
			True,
			adaptor=Bpstobps,
		)

	def _loops (self):
		return graph(
			self.monitor,
			'Reactor loops',
			20000,
			[
				'load.loops',
			],
			True,
		)

	def _events (self):
		return graph(
			self.monitor,
			'Sockets which became readeable',
			20000,
			[
				'load.events',
			],
			True,
		)

	def _queue (self):
		return graph(
			self.monitor,
			'Queued URL for classification',
			20000,
			[
				'queue.size',
			],
			True,
		)


	def _source (self,bysock):
		conns = 0

		clients = defaultdict(lambda:0)
		for sock in bysock:
			try:
				host,port = sock.getpeername()
			except socket.error:
				host,port = None,None

			clients[host] += 1
			conns += 1

		ordered = defaultdict(list)
		for host,number in clients.items():
			ordered[number].append(host)

		result = []
		result.append('<div style="padding: 10px 10px 10px 10px; font-weight:bold;">ExaProxy Statistics</div><br/>')
		result.append('<center>we have %d connection(s) from %d source(s)</center><br/>' % (conns, len(clients)))
		for number in reversed(sorted(ordered)):
			for host in ordered[number]:
				result.append('<span class="key">%s</span><span class="value">&nbsp; %s</span><br/>' % (host,number))

		return _listing % '\n'.join(result)

	def _servers_source (self):
		return self._source(self.supervisor.content.established)

	def _clients_source (self):
		return self._source(self.supervisor.client.bysock)


	def _workers (self):
		form = '<form action="/control/workers/commit" method="get">%s: <input type="text" name="%s" value="%s"><input type="submit" value="Submit"></form>'

		change = {
			'exaproxy.redirector.minimum' : self.supervisor.manager.low,
			'exaproxy.redirector.maximum' : self.supervisor.manager.high,
		}

		forms = []
		for name in ('exaproxy.redirector.minimum', 'exaproxy.redirector.maximum'):
			value = change[name]
			forms.append(form % (name,name,value))
		return '<pre style="margin-left:40px;">\n' + '\n'.join(forms)

	def _run (self):
		s  = '<pre style="margin-left:40px;">'
		s += '<form action="/control/debug/eval" method="get">eval <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>'
		s += '<form action="/control/debug/exec" method="get">exec <textarea type="text" name="python" cols="100" rows="10"></textarea><input type="submit" value="Submit"></form>'
		return s

	def _logs (self):
		return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join(History().formated())

	def _errs (self):
		return 'do not view this in a web browser - the input is not sanitised, you have been warned !\n\n' + '\n'.join(Errors().formated())

	def _email (self,args):
		if self.email_sent:
			return '<center><b>You can only send one email per time ExaProxy is started</b></center>'
		self.email_sent, message = mail.send(args)
		return message

	def _json_running (self):
		return json.dumps(self.monitor.seconds[-1],sort_keys=True,indent=2,separators=(',', ': '))

	def _json_configuration (self):
		return json.dumps(self.monitor.configuration(),sort_keys=True,indent=2,separators=(',', ': '))

	def html (self,path):
		if len(path) > 5000:
			return menu('<center><b>path is too long</b></center>')

		if path == '/':
			path = '/index.html'
			args = ''
		elif '?' in path:
			path,args = path.split('?',1)
		else:
			args = ''

		if not path.startswith('/'):
			return menu('<center><b>invalid url</b></center>')
		elif not path.endswith('.html'):
			if path == '/humans.txt':
				return humans.txt
			if path not in ('/json','/json/running','/json/configuration','/control/workers/commit','/control/debug/eval','/control/debug/exec'):
				return menu('<center><b>invalid url</b></center>')
			sections = path[1:].split('/') + ['']
		else:
			sections = path[1:-5].split('/') + ['']

		if not sections[0]:
			return menu(index)
		section = sections[0]
		subsection = sections[1]

		if section == 'json':
			if subsection == 'running':
				return self._json_running()
			if subsection == 'configuration':
				return self._json_configuration()
			return '{ "errror" : "invalid url", "valid-paths": [ "/json/running", "/json/configuration" ] }'

		if section == 'index':
			return menu(index)

		if section == 'information':
			if subsection == 'introspection':
				return menu(self._introspection(sections[2:-1]))
			if subsection == 'configuration':
				return menu(self._configuration())
			if subsection == 'statistics':
				return menu(self._statistics())
			if subsection == 'logs':
				return self._logs()
			if subsection == 'errs':
				return self._errs()
			return menu(index)

		if section == 'graph':
			if subsection == 'processes':
				return menu(self._processes())
			if subsection == 'connections':
				return menu(self._connections())
			if subsection == 'servers':
				return menu(self._servers())
			if subsection == 'clients':
				return menu(self._clients())
			if subsection == 'transfered':
				return menu(self._transfer())
			if subsection == 'requests':
				return menu(self._requests())
			if subsection == 'loops':
				return menu(self._loops())
			if subsection == 'events':
				return menu(self._events())
			if subsection == 'queue':
				return menu(self._queue())
			return menu(index)

		if section == 'end-point':
			if subsection == 'servers':
				return menu(self._servers_source())
			if subsection == 'clients':
				return menu(self._clients_source())
			return menu(index)

		if section == 'control':
			action = (sections + [None,]) [2]

			if subsection == 'debug':
				if not self.supervisor.configuration.web.debug:
					return menu('not enabled')

				if action == 'exec':
					if '=' in args:
						try:
							key,value = args.split('=',1)
							self.log.critical('PYTHON CODE RAN : %s' % value)
							command = unquote(value.replace('+',' '))
							code = compile(command,'<string>', 'exec')
							exec code
							return 'done !'
						except Exception,e:
							return 'failed to run : \n' + command + '\n\nreason : \n' + str(type(e)) + '\n' + str(e)

				if action == 'eval':
					if '=' in args:
						try:
							key,value = args.split('=',1)
							self.log.critical('PYTHON CODE RAN : %s' % value)
							command = unquote(value.replace('+',' '))
							return str(eval(command))
						except Exception,e:
							return 'failed to run : \n' + command + '\n\nreason : \n' + str(type(e)) + '\n' + str(e)

				return menu(self._run())

			if subsection == 'workers':
				if action == 'commit':
					if '=' in args:
						key,value = args.split('=',1)

						if key == 'exaproxy.redirector.minimum':
							if value.isdigit():  # this prevents negative values
								setting = int(value)
								if setting > self.supervisor.manager.high:
									return menu(self._workers() + '<div style="color: red; padding-top: 3em;">value is higher than exaproxy.redirector.maximum</div>')
								self.supervisor.manager.low = setting
								return menu(self._workers() + '<div style="color: green; padding-top: 3em;">changed successfully</div>')

						if key == 'exaproxy.redirector.maximum':
							if value.isdigit():
								setting = int(value)
								if setting < self.supervisor.manager.low:
									return menu(self._workers() + '<div style="color: red; padding-top: 3em;">value is lower than exaproxy.redirector.minimum</div>')
								self.supervisor.manager.high = setting
								return menu(self._workers() + '<div style="color: green; padding-top: 3em;">changed successfully</div>')

						return menu(self._workers() + '<div style="color: red; padding-top: 3em;">invalid request</div>')

				return menu(self._workers())

			return menu(index)
Example #37
0
	def __init__ (self, configuration, name):
		self.log = Logger('worker ' + str(name), configuration.log.worker)
Example #38
0
import datetime

from interface import IPoller

from select import KQ_FILTER_READ, KQ_FILTER_WRITE, KQ_EV_ADD, KQ_EV_DELETE, kevent
# KQ_EV_ENABLE, KQ_EV_DISABLE,
# KQ_EV_CLEAR, KQ_EV_ONESHOT,
# KQ_EV_ERROR,
# KQ_EV_EOF,


from exaproxy.util.log.logger import Logger
from exaproxy.configuration import load

configuration = load()
log = Logger('select', configuration.log.server)

class KQueuePoller (IPoller):
	kqueue = staticmethod(select.kqueue)

	def __init__(self, speed):
		self.speed = speed

		self.sockets = {}
		self.pollers = {}
		self.master = self.kqueue()
		self.errors = {}
		self.max_events = 10000

	def addReadSocket(self, name, sock):
		sockets, poller, fdtosock, corked = self.sockets[name]
Example #39
0
class Supervisor (object):
	alarm_time = 0.1                           # regular backend work
	second_frequency = int(1/alarm_time)       # when we record history
	minute_frequency = int(60/alarm_time)      # when we want to average history
	increase_frequency = int(5/alarm_time)     # when we add workers
	decrease_frequency = int(60/alarm_time)    # when we remove workers
	saturation_frequency = int(20/alarm_time)  # when we report connection saturation
	interface_frequency = int(300/alarm_time)  # when we check for new interfaces

	# import os
	# clear = [hex(ord(c)) for c in os.popen('clear').read()]
	# clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		sys.exitfunc = self.log_writer.writeMessages

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')       # Listening proxy sockets
		self.poller.setupRead('read_web')         # Listening webserver sockets
		self.poller.setupRead('read_icap')        # Listening icap sockets
		self.poller.setupRead('read_redirector')  # Pipes carrying responses from the redirector process
		self.poller.setupRead('read_resolver')    # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')      # Active clients
		self.poller.setupRead('opening_client')   # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')    # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')  # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')      # Established connections
		self.poller.setupWrite('write_download')    # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')  # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		if not self.initialise():
			self._shutdown = True

		elif self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		# fork the redirector process before performing any further setup
		redirector = fork_redirector(self.poller, self.configuration)

		# create threads _after_ all forking is done
		self.redirector = redirector_message_thread(redirector)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.redirector, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()

	def exit (self):
		sys.exit()

	def sigquit (self,signum, frame):
		if self._softstop:
			self.signal_log.critical('multiple SIG INT received, shutdown')
			self._shutdown = True
		else:
			self.signal_log.critical('SIG INT received, soft-stop')
			self._softstop = True
			self._listen = False

	def sigterm (self,signum, frame):
		self.signal_log.critical('SIG TERM received, shutdown request')
		if os.environ.get('PDB',False):
			self._pdb = True
		else:
			self._shutdown = True

	# def sigabrt (self,signum, frame):
	# 	self.signal_log.info('SIG INFO received, refork request')
	# 	self._refork = True

	# def sighup (self,signum, frame):
	# 	self.signal_log.info('SIG HUP received, reload request')
	# 	self._reload = True

	def sigtrap (self,signum, frame):
		self.signal_log.critical('SIG TRAP received, toggle debug')
		self._toggle_debug = True


	def sigusr1 (self,signum, frame):
		self.signal_log.critical('SIG USR1 received, decrease worker number')
		self._decrease_spawn_limit += 1

	def sigusr2 (self,signum, frame):
		self.signal_log.critical('SIG USR2 received, increase worker number')
		self._increase_spawn_limit += 1


	def sigttou (self,signum, frame):
		self.signal_log.critical('SIG TTOU received, stop listening')
		self._listen = False

	def sigttin (self,signum, frame):
		self.signal_log.critical('SIG IN received, star listening')
		self._listen = True


	def sigalrm (self,signum, frame):
		self.reactor.running = False
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)


	def interfaces (self):
		local = set(['127.0.0.1','::1'])
		for interface in getifaddrs():
			if interface.family not in (AF_INET,AF_INET6):
				continue
			if interface.address not in self.local:
				self.log.info('found new local ip %s (%s)' % (interface.address,interface.name))
			local.add(interface.address)
		for ip in self.local:
			if ip not in local:
				self.log.info('removed local ip %s' % ip)
		if local == self.local:
			self.log.info('no ip change')
		else:
			self.local = local

	def run (self):
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)

		count_second = 0
		count_minute = 0
		count_saturation = 0
		count_interface = 0

		while True:
			count_second = (count_second + 1) % self.second_frequency
			count_minute = (count_minute + 1) % self.minute_frequency

			count_saturation = (count_saturation + 1) % self.saturation_frequency
			count_interface = (count_interface + 1) % self.interface_frequency

			try:
				if self._pdb:
					self._pdb = False
					import pdb
					pdb.set_trace()


				# check for IO change with select
				status = self.reactor.run()
				if status is False:
					self._shutdown = True

				# must follow the reactor so we are sure to go through the reactor at least once
				# and flush any logs
				if self._shutdown:
					self._shutdown = False
					self.shutdown()
					break
				elif self._reload:
					self._reload = False
					self.reload()
				elif self._refork:
					self._refork = False
					self.signal_log.warning('refork not implemented')
					# stop listening to new connections
					# refork the program (as we have been updated)
					# just handle current open connection


				if self._softstop:
					if self._listen == False:
						self.proxy.rejecting()
						self._listen = None
					if self.client.softstop():
						self._shutdown = True
				# only change listening if we are not shutting down
				elif self._listen is not None:
					if self._listen:
						self._shutdown = not self.proxy.accepting()
						self._listen = None
					else:
						self.proxy.rejecting()
						self._listen = None


				if self._toggle_debug:
					self._toggle_debug = False
					self.log_writer.toggleDebug()


				if self._decrease_spawn_limit:
					count = self._decrease_spawn_limit
					self.redirector.decreaseSpawnLimit(count)
					self._decrease_spawn_limit = 0

				if self._increase_spawn_limit:
					count = self._increase_spawn_limit
					self.redirector.increaseSpawnLimit(count)
					self._increase_spawn_limit = 0

				# save our monitoring stats
				if count_second == 0:
					self.monitor.second()
					expired = self.reactor.client.expire()
				else:
					expired = 0

				if expired:
					self.proxy.notifyClose(None, count=expired)

				if count_minute == 0:
					self.monitor.minute()

				# report if we saw too many connections
				if count_saturation == 0:
					self.proxy.saturation()
					self.web.saturation()

				if self.configuration.daemon.poll_interfaces and count_interface == 0:
					self.interfaces()

			except KeyboardInterrupt:
				self.log.critical('^C received')
				self._shutdown = True
			except OSError,e:
				# This shoould never happen as we are limiting how many connections we accept
				if e.errno == 24:  # Too many open files
					self.log.critical('Too many opened files, shutting down')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True
				else:
					self.log.critical('unrecoverable io error')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True

			finally:
Example #40
0
 def __init__(self, configuration, name):
     self.log = Logger('worker ' + str(name), configuration.log.worker)
Example #41
0
class RedirectorManager (object):
	def __init__ (self,configuration,poller):
		self.configuration = configuration

		self.low = configuration.redirector.minimum       # minimum number of workers at all time
		self.high = configuration.redirector.maximum      # maximum numbe of workers at all time
		self.program = configuration.redirector.program   # what program speaks the squid redirector API

		self.nextid = 1                   # incremental number to make the name of the next worker
		self.queue = Queue()              # queue with HTTP headers to process
		self.poller = poller              # poller interface that checks for events on sockets
		self.worker = {}                  # our workers threads
		self.closing = set()              # workers that are currently closing
		self.running = True               # we are running

		self.log = Logger('manager', configuration.log.manager)

	def _getid(self):
		id = str(self.nextid)
		self.nextid +=1
		return id

	def _spawn (self):
		"""add one worker to the pool"""
		wid = self._getid()

		worker = Redirector(self.configuration,wid,self.queue,self.program)
		self.poller.addReadSocket('read_workers', worker.response_box_read)
		self.worker[wid] = worker
		self.log.info("added a worker")
		self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker),self.low,self.high))
		self.worker[wid].start()

	def spawn (self,number=1):
		"""create the set number of worker"""
		self.log.info("spawning %d more worker" % number)
		for _ in range(number):
			self._spawn()

	def respawn (self):
		"""make sure we reach the minimum number of workers"""
		number = max(min(len(self.worker),self.high),self.low)
		for wid in set(self.worker):
			self.reap(wid)
		self.spawn(number)

	def reap (self,wid):
		self.log.info('we are killing worker %s' % wid)
		worker = self.worker[wid]
		self.closing.add(wid)
		worker.stop()  # will cause the worker to stop when it can

	def decrease (self):
		if self.low < len(self.worker):
			worker = self._oldest()
			if worker:
				self.reap(worker.wid)

	def increase (self):
		if len(self.worker) < self.high:
			self.spawn()

	def start (self):
		"""spawn our minimum number of workers"""
		self.log.info("starting workers.")
		self.spawn(max(0,self.low-len(self.worker)))

	def stop (self):
		"""tell all our worker to stop reading the queue and stop"""
		self.running = False
		threads = self.worker.values()
		if len(self.worker):
			self.log.info("stopping %d workers." % len(self.worker))
			for wid in set(self.worker):
				self.reap(wid)
			for thread in threads:
				self.request(None, None, None, 'nop')
			for thread in threads:
				thread.destroyProcess()
				thread.join()

		self.worker = {}

	def _oldest (self):
		"""find the oldest worker"""
		oldest = None
		past = time.time()
		for wid in set(self.worker):
			creation = self.worker[wid].creation
			if creation < past and wid not in self.closing:
				past = creation
				oldest = self.worker[wid]
		return oldest

	def provision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		if not self.running:
			return

		num_workers = len(self.worker)

		# bad we are bleeding workers !
		if num_workers < self.low:
			self.log.info("we lost some workers, respawing %d new workers" % (self.low-num_workers))
			self.spawn(self.low-num_workers)

		size = self.queue.qsize()

		# we need more workers
		if size >= num_workers:
			# nothing we can do we have reach our limit
			if num_workers >= self.high:
				self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers))
				return
			# try to figure a good number to add ..
			# no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth
			nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers))
			self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size))
			self.spawn(nb_to_add)

	def deprovision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		if not self.running:
			return

		size = self.queue.qsize()
		num_workers = len(self.worker)

		# we are now overprovisioned
		if size < 2 and num_workers > self.low:
			self.log.info("we have too many workers (%d), stopping the oldest" % num_workers)
			# if we have to kill one, at least stop the one who had the most chance to memory leak :)
			worker = self._oldest()
			if worker:
				self.reap(worker.wid)

	def request(self, client_id, peer, request, source):
		return self.queue.put((client_id,peer,request,source,False))

	def getDecision(self, box):
		# NOTE: reads may block if we send badly formatted data
		try:
			r_buffer = box.read(3)
			while r_buffer.isdigit():
				r_buffer += box.read(1)

			if ':' in r_buffer:
				size, response = r_buffer.split(':', 1)
				if size.isdigit():
					size = int(size)
				else:
					size, response = None, None
			else:   # not a netstring
				size, response = None, None

			if size is not None:
				required = size + 1 - len(response)
				response += box.read(required)

			if response is not None:
				if response.endswith(','):
					response = response[:-1]
				else:
					response = None

		except ValueError:  # I/O operation on closed file
			worker = self.worker.get(box, None)
			if worker is not None:
				worker.destroyProcess()

			response = None
		except TypeError:
			response = None

		try:
			if response:
				client_id, command, decision = response.split('\0', 2)
			else:
				client_id = None
				command = None
				decision = None

		except (ValueError, TypeError):
			client_id = None
			command = None
			decision = None

		if command == 'requeue':
			_client_id, _peer, _source, _header = response.split('\0', 3)
			self.queue.put((_client_id,_peer,_header,_source,True))

			client_id = None
			command = None
			decision = None

		elif command == 'hangup':
			wid = decision
			client_id = None
			command = None
			decision = None

			worker = self.worker.pop(wid, None)

			if worker:
				self.poller.removeReadSocket('read_workers', worker.response_box_read)
				if wid in self.closing:
					self.closing.remove(wid)
				worker.shutdown()
				worker.join()

		elif command == 'stats':
			wid, timestamp, stats = decision
			self.storeStats(timestamp, wid, stats)

			client_id = None
			command = None
			decision = None

		return client_id, command, decision

	def showInternalError(self):
		return 'file', '\0'.join(('200', 'internal_error.html'))

	def requestStats(self):
		for wid, worker in self.worker.iteritems():
			worker.requestStats()

	def storeStats(self, timestamp, wid, stats):
		pairs = (d.split('=',1) for d in stats.split('?', 1).split('&'))
		d = self.cache.setdefault(timestamp, {})

		for k, v in pairs:
			d.setdefault(k, []).append(v)
Example #42
0
"""

# http://code.google.com/speed/articles/web-metrics.html

import select
import socket
import errno

from exaproxy.network.errno_list import errno_block, errno_fatal
from interface import IPoller

from exaproxy.util.log.logger import Logger
from exaproxy.configuration import load

configuration = load()
log = Logger('select', configuration.log.server)


def poll_select(read, write, timeout=None):
    try:
        r, w, x = select.select(read, write, read + write, timeout)
    except socket.error, e:
        if e.args[0] in errno_block:
            log.error('select not ready, errno %d: %s' %
                      (e.args[0], errno.errorcode.get(e.args[0], '')))
            return [], [], []

        if e.args[0] in errno_fatal:
            log.error('select problem, errno %d: %s' %
                      (e.args[0], errno.errorcode.get(e.args[0], '')))
            log.error('poller read  : %s' % str(read))
Example #43
0
"""
nettools.py

Created by Thomas Mangin on 2011-11-30.
Copyright (c) 2011-2013  Exa Networks. All rights reserved.
"""

import socket
import errno

from exaproxy.util.log.logger import Logger
from exaproxy.network.errno_list import errno_block
from exaproxy.configuration import load

configuration = load()
log = Logger('server', configuration.log.server)

def isipv4(address):
	try:
		socket.inet_pton(socket.AF_INET, address)
		return True
	except socket.error:
		return False

def isipv6(address):
	try:
		socket.inet_pton(socket.AF_INET6, address)
		return True
	except socket.error:
		return False
Example #44
0
                    'command line option --pdb'),
            'log': (value.boolean, string.lower, 'false',
                    'command line option --debug'),
        },
    }

    try:
        configuration = load('exaproxy', defaults, arguments['configuration'])
    except ConfigurationError, e:
        print >> sys.stderr, 'configuration issue,', str(e)
        sys.exit(1)

    configuration.proxy.version = version

    from exaproxy.util.log.logger import Logger
    log = Logger('supervisor', configuration.log.supervisor)

    for arg in sys.argv[1:]:
        if arg in [
                '--',
        ]:
            break
        if arg in ['-h', '--help']:
            help()
            sys.exit(0)
        if arg in ['-i', '-fi', '--ini']:
            ini()
            sys.exit(0)
        if arg in ['-e', '-fe', '--env']:
            env()
            sys.exit(0)
Example #45
0
# encoding: utf-8
"""
async/__init__.py

Created by David Farrar on 2012-01-31.
Copyright (c) 2011-2013  Exa Networks. All rights reserved.
"""

import sys
import select

from exaproxy.util.log.logger import Logger
from exaproxy.configuration import load

configuration = load()
log = Logger('supervisor', configuration.log.supervisor)


def Poller(configuration, speed=None):
    reactor = configuration.reactor

    if reactor == 'best':
        if sys.platform.startswith('linux'):
            configuration.reactor = 'epoll'
        elif sys.platform.startswith('freebsd'):
            configuration.reactor = 'kqueue'
        elif sys.platform.startswith('darwin'):
            configuration.reactor = 'kqueue'
        else:
            log.error(
                'we could not autodetect an high performance reactor for your OS'
Example #46
0
class ResolverManager (object):
	resolverFactory = DNSResolver

	def __init__ (self, poller, configuration, max_workers):
		self.poller = poller
		self.configuration = configuration

		self.resolver_factory = self.resolverFactory(configuration)

		# The actual work is done in the worker
		self.worker = self.resolver_factory.createUDPClient()

		# All currently active clients (one UDP and many TCP)
		self.workers = {}
		self.workers[self.worker.socket] = self.worker
		self.poller.addReadSocket('read_resolver', self.worker.socket)

		# Track the clients currently expecting results
		self.clients = {}  # client_id : identifier

		# Key should be the hostname rather than the request ID?
		self.resolving = {}  # identifier, worker_id :

		# TCP workers that have not yet sent a complete request
		self.sending = {}  # sock :

		# Maximum number of entry we will cache (1024 DNS lookup per second !)
		# assuming 1k per entry, which is a lot, it mean 20Mb of memory
		# which at the default of 900 seconds of cache is 22 new host per seonds
		self.max_entries  = 1024*20

		# track the current queries and when they were started
		self.active = []

		self.cache = {}
		self.cached = deque()

		self.max_workers = max_workers
		self.worker_count = len(self.workers)  # the UDP client

		self.waiting = []

		self.log = Logger('resolver', configuration.log.resolver)
		self.chained = {}

	def cacheDestination (self, hostname, ip):
		if hostname not in self.cache:
			expire_time = time.time() + self.configuration.dns.ttl
			expire_time = expire_time - expire_time % 5  # group the DNS record per buckets 5 seconds
			latest_time, latest_hosts = self.cached[-1] if self.cached else (-1, None)

			if expire_time > latest_time:
				hosts = []
				self.cached.append((expire_time, hosts))
			else:
				hosts = latest_hosts

			self.cache[hostname] = ip
			hosts.append(hostname)

	def expireCache (self):
		# expire only one set of cache entries at a time
		if self.cached:
			current_time = time.time()
			expire_time, hosts = self.cached[0]

			if current_time >= expire_time or len(self.cache) > self.max_entries:
				expire_time, hosts = self.cached.popleft()

				for hostname in hosts:
					self.cache.pop(hostname, None)


	def cleanup(self):
		now = time.time()
		cutoff = now - self.configuration.dns.timeout
		count = 0

		for timestamp, client_id, sock in self.active:
			if timestamp > cutoff:
				break

			count += 1
			cli_data = self.clients.pop(client_id, None)
			worker = self.workers.get(sock)
			tcpudp = 'udp' if worker is self.worker else 'tcp'

			if cli_data is not None:
				w_id, identifier, active_time, resolve_count = cli_data
				data = self.resolving.pop((w_id, identifier), None)
				if not data:
					data = self.sending.pop(sock, None)

				if data:
					client_id, original, hostname, command, decision = data
					self.log.error('timeout when requesting address for %s using the %s client - attempt %s' % (hostname, tcpudp, resolve_count))

					if resolve_count < self.configuration.dns.retries and worker is self.worker:
						self.log.info('going to retransmit request for %s - attempt %s of %s' % (hostname, resolve_count+1, self.configuration.dns.retries))
						self.startResolving(client_id, command, decision, resolve_count+1, identifier=identifier)
						continue

					self.log.error('given up trying to resolve %s after %s attempts' % (hostname, self.configuration.dns.retries))
					yield client_id, 'rewrite', ('503', 'dns.html', '', '', '', hostname, 'peer')

			if worker is not None:
				if worker is not self.worker:
					worker.close()
					self.workers.pop(sock)

		if count:
			self.active = self.active[count:]

	def resolves(self, command, decision):
		if command in ('download', 'connect'):
			hostname = decision[0]
			if isip(hostname):
				res = False
			else:
				res = True
		else:
			res = False

		return res

	def extractHostname(self, command, decision):
		if command in ('download', 'connect'):
			hostname = decision[0]

		else:
			hostname = None

		return hostname

	def resolveDecision(self, command, decision, ip):
		if command in ('download', 'connect'):
			hostname, args = decision[0], decision[1:]
			newdecision = (ip,) + args
		else:
			newdecision = None

		return newdecision

	def startResolving(self, client_id, command, decision, resolve_count=1, identifier=None):
		hostname = self.extractHostname(command, decision)

		if hostname:
			# Resolution is already in our cache
			if hostname in self.cache and identifier is None:
				ip = self.cache[hostname]

				if ip is not None:
					resolved = self.resolveDecision(command, decision, ip)
					response = (client_id, command) + resolved

				else:
					response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer'

			# do not try to resolve domains which are not FQDN
			elif self.configuration.dns.fqdn and '.' not in hostname:
				identifier = None
				response = client_id, 'rewrite', '200', 'dns.html', 'http', '', '', hostname, 'peer'

			# each DNS part (between the dots) must be under 256 chars
			elif max(len(p) for p in hostname.split('.')) > 255:
				identifier = None
				self.log.info('jumbo hostname: %s' % hostname)
				response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer'
			# Lookup that DNS name
			else:
				identifier, _ = self.worker.resolveHost(hostname, identifier=identifier)
				response = None
				active_time = time.time()

				self.resolving[(self.worker.w_id, identifier)] = client_id, hostname, hostname, command, decision
				self.clients[client_id] = (self.worker.w_id, identifier, active_time, resolve_count)
				self.active.append((active_time, client_id, self.worker.socket))
		else:
			identifier = None
			response = None

		return identifier, response

	def beginResolvingTCP (self, client_id, command, decision, resolve_count):
		if self.worker_count < self.max_workers:
			identifier = self.newTCPResolver(client_id, command, decision, resolve_count)
			self.worker_count += 1
		else:
			self.waiting.append((client_id, command, decision, resolve_count))
			identifier = None

		return identifier

	def notifyClose (self):
		paused = self.worker_count >= self.max_workers
		self.worker_count -= 1

		if paused and self.worker_count < self.max_workers:
			for _ in range(self.worker_count, self.max_workers):
				if self.waiting:
					data, self.waiting = self.waiting[0], self.waiting[1:]
					client_id, command, decision, resolve_count = data

					identifier = self.newTCPResolver(client_id, command, decision, resolve_count)
					self.worker_count += 1

	def newTCPResolver (self, client_id, command, decision, resolve_count):
		hostname = self.extractHostname(command, decision)

		if hostname:
			worker = self.resolver_factory.createTCPClient()
			self.workers[worker.socket] = worker

			identifier, all_sent = worker.resolveHost(hostname)
			active_time = time.time()
			self.resolving[(worker.w_id, identifier)] = client_id, hostname, hostname, command, decision
			self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count)
			self.active.append((active_time, client_id, self.worker.socket))

			if all_sent:
				self.poller.addReadSocket('read_resolver', worker.socket)
				self.resolving[(worker.w_id, identifier)] = client_id, hostname, hostname, command, decision
			else:
				self.poller.addWriteSocket('write_resolver', worker.socket)
				self.sending[worker.socket] = client_id, hostname, hostname, command, decision

		else:
			identifier = None

		return identifier

	def getResponse(self, sock):
		worker = self.workers.get(sock)

		if worker:
			result = worker.getResponse(self.chained)

			if result:
				identifier, forhost, ip, completed, newidentifier, newhost, newcomplete = result
				data = self.resolving.pop((worker.w_id, identifier), None)

				chain_count = self.chained.pop(identifier, 0)
				if newidentifier:
					self.chained[newidentifier] = chain_count + 1

				if not data:
					self.log.info('ignoring response for %s (%s) with identifier %s' % (forhost, ip, identifier))

			else:
				# unable to parse response
				self.log.error('unable to parse response')
				data = None

			if data:
				client_id, original, hostname, command, decision = data
				clidata = self.clients.pop(client_id, None)

				if completed:
					if clidata is not None:
						key = clidata[2], client_id, worker.socket
						if key in self.active:
							self.active.remove(key)

				# check to see if we received an incomplete response
				if not completed:
					newidentifier = self.beginResolvingTCP(client_id, command, decision, 1)
					newhost = hostname
					response = None

				# check to see if the worker started a new request
				if newidentifier:
					if completed:
						active_time = time.time()
						self.resolving[(worker.w_id, newidentifier)] = client_id, original, newhost, command, decision
						self.clients[client_id] = (worker.w_id, newidentifier, active_time, 1)
						self.active.append((active_time, client_id, worker.socket))

					response = None

					if completed and newcomplete:
						self.poller.addReadSocket('read_resolver', worker.socket)

					elif completed and not newcomplete:
						self.poller.addWriteSocket('write_resolver', worker.socket)
						self.sending[worker.socket] = client_id, original, hostname, command, decision

				# we just started a new (TCP) request and have not yet completely sent it
				# make sure we still know who the request is for
				elif not completed:
					response = None

				# maybe we read the wrong response?
				elif forhost != hostname:
					_, _, _, resolve_count = clidata
					active_time = time.time()
					self.resolving[(worker.w_id, identifier)] = client_id, original, hostname, command, decision
					self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count)
					self.active.append((active_time, client_id, worker.socket))
					response = None

				# success
				elif ip is not None:
					resolved = self.resolveDecision(command, decision, ip)
					response = (client_id, command) + resolved
					self.cacheDestination(original, ip)

				# not found
				else:
					response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer'
					#self.cacheDestination(original, ip)
			else:
				response = None

			if response or result is None:
				if worker is not self.worker:
					self.poller.removeReadSocket('read_resolver', sock)
					self.poller.removeWriteSocket('write_resolver', sock)
					worker.close()
					self.workers.pop(sock)
					self.notifyClose()

		else:
			response = None

		return response


	def continueSending(self, sock):
		"""Continue sending data over the connected TCP socket"""
		data = self.sending.get(sock)
		if data:
			client_id, original, hostname, command, decision = data
		else:
			client_id, original, hostname, command, decision = None, None, None, None, None, None

		worker = self.workers[sock]
		res = worker.continueSending()

		if res is False: # we've sent all we need to send
			self.poller.removeWriteSocket('write_resolver', sock)

			if client_id in self.clients:
				w_id, identifier, active_time, resolve_count = self.clients[client_id]
				tmp = self.sending.pop(sock)
				self.resolving[(w_id, identifier)] = tmp
				self.poller.addReadSocket('read_resolver', sock)

			else:
				self.log.error('could not find client for dns request for %s. request is being left to timeout.' % str(hostname))
Example #47
0
class Redirector (object):
	# TODO : if the program is a function, fork and run :)
	HTTPParser = HTTPRequestFactory
	TLSParser = TLSParser
	ResponseFactory = ResponseFactory
	ChildFactory = ChildFactory

	__slots__ = ['configuration', 'tls_parser', 'http_parser', 'enabled', '_transparent', 'log', 'usage', 'response_factory', 'child_factory', 'wid', 'creation', 'program', 'running', 'stats_timestamp', '_proxy', 'universal', 'process']

	def __init__ (self, configuration, name, program, protocol):
		self.configuration = configuration
		self.http_parser = self.HTTPParser(configuration)
		self.tls_parser = self.TLSParser(configuration)
		self.enabled = bool(program is not None) and configuration.redirector.enable
		self._transparent = configuration.http.transparent
		self.log = Logger('worker ' + str(name), configuration.log.worker)
		self.usage = UsageLogger('usage', configuration.log.worker)
		self.response_factory = self.ResponseFactory()
		self.child_factory = self.ChildFactory(configuration, name)

		self.wid = name							   # a unique name
		self.creation = time.time()				   # when the thread was created
	#	self.last_worked = self.creation			  # when the thread last picked a task

		self.program = program						# the squid redirector program to fork
		self.running = True						   # the thread is active

		self.stats_timestamp = None				   # time of the most recent outstanding request to generate stats

		self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid())

		universal = configuration.redirector.protocol == 'url'
		# Do not move, we need the forking AFTER the setup
		if program:
			self.process = self.child_factory.createProcess(self.program, universal=universal)
		else:
			self.process = None

	def addHeaders (self, message, peer):
		headers = message.headers
		# http://homepage.ntlworld.com./jonathan.deboynepollard/FGA/web-proxy-connection-header.html
		headers.pop('proxy-connection',None)
		# NOTE: To be RFC compliant we need to add a Via field http://tools.ietf.org/html/rfc2616#section-14.45 on the reply too
		# NOTE: At the moment we only add it from the client to the server (which is what really matters)
		if not self._transparent:
			headers.extend('via','Via: %s %s' % (message.request.version, self._proxy))
			headers.extend('x_forwarded_for', 'X-Forwarded-For: %s' % peer)
			headers.pop('proxy-authenticate')

		return message

	def checkChild (self):
		if not self.enabled:
			return True
		if not bool(self.process):
			return False
		# A None value indicates that the process hasn’t terminated yet.
		# A negative value -N indicates that the child was terminated by signal N (Unix only).
		# In practice: also returns 1 ...
		if self.process.poll() is None:
			return True
		return False

	def writeChild (self, request_string):
		try:
			self.process.stdin.write(request_string)
			status = True

		except ValueError:
			status = False

		return status

	def readChildResponse (self):
		try:
			response = None
			while not response:
				response = self.process.stdout.readline()

		except:
			response = None

		if response:
			response = response.strip()

		return response


	def createChildRequest (self, accept_addr, accept_port, peer, message, http_header):
		return '%s %s - %s -\n' % (message.url_noport, peer, message.request.method)

	def classifyURL (self, request, url_response):
		if not url_response:
			return 'permit', None, None

		if url_response.startswith('http://'):
			response = url_response[7:]

			if response == request.url_noport:
				return 'permit', None, ''

			if response.startswith(request.host + '/'):
				_, rewrite_path = response.split('/', 1) if '/' in request.url else ''
				return 'rewrite', rewrite_path, ''

		if url_response.startswith('file://'):
			return 'file', url_response[7:], ''

		if url_response.startswith('intercept://'):
			return 'intercept', url_response[12:], ''

		if url_response.startswith('redirect://'):
			return 'redirect', url_response[11:], ''

		return 'file', 'internal_error.html', ''


	def parseHTTP (self, client_id, accept_addr, accept_port, peer, http_header):
		message = HTTP(self.configuration, http_header, peer)
		message.parse(self._transparent)
		return message

	def validateHTTP (self, client_id, message):
		if message.reply_code:
			try:
				version = message.request.version
			except AttributeError:
				version = '1.0'

			if message.reply_string:
				clean_header = message.raw.replace('\t','\\t').replace('\r','\\r').replace('\n','\\n\n')
				content = '%s<br/>\n<!--\n\n<![CDATA[%s]]>\n\n-->\n' % (message.reply_string, clean_header)
				response = Respond.http(client_id, http(str(message.reply_code), content, version))
			else:
				response = Respond.http(client_id, http(str(message.reply_code),'',version))

		else:
			response = None

		return response

	def doHTTPRequest (self, client_id, accept_addr, accept_port, peer, message, http_header, source):
		method = message.request.method

		if self.enabled:
			request_string = self.createChildRequest(accept_addr, accept_port, peer, message, http_header) if message else None
			status = self.writeChild(request_string) if request_string else None

			if status is True:
				response = Respond.defer(client_id, message)

			else:
				response = None

		else:
			response = Respond.download(client_id, message.host, message.port, message.upgrade, message.content_length, message)
			self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', message.host)

		return response

	def doHTTPConnect (self, client_id, accept_addr, accept_port, peer, message, http_header, source):
		method = message.request.method

		if not self.configuration.http.connect or message.port not in self.configuration.security.connect:
			# NOTE: we are always returning an HTTP/1.1 response
			response = Respond.http(client_id, http('501', 'CONNECT NOT ALLOWED\n'))
			self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'DENY', 'CONNECT NOT ALLOWED')

		elif self.enabled:
			request_string = self.createChildRequest(accept_addr, accept_port, peer, message, http_header) if message else None
			status = self.writeChild(request_string) if request_string else None

			if status is True:
				response = Respond.defer(client_id, message)

			else:
				response = None

		else:
			response = Respond.connect(client_id, message.host, message.port, '')
			self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', message.host)

		return response

	def doHTTPOptions (self, client_id, accept_addr, accept_port, peer, message):
		# NOTE: we are always returning an HTTP/1.1 response
		method = message.request.method

		header = message.headers.get('max-forwards', '')
		if header:
			value = header[-1].split(':')[-1].strip()
			if not value.isdigit():
				self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'ERROR', 'INVALID MAX FORWARDS')
				return Respond.http(client_id, http('400', 'INVALID MAX-FORWARDS\n'))

			max_forward = int(value)
			if max_forward == 0:
				self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', method)
				return Respond.http(client_id, http('200', ''))

			message.headers.set('max-forwards','Max-Forwards: %d' % (max_forward-1))

		return Respond.download(client_id, message.headerhost, message.port, message.upgrade, message.content_length, message)

	def doHTTP (self, client_id, accept_addr, accept_port, peer, http_header, source):
		message = self.parseHTTP(client_id, accept_addr, accept_port, peer, http_header)
		response = self.validateHTTP(client_id, message)

		if message.validated:
			message = self.addHeaders(message, peer)
			method = message.request.method

			if method in ('GET', 'PUT', 'POST','HEAD','DELETE','PATCH'):
				response = self.doHTTPRequest(client_id, accept_addr, accept_port, peer, message, http_header, source)

			elif method == 'CONNECT':
				response = self.doHTTPConnect(client_id, accept_addr, accept_port, peer, message, http_header, source)

			elif method in ('OPTIONS','TRACE'):
				response = self.doHTTPOptions(client_id, accept_addr, accept_port, peer, message)

			elif method in (
			'BCOPY', 'BDELETE', 'BMOVE', 'BPROPFIND', 'BPROPPATCH', 'COPY', 'DELETE','LOCK', 'MKCOL', 'MOVE',
			'NOTIFY', 'POLL', 'PROPFIND', 'PROPPATCH', 'SEARCH', 'SUBSCRIBE', 'UNLOCK', 'UNSUBSCRIBE', 'X-MS-ENUMATTS'):
				response = Respond.download(client_id, message.headerhost, message.port, message.upgrade, message.content_length, message)
				self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', method)

			elif message.request in self.configuration.http.extensions:
				response = Respond.download(client_id, message.headerhost, message.port, message.upgrade, message.content_length, message)
				self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'PERMIT', message.request)

			else:
				# NOTE: we are always returning an HTTP/1.1 response
				response = Respond.http(client_id, http('405', ''))  # METHOD NOT ALLOWED
				self.usage.logRequest(client_id, accept_addr, accept_port, peer, method, message.url, 'DENY', method)

		elif response is None:
			response = Respond.hangup(client_id)

		return response


	def doTLS (self, client_id, accept_addr, accept_port, peer, tls_header, source):
                tls_hello = self.tls_parser.parseClientHello(tls_header)

		if self.enabled and tls_hello:
			request_string = '%s %s - %s -\n' % (tls_hello.hostname, peer, 'TLS')
			status = self.writeChild(request_string)

			if status is True:
				response = Respond.defer(client_id, tls_hello.hostname)

			else:
				response = None

		elif tls_hello:
			response = Respond.intercept(client_id, tls_hello.hostname, 443, tls_header)

		else:
			response = Respond.hangup(client_id)

		return response

	def doMonitor (self, client_id, accept_addr, accept_port, peer, http_header, source):
		message = self.parseHTTP(client_id, accept_addr, accept_port, peer, http_header)
		response = self.validateHTTP(client_id, message)  # pylint: disable=W0612

		return Respond.monitor(client_id, message.request.path)


	def decide (self, client_id, accept_addr, accept_port, peer, header, subheader, source):
		if self.checkChild():
			if source == 'proxy':
				response = self.doHTTP(client_id, accept_addr, accept_port, peer, header, source)

			elif source == 'web':
				response = self.doMonitor(client_id, accept_addr, accept_port, peer, header, source)

			elif source == 'tls':
				response = self.doTLS(client_id, accept_addr, accept_port, peer, header, source)

			else:
				response = Respond.hangup(client_id)

		else:
			response = Respond.error(client_id)

		return response


	def progress (self, client_id, accept_addr, accept_port, peer, message, header, subheader, source):
		if self.checkChild():
			response_s = self.readChildResponse()

		else:
			response_s = None

		if source == 'tls':
			return Respond.hangup(client_id)

		response = self.classifyURL(message.request, response_s) if response_s is not None else None

		if response is not None and source == 'proxy':
			classification, data, comment = response

			if message.request.method in ('GET','PUT','POST','HEAD','DELETE','PATCH'):
				(operation, destination), decision = self.response_factory.contentResponse(client_id, message, classification, data, comment)

			elif message.request.method == 'CONNECT':
				(operation, destination), decision = self.response_factory.connectResponse(client_id, message, classification, data, comment)

			else:
				self.log.info('unhandled command %s - dev, please look into it!' % str(message.request.method))
				operation, destination, decision = None, None, None

			if operation is not None:
				self.usage.logRequest(client_id, accept_addr, accept_port, peer, message.request.method, message.url, operation, message.host)

		else:
			decision = None

		if decision is None:
			decision = Respond.error(client_id)

		return decision

	def shutdown(self):
		if self.process is not None:
			self.child_factory.destroyProcess(self.process)
			self.process = None
Example #48
0
class Redirector (Thread):
	# TODO : if the program is a function, fork and run :)

	def __init__ (self, configuration, name, request_box, program):
		self.configuration = configuration
		self.enabled = configuration.redirector.enable
		self.protocol = configuration.redirector.protocol
		self._transparent = configuration.http.transparent
		self.log = Logger('worker ' + str(name), configuration.log.worker)
		self.usage = UsageLogger('usage', configuration.log.worker)

		self.universal = True if self.protocol == 'url' else False
		self.icap = self.protocol[len('icap://'):].split('/')[0] if self.protocol.startswith('icap://') else ''

		r, w = os.pipe()                                # pipe for communication with the main thread
		self.response_box_write = os.fdopen(w,'w',0)    # results are written here
		self.response_box_read = os.fdopen(r,'r',0)     # read from the main thread

		self.wid = name                               # a unique name
		self.creation = time.time()                   # when the thread was created
	#	self.last_worked = self.creation              # when the thread last picked a task
		self.request_box = request_box                # queue with HTTP headers to process

		self.program = program                        # the squid redirector program to fork
		self.running = True                           # the thread is active

		self.stats_timestamp = None                   # time of the most recent outstanding request to generate stats

		self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid())

		if self.protocol == 'url':
			self.classify = self._classify_url
		if self.protocol.startswith('icap://'):
			self.classify = self._classify_icap


		# Do not move, we need the forking AFTER the setup
		self.process = self._createProcess()          # the forked program to handle classification
		Thread.__init__(self)

	def _createProcess (self):
		if not self.enabled:
			return

		def preexec():  # Don't forward signals.
			os.setpgrp()

		try:
			process = subprocess.Popen([self.program,],
				stdin=subprocess.PIPE,
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE,
				universal_newlines=self.universal,
				preexec_fn=preexec,
			)
			self.log.debug('spawn process %s' % self.program)
		except KeyboardInterrupt:
			process = None
		except (subprocess.CalledProcessError,OSError,ValueError):
			self.log.error('could not spawn process %s' % self.program)
			process = None

		if process:
			try:
				fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
			except IOError:
				self.destroyProcess()
				process = None

		return process

	def destroyProcess (self):
		if not self.enabled:
			return
		self.log.debug('destroying process %s' % self.program)
		if not self.process:
			return
		try:
			if self.process:
				self.process.terminate()
				self.process.wait()
				self.log.info('terminated process PID %s' % self.process.pid)
		except OSError, e:
			# No such processs
			if e[0] != errno.ESRCH:
				self.log.error('PID %s died' % self.process.pid)
Example #49
0
	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')           # Listening proxy sockets
		self.poller.setupRead('read_web')             # Listening webserver sockets
		self.poller.setupRead('read_icap')             # Listening icap sockets
		self.poller.setupRead('read_workers')         # Pipes carrying responses from the child processes
		self.poller.setupRead('read_resolver')        # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')          # Active clients
		self.poller.setupRead('opening_client')       # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')        # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')      # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')        # Established connections
		self.poller.setupWrite('write_download')      # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')    # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.manager = RedirectorManager(
			self.configuration,
			self.poller,
		)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.manager, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()
Example #50
0
class ResolverManager(object):
    resolverFactory = DNSResolver

    def __init__(self, poller, configuration, max_workers):
        self.poller = poller
        self.configuration = configuration

        self.resolver_factory = self.resolverFactory(configuration)

        # The actual work is done in the worker
        self.worker = self.resolver_factory.createUDPClient()

        # All currently active clients (one UDP and many TCP)
        self.workers = {}
        self.workers[self.worker.socket] = self.worker
        self.poller.addReadSocket('read_resolver', self.worker.socket)

        # Track the clients currently expecting results
        self.clients = {}  # client_id : identifier

        # Key should be the hostname rather than the request ID?
        self.resolving = {}  # identifier, worker_id :

        # TCP workers that have not yet sent a complete request
        self.sending = {}  # sock :

        # Maximum number of entry we will cache (1024 DNS lookup per second !)
        # assuming 1k per entry, which is a lot, it mean 20Mb of memory
        # which at the default of 900 seconds of cache is 22 new host per seonds
        self.max_entries = 1024 * 20

        # track the current queries and when they were started
        self.active = []

        self.cache = {}
        self.cached = deque()

        self.max_workers = max_workers
        self.worker_count = len(self.workers)  # the UDP client

        self.waiting = []

        self.log = Logger('resolver', configuration.log.resolver)
        self.chained = {}

    def cacheDestination(self, hostname, ip):
        if hostname not in self.cache:
            expire_time = time.time() + self.configuration.dns.ttl
            expire_time = expire_time - expire_time % 5  # group the DNS record per buckets 5 seconds
            latest_time, latest_hosts = self.cached[-1] if self.cached else (
                -1, None)

            if expire_time > latest_time:
                hosts = []
                self.cached.append((expire_time, hosts))
            else:
                hosts = latest_hosts

            self.cache[hostname] = ip
            hosts.append(hostname)

    def expireCache(self):
        # expire only one set of cache entries at a time
        if self.cached:
            current_time = time.time()
            expire_time, hosts = self.cached[0]

            if current_time >= expire_time or len(
                    self.cache) > self.max_entries:
                expire_time, hosts = self.cached.popleft()

                for hostname in hosts:
                    self.cache.pop(hostname, None)

    def cleanup(self):
        now = time.time()
        cutoff = now - self.configuration.dns.timeout
        count = 0

        for timestamp, client_id, sock in self.active:
            if timestamp > cutoff:
                break

            count += 1
            cli_data = self.clients.pop(client_id, None)
            worker = self.workers.get(sock)
            tcpudp = 'udp' if worker is self.worker else 'tcp'

            if cli_data is not None:
                w_id, identifier, active_time, resolve_count = cli_data
                data = self.resolving.pop((w_id, identifier), None)
                if not data:
                    data = self.sending.pop(sock, None)

                if data:
                    client_id, original, hostname, command, decision = data
                    self.log.error(
                        'timeout when requesting address for %s using the %s client - attempt %s'
                        % (hostname, tcpudp, resolve_count))

                    if resolve_count < self.configuration.dns.retries and worker is self.worker:
                        self.log.info(
                            'going to retransmit request for %s - attempt %s of %s'
                            % (hostname, resolve_count + 1,
                               self.configuration.dns.retries))
                        self.startResolving(client_id,
                                            command,
                                            decision,
                                            resolve_count + 1,
                                            identifier=identifier)
                        continue

                    self.log.error(
                        'given up trying to resolve %s after %s attempts' %
                        (hostname, self.configuration.dns.retries))
                    yield client_id, 'rewrite', '\0'.join(
                        ('503', 'dns.html', '', '', '', hostname, 'peer'))

            if worker is not None:
                if worker is not self.worker:
                    worker.close()
                    self.workers.pop(sock)

        if count:
            self.active = self.active[count:]

    def resolves(self, command, decision):
        if command in ('download', 'connect'):
            hostname = decision.split('\0')[0]
            if isip(hostname):
                res = False
            else:
                res = True
        else:
            res = False

        return res

    def extractHostname(self, command, decision):
        data = decision.split('\0')

        if command == 'download':
            hostname = data[0]

        elif command == 'connect':
            hostname = decision.split('\0')[0]

        else:
            hostname = None

        return hostname

    def resolveDecision(self, command, decision, ip):
        if command in ('download', 'connect'):
            hostname, args = decision.split('\0', 1)
            newdecision = '\0'.join((ip, args))
        else:
            newdecision = None

        return newdecision

    def startResolving(self,
                       client_id,
                       command,
                       decision,
                       resolve_count=1,
                       identifier=None):
        hostname = self.extractHostname(command, decision)

        if hostname:
            # Resolution is already in our cache
            if hostname in self.cache:
                identifier = None
                ip = self.cache[hostname]

                if ip is not None:
                    resolved = self.resolveDecision(command, decision, ip)
                    response = client_id, command, resolved

                else:
                    newdecision = '\0'.join(
                        ('503', 'dns.html', 'http', '', '', hostname, 'peer'))
                    response = client_id, 'rewrite', newdecision
            # do not try to resolve domains which are not FQDN
            elif self.configuration.dns.fqdn and '.' not in hostname:
                identifier = None
                newdecision = '\0'.join(
                    ('200', 'dns.html', 'http', '', '', hostname, 'peer'))
                response = client_id, 'rewrite', newdecision
            # each DNS part (between the dots) must be under 256 chars
            elif max(len(p) for p in hostname.split('.')) > 255:
                identifier = None
                self.log.info('jumbo hostname: %s' % hostname)
                newdecision = '\0'.join(
                    ('503', 'dns.html', 'http', '', '', hostname, 'peer'))
                response = client_id, 'rewrite', newdecision
            # Lookup that DNS name
            else:
                identifier, _ = self.worker.resolveHost(hostname,
                                                        identifier=identifier)
                response = None
                active_time = time.time()

                self.resolving[(
                    self.worker.w_id, identifier
                )] = client_id, hostname, hostname, command, decision
                self.clients[client_id] = (self.worker.w_id, identifier,
                                           active_time, resolve_count)
                self.active.append(
                    (active_time, client_id, self.worker.socket))
        else:
            identifier = None
            response = None

        return identifier, response

    def beginResolvingTCP(self, client_id, command, decision, resolve_count):
        if self.worker_count < self.max_workers:
            identifier = self.newTCPResolver(client_id, command, decision,
                                             resolve_count)
            self.worker_count += 1
        else:
            self.waiting.append((client_id, command, decision, resolve_count))
            identifier = None

        return identifier

    def notifyClose(self):
        paused = self.worker_count >= self.max_workers
        self.worker_count -= 1

        if paused and self.worker_count < self.max_workers:
            for _ in range(self.worker_count, self.max_workers):
                if self.waiting:
                    data, self.waiting = self.waiting[0], self.waiting[1:]
                    client_id, command, decision, resolve_count = data

                    identifier = self.newTCPResolver(client_id, command,
                                                     decision, resolve_count)
                    self.worker_count += 1

    def newTCPResolver(self, client_id, command, decision, resolve_count):
        hostname = self.extractHostname(command, decision)

        if hostname:
            worker = self.resolver_factory.createTCPClient()
            self.workers[worker.socket] = worker

            identifier, all_sent = worker.resolveHost(hostname)
            active_time = time.time()
            self.resolving[(
                worker.w_id,
                identifier)] = client_id, hostname, hostname, command, decision
            self.clients[client_id] = (worker.w_id, identifier, active_time,
                                       resolve_count)
            self.active.append((active_time, client_id, self.worker.socket))

            if all_sent:
                self.poller.addReadSocket('read_resolver', worker.socket)
                self.resolving[(
                    worker.w_id, identifier
                )] = client_id, hostname, hostname, command, decision
            else:
                self.poller.addWriteSocket('write_resolver', worker.socket)
                self.sending[
                    worker.
                    socket] = client_id, hostname, hostname, command, decision

        else:
            identifier = None

        return identifier

    def getResponse(self, sock):
        worker = self.workers.get(sock)

        if worker:
            result = worker.getResponse(self.chained)

            if result:
                identifier, forhost, ip, completed, newidentifier, newhost, newcomplete = result
                data = self.resolving.pop((worker.w_id, identifier), None)

                chain_count = self.chained.pop(identifier, 0)
                if newidentifier:
                    self.chained[newidentifier] = chain_count + 1

                if not data:
                    self.log.info(
                        'ignoring response for %s (%s) with identifier %s' %
                        (forhost, ip, identifier))

            else:
                # unable to parse response
                self.log.error('unable to parse response')
                data = None

            if data:
                client_id, original, hostname, command, decision = data
                clidata = self.clients.pop(client_id, None)

                if completed:
                    if clidata is not None:
                        key = clidata[2], client_id, worker.socket
                        if key in self.active:
                            self.active.remove(key)

                # check to see if we received an incomplete response
                if not completed:
                    newidentifier = self.beginResolvingTCP(
                        client_id, command, decision, 1)
                    newhost = hostname
                    response = None

                # check to see if the worker started a new request
                if newidentifier:
                    if completed:
                        active_time = time.time()
                        self.resolving[(
                            worker.w_id, newidentifier
                        )] = client_id, original, newhost, command, decision
                        self.clients[client_id] = (worker.w_id, newidentifier,
                                                   active_time, 1)
                        self.active.append(
                            (active_time, client_id, worker.socket))

                    response = None

                    if completed and newcomplete:
                        self.poller.addReadSocket('read_resolver',
                                                  worker.socket)

                    elif completed and not newcomplete:
                        self.poller.addWriteSocket('write_resolver',
                                                   worker.socket)
                        self.sending[
                            worker.
                            socket] = client_id, original, hostname, command, decision

                # we just started a new (TCP) request and have not yet completely sent it
                # make sure we still know who the request is for
                elif not completed:
                    response = None

                # maybe we read the wrong response?
                elif forhost != hostname:
                    _, _, _, resolve_count = clidata
                    active_time = time.time()
                    self.resolving[(
                        worker.w_id, identifier
                    )] = client_id, original, hostname, command, decision
                    self.clients[client_id] = (worker.w_id, identifier,
                                               active_time, resolve_count)
                    self.active.append((active_time, client_id, worker.socket))
                    response = None

                # success
                elif ip is not None:
                    resolved = self.resolveDecision(command, decision, ip)
                    response = client_id, command, resolved
                    self.cacheDestination(original, ip)

                # not found
                else:
                    newdecision = '\0'.join(
                        ('503', 'dns.html', 'http', '', '', hostname, 'peer'))
                    response = client_id, 'rewrite', newdecision
                    #self.cacheDestination(original, ip)
            else:
                response = None

            if response or result is None:
                if worker is not self.worker:
                    self.poller.removeReadSocket('read_resolver', sock)
                    self.poller.removeWriteSocket('write_resolver', sock)
                    worker.close()
                    self.workers.pop(sock)
                    self.notifyClose()

        else:
            response = None

        return response

    def continueSending(self, sock):
        """Continue sending data over the connected TCP socket"""
        data = self.sending.get(sock)
        if data:
            client_id, original, hostname, command, decision = data
        else:
            client_id, original, hostname, command, decision = None, None, None, None, None, None

        worker = self.workers[sock]
        res = worker.continueSending()

        if res is False:  # we've sent all we need to send
            self.poller.removeWriteSocket('write_resolver', sock)

            if client_id in self.clients:
                w_id, identifier, active_time, resolve_count = self.clients[
                    client_id]
                tmp = self.sending.pop(sock)
                self.resolving[(w_id, identifier)] = tmp
                self.poller.addReadSocket('read_resolver', sock)

            else:
                self.log.error(
                    'could not find client for dns request for %s. request is being left to timeout.'
                    % str(hostname))
Example #51
0
 def __init__(self, supervisor):
     self.supervisor = supervisor
     self.monitor = supervisor.monitor
     self.email_sent = False
     self.log = Logger('web', supervisor.configuration.log.web)
Example #52
0
class Redirector(Thread):
    # TODO : if the program is a function, fork and run :)
    ICAPParser = ICAPParser

    def __init__(self, configuration, name, request_box, program):
        self.configuration = configuration
        self.icap_parser = self.ICAPParser(configuration)
        self.enabled = configuration.redirector.enable
        self.protocol = configuration.redirector.protocol
        self._transparent = configuration.http.transparent
        self.log = Logger('worker ' + str(name), configuration.log.worker)
        self.usage = UsageLogger('usage', configuration.log.worker)

        self.universal = True if self.protocol == 'url' else False
        self.icap = self.protocol[len('icap://'):].split(
            '/')[0] if self.protocol.startswith('icap://') else ''

        r, w = os.pipe()  # pipe for communication with the main thread
        self.response_box_write = os.fdopen(w, 'w',
                                            0)  # results are written here
        self.response_box_read = os.fdopen(r, 'r',
                                           0)  # read from the main thread

        self.wid = name  # a unique name
        self.creation = time.time()  # when the thread was created
        #	self.last_worked = self.creation			  # when the thread last picked a task
        self.request_box = request_box  # queue with HTTP headers to process

        self.program = program  # the squid redirector program to fork
        self.running = True  # the thread is active

        self.stats_timestamp = None  # time of the most recent outstanding request to generate stats

        self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,
                                             os.getpid())

        if self.protocol == 'url':
            self.classify = self._classify_url
        if self.protocol.startswith('icap://'):
            self.classify = self._classify_icap

        # Do not move, we need the forking AFTER the setup
        self.process = self._createProcess(
        )  # the forked program to handle classification
        Thread.__init__(self)

    def _createProcess(self):
        if not self.enabled:
            return

        def preexec():  # Don't forward signals.
            os.setpgrp()

        try:
            process = subprocess.Popen(
                [
                    self.program,
                ],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=self.universal,
                preexec_fn=preexec,
            )
            self.log.debug('spawn process %s' % self.program)
        except KeyboardInterrupt:
            process = None
        except (subprocess.CalledProcessError, OSError, ValueError):
            self.log.error('could not spawn process %s' % self.program)
            process = None

        if process:
            try:
                fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
            except IOError:
                self.destroyProcess()
                process = None

        return process

    def destroyProcess(self):
        if not self.enabled:
            return
        self.log.debug('destroying process %s' % self.program)
        if not self.process:
            return
        try:
            if self.process:
                self.process.terminate()
                self.process.wait()
                self.log.info('terminated process PID %s' % self.process.pid)
        except OSError, e:
            # No such processs
            if e[0] != errno.ESRCH:
                self.log.error('PID %s died' % self.process.pid)