Example #1
0
class ChildFactory:
    def preExec(self):
        os.setpgrp()

    def __init__(self, configuration, name):
        self.log = Logger('worker ' + str(name), configuration.log.worker)

    def createProcess(self, program, universal=False):
        try:
            process = subprocess.Popen(
                program.split(' '),
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=universal,
                preexec_fn=self.preExec,
            )

            self.log.debug('spawn process %s' % program)

        except KeyboardInterrupt:
            process = None

        except (subprocess.CalledProcessError, OSError, ValueError):
            self.log.error('could not spawn process %s' % program)
            process = None

        if process:
            try:
                fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
            except IOError:
                self.destroyProcess(process)
                process = None

        return process

    def destroyProcess(self, process):
        try:
            process.terminate()
            process.wait()
            self.log.info('terminated process PID %s' % process.pid)

        except OSError, e:
            # No such processs
            if e[0] != errno.ESRCH:
                self.log.error('PID %s died' % process.pid)
Example #2
0
class ChildFactory:
	def preExec (self):
		os.setpgrp()

	def __init__ (self, configuration, name):
		self.log = Logger('worker ' + str(name), configuration.log.worker)

	def createProcess (self, program, universal=False):
		try:
			process = subprocess.Popen([program],
				stdin=subprocess.PIPE,
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE,
				universal_newlines=universal,
				preexec_fn=self.preExec,
			)

			self.log.debug('spawn process %s' % program)

		except KeyboardInterrupt:
			process = None

		except (subprocess.CalledProcessError,OSError,ValueError):
			self.log.error('could not spawn process %s' % program)
			process = None

		if process:
			try:
				fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
			except IOError:
				self.destroyProcess(process)
				process = None

		return process

	def destroyProcess (self, process):
		try:
			process.terminate()
			process.wait()
			self.log.info('terminated process PID %s' % process.pid)

		except OSError, e:
			# No such processs
			if e[0] != errno.ESRCH:
				self.log.error('PID %s died' % process.pid)
Example #3
0
class Supervisor (object):
	alarm_time = 0.1                           # regular backend work
	second_frequency = int(1/alarm_time)       # when we record history
	minute_frequency = int(60/alarm_time)      # when we want to average history
	increase_frequency = int(5/alarm_time)     # when we add workers
	decrease_frequency = int(60/alarm_time)    # when we remove workers
	saturation_frequency = int(20/alarm_time)  # when we report connection saturation
	interface_frequency = int(300/alarm_time)  # when we check for new interfaces

	# import os
	# clear = [hex(ord(c)) for c in os.popen('clear').read()]
	# clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		sys.exitfunc = self.log_writer.writeMessages

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')       # Listening proxy sockets
		self.poller.setupRead('read_web')         # Listening webserver sockets
		self.poller.setupRead('read_icap')        # Listening icap sockets
		self.poller.setupRead('read_redirector')  # Pipes carrying responses from the redirector process
		self.poller.setupRead('read_resolver')    # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')      # Active clients
		self.poller.setupRead('opening_client')   # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')    # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')  # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')      # Established connections
		self.poller.setupWrite('write_download')    # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')  # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		if not self.initialise():
			self._shutdown = True

		elif self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		# fork the redirector process before performing any further setup
		redirector = fork_redirector(self.poller, self.configuration)

		# create threads _after_ all forking is done
		self.redirector = redirector_message_thread(redirector)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.redirector, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()

	def exit (self):
		sys.exit()

	def sigquit (self,signum, frame):
		if self._softstop:
			self.signal_log.critical('multiple SIG INT received, shutdown')
			self._shutdown = True
		else:
			self.signal_log.critical('SIG INT received, soft-stop')
			self._softstop = True
			self._listen = False

	def sigterm (self,signum, frame):
		self.signal_log.critical('SIG TERM received, shutdown request')
		if os.environ.get('PDB',False):
			self._pdb = True
		else:
			self._shutdown = True

	# def sigabrt (self,signum, frame):
	# 	self.signal_log.info('SIG INFO received, refork request')
	# 	self._refork = True

	# def sighup (self,signum, frame):
	# 	self.signal_log.info('SIG HUP received, reload request')
	# 	self._reload = True

	def sigtrap (self,signum, frame):
		self.signal_log.critical('SIG TRAP received, toggle debug')
		self._toggle_debug = True


	def sigusr1 (self,signum, frame):
		self.signal_log.critical('SIG USR1 received, decrease worker number')
		self._decrease_spawn_limit += 1

	def sigusr2 (self,signum, frame):
		self.signal_log.critical('SIG USR2 received, increase worker number')
		self._increase_spawn_limit += 1


	def sigttou (self,signum, frame):
		self.signal_log.critical('SIG TTOU received, stop listening')
		self._listen = False

	def sigttin (self,signum, frame):
		self.signal_log.critical('SIG IN received, star listening')
		self._listen = True


	def sigalrm (self,signum, frame):
		self.reactor.running = False
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)


	def interfaces (self):
		local = set(['127.0.0.1','::1'])
		for interface in getifaddrs():
			if interface.family not in (AF_INET,AF_INET6):
				continue
			if interface.address not in self.local:
				self.log.info('found new local ip %s (%s)' % (interface.address,interface.name))
			local.add(interface.address)
		for ip in self.local:
			if ip not in local:
				self.log.info('removed local ip %s' % ip)
		if local == self.local:
			self.log.info('no ip change')
		else:
			self.local = local

	def run (self):
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)

		count_second = 0
		count_minute = 0
		count_saturation = 0
		count_interface = 0

		while True:
			count_second = (count_second + 1) % self.second_frequency
			count_minute = (count_minute + 1) % self.minute_frequency

			count_saturation = (count_saturation + 1) % self.saturation_frequency
			count_interface = (count_interface + 1) % self.interface_frequency

			try:
				if self._pdb:
					self._pdb = False
					import pdb
					pdb.set_trace()


				# check for IO change with select
				status = self.reactor.run()
				if status is False:
					self._shutdown = True

				# must follow the reactor so we are sure to go through the reactor at least once
				# and flush any logs
				if self._shutdown:
					self._shutdown = False
					self.shutdown()
					break
				elif self._reload:
					self._reload = False
					self.reload()
				elif self._refork:
					self._refork = False
					self.signal_log.warning('refork not implemented')
					# stop listening to new connections
					# refork the program (as we have been updated)
					# just handle current open connection


				if self._softstop:
					if self._listen == False:
						self.proxy.rejecting()
						self._listen = None
					if self.client.softstop():
						self._shutdown = True
				# only change listening if we are not shutting down
				elif self._listen is not None:
					if self._listen:
						self._shutdown = not self.proxy.accepting()
						self._listen = None
					else:
						self.proxy.rejecting()
						self._listen = None


				if self._toggle_debug:
					self._toggle_debug = False
					self.log_writer.toggleDebug()


				if self._decrease_spawn_limit:
					count = self._decrease_spawn_limit
					self.redirector.decreaseSpawnLimit(count)
					self._decrease_spawn_limit = 0

				if self._increase_spawn_limit:
					count = self._increase_spawn_limit
					self.redirector.increaseSpawnLimit(count)
					self._increase_spawn_limit = 0

				# save our monitoring stats
				if count_second == 0:
					self.monitor.second()
					expired = self.reactor.client.expire()
				else:
					expired = 0

				if expired:
					self.proxy.notifyClose(None, count=expired)

				if count_minute == 0:
					self.monitor.minute()

				# report if we saw too many connections
				if count_saturation == 0:
					self.proxy.saturation()
					self.web.saturation()

				if self.configuration.daemon.poll_interfaces and count_interface == 0:
					self.interfaces()

			except KeyboardInterrupt:
				self.log.critical('^C received')
				self._shutdown = True
			except OSError,e:
				# This shoould never happen as we are limiting how many connections we accept
				if e.errno == 24:  # Too many open files
					self.log.critical('Too many opened files, shutting down')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True
				else:
					self.log.critical('unrecoverable io error')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True

			finally:
Example #4
0
			try:
				poll([f], [], [f], 0.1)
			except socket.error:
				print "CANNOT POLL (read): %s" % str(f)
				log.error('can not poll (read) : %s' % str(f))

		for f in write:
			try:
				poll([], [f], [f], 0.1)
			except socket.error:
				print "CANNOT POLL (write): %s" % str(f)
				log.error('can not poll (write) : %s' % str(f))

		raise e
	except (ValueError, AttributeError, TypeError), e:
		log.error("fatal error encountered during select - %s %s" % (type(e),str(e)))
		raise e
	except select.error, e:
		if e.args[0] in errno_block:
			return [], [], []
		log.error("fatal error encountered during select - %s %s" % (type(e),str(e)))
		raise e
	except KeyboardInterrupt,e:
		raise e
	except Exception, e:
		log.critical("fatal error encountered during select - %s %s" % (type(e),str(e)))
		raise e

	return r, w, x

Example #5
0
class ResolverManager(object):
    resolverFactory = DNSResolver

    def __init__(self, poller, configuration, max_workers):
        self.poller = poller
        self.configuration = configuration

        self.resolver_factory = self.resolverFactory(configuration)

        # The actual work is done in the worker
        self.worker = self.resolver_factory.createUDPClient()

        # All currently active clients (one UDP and many TCP)
        self.workers = {}
        self.workers[self.worker.socket] = self.worker
        self.poller.addReadSocket('read_resolver', self.worker.socket)

        # Track the clients currently expecting results
        self.clients = {}  # client_id : identifier

        # Key should be the hostname rather than the request ID?
        self.resolving = {}  # identifier, worker_id :

        # TCP workers that have not yet sent a complete request
        self.sending = {}  # sock :

        # Maximum number of entry we will cache (1024 DNS lookup per second !)
        # assuming 1k per entry, which is a lot, it mean 20Mb of memory
        # which at the default of 900 seconds of cache is 22 new host per seonds
        self.max_entries = 1024 * 20

        # track the current queries and when they were started
        self.active = []

        self.cache = {}
        self.cached = deque()

        self.max_workers = max_workers
        self.worker_count = len(self.workers)  # the UDP client

        self.waiting = []

        self.log = Logger('resolver', configuration.log.resolver)
        self.chained = {}

    def cacheDestination(self, hostname, ip):
        if hostname not in self.cache:
            expire_time = time.time() + self.configuration.dns.ttl
            expire_time = expire_time - expire_time % 5  # group the DNS record per buckets 5 seconds
            latest_time, latest_hosts = self.cached[-1] if self.cached else (
                -1, None)

            if expire_time > latest_time:
                hosts = []
                self.cached.append((expire_time, hosts))
            else:
                hosts = latest_hosts

            self.cache[hostname] = ip
            hosts.append(hostname)

    def expireCache(self):
        # expire only one set of cache entries at a time
        if self.cached:
            current_time = time.time()
            expire_time, hosts = self.cached[0]

            if current_time >= expire_time or len(
                    self.cache) > self.max_entries:
                expire_time, hosts = self.cached.popleft()

                for hostname in hosts:
                    self.cache.pop(hostname, None)

    def cleanup(self):
        now = time.time()
        cutoff = now - self.configuration.dns.timeout
        count = 0

        for timestamp, client_id, sock in self.active:
            if timestamp > cutoff:
                break

            count += 1
            cli_data = self.clients.pop(client_id, None)
            worker = self.workers.get(sock)
            tcpudp = 'udp' if worker is self.worker else 'tcp'

            if cli_data is not None:
                w_id, identifier, active_time, resolve_count = cli_data
                data = self.resolving.pop((w_id, identifier), None)
                if not data:
                    data = self.sending.pop(sock, None)

                if data:
                    client_id, original, hostname, command, decision = data
                    self.log.error(
                        'timeout when requesting address for %s using the %s client - attempt %s'
                        % (hostname, tcpudp, resolve_count))

                    if resolve_count < self.configuration.dns.retries and worker is self.worker:
                        self.log.info(
                            'going to retransmit request for %s - attempt %s of %s'
                            % (hostname, resolve_count + 1,
                               self.configuration.dns.retries))
                        self.startResolving(client_id,
                                            command,
                                            decision,
                                            resolve_count + 1,
                                            identifier=identifier)
                        continue

                    self.log.error(
                        'given up trying to resolve %s after %s attempts' %
                        (hostname, self.configuration.dns.retries))
                    yield client_id, 'rewrite', '\0'.join(
                        ('503', 'dns.html', '', '', '', hostname, 'peer'))

            if worker is not None:
                if worker is not self.worker:
                    worker.close()
                    self.workers.pop(sock)

        if count:
            self.active = self.active[count:]

    def resolves(self, command, decision):
        if command in ('download', 'connect'):
            hostname = decision.split('\0')[0]
            if isip(hostname):
                res = False
            else:
                res = True
        else:
            res = False

        return res

    def extractHostname(self, command, decision):
        data = decision.split('\0')

        if command == 'download':
            hostname = data[0]

        elif command == 'connect':
            hostname = decision.split('\0')[0]

        else:
            hostname = None

        return hostname

    def resolveDecision(self, command, decision, ip):
        if command in ('download', 'connect'):
            hostname, args = decision.split('\0', 1)
            newdecision = '\0'.join((ip, args))
        else:
            newdecision = None

        return newdecision

    def startResolving(self,
                       client_id,
                       command,
                       decision,
                       resolve_count=1,
                       identifier=None):
        hostname = self.extractHostname(command, decision)

        if hostname:
            # Resolution is already in our cache
            if hostname in self.cache:
                identifier = None
                ip = self.cache[hostname]

                if ip is not None:
                    resolved = self.resolveDecision(command, decision, ip)
                    response = client_id, command, resolved

                else:
                    newdecision = '\0'.join(
                        ('503', 'dns.html', 'http', '', '', hostname, 'peer'))
                    response = client_id, 'rewrite', newdecision
            # do not try to resolve domains which are not FQDN
            elif self.configuration.dns.fqdn and '.' not in hostname:
                identifier = None
                newdecision = '\0'.join(
                    ('200', 'dns.html', 'http', '', '', hostname, 'peer'))
                response = client_id, 'rewrite', newdecision
            # each DNS part (between the dots) must be under 256 chars
            elif max(len(p) for p in hostname.split('.')) > 255:
                identifier = None
                self.log.info('jumbo hostname: %s' % hostname)
                newdecision = '\0'.join(
                    ('503', 'dns.html', 'http', '', '', hostname, 'peer'))
                response = client_id, 'rewrite', newdecision
            # Lookup that DNS name
            else:
                identifier, _ = self.worker.resolveHost(hostname,
                                                        identifier=identifier)
                response = None
                active_time = time.time()

                self.resolving[(
                    self.worker.w_id, identifier
                )] = client_id, hostname, hostname, command, decision
                self.clients[client_id] = (self.worker.w_id, identifier,
                                           active_time, resolve_count)
                self.active.append(
                    (active_time, client_id, self.worker.socket))
        else:
            identifier = None
            response = None

        return identifier, response

    def beginResolvingTCP(self, client_id, command, decision, resolve_count):
        if self.worker_count < self.max_workers:
            identifier = self.newTCPResolver(client_id, command, decision,
                                             resolve_count)
            self.worker_count += 1
        else:
            self.waiting.append((client_id, command, decision, resolve_count))
            identifier = None

        return identifier

    def notifyClose(self):
        paused = self.worker_count >= self.max_workers
        self.worker_count -= 1

        if paused and self.worker_count < self.max_workers:
            for _ in range(self.worker_count, self.max_workers):
                if self.waiting:
                    data, self.waiting = self.waiting[0], self.waiting[1:]
                    client_id, command, decision, resolve_count = data

                    identifier = self.newTCPResolver(client_id, command,
                                                     decision, resolve_count)
                    self.worker_count += 1

    def newTCPResolver(self, client_id, command, decision, resolve_count):
        hostname = self.extractHostname(command, decision)

        if hostname:
            worker = self.resolver_factory.createTCPClient()
            self.workers[worker.socket] = worker

            identifier, all_sent = worker.resolveHost(hostname)
            active_time = time.time()
            self.resolving[(
                worker.w_id,
                identifier)] = client_id, hostname, hostname, command, decision
            self.clients[client_id] = (worker.w_id, identifier, active_time,
                                       resolve_count)
            self.active.append((active_time, client_id, self.worker.socket))

            if all_sent:
                self.poller.addReadSocket('read_resolver', worker.socket)
                self.resolving[(
                    worker.w_id, identifier
                )] = client_id, hostname, hostname, command, decision
            else:
                self.poller.addWriteSocket('write_resolver', worker.socket)
                self.sending[
                    worker.
                    socket] = client_id, hostname, hostname, command, decision

        else:
            identifier = None

        return identifier

    def getResponse(self, sock):
        worker = self.workers.get(sock)

        if worker:
            result = worker.getResponse(self.chained)

            if result:
                identifier, forhost, ip, completed, newidentifier, newhost, newcomplete = result
                data = self.resolving.pop((worker.w_id, identifier), None)

                chain_count = self.chained.pop(identifier, 0)
                if newidentifier:
                    self.chained[newidentifier] = chain_count + 1

                if not data:
                    self.log.info(
                        'ignoring response for %s (%s) with identifier %s' %
                        (forhost, ip, identifier))

            else:
                # unable to parse response
                self.log.error('unable to parse response')
                data = None

            if data:
                client_id, original, hostname, command, decision = data
                clidata = self.clients.pop(client_id, None)

                if completed:
                    if clidata is not None:
                        key = clidata[2], client_id, worker.socket
                        if key in self.active:
                            self.active.remove(key)

                # check to see if we received an incomplete response
                if not completed:
                    newidentifier = self.beginResolvingTCP(
                        client_id, command, decision, 1)
                    newhost = hostname
                    response = None

                # check to see if the worker started a new request
                if newidentifier:
                    if completed:
                        active_time = time.time()
                        self.resolving[(
                            worker.w_id, newidentifier
                        )] = client_id, original, newhost, command, decision
                        self.clients[client_id] = (worker.w_id, newidentifier,
                                                   active_time, 1)
                        self.active.append(
                            (active_time, client_id, worker.socket))

                    response = None

                    if completed and newcomplete:
                        self.poller.addReadSocket('read_resolver',
                                                  worker.socket)

                    elif completed and not newcomplete:
                        self.poller.addWriteSocket('write_resolver',
                                                   worker.socket)
                        self.sending[
                            worker.
                            socket] = client_id, original, hostname, command, decision

                # we just started a new (TCP) request and have not yet completely sent it
                # make sure we still know who the request is for
                elif not completed:
                    response = None

                # maybe we read the wrong response?
                elif forhost != hostname:
                    _, _, _, resolve_count = clidata
                    active_time = time.time()
                    self.resolving[(
                        worker.w_id, identifier
                    )] = client_id, original, hostname, command, decision
                    self.clients[client_id] = (worker.w_id, identifier,
                                               active_time, resolve_count)
                    self.active.append((active_time, client_id, worker.socket))
                    response = None

                # success
                elif ip is not None:
                    resolved = self.resolveDecision(command, decision, ip)
                    response = client_id, command, resolved
                    self.cacheDestination(original, ip)

                # not found
                else:
                    newdecision = '\0'.join(
                        ('503', 'dns.html', 'http', '', '', hostname, 'peer'))
                    response = client_id, 'rewrite', newdecision
                    #self.cacheDestination(original, ip)
            else:
                response = None

            if response or result is None:
                if worker is not self.worker:
                    self.poller.removeReadSocket('read_resolver', sock)
                    self.poller.removeWriteSocket('write_resolver', sock)
                    worker.close()
                    self.workers.pop(sock)
                    self.notifyClose()

        else:
            response = None

        return response

    def continueSending(self, sock):
        """Continue sending data over the connected TCP socket"""
        data = self.sending.get(sock)
        if data:
            client_id, original, hostname, command, decision = data
        else:
            client_id, original, hostname, command, decision = None, None, None, None, None, None

        worker = self.workers[sock]
        res = worker.continueSending()

        if res is False:  # we've sent all we need to send
            self.poller.removeWriteSocket('write_resolver', sock)

            if client_id in self.clients:
                w_id, identifier, active_time, resolve_count = self.clients[
                    client_id]
                tmp = self.sending.pop(sock)
                self.resolving[(w_id, identifier)] = tmp
                self.poller.addReadSocket('read_resolver', sock)

            else:
                self.log.error(
                    'could not find client for dns request for %s. request is being left to timeout.'
                    % str(hostname))
Example #6
0
class ResolverManager (object):
	resolverFactory = DNSResolver

	def __init__ (self, poller, configuration, max_workers):
		self.poller = poller
		self.configuration = configuration

		self.resolver_factory = self.resolverFactory(configuration)

		# The actual work is done in the worker
		self.worker = self.resolver_factory.createUDPClient()

		# All currently active clients (one UDP and many TCP)
		self.workers = {}
		self.workers[self.worker.socket] = self.worker
		self.poller.addReadSocket('read_resolver', self.worker.socket)

		# Track the clients currently expecting results
		self.clients = {}  # client_id : identifier

		# Key should be the hostname rather than the request ID?
		self.resolving = {}  # identifier, worker_id :

		# TCP workers that have not yet sent a complete request
		self.sending = {}  # sock :

		# Maximum number of entry we will cache (1024 DNS lookup per second !)
		# assuming 1k per entry, which is a lot, it mean 20Mb of memory
		# which at the default of 900 seconds of cache is 22 new host per seonds
		self.max_entries  = 1024*20

		# track the current queries and when they were started
		self.active = []

		self.cache = {}
		self.cached = deque()

		self.max_workers = max_workers
		self.worker_count = len(self.workers)  # the UDP client

		self.waiting = []

		self.log = Logger('resolver', configuration.log.resolver)
		self.chained = {}

	def cacheDestination (self, hostname, ip):
		if hostname not in self.cache:
			expire_time = time.time() + self.configuration.dns.ttl
			expire_time = expire_time - expire_time % 5  # group the DNS record per buckets 5 seconds
			latest_time, latest_hosts = self.cached[-1] if self.cached else (-1, None)

			if expire_time > latest_time:
				hosts = []
				self.cached.append((expire_time, hosts))
			else:
				hosts = latest_hosts

			self.cache[hostname] = ip
			hosts.append(hostname)

	def expireCache (self):
		# expire only one set of cache entries at a time
		if self.cached:
			current_time = time.time()
			expire_time, hosts = self.cached[0]

			if current_time >= expire_time or len(self.cache) > self.max_entries:
				expire_time, hosts = self.cached.popleft()

				for hostname in hosts:
					self.cache.pop(hostname, None)


	def cleanup(self):
		now = time.time()
		cutoff = now - self.configuration.dns.timeout
		count = 0

		for timestamp, client_id, sock in self.active:
			if timestamp > cutoff:
				break

			count += 1
			cli_data = self.clients.pop(client_id, None)
			worker = self.workers.get(sock)
			tcpudp = 'udp' if worker is self.worker else 'tcp'

			if cli_data is not None:
				w_id, identifier, active_time, resolve_count = cli_data
				data = self.resolving.pop((w_id, identifier), None)
				if not data:
					data = self.sending.pop(sock, None)

				if data:
					client_id, original, hostname, command, decision = data
					self.log.error('timeout when requesting address for %s using the %s client - attempt %s' % (hostname, tcpudp, resolve_count))

					if resolve_count < self.configuration.dns.retries and worker is self.worker:
						self.log.info('going to retransmit request for %s - attempt %s of %s' % (hostname, resolve_count+1, self.configuration.dns.retries))
						self.startResolving(client_id, command, decision, resolve_count+1, identifier=identifier)
						continue

					self.log.error('given up trying to resolve %s after %s attempts' % (hostname, self.configuration.dns.retries))
					yield client_id, 'rewrite', ('503', 'dns.html', '', '', '', hostname, 'peer')

			if worker is not None:
				if worker is not self.worker:
					worker.close()
					self.workers.pop(sock)

		if count:
			self.active = self.active[count:]

	def resolves(self, command, decision):
		if command in ('download', 'connect'):
			hostname = decision[0]
			if isip(hostname):
				res = False
			else:
				res = True
		else:
			res = False

		return res

	def extractHostname(self, command, decision):
		if command in ('download', 'connect'):
			hostname = decision[0]

		else:
			hostname = None

		return hostname

	def resolveDecision(self, command, decision, ip):
		if command in ('download', 'connect'):
			hostname, args = decision[0], decision[1:]
			newdecision = (ip,) + args
		else:
			newdecision = None

		return newdecision

	def startResolving(self, client_id, command, decision, resolve_count=1, identifier=None):
		hostname = self.extractHostname(command, decision)

		if hostname:
			# Resolution is already in our cache
			if hostname in self.cache and identifier is None:
				ip = self.cache[hostname]

				if ip is not None:
					resolved = self.resolveDecision(command, decision, ip)
					response = (client_id, command) + resolved

				else:
					response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer'

			# do not try to resolve domains which are not FQDN
			elif self.configuration.dns.fqdn and '.' not in hostname:
				identifier = None
				response = client_id, 'rewrite', '200', 'dns.html', 'http', '', '', hostname, 'peer'

			# each DNS part (between the dots) must be under 256 chars
			elif max(len(p) for p in hostname.split('.')) > 255:
				identifier = None
				self.log.info('jumbo hostname: %s' % hostname)
				response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer'
			# Lookup that DNS name
			else:
				identifier, _ = self.worker.resolveHost(hostname, identifier=identifier)
				response = None
				active_time = time.time()

				self.resolving[(self.worker.w_id, identifier)] = client_id, hostname, hostname, command, decision
				self.clients[client_id] = (self.worker.w_id, identifier, active_time, resolve_count)
				self.active.append((active_time, client_id, self.worker.socket))
		else:
			identifier = None
			response = None

		return identifier, response

	def beginResolvingTCP (self, client_id, command, decision, resolve_count):
		if self.worker_count < self.max_workers:
			identifier = self.newTCPResolver(client_id, command, decision, resolve_count)
			self.worker_count += 1
		else:
			self.waiting.append((client_id, command, decision, resolve_count))
			identifier = None

		return identifier

	def notifyClose (self):
		paused = self.worker_count >= self.max_workers
		self.worker_count -= 1

		if paused and self.worker_count < self.max_workers:
			for _ in range(self.worker_count, self.max_workers):
				if self.waiting:
					data, self.waiting = self.waiting[0], self.waiting[1:]
					client_id, command, decision, resolve_count = data

					identifier = self.newTCPResolver(client_id, command, decision, resolve_count)
					self.worker_count += 1

	def newTCPResolver (self, client_id, command, decision, resolve_count):
		hostname = self.extractHostname(command, decision)

		if hostname:
			worker = self.resolver_factory.createTCPClient()
			self.workers[worker.socket] = worker

			identifier, all_sent = worker.resolveHost(hostname)
			active_time = time.time()
			self.resolving[(worker.w_id, identifier)] = client_id, hostname, hostname, command, decision
			self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count)
			self.active.append((active_time, client_id, self.worker.socket))

			if all_sent:
				self.poller.addReadSocket('read_resolver', worker.socket)
				self.resolving[(worker.w_id, identifier)] = client_id, hostname, hostname, command, decision
			else:
				self.poller.addWriteSocket('write_resolver', worker.socket)
				self.sending[worker.socket] = client_id, hostname, hostname, command, decision

		else:
			identifier = None

		return identifier

	def getResponse(self, sock):
		worker = self.workers.get(sock)

		if worker:
			result = worker.getResponse(self.chained)

			if result:
				identifier, forhost, ip, completed, newidentifier, newhost, newcomplete = result
				data = self.resolving.pop((worker.w_id, identifier), None)

				chain_count = self.chained.pop(identifier, 0)
				if newidentifier:
					self.chained[newidentifier] = chain_count + 1

				if not data:
					self.log.info('ignoring response for %s (%s) with identifier %s' % (forhost, ip, identifier))

			else:
				# unable to parse response
				self.log.error('unable to parse response')
				data = None

			if data:
				client_id, original, hostname, command, decision = data
				clidata = self.clients.pop(client_id, None)

				if completed:
					if clidata is not None:
						key = clidata[2], client_id, worker.socket
						if key in self.active:
							self.active.remove(key)

				# check to see if we received an incomplete response
				if not completed:
					newidentifier = self.beginResolvingTCP(client_id, command, decision, 1)
					newhost = hostname
					response = None

				# check to see if the worker started a new request
				if newidentifier:
					if completed:
						active_time = time.time()
						self.resolving[(worker.w_id, newidentifier)] = client_id, original, newhost, command, decision
						self.clients[client_id] = (worker.w_id, newidentifier, active_time, 1)
						self.active.append((active_time, client_id, worker.socket))

					response = None

					if completed and newcomplete:
						self.poller.addReadSocket('read_resolver', worker.socket)

					elif completed and not newcomplete:
						self.poller.addWriteSocket('write_resolver', worker.socket)
						self.sending[worker.socket] = client_id, original, hostname, command, decision

				# we just started a new (TCP) request and have not yet completely sent it
				# make sure we still know who the request is for
				elif not completed:
					response = None

				# maybe we read the wrong response?
				elif forhost != hostname:
					_, _, _, resolve_count = clidata
					active_time = time.time()
					self.resolving[(worker.w_id, identifier)] = client_id, original, hostname, command, decision
					self.clients[client_id] = (worker.w_id, identifier, active_time, resolve_count)
					self.active.append((active_time, client_id, worker.socket))
					response = None

				# success
				elif ip is not None:
					resolved = self.resolveDecision(command, decision, ip)
					response = (client_id, command) + resolved
					self.cacheDestination(original, ip)

				# not found
				else:
					response = client_id, 'rewrite', '503', 'dns.html', 'http', '', '', hostname, 'peer'
					#self.cacheDestination(original, ip)
			else:
				response = None

			if response or result is None:
				if worker is not self.worker:
					self.poller.removeReadSocket('read_resolver', sock)
					self.poller.removeWriteSocket('write_resolver', sock)
					worker.close()
					self.workers.pop(sock)
					self.notifyClose()

		else:
			response = None

		return response


	def continueSending(self, sock):
		"""Continue sending data over the connected TCP socket"""
		data = self.sending.get(sock)
		if data:
			client_id, original, hostname, command, decision = data
		else:
			client_id, original, hostname, command, decision = None, None, None, None, None, None

		worker = self.workers[sock]
		res = worker.continueSending()

		if res is False: # we've sent all we need to send
			self.poller.removeWriteSocket('write_resolver', sock)

			if client_id in self.clients:
				w_id, identifier, active_time, resolve_count = self.clients[client_id]
				tmp = self.sending.pop(sock)
				self.resolving[(w_id, identifier)] = tmp
				self.poller.addReadSocket('read_resolver', sock)

			else:
				self.log.error('could not find client for dns request for %s. request is being left to timeout.' % str(hostname))
Example #7
0
            try:
                poll([f], [], [f], 0.1)
            except socket.error:
                print "CANNOT POLL (read): %s" % str(f)
                log.error('can not poll (read) : %s' % str(f))

        for f in write:
            try:
                poll([], [f], [f], 0.1)
            except socket.error:
                print "CANNOT POLL (write): %s" % str(f)
                log.error('can not poll (write) : %s' % str(f))

        raise e
    except (ValueError, AttributeError, TypeError), e:
        log.error("fatal error encountered during select - %s %s" %
                  (type(e), str(e)))
        raise e
    except select.error, e:
        if e.args[0] in errno_block:
            return [], [], []
        log.error("fatal error encountered during select - %s %s" %
                  (type(e), str(e)))
        raise e
    except KeyboardInterrupt, e:
        raise e
    except Exception, e:
        log.critical("fatal error encountered during select - %s %s" %
                     (type(e), str(e)))
        raise e

    return r, w, x
Example #8
0
class Supervisor(object):
    alarm_time = 0.1  # regular backend work
    second_frequency = int(1 / alarm_time)  # when we record history
    minute_frequency = int(60 / alarm_time)  # when we want to average history
    increase_frequency = int(5 / alarm_time)  # when we add workers
    decrease_frequency = int(60 / alarm_time)  # when we remove workers
    saturation_frequency = int(
        20 / alarm_time)  # when we report connection saturation
    interface_frequency = int(300 /
                              alarm_time)  # when we check for new interfaces

    # import os
    # clear = [hex(ord(c)) for c in os.popen('clear').read()]
    # clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

    def __init__(self, configuration):
        self.configuration = configuration

        # Only here so the introspection code can find them
        self.log = Logger('supervisor', configuration.log.supervisor)
        self.log.error('Starting exaproxy version %s' %
                       configuration.proxy.version)

        self.signal_log = Logger('signal', configuration.log.signal)
        self.log_writer = SysLogWriter('log',
                                       configuration.log.destination,
                                       configuration.log.enable,
                                       level=configuration.log.level)
        self.usage_writer = UsageWriter('usage',
                                        configuration.usage.destination,
                                        configuration.usage.enable)

        sys.exitfunc = self.log_writer.writeMessages

        self.log_writer.setIdentifier(configuration.daemon.identifier)
        #self.usage_writer.setIdentifier(configuration.daemon.identifier)

        if configuration.debug.log:
            self.log_writer.toggleDebug()
            self.usage_writer.toggleDebug()

        self.log.error('python version %s' %
                       sys.version.replace(os.linesep, ' '))
        self.log.debug('starting %s' % sys.argv[0])

        self.pid = PID(self.configuration)

        self.daemon = Daemon(self.configuration)
        self.poller = Poller(self.configuration.daemon)

        self.poller.setupRead('read_proxy')  # Listening proxy sockets
        self.poller.setupRead('read_web')  # Listening webserver sockets
        self.poller.setupRead('read_icap')  # Listening icap sockets
        self.poller.setupRead('read_tls')  # Listening tls sockets
        self.poller.setupRead('read_passthrough')  # Listening raw data sockets
        self.poller.setupRead(
            'read_redirector'
        )  # Pipes carrying responses from the redirector process
        self.poller.setupRead(
            'read_resolver')  # Sockets currently listening for DNS responses

        self.poller.setupRead('read_client')  # Active clients
        self.poller.setupRead(
            'opening_client')  # Clients we have not yet read a request from
        self.poller.setupWrite(
            'write_client')  # Active clients with buffered data to send
        self.poller.setupWrite(
            'write_resolver')  # Active DNS requests with buffered data to send

        self.poller.setupRead('read_download')  # Established connections
        self.poller.setupWrite(
            'write_download'
        )  # Established connections we have buffered data to send to
        self.poller.setupWrite('opening_download')  # Opening connections

        self.poller.setupRead('read_interrupt')  # Scheduled events
        self.poller.setupRead(
            'read_control'
        )  # Responses from commands sent to the redirector process

        self.monitor = Monitor(self)
        self.page = Page(self)
        self.content = ContentManager(self, configuration)
        self.client = ClientManager(self.poller, configuration)
        self.resolver = ResolverManager(self.poller, self.configuration,
                                        configuration.dns.retries * 10)
        self.proxy = Server('http proxy', self.poller, 'read_proxy',
                            configuration.http)
        self.web = Server('web server', self.poller, 'read_web',
                          configuration.web)
        self.icap = Server('icap server', self.poller, 'read_icap',
                           configuration.icap)
        self.tls = Server('tls server', self.poller, 'read_tls',
                          configuration.tls)
        self.passthrough = InterceptServer('passthrough server', self.poller,
                                           'read_passthrough',
                                           configuration.passthrough)

        self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
        self._softstop = False  # stop once all current connection have been dealt with
        self._reload = False  # unimplemented
        self._toggle_debug = False  # start logging a lot
        self._decrease_spawn_limit = 0
        self._increase_spawn_limit = 0
        self._refork = False  # unimplemented
        self._pdb = False  # turn on pdb debugging
        self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
        self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
        self.local = set()  # what addresses are on our local interfaces

        if not self.initialise():
            self._shutdown = True

        elif self.daemon.drop_privileges():
            self.log.critical(
                'Could not drop privileges to \'%s\'. Refusing to run as root'
                % self.daemon.user)
            self.log.critical(
                'Set the environment value USER to change the unprivileged user'
            )
            self._shutdown = True

        # fork the redirector process before performing any further setup
        redirector = fork_redirector(self.poller, self.configuration)

        # use simple blocking IO for communication with the redirector process
        self.redirector = redirector_message_thread(redirector)

        # NOTE: create threads _after_ all forking is done

        # regularly interrupt the reactor for maintenance
        self.interrupt_scheduler = alarm_thread(self.poller, self.alarm_time)

        self.reactor = Reactor(self.configuration, self.web, self.proxy,
                               self.passthrough, self.icap, self.tls,
                               self.redirector, self.content, self.client,
                               self.resolver, self.log_writer,
                               self.usage_writer, self.poller)

        self.interfaces()

        signal.signal(signal.SIGQUIT, self.sigquit)
        signal.signal(signal.SIGINT, self.sigterm)
        signal.signal(signal.SIGTERM, self.sigterm)
        # signal.signal(signal.SIGABRT, self.sigabrt)
        # signal.signal(signal.SIGHUP, self.sighup)

        signal.signal(signal.SIGTRAP, self.sigtrap)

        signal.signal(signal.SIGUSR1, self.sigusr1)
        signal.signal(signal.SIGUSR2, self.sigusr2)
        signal.signal(signal.SIGTTOU, self.sigttou)
        signal.signal(signal.SIGTTIN, self.sigttin)

        # make sure we always have data in history
        # (done in zero for dependencies reasons)

        if self._shutdown is False:
            self.redirector.requestStats()
            command, control_data = self.redirector.readResponse()
            stats_data = control_data if command == 'STATS' else None

            stats = self.monitor.statistics(stats_data)
            ok = self.monitor.zero(stats)

            if ok:
                self.redirector.requestStats()

            else:
                self._shutdown = True

    def exit(self):
        sys.exit()

    def sigquit(self, signum, frame):
        if self._softstop:
            self.signal_log.critical('multiple SIG INT received, shutdown')
            self._shutdown = True
        else:
            self.signal_log.critical('SIG INT received, soft-stop')
            self._softstop = True
            self._listen = False

    def sigterm(self, signum, frame):
        self.signal_log.critical('SIG TERM received, shutdown request')
        if os.environ.get('PDB', False):
            self._pdb = True
        else:
            self._shutdown = True

    # def sigabrt (self,signum, frame):
    # 	self.signal_log.info('SIG INFO received, refork request')
    # 	self._refork = True

    # def sighup (self,signum, frame):
    # 	self.signal_log.info('SIG HUP received, reload request')
    # 	self._reload = True

    def sigtrap(self, signum, frame):
        self.signal_log.critical('SIG TRAP received, toggle debug')
        self._toggle_debug = True

    def sigusr1(self, signum, frame):
        self.signal_log.critical('SIG USR1 received, decrease worker number')
        self._decrease_spawn_limit += 1

    def sigusr2(self, signum, frame):
        self.signal_log.critical('SIG USR2 received, increase worker number')
        self._increase_spawn_limit += 1

    def sigttou(self, signum, frame):
        self.signal_log.critical('SIG TTOU received, stop listening')
        self._listen = False

    def sigttin(self, signum, frame):
        self.signal_log.critical('SIG IN received, star listening')
        self._listen = True

    def interfaces(self):
        local = {'127.0.0.1', '::1'}
        for interface in getifaddrs():
            if interface.family not in (AF_INET, AF_INET6):
                continue
            if interface.address not in self.local:
                self.log.info('found new local ip %s (%s)' %
                              (interface.address, interface.name))
            local.add(interface.address)
        for ip in self.local:
            if ip not in local:
                self.log.info('removed local ip %s' % ip)
        if local == self.local:
            self.log.info('no ip change')
        else:
            self.local = local

    def run(self):
        count_second = 0
        count_minute = 0
        count_saturation = 0
        count_interface = 0

        events = {'read_interrupt'}

        while True:
            count_second = (count_second + 1) % self.second_frequency
            count_minute = (count_minute + 1) % self.minute_frequency

            count_saturation = (count_saturation +
                                1) % self.saturation_frequency
            count_interface = (count_interface + 1) % self.interface_frequency

            try:
                if self._pdb:
                    self._pdb = False
                    import pdb
                    pdb.set_trace()

                # prime the alarm
                if 'read_interrupt' in events:
                    self.interrupt_scheduler.setAlarm()

                # check for IO change with select
                status, events = self.reactor.run()

                # shut down the server if a child process disappears
                if status is False:
                    self._shutdown = True

                # respond to control responses immediately
                if 'read_control' in events:
                    command, control_data = self.redirector.readResponse()

                    if command == 'STATS':
                        ok = self.doStats(count_second, count_minute,
                                          control_data)

                    if ok is False:
                        self._shutdown = True

                    # jump straight back into the reactor if we haven't yet received an
                    # interrupt event
                    if 'read_interrupt' not in events:
                        continue

                # clear the alarm condition
                self.interrupt_scheduler.acknowledgeAlarm()

                # must follow the reactor so we are sure to go through the reactor at least once
                # and flush any logs
                if self._shutdown:
                    self._shutdown = False
                    self.shutdown()
                    break
                elif self._reload:
                    self._reload = False
                    self.reload()
                elif self._refork:
                    self._refork = False
                    self.signal_log.warning('refork not implemented')
                    # stop listening to new connections
                    # refork the program (as we have been updated)
                    # just handle current open connection

                # ask the redirector process for stats
                self.redirector.requestStats()

                if self._softstop:
                    if self._listen == False:
                        self.proxy.rejecting()
                        self._listen = None
                    if self.client.softstop():
                        self._shutdown = True
                # only change listening if we are not shutting down
                elif self._listen is not None:
                    if self._listen:
                        self._shutdown = not self.proxy.accepting()
                        self._listen = None
                    else:
                        self.proxy.rejecting()
                        self._listen = None

                if self._toggle_debug:
                    self._toggle_debug = False
                    self.log_writer.toggleDebug()

                if self._decrease_spawn_limit:
                    count = self._decrease_spawn_limit
                    self.redirector.decreaseSpawnLimit(count)
                    self._decrease_spawn_limit = 0

                if self._increase_spawn_limit:
                    count = self._increase_spawn_limit
                    self.redirector.increaseSpawnLimit(count)
                    self._increase_spawn_limit = 0

                # cleanup idle connections
                # TODO: track all idle connections, not just the ones that have never sent data
                expired = self.reactor.client.expire()

                for expire_source, expire_count in expired.items():
                    if expire_source == 'proxy':
                        self.proxy.notifyClose(None, count=expire_count)

                    elif expire_source == 'icap':
                        self.icap.notifyClose(None, count=expire_count)

                    elif expire_source == 'passthrough':
                        self.passthrough.notifyClose(None, count=expire_count)

                    elif expire_source == 'tls':
                        self.tls.notifyClose(None, count=expire_count)

                    elif expire_source == 'web':
                        self.web.notifyClose(None, count=expire_count)

                # report if we saw too many connections
                if count_saturation == 0:
                    self.proxy.saturation()
                    self.web.saturation()

                if self.configuration.daemon.poll_interfaces and count_interface == 0:
                    self.interfaces()

            except KeyboardInterrupt:
                self.log.critical('^C received')
                self._shutdown = True

            except OSError, e:
                # This shoould never happen as we are limiting how many connections we accept
                if e.errno == 24:  # Too many open files
                    self.log.critical('Too many opened files, shutting down')
                    for line in traceback.format_exc().split('\n'):
                        self.log.critical(line)
                    self._shutdown = True
                else:
                    self.log.critical('unrecoverable io error')
                    for line in traceback.format_exc().split('\n'):
                        self.log.critical(line)
                    self._shutdown = True

            finally:
Example #9
0
class ClientManager(object):
    unproxy = ProxyProtocol().parseRequest

    def __init__(self, poller, configuration):
        self.total_sent4 = 0L
        self.total_sent6 = 0L
        self.total_requested = 0L
        self.norequest = TimeCache(configuration.http.idle_connect)
        self.bysock = {}
        self.byname = {}
        self.buffered = []
        self._nextid = 0
        self.poller = poller
        self.log = Logger('client', configuration.log.client)
        self.proxied = configuration.http.proxied
        self.max_buffer = configuration.http.header_size

    def __contains__(self, item):
        return item in self.byname

    def getnextid(self):
        self._nextid += 1
        return str(self._nextid)

    def expire(self, number=100):
        count = 0
        for sock in self.norequest.expired(number):
            client = self.norequest.get(sock, [
                None,
            ])[0]
            if client:
                self.cleanup(sock, client.name)
                count += 1

        return count

    def newConnection(self, sock, peer, source):
        name = self.getnextid()
        client = Client(name, sock, peer, self.log, self.max_buffer)

        self.norequest[sock] = client, source
        self.byname[name] = client, source

        # watch for the opening request
        self.poller.addReadSocket('opening_client', client.sock)

        #self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
        return peer

    def readRequest(self, sock):
        """Read only the initial HTTP headers sent by the client"""

        client, source = self.norequest.get(sock, (None, None))
        if client:
            name, peer, request, content = client.readData()
            if request:
                self.total_requested += 1
                # headers can be read only once
                self.norequest.pop(sock, (None, None))

                # we have now read the client's opening request
                self.poller.removeReadSocket('opening_client', client.sock)

            elif request is None:
                self.cleanup(sock, client.name)
        else:
            self.log.error(
                'trying to read headers from a client that does not exist %s' %
                sock)
            name, peer, request, content, source = None, None, None, None, None

        if request and self.proxied is True and source == 'proxy':
            client_ip, client_request = self.unproxy(request)

            if client_ip and client_request:
                peer = client_ip
                request = client_request
                client.setPeer(client_ip)

        return name, peer, request, content, source

    def readDataBySocket(self, sock):
        client, source = self.bysock.get(sock, (None, None))
        if client:
            name, peer, request, content = client.readData()
            if request:
                self.total_requested += 1
                # Parsing of the new request will be handled asynchronously. Ensure that
                # we do not read anything from the client until a request has been sent
                # to the remote webserver.
                # Since we just read a request, we know that the cork is not currently
                # set and so there's no risk of it being erroneously removed.
                self.poller.corkReadSocket('read_client', sock)

            elif request is None:
                self.cleanup(sock, client.name)
        else:
            self.log.error(
                'trying to read from a client that does not exist %s' % sock)
            name, peer, request, content = None, None, None, None

        return name, peer, request, content, source

    def readDataByName(self, name):
        client, source = self.byname.get(name, (None, None))
        if client:
            name, peer, request, content = client.readData()
            if request:
                self.total_requested += 1
                # Parsing of the new request will be handled asynchronously. Ensure that
                # we do not read anything from the client until a request has been sent
                # to the remote webserver.
                # Since we just read a request, we know that the cork is not currently
                # set and so there's no risk of it being erroneously removed.
                self.poller.corkReadSocket('read_client', client.sock)

            elif request is None:
                self.cleanup(client.sock, name)
        else:
            self.log.error(
                'trying to read from a client that does not exist %s' % name)
            name, peer, request, content = None, None, None, None

        return name, peer, request, content

    def sendDataBySocket(self, sock, data):
        client, source = self.bysock.get(sock, (None, None))
        if client:
            name = client.name
            res = client.writeData(data)

            if res is None:
                # close the client connection
                self.cleanup(sock, client.name)

                buffered, had_buffer, sent4, sent6 = None, None, 0, 0
                result = None
                buffer_change = None
            else:
                buffered, had_buffer, sent4, sent6 = res
                self.total_sent4 += sent4
                self.total_sent6 += sent6
                result = buffered

            if buffered:
                if sock not in self.buffered:
                    self.buffered.append(sock)
                    buffer_change = True

                    # watch for the socket's send buffer becoming less than full
                    self.poller.addWriteSocket('write_client', client.sock)
                else:
                    buffer_change = False

            elif had_buffer and sock in self.buffered:
                self.buffered.remove(sock)
                buffer_change = True

                # we no longer care about writing to the client
                self.poller.removeWriteSocket('write_client', client.sock)

            else:
                buffer_change = False
        else:
            result = None
            buffer_change = None
            name = None

        return result, buffer_change, name, source

    def sendDataByName(self, name, data):
        client, source = self.byname.get(name, (None, None))
        if client:
            res = client.writeData(data)

            if res is None:
                # we cannot write to the client so clean it up
                self.cleanup(client.sock, name)

                buffered, had_buffer, sent4, sent6 = None, None, 0, 0
                result = None
                buffer_change = None
            else:
                buffered, had_buffer, sent4, sent6 = res
                self.total_sent4 += sent4
                self.total_sent6 += sent6
                result = buffered

            if buffered:
                if client.sock not in self.buffered:
                    self.buffered.append(client.sock)
                    buffer_change = True

                    # watch for the socket's send buffer becoming less than full
                    self.poller.addWriteSocket('write_client', client.sock)
                else:
                    buffer_change = False

            elif had_buffer and client.sock in self.buffered:
                self.buffered.remove(client.sock)
                buffer_change = True

                # we no longer care about writing to the client
                self.poller.removeWriteSocket('write_client', client.sock)

            else:
                buffer_change = False
        else:
            result = None
            buffer_change = None

        return result, buffer_change, client

    def startData(self, name, data, remaining):
        # NOTE: soo ugly but fast to code
        nb_to_read = 0
        if type(remaining) == type(''):
            if 'chunked' in remaining:
                mode = 'chunked'
            else:
                mode = 'passthrough'
        elif remaining > 0:
            mode = 'transfer'
            nb_to_read = remaining
        elif remaining == 0:
            mode = 'request'
        else:
            mode = 'passthrough'

        client, source = self.byname.get(name, (None, None))
        if client:
            try:
                command, d = data
            except (ValueError, TypeError):
                self.log.error('invalid command sent to client %s' % name)
                self.cleanup(client.sock, name)
                res = None
            else:
                if client.sock not in self.bysock:
                    # Start checking for content sent by the client
                    self.bysock[client.sock] = client, source

                    # watch for the client sending new data
                    self.poller.addReadSocket('read_client', client.sock)

                    # make sure we don't somehow end up with this still here
                    self.norequest.pop(client.sock, (None, None))

                    # NOTE: always done already in readRequest
                    self.poller.removeReadSocket('opening_client', client.sock)
                    res = client.startData(command, d)

                else:
                    res = client.restartData(command, d)

                    # If we are here then we must have prohibited reading from the client
                    # and it must otherwise have been in a readable state
                    self.poller.uncorkReadSocket('read_client', client.sock)

            if res is not None:
                buffered, had_buffer, sent4, sent6 = res

                # buffered data we read with the HTTP headers
                name, peer, request, content = client.readRelated(
                    mode, nb_to_read)
                if request:
                    self.total_requested += 1
                    self.log.info('reading multiple requests')
                    self.cleanup(client.sock, name)
                    buffered, had_buffer = None, None
                    content = None

                elif request is None:
                    self.cleanup(client.sock, name)
                    buffered, had_buffer = None, None
                    content = None

            else:
                # we cannot write to the client so clean it up
                self.cleanup(client.sock, name)

                buffered, had_buffer = None, None
                content = None

            if buffered:
                if client.sock not in self.buffered:
                    self.buffered.append(client.sock)

                    # watch for the socket's send buffer becoming less than full
                    self.poller.addWriteSocket('write_client', client.sock)

            elif had_buffer and client.sock in self.buffered:
                self.buffered.remove(client.sock)

                # we no longer care about writing to the client
                self.poller.removeWriteSocket('write_client', client.sock)
        else:
            content = None

        return client, content, source

    def corkUploadByName(self, name):
        client, source = self.byname.get(name, (None, None))
        if client:
            self.poller.corkReadSocket('read_client', client.sock)

    def uncorkUploadByName(self, name):
        client, source = self.byname.get(name, (None, None))
        if client:
            if client.sock in self.bysock:
                self.poller.uncorkReadSocket('read_client', client.sock)

    def cleanup(self, sock, name):
        self.log.debug('cleanup for socket %s' % sock)
        client, source = self.bysock.get(sock, (None, None))
        client, source = (client, None) if client else self.norequest.get(
            sock, (None, None))
        client, source = (client, None) or self.byname.get(name, (None, None))

        self.bysock.pop(sock, None)
        self.norequest.pop(sock, (None, None))
        self.byname.pop(name, None)

        if client:
            self.poller.removeWriteSocket('write_client', client.sock)
            self.poller.removeReadSocket('read_client', client.sock)
            self.poller.removeReadSocket('opening_client', client.sock)

            client.shutdown()
        else:
            self.log.error('COULD NOT CLEAN UP SOCKET %s' % sock)

        if sock in self.buffered:
            self.buffered.remove(sock)

    def softstop(self):
        if len(self.byname) > 0 or len(self.norequest) > 0:
            return False
        self.log.critical('no more client connection, exiting.')
        return True

    def stop(self):
        for client, source in self.bysock.itervalues():
            client.shutdown()

        for client, source in self.norequest.itervalues():
            client.shutdown()

        self.poller.clearRead('read_client')
        self.poller.clearRead('opening_client')
        self.poller.clearWrite('write_client')

        self.bysock = {}
        self.norequest = {}
        self.byname = {}
        self.buffered = []
Example #10
0
class Redirector(Thread):
    # TODO : if the program is a function, fork and run :)
    ICAPParser = ICAPParser

    def __init__(self, configuration, name, request_box, program):
        self.configuration = configuration
        self.icap_parser = self.ICAPParser(configuration)
        self.enabled = configuration.redirector.enable
        self.protocol = configuration.redirector.protocol
        self._transparent = configuration.http.transparent
        self.log = Logger('worker ' + str(name), configuration.log.worker)
        self.usage = UsageLogger('usage', configuration.log.worker)

        self.universal = True if self.protocol == 'url' else False
        self.icap = self.protocol[len('icap://'):].split(
            '/')[0] if self.protocol.startswith('icap://') else ''

        r, w = os.pipe()  # pipe for communication with the main thread
        self.response_box_write = os.fdopen(w, 'w',
                                            0)  # results are written here
        self.response_box_read = os.fdopen(r, 'r',
                                           0)  # read from the main thread

        self.wid = name  # a unique name
        self.creation = time.time()  # when the thread was created
        #	self.last_worked = self.creation			  # when the thread last picked a task
        self.request_box = request_box  # queue with HTTP headers to process

        self.program = program  # the squid redirector program to fork
        self.running = True  # the thread is active

        self.stats_timestamp = None  # time of the most recent outstanding request to generate stats

        self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,
                                             os.getpid())

        if self.protocol == 'url':
            self.classify = self._classify_url
        if self.protocol.startswith('icap://'):
            self.classify = self._classify_icap

        # Do not move, we need the forking AFTER the setup
        self.process = self._createProcess(
        )  # the forked program to handle classification
        Thread.__init__(self)

    def _createProcess(self):
        if not self.enabled:
            return

        def preexec():  # Don't forward signals.
            os.setpgrp()

        try:
            process = subprocess.Popen(
                [
                    self.program,
                ],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=self.universal,
                preexec_fn=preexec,
            )
            self.log.debug('spawn process %s' % self.program)
        except KeyboardInterrupt:
            process = None
        except (subprocess.CalledProcessError, OSError, ValueError):
            self.log.error('could not spawn process %s' % self.program)
            process = None

        if process:
            try:
                fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
            except IOError:
                self.destroyProcess()
                process = None

        return process

    def destroyProcess(self):
        if not self.enabled:
            return
        self.log.debug('destroying process %s' % self.program)
        if not self.process:
            return
        try:
            if self.process:
                self.process.terminate()
                self.process.wait()
                self.log.info('terminated process PID %s' % self.process.pid)
        except OSError, e:
            # No such processs
            if e[0] != errno.ESRCH:
                self.log.error('PID %s died' % self.process.pid)
Example #11
0
class ClientManager (object):
	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.http_max_buffer = configuration.http.header_size
		self.icap_max_buffer = configuration.icap.header_size
		self.proxied = {
			'proxy' : configuration.http.proxied,
			'icap'  : configuration.icap.proxied,
		}

	def __contains__(self, item):
		return item in self.bysock

	def lookupSocket (self, item):
		return self.byname.get(item, None)

	def getnextid(self):
		self._nextid += 1
		return str(self._nextid)

	def expire (self,number=100):
		count = 0
		for sock in self.norequest.expired(number):
			client = self.norequest.get(sock,[None,])[0]
			if client:
				self.cleanup(sock,client.name)
				count += 1

		return count

	def httpConnection (self, sock, peer, source):
		name = self.getnextid()
		client = HTTPClient(name, sock, peer, self.log, self.http_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def icapConnection (self, sock, peer, source):
		name = self.getnextid()
		client = ICAPClient(name, sock, peer, self.log, self.icap_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def readRequest (self, sock):
		"""Read only the initial HTTP headers sent by the client"""

		client, source = self.norequest.get(sock, (None, None))

		if client:
			name, peer, request, subrequest, content = client.readData()
			if request:
				self.total_requested += 1

				# headers can be read only once
				self.norequest.pop(sock, (None, None))
				self.bysock[sock] = client, source

				# watch for the client sending new data
				self.poller.addReadSocket('read_client', client.sock)

				# we have now read the client's opening request
				self.poller.removeReadSocket('opening_client', client.sock)

				# do not read more data until we have properly handled the request
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read headers from a client that does not exist %s' % sock)
			name, peer, request, subrequest, content, source = None, None, None, None, None, None

		return name, peer, request, subrequest, content, source


	def readData (self, sock):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name, peer, request, subrequest, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % sock)
			name, peer, request, subrequest, content = None, None, None, None, None


		return name, peer, request, subrequest, content, source

	def sendData (self, sock, data):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name = client.name
			res = client.writeData(data)

			if res is None:
				# close the client connection
				self.cleanup(sock, client.name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered


			if buffered:
				if sock not in self.buffered:
					self.buffered.append(sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and sock in self.buffered:
				self.buffered.remove(sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None
			name = None

		return result, buffer_change, name, source


	def parseRemaining (self, remaining):
		nb_to_read = 0

		if isinstance(remaining, basestring):
			mode = 'chunked' if remaining == 'chunked' else 'passthrough'

		elif remaining > 0:
			mode = 'transfer'
			nb_to_read = remaining

		elif remaining == 0:
			mode = ''

		else:
			mode = 'passthrough'

		return mode, nb_to_read

	def startData(self, sock, data, remaining):
		client, source = self.bysock.get(sock, (None, None))

		try:
			mode, nb_to_read = self.parseRemaining(remaining)
			command, d = data if client is not None else (None, None)

		except (ValueError, TypeError), e:
			self.log.error('invalid command sent to client %s' % client.name)
			command, d = None, None

		if not client or command is None:
			return None, source

		name, peer, res = client.startData(command, d)

		if res is not None:
			name, peer, request, subrequest, content = client.readRelated(mode, nb_to_read)

			buffered, had_buffer, sent4, sent6 = res

			self.poller.uncorkReadSocket('read_client', client.sock)

			self.total_sent4 += sent4
			self.total_sent6 += sent6

		else:
			self.cleanup(client.sock, name)
			return None, source


		if request:
			self.total_requested += 1
			self.log.info('reading multiple requests')
			self.cleanup(client.sock, name)
			buffered, had_buffer = None, None
			content = None

		elif request is None:
			self.cleanup(client.sock, name)
			buffered, had_buffer = None, None
			content = None

		if buffered is True and had_buffer is False:
			self.buffered.append(client.sock)

			self.poller.addWriteSocket('write_client', client.sock)

		elif buffered is False and had_buffer is True:
			self.buffered.remove(client.sock)

			self.poller.removeWriteSocket('write_client', client.sock)

		return content, source
Example #12
0
		for events in res:
			fd = events.ident

			name, poller, sockets, fdtosock = self.pollers[fd]
			events = poller.control(None, self.max_events, 0)

			if (len(events) == self.max_events):
				log.warning("polled max_events from queue %s" % (name))

			for sock_events in events:
				sock_fd = sock_events.ident
				try:
					response[name].append(fdtosock[sock_fd])
				except KeyError, e:
					log.error("KQueue register called before fdtosock registered! Skipping event")
					continue

				if sock_events.flags & select.KQ_EV_ERROR:
					log.warning("%s KQ_EV_ERROR: fd=%d filter=%d fflags=%d flags=%d data=%d udata=%d" % (
						str(datetime.datetime.now()),
						sock_events.ident, sock_events.filter, sock_events.flags, sock_events.fflags,
						sock_events.data, sock_events.udata))

					sock = fdtosock.pop(sock_fd, None)
					poller.control([kevent(sock, sock_events.filter, KQ_EV_DELETE)], 0)
					sockets.pop(sock)

					if sock not in self.errors:
						self.errors[sock] = name
Example #13
0
		for events in res:
			fd = events.ident

			name, poller, sockets, fdtosock = self.pollers[fd]
			events = poller.control(None, self.max_events, 0)

			if len(events) == self.max_events:
				log.warning("polled max_events from queue %s" % name)

			for sock_events in events:
				sock_fd = sock_events.ident
				try:
					response[name].append(fdtosock[sock_fd])
				except KeyError:
					log.error("KQueue register called before fdtosock registered! Skipping event")
					continue

				if sock_events.flags & select.KQ_EV_ERROR:
					log.warning("%s KQ_EV_ERROR: fd=%d filter=%d fflags=%d flags=%d data=%d udata=%d" % (
						str(datetime.datetime.now()),
						sock_events.ident, sock_events.filter, sock_events.flags, sock_events.fflags,
						sock_events.data, sock_events.udata))

					sock = fdtosock.pop(sock_fd, None)
					poller.control([kevent(sock, sock_events.filter, KQ_EV_DELETE)], 0)
					sockets.pop(sock)

					if sock not in self.errors:
						self.errors[sock] = name
Example #14
0
class Redirector (Thread):
	# TODO : if the program is a function, fork and run :)

	def __init__ (self, configuration, name, request_box, program):
		self.configuration = configuration
		self.enabled = configuration.redirector.enable
		self.protocol = configuration.redirector.protocol
		self._transparent = configuration.http.transparent
		self.log = Logger('worker ' + str(name), configuration.log.worker)
		self.usage = UsageLogger('usage', configuration.log.worker)

		self.universal = True if self.protocol == 'url' else False
		self.icap = self.protocol[len('icap://'):].split('/')[0] if self.protocol.startswith('icap://') else ''

		r, w = os.pipe()                                # pipe for communication with the main thread
		self.response_box_write = os.fdopen(w,'w',0)    # results are written here
		self.response_box_read = os.fdopen(r,'r',0)     # read from the main thread

		self.wid = name                               # a unique name
		self.creation = time.time()                   # when the thread was created
	#	self.last_worked = self.creation              # when the thread last picked a task
		self.request_box = request_box                # queue with HTTP headers to process

		self.program = program                        # the squid redirector program to fork
		self.running = True                           # the thread is active

		self.stats_timestamp = None                   # time of the most recent outstanding request to generate stats

		self._proxy = 'ExaProxy-%s-id-%d' % (configuration.proxy.version,os.getpid())

		if self.protocol == 'url':
			self.classify = self._classify_url
		if self.protocol.startswith('icap://'):
			self.classify = self._classify_icap


		# Do not move, we need the forking AFTER the setup
		self.process = self._createProcess()          # the forked program to handle classification
		Thread.__init__(self)

	def _createProcess (self):
		if not self.enabled:
			return

		def preexec():  # Don't forward signals.
			os.setpgrp()

		try:
			process = subprocess.Popen([self.program,],
				stdin=subprocess.PIPE,
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE,
				universal_newlines=self.universal,
				preexec_fn=preexec,
			)
			self.log.debug('spawn process %s' % self.program)
		except KeyboardInterrupt:
			process = None
		except (subprocess.CalledProcessError,OSError,ValueError):
			self.log.error('could not spawn process %s' % self.program)
			process = None

		if process:
			try:
				fcntl.fcntl(process.stderr, fcntl.F_SETFL, os.O_NONBLOCK)
			except IOError:
				self.destroyProcess()
				process = None

		return process

	def destroyProcess (self):
		if not self.enabled:
			return
		self.log.debug('destroying process %s' % self.program)
		if not self.process:
			return
		try:
			if self.process:
				self.process.terminate()
				self.process.wait()
				self.log.info('terminated process PID %s' % self.process.pid)
		except OSError, e:
			# No such processs
			if e[0] != errno.ESRCH:
				self.log.error('PID %s died' % self.process.pid)
Example #15
0
class ClientManager (object):
	unproxy = ProxyProtocol().parseRequest

	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.proxied = configuration.http.proxied
		self.max_buffer = configuration.http.header_size

	def __contains__(self, item):
		return item in self.byname

	def getnextid(self):
		self._nextid += 1
		return str(self._nextid)

	def expire (self,number=100):
		count = 0
		for sock in self.norequest.expired(number):
			client = self.norequest.get(sock,[None,])[0]
			if client:
				self.cleanup(sock,client.name)
				count += 1

		return count

	def newConnection(self, sock, peer, source):
		name = self.getnextid()
		client = Client(name, sock, peer, self.log, self.max_buffer)

		self.norequest[sock] = client, source
		self.byname[name] = client, source

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def readRequest(self, sock):
		"""Read only the initial HTTP headers sent by the client"""

		client, source = self.norequest.get(sock, (None, None))
		if client:
			name, peer, request, content = client.readData()
			if request:
				self.total_requested += 1
				# headers can be read only once
				self.norequest.pop(sock, (None, None))

				# we have now read the client's opening request
				self.poller.removeReadSocket('opening_client', client.sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read headers from a client that does not exist %s' % sock)
			name, peer, request, content, source = None, None, None, None, None

		if request and self.proxied is True and source == 'proxy':
			client_ip, client_request = self.unproxy(request)

			if client_ip and client_request:
				peer = client_ip
				request = client_request
				client.setPeer(client_ip)

		return name, peer, request, content, source


	def readDataBySocket(self, sock):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name, peer, request, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % sock)
			name, peer, request, content = None, None, None, None


		return name, peer, request, content, source


	def readDataByName(self, name):
		client, source = self.byname.get(name, (None, None))
		if client:
			name, peer, request, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', client.sock)

			elif request is None:
				self.cleanup(client.sock, name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % name)
			name, peer, request, content = None, None, None, None


		return name, peer, request, content

	def sendDataBySocket(self, sock, data):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name = client.name
			res = client.writeData(data)

			if res is None:
				# close the client connection
				self.cleanup(sock, client.name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered


			if buffered:
				if sock not in self.buffered:
					self.buffered.append(sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and sock in self.buffered:
				self.buffered.remove(sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None
			name = None

		return result, buffer_change, name, source

	def sendDataByName(self, name, data):
		client, source = self.byname.get(name, (None, None))
		if client:
			res = client.writeData(data)

			if res is None:
				# we cannot write to the client so clean it up
				self.cleanup(client.sock, name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered

			if buffered:
				if client.sock not in self.buffered:
					self.buffered.append(client.sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and client.sock in self.buffered:
				self.buffered.remove(client.sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None

		return result, buffer_change, client


	def startData(self, name, data, remaining):
		# NOTE: soo ugly but fast to code
		nb_to_read = 0
		if type(remaining) == type(''):
			if 'chunked' in remaining:
				mode = 'chunked'
			else:
				mode = 'passthrough'
		elif remaining > 0:
			mode = 'transfer'
			nb_to_read = remaining
		elif remaining == 0:
			mode = 'request'
		else:
			mode = 'passthrough'

		client, source = self.byname.get(name, (None, None))
		if client:
			try:
				command, d = data
			except (ValueError, TypeError):
				self.log.error('invalid command sent to client %s' % name)
				self.cleanup(client.sock, name)
				res = None
			else:
				if client.sock not in self.bysock:
					# Start checking for content sent by the client
					self.bysock[client.sock] = client, source

					# watch for the client sending new data
					self.poller.addReadSocket('read_client', client.sock)

					# make sure we don't somehow end up with this still here
					self.norequest.pop(client.sock, (None,None))

					# NOTE: always done already in readRequest
					self.poller.removeReadSocket('opening_client', client.sock)
					res = client.startData(command, d)

				else:
					res = client.restartData(command, d)

					# If we are here then we must have prohibited reading from the client
					# and it must otherwise have been in a readable state
					self.poller.uncorkReadSocket('read_client', client.sock)



			if res is not None:
				buffered, had_buffer, sent4, sent6 = res

				# buffered data we read with the HTTP headers
				name, peer, request, content = client.readRelated(mode,nb_to_read)
				if request:
					self.total_requested += 1
					self.log.info('reading multiple requests')
					self.cleanup(client.sock, name)
					buffered, had_buffer = None, None
					content = None

				elif request is None:
					self.cleanup(client.sock, name)
					buffered, had_buffer = None, None
					content = None

			else:
				# we cannot write to the client so clean it up
				self.cleanup(client.sock, name)

				buffered, had_buffer = None, None
				content = None

			if buffered:
				if client.sock not in self.buffered:
					self.buffered.append(client.sock)

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)

			elif had_buffer and client.sock in self.buffered:
				self.buffered.remove(client.sock)

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)
		else:
			content = None

		return client, content, source


	def corkUploadByName(self, name):
		client, source = self.byname.get(name, (None, None))
		if client:
			self.poller.corkReadSocket('read_client', client.sock)

	def uncorkUploadByName(self, name):
		client, source = self.byname.get(name, (None, None))
		if client:
			if client.sock in self.bysock:
				self.poller.uncorkReadSocket('read_client', client.sock)

	def cleanup(self, sock, name):
		self.log.debug('cleanup for socket %s' % sock)
		client, source = self.bysock.get(sock, (None,None))
		client, source = (client,None) if client else self.norequest.get(sock, (None,None))
		client, source = (client,None) or self.byname.get(name, (None,None))

		self.bysock.pop(sock, None)
		self.norequest.pop(sock, (None,None))
		self.byname.pop(name, None)

		if client:
			self.poller.removeWriteSocket('write_client', client.sock)
			self.poller.removeReadSocket('read_client', client.sock)
			self.poller.removeReadSocket('opening_client', client.sock)

			client.shutdown()
		else:
			self.log.error('COULD NOT CLEAN UP SOCKET %s' % sock)

		if sock in self.buffered:
			self.buffered.remove(sock)

	def softstop (self):
		if len(self.byname) > 0 or len(self.norequest) > 0:
			return False
		self.log.critical('no more client connection, exiting.')
		return True

	def stop(self):
		for client, source in self.bysock.itervalues():
			client.shutdown()

		for client, source in self.norequest.itervalues():
			client.shutdown()

		self.poller.clearRead('read_client')
		self.poller.clearRead('opening_client')
		self.poller.clearWrite('write_client')

		self.bysock = {}
		self.norequest = {}
		self.byname = {}
		self.buffered = []
Example #16
0
class RedirectorManager (object):
	def __init__ (self, configuration, poller):
		self.low = configuration.redirector.minimum 		# minimum concurrent redirector workers
		self.high = configuration.redirector.maximum		# maximum concurrent redirector workers

		self.poller = poller
		self.configuration = configuration
		self.queue = Queue()    # store requests we do not immediately have the resources to process

		self.nextid = 1			# unique id to give to the next spawned worker
		self.worker = {}		# worker tasks for each spawned child
		self.processes = {}		# worker tasks indexed by file descriptors we can poll
		self.available = set()	# workers that are currently available to handle new requests
		self.active = {}        # workers that are currently busy waiting for a response from the spawned process
		self.stopping = set()   # workers we want to stop as soon as they stop being active

		program = configuration.redirector.program
		protocol = configuration.redirector.protocol
		self.redirector_factory = RedirectorFactory(configuration, program, protocol)

		self.log = Logger('manager', configuration.log.manager)

	def _getid(self):
		wid = str(self.nextid)
		self.nextid += 1
		return wid

	def _spawn (self):
		"""add one worker to the pool"""
		wid = self._getid()

		worker = self.redirector_factory.create(wid)
		self.worker[wid] = worker
		self.available.add(wid)

		if worker.process is not None:
			identifier = worker.process.stdout
			self.processes[identifier] = worker
			self.poller.addReadSocket('read_workers', identifier)

		self.log.info("added a worker")
		self.log.info("we have %d workers. defined range is ( %d / %d )" % (len(self.worker), self.low, self.high))

	def spawn (self, number=1):
		"""create the request number of worker processes"""
		self.log.info("spawning %d more workers" % number)
		for _ in range(number):
			self._spawn()

	def respawn (self):
		"""make sure we reach the minimum number of workers"""
		number = max(min(len(self.worker), self.high), self.low)

		for wid in set(self.worker):
			self.stopWorker(wid)

		self.spawn(number)

	def stopWorker (self, wid):
		self.log.info('want worker %s to go away' % wid)

		if wid not in self.active:
			self.reap(wid)

		else:
			self.stopping.add(wid)

	def reap (self, wid):
		self.log.info('we are killing worker %s' % wid)
		worker = self.worker[wid]

		if wid in self.active:
			self.log.error('reaping worker %s even though it is still active' % wid)
			self.active.pop(wid)

		if wid in self.stopping:
			self.stopping.remove(wid)

		if wid in self.available:
			self.available.remove(wid)

		if worker.process is not None:
			self.poller.removeReadSocket('read_workers', worker.process.stdout)
			self.processes.pop(worker.process.stdout)

		worker.shutdown()
		self.worker.pop(wid)

	def _decrease (self):
		if self.low < len(self.worker):
			wid = self._oldest()
			if wid:
				self.stopWorker(wid)

	def _increase (self):
		if len(self.worker) < self.high:
			self.spawn()

	def decrease (self, count=1):
		for _ in xrange(count):
			self._decrease()

	def increase (self, count=1):
		for _ in xrange(count):
			self._increase()

	def start (self):
		"""spawn our minimum number of workers"""
		self.log.info("starting workers.")
		self.spawn(max(0,self.low-len(self.worker)))

	def stop (self):
		"""tell all our worker to stop reading the queue and stop"""

		for wid in self.worker:
			self.reap(wid)

		self.worker = {}

	def _oldest (self):
		"""find the oldest worker"""
		oldest = None
		past = time.time()
		for wid in set(self.worker):
			creation = self.worker[wid].creation
			if creation < past and wid not in self.stopping:
				past = creation
				oldest = wid

		return oldest

	def provision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		size = self.queue.qsize()
		num_workers = len(self.worker)

		# bad we are bleeding workers !
		if num_workers < self.low:
			self.log.info("we lost some workers, respawing %d new workers" % (self.low - num_workers))
			self.spawn(self.low - num_workers)

		# we need more workers
		if size >= num_workers:
			# nothing we can do we have reach our limit
			if num_workers >= self.high:
				self.log.warning("help ! we need more workers but we reached our ceiling ! %d request are queued for %d processes" % (size,num_workers))
				return
			# try to figure a good number to add ..
			# no less than one, no more than to reach self.high, lower between self.low and a quarter of the allowed growth
			nb_to_add = int(min(max(1,min(self.low,(self.high-self.low)/4)),self.high-num_workers))
			self.log.warning("we are low on workers adding a few (%d), the queue has %d unhandled url" % (nb_to_add,size))
			self.spawn(nb_to_add)

	def deprovision (self):
		"""manage our workers to make sure we have enough to consume the queue"""
		size = self.queue.qsize()
		num_workers = len(self.worker)

		# we are now overprovisioned
		if size < 2 and num_workers > self.low:
			self.log.info("we have too many workers (%d), stopping the oldest" % num_workers)
			# if we have to kill one, at least stop the one who had the most chance to memory leak :)
			wid = self._oldest()
			if wid:
				self.stopWorker(wid)



	def acquire (self):
		if self.available:
			identifier = self.available.pop()
			worker = self.worker[identifier]

		else:
			worker = None

		return worker

	def release (self, wid):
		if wid not in self.stopping:
			self.available.add(wid)

		else:
			self.reap(wid)

	def persist (self, wid, client_id, peer, data, header, subheader, source, tainted):
		self.active[wid] = client_id, peer, data, header, subheader, source, tainted

	def progress (self, wid):
		return self.active.pop(wid)

	def doqueue (self):
		if self.available and not self.queue.isempty():
			client_id, peer, header, subheader, source, tainted = self.queue.get()
			_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=tainted)

		else:
			client_id, command, decision = None, None, None

		return client_id, command, decision


	def request (self, client_id, peer, header, subheader, source, tainted=False):
		worker = self.acquire()

		if worker is not None:
			try:
				_, command, decision = worker.decide(client_id, peer, header, subheader, source)

			except:
				command, decision = None, None

			if command is None:
				self.reap(worker.wid)

				if tainted is False:
					_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True)

				else:
					_, command, decision = Respond.close(client_id)

		else:
			command, decision = None, None
			self.queue.put((client_id, peer, header, subheader, source, tainted))

		if command == 'defer':
			self.persist(worker.wid, client_id, peer, decision, header, subheader, source, tainted)
			command, decision = None, None

		elif worker is not None:
			self.release(worker.wid)

		return client_id, command, decision


	def getDecision (self, pipe_in):
		worker = self.processes.get(pipe_in, None)

		if worker is not None and worker.wid in self.active:
			client_id, peer, request, header, subheader, source, tainted = self.progress(worker.wid)
			try:
				_, command, decision = worker.progress(client_id, peer, request, header, subheader, source)

			except Exception, e:
				command, decision = None, None

			self.release(worker.wid)

			if command is None:
				self.reap(worker.wid)

				if tainted is False:
					_, command, decision = self.request(client_id, peer, header, subheader, source, tainted=True)

				else:
					_, command, decision = Respond.close(client_id)

		else:
Example #17
0
class Supervisor (object):
	alarm_time = 0.1                           # regular backend work
	second_frequency = int(1/alarm_time)       # when we record history
	minute_frequency = int(60/alarm_time)      # when we want to average history
	increase_frequency = int(5/alarm_time)     # when we add workers
	decrease_frequency = int(60/alarm_time)    # when we remove workers
	saturation_frequency = int(20/alarm_time)  # when we report connection saturation
	interface_frequency = int(300/alarm_time)  # when we check for new interfaces

	# import os
	# clear = [hex(ord(c)) for c in os.popen('clear').read()]
	# clear = ''.join([chr(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']])

	def __init__ (self,configuration):
		configuration = load()
		self.configuration = configuration

		# Only here so the introspection code can find them
		self.log = Logger('supervisor', configuration.log.supervisor)
		self.log.error('Starting exaproxy version %s' % configuration.proxy.version)

		self.signal_log = Logger('signal', configuration.log.signal)
		self.log_writer = SysLogWriter('log', configuration.log.destination, configuration.log.enable, level=configuration.log.level)
		self.usage_writer = UsageWriter('usage', configuration.usage.destination, configuration.usage.enable)

		self.log_writer.setIdentifier(configuration.daemon.identifier)
		#self.usage_writer.setIdentifier(configuration.daemon.identifier)

		if configuration.debug.log:
			self.log_writer.toggleDebug()
			self.usage_writer.toggleDebug()

		self.log.error('python version %s' % sys.version.replace(os.linesep,' '))
		self.log.debug('starting %s' % sys.argv[0])

		self.pid = PID(self.configuration)

		self.daemon = Daemon(self.configuration)
		self.poller = Poller(self.configuration.daemon)

		self.poller.setupRead('read_proxy')           # Listening proxy sockets
		self.poller.setupRead('read_web')             # Listening webserver sockets
		self.poller.setupRead('read_icap')             # Listening icap sockets
		self.poller.setupRead('read_workers')         # Pipes carrying responses from the child processes
		self.poller.setupRead('read_resolver')        # Sockets currently listening for DNS responses

		self.poller.setupRead('read_client')          # Active clients
		self.poller.setupRead('opening_client')       # Clients we have not yet read a request from
		self.poller.setupWrite('write_client')        # Active clients with buffered data to send
		self.poller.setupWrite('write_resolver')      # Active DNS requests with buffered data to send

		self.poller.setupRead('read_download')        # Established connections
		self.poller.setupWrite('write_download')      # Established connections we have buffered data to send to
		self.poller.setupWrite('opening_download')    # Opening connections

		self.monitor = Monitor(self)
		self.page = Page(self)
		self.manager = RedirectorManager(
			self.configuration,
			self.poller,
		)
		self.content = ContentManager(self,configuration)
		self.client = ClientManager(self.poller, configuration)
		self.resolver = ResolverManager(self.poller, self.configuration, configuration.dns.retries*10)
		self.proxy = Server('http proxy',self.poller,'read_proxy', configuration.http.connections)
		self.web = Server('web server',self.poller,'read_web', configuration.web.connections)
		self.icap = Server('icap server',self.poller,'read_icap', configuration.icap.connections)

		self.reactor = Reactor(self.configuration, self.web, self.proxy, self.icap, self.manager, self.content, self.client, self.resolver, self.log_writer, self.usage_writer, self.poller)

		self._shutdown = True if self.daemon.filemax == 0 else False  # stop the program
		self._softstop = False  # stop once all current connection have been dealt with
		self._reload = False  # unimplemented
		self._toggle_debug = False  # start logging a lot
		self._decrease_spawn_limit = 0
		self._increase_spawn_limit = 0
		self._refork = False  # unimplemented
		self._pdb = False  # turn on pdb debugging
		self._listen = None  # listening change ? None: no, True: listen, False: stop listeing
		self.wait_time = 5.0  # how long do we wait at maximum once we have been soft-killed
		self.local = set()  # what addresses are on our local interfaces

		self.interfaces()

		signal.signal(signal.SIGQUIT, self.sigquit)
		signal.signal(signal.SIGINT, self.sigterm)
		signal.signal(signal.SIGTERM, self.sigterm)
		# signal.signal(signal.SIGABRT, self.sigabrt)
		# signal.signal(signal.SIGHUP, self.sighup)

		signal.signal(signal.SIGTRAP, self.sigtrap)

		signal.signal(signal.SIGUSR1, self.sigusr1)
		signal.signal(signal.SIGUSR2, self.sigusr2)
		signal.signal(signal.SIGTTOU, self.sigttou)
		signal.signal(signal.SIGTTIN, self.sigttin)

		signal.signal(signal.SIGALRM, self.sigalrm)

		# make sure we always have data in history
		# (done in zero for dependencies reasons)
		self.monitor.zero()


	def sigquit (self,signum, frame):
		if self._softstop:
			self.signal_log.critical('multiple SIG INT received, shutdown')
			self._shutdown = True
		else:
			self.signal_log.critical('SIG INT received, soft-stop')
			self._softstop = True
			self._listen = False

	def sigterm (self,signum, frame):
		self.signal_log.critical('SIG TERM received, shutdown request')
		if os.environ.get('PDB',False):
			self._pdb = True
		else:
			self._shutdown = True

	# def sigabrt (self,signum, frame):
	# 	self.signal_log.info('SIG INFO received, refork request')
	# 	self._refork = True

	# def sighup (self,signum, frame):
	# 	self.signal_log.info('SIG HUP received, reload request')
	# 	self._reload = True

	def sigtrap (self,signum, frame):
		self.signal_log.critical('SIG TRAP received, toggle debug')
		self._toggle_debug = True


	def sigusr1 (self,signum, frame):
		self.signal_log.critical('SIG USR1 received, decrease worker number')
		self._decrease_spawn_limit += 1

	def sigusr2 (self,signum, frame):
		self.signal_log.critical('SIG USR2 received, increase worker number')
		self._increase_spawn_limit += 1


	def sigttou (self,signum, frame):
		self.signal_log.critical('SIG TTOU received, stop listening')
		self._listen = False

	def sigttin (self,signum, frame):
		self.signal_log.critical('SIG IN received, star listening')
		self._listen = True


	def sigalrm (self,signum, frame):
		self.signal_log.debug('SIG ALRM received, timed actions')
		self.reactor.running = False
		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)


	def interfaces (self):
		local = set(['127.0.0.1','::1'])
		for interface in getifaddrs():
			if interface.family not in (AF_INET,AF_INET6):
				continue
			if interface.address not in self.local:
				self.log.info('found new local ip %s (%s)' % (interface.address,interface.name))
			local.add(interface.address)
		for ip in self.local:
			if ip not in local:
				self.log.info('removed local ip %s' % ip)
		if local == self.local:
			self.log.info('no ip change')
		else:
			self.local = local

	def run (self):
		if self.daemon.drop_privileges():
			self.log.critical('Could not drop privileges to \'%s\'. Refusing to run as root' % self.daemon.user)
			self.log.critical('Set the environment value USER to change the unprivileged user')
			self._shutdown = True

		elif not self.initialise():
			self._shutdown = True

		signal.setitimer(signal.ITIMER_REAL,self.alarm_time,self.alarm_time)

		count_second = 0
		count_minute = 0
		count_increase = 0
		count_decrease = 0
		count_saturation = 0
		count_interface = 0

		while True:
			count_second = (count_second + 1) % self.second_frequency
			count_minute = (count_minute + 1) % self.minute_frequency

			count_increase = (count_increase + 1) % self.increase_frequency
			count_decrease = (count_decrease + 1) % self.decrease_frequency
			count_saturation = (count_saturation + 1) % self.saturation_frequency
			count_interface = (count_interface + 1) % self.interface_frequency

			try:
				if self._pdb:
					self._pdb = False
					import pdb
					pdb.set_trace()


				# check for IO change with select
				self.reactor.run()


				# must follow the reactor so we are sure to go through the reactor at least once
				# and flush any logs
				if self._shutdown:
					self._shutdown = False
					self.shutdown()
					break
				elif self._reload:
					self._reload = False
					self.reload()
				elif self._refork:
					self._refork = False
					self.signal_log.warning('refork not implemented')
					# stop listening to new connections
					# refork the program (as we have been updated)
					# just handle current open connection


				if self._softstop:
					if self._listen == False:
						self.proxy.rejecting()
						self._listen = None
					if self.client.softstop():
						self._shutdown = True
				# only change listening if we are not shutting down
				elif self._listen is not None:
					if self._listen:
						self._shutdown = not self.proxy.accepting()
						self._listen = None
					else:
						self.proxy.rejecting()
						self._listen = None


				if self._toggle_debug:
					self._toggle_debug = False
					self.log_writer.toggleDebug()


				if self._increase_spawn_limit:
					number = self._increase_spawn_limit
					self._increase_spawn_limit = 0
					self.manager.low += number
					self.manager.high = max(self.manager.low,self.manager.high)
					for _ in range(number):
						self.manager.increase()

				if self._decrease_spawn_limit:
					number = self._decrease_spawn_limit
					self._decrease_spawn_limit = 0
					self.manager.high = max(1,self.manager.high-number)
					self.manager.low = min(self.manager.high,self.manager.low)
					for _ in range(number):
						self.manager.decrease()


				# save our monitoring stats
				if count_second == 0:
					self.monitor.second()
					expired = self.reactor.client.expire()
					self.reactor.log.debug('events : ' + ', '.join('%s:%d' % (k,len(v)) for (k,v) in self.reactor.events.items()))
				else:
					expired = 0

				if expired:
					self.proxy.notifyClose(None, count=expired)

				if count_minute == 0:
					self.monitor.minute()

				# make sure we have enough workers
				if count_increase == 0:
					self.manager.provision()
				# and every so often remove useless workers
				if count_decrease == 0:
					self.manager.deprovision()

				# report if we saw too many connections
				if count_saturation == 0:
					self.proxy.saturation()
					self.web.saturation()

				if self.configuration.daemon.poll_interfaces and count_interface == 0:
					self.interfaces()

			except KeyboardInterrupt:
				self.log.critical('^C received')
				self._shutdown = True
			except OSError,e:
				# This shoould never happen as we are limiting how many connections we accept
				if e.errno == 24:  # Too many open files
					self.log.critical('Too many opened files, shutting down')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True
				else:
					self.log.critical('unrecoverable io error')
					for line in traceback.format_exc().split('\n'):
						self.log.critical(line)
					self._shutdown = True

			finally:
Example #18
0
class ClientManager (object):
	def __init__(self, poller, configuration):
		self.total_sent4 = 0L
		self.total_sent6 = 0L
		self.total_requested = 0L
		self.norequest = TimeCache(configuration.http.idle_connect)
		self.bysock = {}
		self.byname = {}
		self.buffered = []
		self._nextid = 0
		self.poller = poller
		self.log = Logger('client', configuration.log.client)
		self.http_max_buffer = configuration.http.header_size
		self.icap_max_buffer = configuration.icap.header_size
		self.tls_max_buffer = configuration.tls.header_size
		self.passthrough_max_buffer = 0
		self.proxied = {
			'proxy' : configuration.http.proxied,
			'icap'  : configuration.icap.proxied,
			'tls'   : configuration.tls.proxied,
		}

	def __contains__(self, item):
		return item in self.bysock

	def lookupSocket (self, item):
		return self.byname.get(item, None)

	def getnextid(self):
		self._nextid += 1
		return str(self._nextid)

	def expire (self,number=100):
		counts = {}
		for sock in self.norequest.expired(number):
			client, source = self.norequest.get(sock,(None,None))
			if client:
				self.cleanup(sock,client.name)
				counts[source] = counts.get(source, 0) + 1

		return counts

	def httpConnection (self, sock, peer, source):
		name = self.getnextid()
		client = HTTPClient(name, sock, peer, self.log, self.http_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def icapConnection (self, sock, peer, source):
		name = self.getnextid()
		client = ICAPClient(name, sock, peer, self.log, self.icap_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def tlsConnection (self, sock, peer, source):
		name = self.getnextid()
		client = TLSClient(name, sock, peer, self.log, self.tls_max_buffer, self.proxied.get(source))

		self.norequest[sock] = client, source
		self.byname[name] = sock

		# watch for the opening request
		self.poller.addReadSocket('opening_client', client.sock)

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return peer

	def passthroughConnection (self, sock, peer, source):
		name = self.getnextid()
		client = PassthroughClient(name, sock, peer, self.log, self.passthrough_max_buffer, self.proxied.get(source))

		self.bysock[sock] = client, source
		self.byname[name] = sock

		# watch for the opening data
		self.poller.addReadSocket('read_client', client.sock)

		accept_addr, accept_port = client.getAcceptAddress()

		#self.log.info('new id %s (socket %s) in clients : %s' % (name, sock, sock in self.bysock))
		return name, accept_addr, accept_port

	def readRequest (self, sock):
		"""Read only the initial HTTP headers sent by the client"""

		client, source = self.norequest.get(sock, (None, None))

		if client:
			name, accept_addr, accept_port, peer, request, subrequest, content = client.readData()
			if request:
				self.total_requested += 1

				# headers can be read only once
				self.norequest.pop(sock, (None, None))
				self.bysock[sock] = client, source

				# watch for the client sending new data
				self.poller.addReadSocket('read_client', client.sock)

				# we have now read the client's opening request
				self.poller.removeReadSocket('opening_client', client.sock)

				# do not read more data until we have properly handled the request
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read headers from a client that does not exist %s' % sock)
			name, accept_addr, accept_port, peer, request, subrequest, content, source = None, None, None, None, None, None, None, None

		return name, accept_addr, accept_port, peer, request, subrequest, content, source


	def readData (self, sock):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name, accept_addr, accept_port, peer, request, subrequest, content = client.readData()
			if request:
				self.total_requested += 1
				# Parsing of the new request will be handled asynchronously. Ensure that
				# we do not read anything from the client until a request has been sent
				# to the remote webserver.
				# Since we just read a request, we know that the cork is not currently
				# set and so there's no risk of it being erroneously removed.
				self.poller.corkReadSocket('read_client', sock)

			elif request is None:
				self.cleanup(sock, client.name)
		else:
			self.log.error('trying to read from a client that does not exist %s' % sock)
			name, accept_addr, accept_port, peer, request, subrequest, content = None, None, None, None, None, None, None


		return name, accept_addr, accept_port, peer, request, subrequest, content, source

	def sendData (self, sock, data):
		client, source = self.bysock.get(sock, (None, None))
		if client:
			name = client.name
			res = client.writeData(data)

			if res is None:
				# close the client connection
				self.cleanup(sock, client.name)

				buffered, had_buffer, sent4, sent6 = None, None, 0, 0
				result = None
				buffer_change = None
			else:
				buffered, had_buffer, sent4, sent6 = res
				self.total_sent4 += sent4
				self.total_sent6 += sent6
				result = buffered


			if buffered:
				if sock not in self.buffered:
					self.buffered.append(sock)
					buffer_change = True

					# watch for the socket's send buffer becoming less than full
					self.poller.addWriteSocket('write_client', client.sock)
				else:
					buffer_change = False

			elif had_buffer and sock in self.buffered:
				self.buffered.remove(sock)
				buffer_change = True

				# we no longer care about writing to the client
				self.poller.removeWriteSocket('write_client', client.sock)

			else:
				buffer_change = False
		else:
			result = None
			buffer_change = None
			name = None

		return result, buffer_change, name, source


	def parseRemaining (self, remaining):
		nb_to_read = 0

		if isinstance(remaining, basestring):
			mode = 'chunked' if remaining == 'chunked' else 'passthrough'

		elif remaining > 0:
			mode = 'transfer'
			nb_to_read = remaining

		elif remaining == 0:
			mode = ''

		else:
			mode = 'passthrough'

		return mode, nb_to_read

	def startData(self, sock, data, remaining):
		client, source = self.bysock.get(sock, (None, None))

		try:
			mode, nb_to_read = self.parseRemaining(remaining)
			command, d = data if client is not None else (None, None)

		except (ValueError, TypeError), e:
			self.log.error('invalid command sent to client %s' % client.name)
			command, d = None, None

		if not client or command is None:
			return None, source

		name, peer, res = client.startData(command, d)

		if res is not None:
			name, accept_addr, peer, request, subrequest, content = client.readRelated(mode, nb_to_read)

			buffered, had_buffer, sent4, sent6 = res

			self.poller.uncorkReadSocket('read_client', client.sock)

			self.total_sent4 += sent4
			self.total_sent6 += sent6

		else:
			self.cleanup(client.sock, name)
			return None, source


		if request:
			self.total_requested += 1
			self.log.info('reading multiple requests')
			self.cleanup(client.sock, name)
			buffered, had_buffer = None, None
			content = None

		elif request is None:
			self.cleanup(client.sock, name)
			buffered, had_buffer = None, None
			content = None

		if buffered is True and had_buffer is False:
			self.buffered.append(client.sock)

			self.poller.addWriteSocket('write_client', client.sock)

		elif buffered is False and had_buffer is True:
			self.buffered.remove(client.sock)

			self.poller.removeWriteSocket('write_client', client.sock)

		return content, source