def __sig_handler(sig, frame): logger = Logger() logger.critical("signal received, shutting down") cfg = GlobalConfigStore() cfg.connector.stop() cfg.processor.stop() cfg.threadpool.remove_all_workers(3)
def _catch_fail(failure): log = Logger() try: raise failure.exception, failure.message, failure.traceback except: msg = 'unexpected error occurred: {}'.format(failure.message) log.critical(msg, exc_info=True)
def __init__(self): self._tp = GlobalThreadPool() self._logger = Logger() self._ingress = queue.Queue() self._egress = queue.Queue() # TODO: Put the following in a configuration file. self._home_dir = '/home' self._user_dir = os.path.join(self._home_dir, '{username}') self._ssh_dir = os.path.join(self._user_dir, '.ssh') self._authkeys = os.path.join(self._ssh_dir, 'authorized_keys') # Start the action handler t = Task(target=self.__action_handler, infinite=True) t.failure = self.__catch_fail self._action_handler_task = self._tp.run(t)
def __init__(self): cfg = GlobalConfigStore() self._tp = GlobalThreadPool() self._username = cfg.agent_username self._agent_key = cfg.agentkey self._backend_addr = (cfg.host, cfg.port) self._backend_hostkey = cfg.backend_hostkey self._logger = Logger() self._endpoints = [] self._tx = queue.Queue() self._conn_handler_task = None self._connected = False self._running = False self._client = None self._chan = None
def __init__(self, min_workers = 2): stacksize = GlobalConfigStore().stacksize threading.stack_size(stacksize * 1024) self._logger = Logger() self._tasks = queue.Queue() self._running_tasks = [] self._min_workers = min_workers + 1 # for the monitoring thread self._workers = 0 self._avail_workers = 0 self._countlck = threading.Lock() self._task_added = threading.Event() self._killev = threading.Event() self._all_died = threading.Event() self.add_worker(self._min_workers) mt = Task(target=self.__volume_monitor, infinite=True) self.run(mt)
class ThreadPool(object): """An adaptive thread pool. This thread pool adapts to the threads consumption rate by making sure that the available number of workers to process tasks is always above the minimum number of workers that should be always available. """ ThreadFactory = threading.Thread def __init__(self, min_workers = 2): stacksize = GlobalConfigStore().stacksize threading.stack_size(stacksize * 1024) self._logger = Logger() self._tasks = queue.Queue() self._running_tasks = [] self._min_workers = min_workers + 1 # for the monitoring thread self._workers = 0 self._avail_workers = 0 self._countlck = threading.Lock() self._task_added = threading.Event() self._killev = threading.Event() self._all_died = threading.Event() self.add_worker(self._min_workers) mt = Task(target=self.__volume_monitor, infinite=True) self.run(mt) def run(self, task): """Start a task. :param task: A task to be executed by a worker. :type task: :class:`Task` :returns: The task that was passed to this method. """ self._task_added.set() self._tasks.put(task) return task def add_worker(self, num=1): """Add worker(s) to the thread pool. :param num: The number of workers to add to the pool. :type num: int """ for x in range(int(num)): t = self.ThreadFactory(target=self.__worker) t.setDaemon(True) t.start() def remove_worker(self, num=1): """Remove worker(s) from the thread pool. :param num: The number of workers to remove from the pool. :type num: int """ for x in range(int(num)): self._tasks.put("exit") def remove_all_workers(self, wait=None): """Remove all workers from the pool. Remove all active workers from the pool and wait ``wait`` seconds until last worker ends, or wait forever if ``wait`` is None. This action will also signal all running tasks to stop as soon as possible. :param wait: Number of seconds to wait or None to wait forever. :type wait: float """ self._killev.set() self.remove_worker(self._workers) self._task_added.set() for task in self._running_tasks: task.stop() self._all_died.wait(wait) self._killev.clear() def __volume_monitor(self, kill_ev): while not kill_ev.is_set(): with self._countlck: if self._workers < self._min_workers: self.add_worker(self._min_workers - self._workers) if self._avail_workers < self._min_workers: self.add_worker(round(abs(self._workers - self._avail_workers) / 2.0)) self._task_added.wait(5.0) self._task_added.clear() def __worker(self): with self._countlck: self._workers += 1 self._avail_workers += 1 self._all_died.clear() while not self._killev.is_set(): # Main thread body try: task = self._tasks.get(timeout=1.0) except queue.Empty: # Waited for too long break if task == 'exit': # "exit" is a sentinel task to kill the worker break with self._countlck: self._avail_workers -= 1 # Execute target function here self._running_tasks.append(task) try: ret = task.target(*task.args, **task.kwargs) if task.success: task.success(ret) except Exception as ex: if task.failure: try: task.failure(Failure()) except Exception: msg = 'failure callback raised an error on task ({})'.format(task.id) self._logger.critical(msg, exc_info=True) else: msg = "unhandled error occurred on task ({}): {}".format( task.id, ex.message) self._logger.critical(msg, exc_info=True) self._running_tasks.remove(task) if task.infinite: self._tasks.put(task) with self._countlck: self._avail_workers += 1 with self._countlck: self._workers -= 1 self._avail_workers -= 1 if not self._workers: self._all_died.set()
class Processor(object): """A class to handle action messages coming from the backend and send back a feedback to indicate success or failure of the action requested. This class is a kind-singleton which means you cannot instantiate more than one copy per application life time. """ __metaclass__ = KindSingletonMeta def __init__(self): self._tp = GlobalThreadPool() self._logger = Logger() self._ingress = queue.Queue() self._egress = queue.Queue() # TODO: Put the following in a configuration file. self._home_dir = '/home' self._user_dir = os.path.join(self._home_dir, '{username}') self._ssh_dir = os.path.join(self._user_dir, '.ssh') self._authkeys = os.path.join(self._ssh_dir, 'authorized_keys') # Start the action handler t = Task(target=self.__action_handler, infinite=True) t.failure = self.__catch_fail self._action_handler_task = self._tp.run(t) def endpoint(self): """Return an ingress and an egress points to communicate with this processor. :returns: :class:`bastio.ssh.client.BackendConnector.EndPoint` """ return BackendConnector.EndPoint(ingress=self._ingress, egress=self._egress) def process(self, message): """Process a message and return a feedback. :param message: A message to be processed. :type message: A subclass of :class:`bastio.ssh.protocol.ActionMessage` :returns: :class:`bastio.ssh.protocol.FeedbackMessage` """ if isinstance(message, AddUserMessage): # Add a user if one doesn't exist feedback = self._add_user(message) elif isinstance(message, RemoveUserMessage): # Remove a user if one exists feedback = self._remove_user(message) elif isinstance(message, UpdateUserMessage): # Update a user either to give it root access or to demote it feedback = self._update_user(message) elif isinstance(message, AddKeyMessage): # Add public key to the user's authorized_keys file feedback = self._add_key(message) elif isinstance(message, RemoveKeyMessage): # Remove public key from the user's authorized_keys file feedback = self._remove_key(message) else: # NOTE: This execution branch must never be reached, # do not take this lightly if it happens. feedback = message.reply( ("internal error: agent does not know how to handle messages" " of type `{type}`").format(type=message.type), FeedbackMessage.ERROR) return feedback def stop(self): """Signal the action handler to stop.""" self._action_handler_task.stop() def __action_handler(self, kill_ev): self._logger.warning("action handler started") while not kill_ev.is_set(): message = self._get_ingress(timeout=3) if message: feedback = self.process(message) self._put_egress(feedback) def __catch_fail(self, failure): try: raise failure.exception, failure.message, failure.traceback except Exception: self._logger.critical("unexpected error occurred in the action handler", exc_info=True) def _get_ingress(self, timeout): try: return self._ingress.get(timeout=timeout) except queue.Empty: return None def _put_egress(self, item): self._egress.put(item) ### ### BEGIN COMMAND METHODS ### def _chk_user(self, message, status=FeedbackMessage.ERROR, should_exist=False): # Check if a user exists user_exist = os.path.exists(self._user_dir.format( username=message.username)) try: pwd.getpwnam(message.username) except KeyError: user_exist = False if user_exist: reply_msg = "{username} already exists".format(username=message.username) if should_exist: # user exists and should return False else: # user exists but shouldn't feedback = message.reply(reply_msg, status) else: reply_msg = "{username} does not exist".format(username=message.username) if should_exist: # user doesn't exist but should feedback = message.reply(reply_msg, status) else: # user doesn't exist and shouldn't return False return feedback def _chk_key(self, message): # Check if a public key exists try: with open(self._authkeys.format(username=message.username), 'rb') as fd: auth_data = fd.read() except Exception: return False return message.public_key in auth_data def _create_ssh(self, message): # Make sure that .ssh exists and has the right permissions try: os.mkdir(self._ssh_dir.format(username=message.username), 0700) except OSError: pass # Directory already exists (or perm denied... very unlikely) # Touch .ssh/authorized_keys file auth_file = self._authkeys.format(username=message.username) try: with open(auth_file, 'ab') as fd: pass # We just want to create the file if it doesn't exist except IOError: # We can't do anything about it here, it will be handled by other messages pass # Make sure that .ssh/authorized_keys file has the right permissions try: os.chmod(auth_file, 0600) except OSError: # We can't do anything about it here, offload it to future messages pass # Chown .ssh/authorized_keys to the user try: pw_struct = pwd.getpwnam(message.username) os.chown(self._ssh_dir.format(username=message.username), pw_struct.pw_uid, pw_struct.pw_gid) os.chown(auth_file, pw_struct.pw_uid, pw_struct.pw_gid) except KeyError: # username not found from getpwnam pass except OSError: # chown failed pass def _add_user(self, message): # Add user if message.sudo: add_command = 'useradd -mU -G sudo {username}' else: add_command = 'useradd -mU {username}' add_command = add_command.format(username=message.username) # Check if a user exists feedback = self._chk_user(message, FeedbackMessage.INFO, False) if feedback: self._create_ssh(message) return feedback # Create the user _, stderr = self._run_command(add_command) if stderr: feedback = message.reply(stderr, FeedbackMessage.ERROR) return feedback # Clear out user's password _, stderr = self._run_command("passwd -d {username}".format( username=message.username)) if stderr: feedback = message.reply(stderr, FeedbackMessage.ERROR) return feedback self._create_ssh(message) feedback = message.reply("{username} was created successfully".format( username=message.username), FeedbackMessage.SUCCESS) return feedback def _remove_user(self, message): # Remove user rm_command = 'userdel -r {username}'.format(username=message.username) # Check if a user exists feedback = self._chk_user(message, FeedbackMessage.INFO, True) if feedback: return feedback # Try to remove the user _, stderr = self._run_command(rm_command) if stderr: feedback = message.reply(stderr, FeedbackMessage.ERROR) else: feedback = message.reply( "{username} was removed successfully".format( username=message.username), FeedbackMessage.SUCCESS) return feedback def _update_user(self, message): # Update user flag = '-a' if message.sudo else '-d' update_command = 'gpasswd {flag} {username} sudo'.format(flag=flag, username=message.username) # Check if a user exists feedback = self._chk_user(message, FeedbackMessage.ERROR, True) if feedback: return feedback # Update a user either to give it root access or to demote it _, stderr = self._run_command(update_command) if stderr: feedback = message.reply(stderr, FeedbackMessage.ERROR) else: if message.sudo: fb_str = '{username} was added to the sudo group successfully' else: fb_str = '{username} was removed from the sudo group successfully' feedback = message.reply(fb_str.format(username=message.username), FeedbackMessage.SUCCESS) return feedback def _add_key(self, message): # Add public key pubkey = message.public_key username = message.username # Check if a user exists feedback = self._chk_user(message, FeedbackMessage.ERROR, True) if feedback: return feedback # Check if public key already exists if self._chk_key(message): feedback = message.reply( "public key `{pub_key}` for {username} already exists".format( pub_key=pubkey, username=username), FeedbackMessage.INFO) return feedback # Try to add the public key to the user's authorized_keys file auth_file = self._authkeys.format(username=username) try: with open(auth_file, 'ab') as fd: fd.write(pubkey + '\n') feedback = message.reply( "added public key to {username} successfully".format( username=username), FeedbackMessage.SUCCESS) except IOError as ex: feedback = message.reply(ex.strerror, FeedbackMessage.ERROR) except Exception as ex: feedback = message.reply(ex.message, FeedbackMessage.ERROR) return feedback def _remove_key(self, message): # Remove public key pubkey = message.public_key username = message.username # Check if a user exists feedback = self._chk_user(message, FeedbackMessage.ERROR, True) if feedback: return feedback # Check if public key does not exist if not self._chk_key(message): feedback = message.reply( "public key for {username} does not exist".format( username=username), FeedbackMessage.INFO) return feedback # Try to remove the public key from the user's authorized_keys file auth_data = [] auth_file = self._authkeys.format(username=username) try: with open(auth_file, 'rb') as fd: for line in fd.readlines(): if pubkey in line: continue auth_data.append(line) with open(auth_file, 'wb') as fd: # TODO: A race condition is possible here where the file could # be written to before we write to it and therefore overriding # the changes made to it by some other application. Find a fix # for it. This is quite unlikely in this particular case so # don't sweat it. fd.writelines(auth_data) feedback = message.reply( "removed public key from {username} successfully".format( username=username), FeedbackMessage.SUCCESS) except IOError as ex: feedback = message.reply(ex.strerror, FeedbackMessage.ERROR) except Exception as ex: feedback = message.reply(ex.message, FeedbackMessage.ERROR) return feedback @staticmethod def _run_command(command, input_data=None): try: po = subprocess.Popen(args=command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = po.communicate(input_data) except OSError as ex: stderr = ex.strerror except ValueError as ex: stderr = ex.message return stdout, stderr
class BackendConnector(object): """A singleton to establish and maintain a secure connection with the backend over a specific subsystem channel. This connector supports registering of endpoints where processors can register their endpoint to communicate with the backend. It is guaranteed that the messages will be delivered ASAP but the actual ETA is chaotic. """ __metaclass__ = KindSingletonMeta EndPoint = collections.namedtuple("EndPoint", "ingress egress") Subsystem = 'bastio-agent' def __init__(self): cfg = GlobalConfigStore() self._tp = GlobalThreadPool() self._username = cfg.agent_username self._agent_key = cfg.agentkey self._backend_addr = (cfg.host, cfg.port) self._backend_hostkey = cfg.backend_hostkey self._logger = Logger() self._endpoints = [] self._tx = queue.Queue() self._conn_handler_task = None self._connected = False self._running = False self._client = None self._chan = None def start(self): """Start the connection handler thread.""" if not self._running: self._running = True t = Task(target=self.__conn_handler, infinite=True) t.failure = self._catch_fail self._conn_handler_task = self._tp.run(t) def stop(self): """Stop the connection handler thread.""" if self._running: self._running = False self.close() self._conn_handler_task.stop() def register(self, endpoint): """Register an endpoint to this connector to so that it can communicate with the backend. The endpoint is a tuple of one ingress queue as first argument and egress as the second argument. :param endpoint: A tuple of two queues; ingress and egress. :type endpoint: :class:`BackendConnector.EndPoint` """ self._endpoints.append(endpoint) def is_active(self): """Check whether the transport is still active.""" if self._client: t = self._client.get_transport() if t: return t.is_active() return False def close(self): """Close open channels and transport.""" if self._chan: self._chan.close() if self._client: self._client.close() self._connected = False self._logger.critical("connection lost with the backend") def __conn_handler(self, kill_ev): self._logger.warning("backend connection handler started") while not kill_ev.is_set(): # Try to connect to the backend try: self._connect() except BastioBackendError as ex: self.close() self._logger.critical(ex.message) # TODO: Implement a more decent reconnection strategy time.sleep(5) # Sleep 5 seconds before retrial continue # Read a message from the wire, parse it, and push it to ingress queue(s) try: json_string = self._read_message() message = MessageParser.parse(json_string) self._put_ingress(message) except socket.timeout: pass # No messages are ready to be read except BastioNetstringError as ex: self._logger.critical( "error parsing a Netstring message: {}".format(ex.message)) self.close() continue except BastioMessageError as ex: self._logger.critical( "error parsing a protocol message: {}".format(ex.message)) self.close() continue except BastioEOFError: self._logger.critical("received EOF on channel") self.close() continue # Get an item from the egress queue(s) and send it to the backend try: message = self._get_egress(timeout=0.01) # 10ms if message == None: # No message is available to send continue self._write_message(message.to_json()) except socket.timeout: # Too many un-ACK'd packets? Sliding window shut on our fingers? # We don't really know what happened, lets reschedule the last # message for retransmission anyway self._push_queue(self._tx, message) except BastioEOFError: # Message was not sent because channel was closed # re-push the message to the TX queue again and retry connection self._push_queue(self._tx, message) self.close() continue def _connect(self): """An idempotent method to connect to the backend.""" try: if self._connected: return # Prepare host keys self._client = paramiko.SSHClient() hostkeys = self._client.get_host_keys() hostkey_server_name = self._make_hostkey_entry_name(self._backend_addr) hostkeys.add(hostkey_server_name, self._backend_hostkey.get_name(), self._backend_hostkey) # Try to connect self._client.connect(hostname=self._backend_addr[0], port=self._backend_addr[1], username=self._username, pkey=self._agent_key, allow_agent=False, look_for_keys=False) self._connected = True # Open session and establish the subsystem self._chan = self._invoke_bastio() self._logger.critical("connection established with the backend") except BastioBackendError: raise except paramiko.AuthenticationException: reraise(BastioBackendError, "authentication with backend failed") except paramiko.BadHostKeyException: reraise(BastioBackendError, "backend host key does not match") except socket.error as ex: reraise(BastioBackendError, ex.strerror.lower()) except Exception: reraise(BastioBackendError) def _invoke_bastio(self): """Start a bastio subsystem on an already authenticated transport. :returns: A channel connected to the subsystem or None. """ if not self.is_active(): raise BastioBackendError("client is not connected") t = self._client.get_transport() chan = t.open_session() if not chan: raise BastioBackendError("opening a session with the backend failed") chan.settimeout(0.01) # 10ms chan.invoke_subsystem(self.Subsystem) return chan def _read_message(self): nets = Netstring(self._chan) return nets.recv() def _write_message(self, data): nets = Netstring.compose(data) remaining = len(nets) while remaining > 0: n = self._chan.send(nets) if n <= 0: raise BastioEOFError("channel closed") remaining -= n def _put_ingress(self, item): for endpoint in self._endpoints: endpoint.ingress.put(item) def _get_egress(self, timeout): for endpoint in self._endpoints: try: item = endpoint.egress.get_nowait() self._tx.put(item) except queue.Empty: pass try: return self._tx.get(timeout=timeout) except queue.Empty: return None @staticmethod def _catch_fail(failure): log = Logger() try: raise failure.exception, failure.message, failure.traceback except: msg = 'unexpected error occurred: {}'.format(failure.message) log.critical(msg, exc_info=True) @staticmethod def _make_hostkey_entry_name(addr): """We do the following to work around a paramiko inconsistency.""" if addr[1] == paramiko.config.SSH_PORT: return addr[0] return '[{}]:{}'.format(*addr)