def quit( self ): # thread-safe """Signaling that shut down shall commence""" # If we'd guard this function, an error in here would # again call self.quit , and we're stuck in infinite # recursion. Hence, no guarding. fs_logging.log( "Shutdown flagged", fs_logging.LEVEL_DEBUG_2 ) self._keep_running.clear()
def _stop_threads( self ): """Stop all spawned threads Stopping occurs in two steps. First, they are asked to stop gracefully via the stop method. Afterwards, we force the to stop using _Thread__stop """ # Nicely asking threads to stop for plugin in self._started_threads: stop_method = getattr( plugin, 'stop', None ) if callable( stop_method ): stop_method() for plugin in self._started_threads: # Using 1.6 instead of the required 1.5 seconds to # respond to stop, to allow for some lag plugin.join( 1.6 ) if plugin.is_alive(): # Thread did not react to our "stop". How impolite! # We try forcing it to stop. try: fs_logging.log( "Plugin %s did not react to 'stop' in time. Forcing stop" % plugin) plugin._Thread__stop() except: pass if plugin.is_alive(): # Thread is still alive. We cannot do anything else. fs_logging.log( "Failing to stop %s. Giving up" % plugin)
def _register_rule( self, rule ): """Register a rule with this manager rule: The Rule instance that is to be registered """ self._rules.append( rule ) depends_on = rule.depends_on() if len( depends_on ) > 0: for name in depends_on: fs_logging.log( "Registering '%s' for updates of %s" % ( rule, name ), fs_logging.LEVEL_DEBUG_1 ) self._updatees_lock.acquire() try: try: self._value_updatees[ name ].add( rule ) except KeyError: self._value_updatees[ name ] = set( [ rule ] ) finally: self._updatees_lock.release() else: fs_logging.log( "Registering %s globally for updates" % ( rule ), fs_logging.LEVEL_DEBUG_1 ) self._updatees_lock.acquire() try: self._global_updatees.add( rule ) finally: self._updatees_lock.release() # We immediately evaluate the rule, because it might already match rule.evaluate( self )
def _shut_down( self ): """Shutting down of the failure simulator.""" # This function is only called from the main thread, # and is only called once. fs_logging.log( "Shutdown started" , fs_logging.LEVEL_VERBOSE ) try: self._self_plugin.set_shutdown() except: pass try: self._restore_stdin() except: pass try: self._stop_threads() except: pass try: self._uninit_plugins() except: pass fs_logging.log( "Shutdown finished", fs_logging.LEVEL_VERBOSE )
def execute_action( self, name, params ): """Adding to log. name: gets ignored""" if params is None: fs_logging.log() elif len( params ) == 0: fs_logging.log() elif len( params ) == 1: fs_logging.log( params[ 0 ] ) elif len( params ) == 2: msg, level_str = params level = fs_logging.LEVEL_NORMAL if level_str == "plain_output": level = fs_logging.LEVEL_PLAIN_OUTPUT elif level_str == "normal": level = fs_logging.LEVEL_NORMAL elif level_str == "verbose": level = fs_logging.LEVEL_VERBOSE elif level_str == "debug": level = fs_logging.LEVEL_DEBUG_1 elif level_str == "debug_1": level = fs_logging.LEVEL_DEBUG_1 elif level_str == "debug_2": level = fs_logging.LEVEL_DEBUG_2 fs_logging.log( msg, level ) elif len( params ) >= 3: fs_logging.log( ", ".join( map( str, params ) ) )
def _shut_down( self ): #thread-safe """Shutdown of the connected sockets and characteristics It's safe to call this function more than once (even if the _shut_down has been already started / done)""" fs_logging.log( "Closing connection from %s:%s" % self._cAddress, fs_logging.LEVEL_VERBOSE ) self._close_socks() self._characteristics.close()
def _parse_params( self, params_ ): """Parsing of parameters given to manager This part deals with plugin loading and rule insertion on a higher level """ # We do not lead the ManagerSelfPlugin from another # Python module, so we shortcut it's injection self._self_plugin = self.ManagerSelfPlugin( self ) self._register_plugin( "manager", self._self_plugin ) # We fill params_ with the parameters that we want to parse params = [] # Adding default plugins via parameter injection params.append( "--plugin:default_actions.DefaultActionQuit:quit" ) params.append( "--plugin:default_actions.DefaultActionDump:dump" ) params.append( "--plugin:default_actions.DefaultActionLog:log" ) # And also using the real parameters (after our injected ones) params.extend( params_ ) for param in params: fs_logging.log( "Treating parameter: %s" % param, fs_logging.LEVEL_DEBUG_1 ) if param.startswith( '--plugin' ): # Parsing plugins parsed_params = param.split( ':' ) if len( parsed_params ) < 3: raise RuntimeError( "Format for --plugin parameter is 'class:name' followed by arbitrarily many ':option's" ) class_name_full = parsed_params[1] plugin_key = parsed_params[2] constructor_params = parsed_params[3:] fs_logging.log( 'loading plugin: %s as %s' % (class_name_full, plugin_key), fs_logging.LEVEL_VERBOSE ) module_name, _, class_name = class_name_full.partition( '.' ); module = __import__( module_name ) class_ = getattr( module, class_name ) # Creating the plugin as instance of class_. plugin = class_( self, constructor_params ) # Registering the plugin self._register_plugin( plugin_key, plugin ) elif param == '-v' or param == '--verbose': fs_logging.level += 1 elif param == '-h' or param == '--help': self._print_help() raise KeyboardInterrupt else: # None of the known options, hence it must be a rule. # Parsing rules self._register_rule( fs_rules.RuleParser( param ).parse_rule() )
def _uninit_plugins( self ): """Uninitialization of plugins""" for plugin in self._plugin_keys: fs_logging.log( "Uninitializing %s" % plugin, fs_logging.LEVEL_DEBUG_2 ) uninit_method = getattr( plugin, 'uninit', None ) if callable( uninit_method ): uninit_method() fs_logging.log( "Uninitializing %s done" % plugin, fs_logging.LEVEL_DEBUG_2 )
def _get_value( self, name ): #thread-safe """Unguarded variant of get_value""" value = None self._values_lock.acquire() try: value = self._values[ name ] except KeyError: pass finally: self._values_lock.release() fs_logging.log( "Evaluated %s to %s" % ( name , value ), fs_logging.LEVEL_DEBUG_2 ) return value
def _handle_command( self, command ): """Handle a command entered at runtime by the user command: The entered command as string. command is interpreted as rule and gets added to the rule base. Thereby, you can add rules at runtime. Due to the default actions like quit, and the rule syntax, just entering 'quit' as command, will make the manager quit the failure simulation. """ fs_logging.log( "Trying to add rule %s" % command, fs_logging.LEVEL_VERBOSE ) rule = fs_rules.RuleParser( command ).parse_rule() self._register_rule( rule )
def _guarded_run( self ): # Setting up listening socket lsock = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) lsock.setsockopt( socket.SOL_SOCKET, socket.SO_REUSEADDR, 1 ) lsock.bind(( '', self._local_port )) lsock.listen(5) lAddress = lsock.getsockname() self._manager.set_value_relative( self, "tunnel.listening.host", lAddress[0] ) self._manager.set_value_relative( self, "tunnel.listening.port", lAddress[1] ) fs_logging.log( 'TCPTunnel: Redirecting localhost:%s -> %s:%s' % ( self._local_port, self._sAddress[0], self._sAddress[1] ), fs_logging.LEVEL_NORMAL) while self._keep_running.is_set(): # In order to react to shutdown requests in time, we have to use # non-blocking variants of I/O everywhere ready_r, ready_w, ready_x = select.select( [ lsock ], [], [], 0.1 ) if lsock in ready_r: cSocket, cAddress = lsock.accept() self._connections_lock.acquire() #------------- LOCK START try: # Preparing a reporting function for each connection, so each # connection can only set ".connection_N." values connections_count = len( self._connections ) report_function = self._report_function_generator( connections_count ) characteristics = ConnectionCharacteristics( self._characteristics_template, report_function ) handler = TunneledConnection( self._sAddress, cSocket, cAddress, characteristics, self._manager.quit ) self._connections.append( ( handler, characteristics ) ) finally: self._connections_lock.release() #------- LOCK END handler.start() report_function( "tunnel.clientside.client.host", cAddress[0] ) report_function( "tunnel.clientside.client.port", cAddress[1] ) cLocal = cSocket.getsockname() report_function( "tunnel.clientside.tunnel.host", cLocal[0] ) report_function( "tunnel.clientside.tunnel.port", cLocal[1] ) report_function( "tunnel.serverside.server.host", self._sAddress[0] ) report_function( "tunnel.serverside.server.port", self._sAddress[1] ) report_function( "alive", 1 ) self._manager.set_value_relative( self, "connections.total", connections_count + 1 ) self._update_alive_connections( 1 )
def _set_value_relative( self, plugin, name_suffix, value ): #thread-safe """Unguarded variant of set_value_relative""" plugin_key = self._plugin_keys[ plugin ] name = plugin_key if name_suffix != None: name += "." + name_suffix fs_logging.log( "Setting %s to %s" % ( name , value ), fs_logging.LEVEL_DEBUG_1 ) self._values_lock.acquire() try: # Returning straight away, if the now value # matches the old try: if self._values[ name ] == value: return except KeyError: pass self._values[ name ] = value finally: self._values_lock.release() # Check for rules that need to be re-evaluated for this change self._updatees_lock.acquire() try: try: # updates.union(...) is /not/ modifying # updatees (i.e.: self._global_updatees), so # we not increase self._global_updatees here. updatees = self._global_updatees.union( self._value_updatees[ name ] ) except KeyError: # Due to threading, we cannot just # assign the set. To assure safe # iteration afterwards, we have to # /take a copy/ within the lock. updatees = self._global_updatees.copy() finally: self._updatees_lock.release() for updatee in updatees: # Doing this it the same thread that did the # update. This is a simple working solution # for now, but may lead to performance # problems, and endless recursion within a # thread, if a value update triggers an # action, that updates the value again. fs_logging.log( "updating %s" % updatee, fs_logging.LEVEL_DEBUG_2 ) updatee.evaluate( self )
def step( self, characteristics ): """This function handles a single send to the sink followed by a single read from the pipeline's source. By repeatedly calling this function, the transfer of data through this pipe is realized. If neither data could be sent nor data was received, the function returns False. Otherwise, True is returned to signal activity in the pipeline. characteristic is a pair, whose first slot holds the number of bytes to be maximally received. Its second slot holds the maximum number of bytes to write (If this is larger than the data available in the buffer, only the data available in the buffer gets sent). Both slots are expected to be non-negative. """ max_receive, max_send = characteristics actual_received = 0 actual_sent = 0 active = False ready_r, ready_w, ready_x = select.select( [ self.source ], [ self.sink ] if len( self._buffer ) > 0 else [], [], 0 ) # Writing buffer_to_send to sink if self.sink in ready_w and max_send > 0: fs_logging.log( "trying to send ... (%d bytes)" % max_send, fs_logging.LEVEL_DEBUG_2 ) try: actual_sent = self.sink.send( self._buffer[:max_send] ) if ( actual_sent == 0 ): raise ConnectionClosedException() fs_logging.log( "%d bytes sent" % ( actual_sent ), fs_logging.LEVEL_DEBUG_2 ) self._buffer = self._buffer[actual_sent:] active = True except socket.timeout: pass # Reading more data into buffer_received from source if self.source in ready_r and max_receive > 0: try: fs_logging.log( "trying to receive ...", fs_logging.LEVEL_DEBUG_2 ) data = self.source.recv( max_receive ) if ( data == '' ): raise ConnectionClosedException() actual_received = len ( data ) fs_logging.log( "%d bytes received" % (actual_received), fs_logging.LEVEL_DEBUG_2 ) self._buffer += data; active = True except socket.timeout: pass return active, ( actual_received, actual_sent )
def _register_plugin( self, plugin_key, plugin ): """Register a plugin with this manager plugin_key: The name under which the plugin gets registered. This string cannot contain '.' and should contain only letters and underscores. plugin: The FSPlugin instance that is to be registered This function initializes the plugin """ if plugin_key in self._plugins: raise RuntimeError( "A plugin with key %s has already been registered" % plugin_key ) self._plugins[ plugin_key ] = plugin self._plugin_keys[ plugin ] = plugin_key fs_logging.log( "Registering plugin %s" % plugin, fs_logging.LEVEL_DEBUG_1 ) # Initializing the plugin init_method = getattr( plugin, 'init', None ) if callable( init_method ): init_method()
def execute_action( self, name, parameters ): #thread-safe main_action, _, sub_action = name.partition( "." ) if main_action[:11] == "connection_": connection_idx_str = main_action[11:] if connection_idx_str == "template": characteristics = self._characteristics_template else: try: connection_idx = int( connection_idx_str ) except ValueError: # Not a number as connection index. raise RuntimeError( "'%s' passed instead of number for connection index when trying to invoke action of TCPTunnel" % connection_idx_str ) self._connections_lock.acquire() try: _, characteristics = self._connections[ connection_idx ] except IndexError: # Connection does not (yet) exist. We ignore the action for now return finally: self._connections_lock.release() sub_action = sub_action.replace( '.', '_' ) # Now 'connection' holds the indexed connection. We invoke the action method = getattr( characteristics, sub_action, None ) if callable( method ): if parameters != None and len( parameters ): method( *parameters ) else: method() else: # Connection exists, but the desired action does not exist. raise RuntimeError( "Do not know how to execute action '%s' on a TCPTunnel connection" % sub_action ) else: fs_logging.log( "Ignoring unknown action '%s' on TCPTunnel" % name )
def _execute_simple_action( self, plugin_w_action, parameters = [] ): """Unguarded variant of execute_simple_action""" fs_logging.log( "Executing %s" % plugin_w_action, fs_logging.LEVEL_DEBUG_1 ) plugin_key, _, action = plugin_w_action.partition( "." ) plugin = self._plugins[ plugin_key ] if action == "init" or action == "run" or action == "stop": raise RuntimeError( "Trying to call protected action '%s' on plugin" % action ) if action == "help": fs_logging.log( "Help for plugin: %s (class %s):\n%s\n" % ( plugin_key, plugin.__class__.__name__, plugin.__class__.__doc__ ) ) return if hasattr( plugin, action ): method = getattr( plugin, action ) if parameters != None and len( parameters ): method( *parameters ) else: method() elif hasattr( plugin, 'execute_action' ): method = getattr( plugin, 'execute_action' ) plugin.execute_action( action, parameters ) else: raise RuntimeError( "Do not know how to execute action '%s'. Not available as method, or via execute_action method" % action )
def manage( self, params ): """Main loop of failure simulation""" try: # Setting up SIGTERM handler signal.signal( signal.SIGTERM, self._handle_sigterm ) # Setting up failure simulation via parameters self._parse_params( params ) # Starting plugins' threads for plugin_key, plugin in self._plugins.items(): if isinstance( plugin, threading.Thread ): # Starting the Thread only, if we also got a suitable # stop method if callable( getattr( plugin, 'stop', None ) ): plugin.start() self._started_threads.append( plugin ) else: fs_logging.log( "Not starting thread of %s, as it does not provide a stop function" % plugin_key ) # Putting stdin in non-blocking mode, to be # able to read commands, while still being # able to react to quit requests in a timely # manner self._stdin_old_flags = fcntl.fcntl( sys.stdin, fcntl.F_GETFL ) fcntl.fcntl( sys.stdin, fcntl.F_SETFL, self._stdin_old_flags | os.O_NONBLOCK ) stdin_buffer = ''; fs_logging.log( "Starting up plugins done", fs_logging.LEVEL_VERBOSE ) wait_interval = 0.1 while self._keep_running.is_set(): ready_r, ready_w, ready_x = select.select( [ sys.stdin ], [], [], wait_interval ) if sys.stdin in ready_r: old_stdin_buffer_len = len( stdin_buffer ) try: stdin_buffer += sys.stdin.read( 1024 ) except IOError, e: if e.errno != errno.EAGAIN: # Cannot read from stdin, and it's not a harmless EAGAIN. print traceback.format_exc() raise new_stdin_buffer_len = len( stdin_buffer ) if old_stdin_buffer_len == new_stdin_buffer_len: # 0-length read, for which select yielded stdin as ready. # This indicates, that we're reading from a fifo. Select # will always return right away that stdin is ready for # reading. So we'll back off by hand here to avoid busy # waiting. time.sleep( wait_interval ) else: #stdin_buffer changed. Treat all completed lines while ( '\n' in stdin_buffer ): line, _, stdin_buffer = stdin_buffer.partition( '\n' ); self._handle_command( line ) except KeyboardInterrupt: pass finally: self._shut_down()
def run( self ): """Setting up of connection to server and juggling the connections and characteristics.""" escalate_exit = True # if True, signal exit (due to throw exception) via escalate_function try: # Preparing the connection from the client fs_logging.log( 'Handling connection from %s:%s' % self._cAddress, fs_logging.LEVEL_VERBOSE ) self._cSocket.setblocking( 0 ) # UnidirectionPipeline wants non-blocking sockets # Setting up the connection to the server self._sSocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) self._sSocket.connect( self._sAddress ) self._sSocket.setblocking( 0 ) # UnidirectionPipeline wants non-blocking sockets self._characteristics.set_serverside_connected( self._sSocket ) # Splitting the connected sockets to for two # pipelines, each going in a single direction # through the tunnel. c2s = UnidirectionPipeline( self._cSocket, self._sSocket ) s2c = UnidirectionPipeline( self._sSocket, self._cSocket ) sleepInterval = 0; # Number of seconds to wait between # two send/receive steps. This is recomputed within the upcoming while # loop keep_running_by_characteristic = True; # Will get set to False, # if the characterstics wants us to close the connection. # Now for the main receive/send step iteration while self._keep_running.is_set() and keep_running_by_characteristic: # Getting allowed buffer sizes c2s_characteristics, s2c_characteristics = self._characteristics.get_step_max_transmit_lengths() # client -> server c2s_active, c2s_actual_characteristics = c2s.step( c2s_characteristics ) # server -> client s2c_active, s2c_actual_characteristics = s2c.step( s2c_characteristics ) # informing characteristics about received/sent data keep_running_by_characteristic = self._characteristics.set_step_actual_transmit_lengths( c2s_actual_characteristics, s2c_actual_characteristics ) # Graceful waiting between two receive/send iterations if c2s_active or s2c_active or not self._keep_running.is_set() or not keep_running_by_characteristic: # There is some action going on, or we hav been asked to shut # down. So we do not wait. sleepInterval = 0 else: # No read or write on either pipeline. We back off for some time # This interval is small at first to be able to react quickly on # short transmission pauses. But we increase exponentially until # we reach 0.5 seconds. sleepInterval = sleepInterval * 2 + 0.001 if ( sleepInterval > 0.5 ): sleepInterval = 0.5 time.sleep( sleepInterval) escalate_exit = False # Graceful shutdown. No need to escalate except ConnectionClosedException: escalate_exit = False # Connection got closed. No need to escalate except KeyboardInterrupt: # No need to reraise KeyboardInterrupt pass except IOError as e: if e.errno == errno.ECONNRESET: escalate_exit = False # Connection reset. No need to escalate else: raise finally: if escalate_exit: self._escalate_function() self._shut_down()
def _execute_script( self, name, params = [], swallow_output = False ): """Execution of a script name: The unexpanded name of the script as string params: (optional) (default: []) List of strings that get passed as command line options to the script. swallow_output: (optional) (default: False) If True, stdout is swallowed. Otherwise, the script's stdout is not mangled with at all and makes it to the stdout of the failure simulator. This function returns the swallowed output as string. If no output has been swallowed, the empty string is returned. """ expanded_name = self._expand_name( name ) output = '' if self._script_ok( expanded_name, False ): if not "/" in expanded_name: expanded_name = "./" + expanded_name command = [ expanded_name ] command.extend( self._common_initial_arguments ) if params is not None: command.extend( params ) command.extend( self._common_final_arguments ) script_running_name = "script.%s.running" % name script_invocation_name = "script.%s.invocations" % name # Updating failure simulators values for the the script execution self._invocation_lock.acquire() try: # increasing run counter run_count = self._manager.get_value_relative( self, script_running_name ) if ( run_count == None ): run_count = 0 self._manager.set_value_relative( self, script_running_name, run_count + 1 ) # increasing invocation counter invocation_count = self._manager.get_value_relative( self, script_invocation_name ) if ( invocation_count == None ): invocation_count = 0 self._manager.set_value_relative( self, script_invocation_name, invocation_count + 1 ) finally: self._invocation_lock.release() # The execution itself if swallow_output: output_option = subprocess.PIPE else: output_option = None proc = subprocess.Popen( command, stdout = output_option ) output, error = proc.communicate() return_value = proc.returncode # Updating failure simulators values after the the script execution self._invocation_lock.acquire() try: # decreasing run counter self._manager.set_value_relative( self, script_running_name, self._manager.get_value_relative( self, script_running_name ) - 1 ) finally: self._invocation_lock.release() self._manager.set_value_relative( self, "script.%s.return_value.last" % name, return_value ) else: fs_logging.log( "%s cannot be executed" % expanded_name ) return output