def test2persist_multiple_objects_of_multiple_class(self): """In this test case multiple instances of multiple classes are to be persisted and loaded.""" description = "Test2PersistencyUtil" new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'Default2', True) self.analysis_context.register_component(new_match_path_detector, description) t = time.time() log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector) log_atom_decimal_integer_value_me = LogAtom( self.match_context_decimal_integer_value_me.match_data, ParserMatch(self.match_element_decimal_integer_value_me), t, new_match_path_detector) new_match_path_detector.receive_atom(log_atom_fixed_dme) new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me) other_new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'otherDetector2', True) self.analysis_context.register_component(other_new_match_path_detector, description + "2") log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, other_new_match_path_detector) other_new_match_path_detector.receive_atom(log_atom_fixed_dme) new_match_path_value_combo_detector = NewMatchPathValueComboDetector( self.aminer_config, ['first/f1/s1'], [self.stream_printer_event_handler], 'Default', False, True) self.analysis_context.register_component( new_match_path_value_combo_detector, description + "3") log_atom_sequence_me = LogAtom( self.fixed_dme.fixed_data, ParserMatch(self.match_element_first_match_me), t, new_match_path_value_combo_detector) new_match_path_value_combo_detector.receive_atom(log_atom_sequence_me) PersistencyUtil.persist_all() self.assertTrue( PersistencyUtil.load_json( new_match_path_detector.persistence_file_name) == [ self.match_element_fixed_dme.get_path(), self.match_element_decimal_integer_value_me.get_path() ] or PersistencyUtil.load_json( new_match_path_detector.persistence_file_name) == [ self.match_element_decimal_integer_value_me.get_path(), self.match_element_fixed_dme.get_path() ]) self.assertEqual( PersistencyUtil.load_json( other_new_match_path_detector.persistence_file_name), [self.match_element_fixed_dme.get_path()]) self.assertEqual( PersistencyUtil.load_json( new_match_path_value_combo_detector.persistence_file_name), ([[log_atom_sequence_me.raw_data]]))
def run_analysis(self, master_fd): """This method runs the analysis thread. @param master_fd the main communication socket to the parent to receive logfile updates from the parent. @return 0 on success, e.g. normal termination via signal or 1 on error.""" # The masterControlSocket is the socket to communicate with the master process to receive commands or logstream data. Expect # the parent/child communication socket on fd 3. This also duplicates the fd, so close the old one. self.master_control_socket = socket.fromfd(master_fd, socket.AF_UNIX, socket.SOCK_DGRAM, 0) os.close(master_fd) self.tracked_fds_dict[ self.master_control_socket.fileno()] = self.master_control_socket # Locate the real analysis configuration. self.analysis_context.build_analysis_pipeline() if self.analysis_context.atomizer_factory is None: print( 'FATAL: build_analysis_pipeline() did not initialize atomizer_factory, terminating', file=sys.stderr) return 1 real_time_triggered_components = self.analysis_context.real_time_triggered_components analysis_time_triggered_components = self.analysis_context.analysis_time_triggered_components max_memory_mb = self.analysis_context.aminer_config.config_properties.get( AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE, None) if max_memory_mb is not None: try: max_memory_mb = int(max_memory_mb) resource.setrlimit( resource.RLIMIT_AS, (max_memory_mb * 1024 * 1024, resource.RLIM_INFINITY)) except ValueError: print('FATAL: %s must be an integer, terminating' % AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE, file=sys.stderr) return 1 max_cpu_percent_usage = self.analysis_context.aminer_config.config_properties.get( AMinerConfig.KEY_RESOURCES_MAX_PERCENT_CPU_USAGE) if max_cpu_percent_usage is not None: try: max_cpu_percent_usage = int(max_cpu_percent_usage) # limit pid = os.getpid() package_installed_cmd = ['dpkg', '-l', 'cpulimit'] cpulimit_cmd = [ 'cpulimit', '-p', str(pid), '-l', str(max_cpu_percent_usage) ] # skipcq: BAN-B603 with subprocess.Popen(package_installed_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as out: stdout, _stderr = out.communicate() if 'dpkg-query: no packages found matching cpulimit' in stdout.decode( ): print( 'FATAL: cpulimit package must be installed, when using the property %s' % AMinerConfig.KEY_RESOURCES_MAX_PERCENT_CPU_USAGE, file=sys.stderr) return 1 # skipcq: BAN-B603 _out = subprocess.Popen(cpulimit_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except ValueError: print('FATAL: %s must be an integer, terminating' % AMinerConfig.KEY_RESOURCES_MAX_PERCENT_CPU_USAGE, file=sys.stderr) return 1 # Load continuation data for last known log streams. The loaded data has to be a dictionary with repositioning information for # each stream. The data is used only when creating the first stream with that name. self.repositioning_data_dict = PersistencyUtil.load_json( self.persistence_file_name) if self.repositioning_data_dict is None: self.repositioning_data_dict = {} # A list of LogStreams where handleStream() blocked due to downstream not being able to consume the data yet. blocked_log_streams = [] # Always start when number is None. next_real_time_trigger_time = None next_analysis_time_trigger_time = None delayed_return_status = 0 while self.run_analysis_loop_flag: # Build the list of inputs to select for anew each time: the LogStream file descriptors may change due to rollover. input_select_fd_list = [] output_select_fd_list = [] for fd_handler_object in self.tracked_fds_dict.values(): if isinstance(fd_handler_object, LogStream): stream_fd = fd_handler_object.get_current_fd() if stream_fd < 0: continue input_select_fd_list.append(stream_fd) elif isinstance(fd_handler_object, AnalysisChildRemoteControlHandler): fd_handler_object.add_select_fds(input_select_fd_list, output_select_fd_list) else: # This has to be a socket, just add the file descriptor. input_select_fd_list.append(fd_handler_object.fileno()) # Loop over the list in reverse order to avoid skipping elements in remove. for log_stream in reversed(blocked_log_streams): current_stream_fd = log_stream.handle_stream() if current_stream_fd >= 0: self.tracked_fds_dict[current_stream_fd] = log_stream input_select_fd_list.append(current_stream_fd) blocked_log_streams.remove(log_stream) read_list = None write_list = None try: (read_list, write_list, _except_list) = select.select(input_select_fd_list, output_select_fd_list, [], 1) except select.error as select_error: # Interrupting signals, e.g. for shutdown are OK. if select_error[0] == errno.EINTR: continue print('Unexpected select result %s' % str(select_error), file=sys.stderr) delayed_return_status = 1 break for read_fd in read_list: fd_handler_object = self.tracked_fds_dict[read_fd] if isinstance(fd_handler_object, LogStream): # Handle this LogStream. Only when downstream processing blocks, add the stream to the blocked stream list. handle_result = fd_handler_object.handle_stream() if handle_result < 0: # No need to care if current internal file descriptor in LogStream has changed in handleStream(), # this will be handled when unblocking. del self.tracked_fds_dict[read_fd] blocked_log_streams.append(fd_handler_object) elif handle_result != read_fd: # The current fd has changed, update the tracking list. del self.tracked_fds_dict[read_fd] self.tracked_fds_dict[ handle_result] = fd_handler_object continue if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler): try: fd_handler_object.do_receive() except ConnectionError as receiveException: print('Unclean termination of remote control: %s' % str(receiveException), file=sys.stderr) if fd_handler_object.is_dead(): del self.tracked_fds_dict[read_fd] # Reading is only attempted when output buffer was already flushed. Try processing the next request to fill the output # buffer for next round. else: fd_handler_object.do_process(self.analysis_context) continue if fd_handler_object == self.master_control_socket: self.handle_master_control_socket_receive() continue if fd_handler_object == self.remote_control_socket: # We received a remote connection, accept it unconditionally. Users should make sure, that they do not exhaust # resources by hogging open connections. (control_client_socket, _remote_address) = self.remote_control_socket.accept() # Keep track of information received via this remote control socket. remote_control_handler = AnalysisChildRemoteControlHandler( control_client_socket) self.tracked_fds_dict[control_client_socket.fileno( )] = remote_control_handler continue raise Exception('Unhandled object type %s' % type(fd_handler_object)) for write_fd in write_list: fd_handler_object = self.tracked_fds_dict[write_fd] if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler): buffer_flushed_flag = False try: buffer_flushed_flag = fd_handler_object.do_send() except OSError as sendError: print('Error sending data via remote control: %s' % str(sendError), file=sys.stderr) try: fd_handler_object.terminate() except ConnectionError as terminateException: print('Unclean termination of remote control: %s' % str(terminateException), file=sys.stderr) if buffer_flushed_flag: fd_handler_object.do_process(self.analysis_context) if fd_handler_object.is_dead(): del self.tracked_fds_dict[write_fd] continue raise Exception('Unhandled object type %s' % type(fd_handler_object)) # Handle the real time events. real_time = time.time() if next_real_time_trigger_time is None or real_time >= next_real_time_trigger_time: next_trigger_offset = 3600 for component in real_time_triggered_components: next_trigger_request = component.do_timer(real_time) next_trigger_offset = min(next_trigger_offset, next_trigger_request) next_real_time_trigger_time = real_time + next_trigger_offset # Handle the analysis time events. The analysis time will be different when an analysis time component is registered. analysis_time = self.analysis_context.analysis_time if analysis_time is None: analysis_time = real_time if next_analysis_time_trigger_time is None or analysis_time >= next_analysis_time_trigger_time: next_trigger_offset = 3600 for component in analysis_time_triggered_components: next_trigger_request = component.do_timer(real_time) next_trigger_offset = min(next_trigger_offset, next_trigger_request) next_analysis_time_trigger_time = analysis_time + next_trigger_offset # Analysis loop is only left on shutdown. Try to persist everything and leave. PersistencyUtil.persist_all() return delayed_return_status