def test2persist_multiple_objects_of_multiple_class(self): """In this test case multiple instances of multiple classes are to be persisted and loaded.""" description = "Test2PersistenceUtil" new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'Default2', True) self.analysis_context.register_component(new_match_path_detector, description) t = time.time() log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector) log_atom_decimal_integer_value_me = LogAtom( self.match_context_decimal_integer_value_me.match_data, ParserMatch(self.match_element_decimal_integer_value_me), t, new_match_path_detector) new_match_path_detector.receive_atom(log_atom_fixed_dme) new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me) other_new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'otherDetector2', True) self.analysis_context.register_component(other_new_match_path_detector, description + "2") log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, other_new_match_path_detector) other_new_match_path_detector.receive_atom(log_atom_fixed_dme) new_match_path_value_combo_detector = NewMatchPathValueComboDetector( self.aminer_config, ['first/f1/s1'], [self.stream_printer_event_handler], 'Default', False, True) self.analysis_context.register_component( new_match_path_value_combo_detector, description + "3") log_atom_sequence_me = LogAtom( self.fixed_dme.fixed_data, ParserMatch(self.match_element_first_match_me), t, new_match_path_value_combo_detector) new_match_path_value_combo_detector.receive_atom(log_atom_sequence_me) PersistenceUtil.persist_all() persistence_data = PersistenceUtil.load_json( new_match_path_detector.persistence_file_name) self.assertTrue(persistence_data in ([ self.match_element_fixed_dme.get_path(), self.match_element_decimal_integer_value_me.get_path() ], [ self.match_element_decimal_integer_value_me.get_path(), self.match_element_fixed_dme.get_path() ])) self.assertEqual( PersistenceUtil.load_json( other_new_match_path_detector.persistence_file_name), [self.match_element_fixed_dme.get_path()]) self.assertEqual( PersistenceUtil.load_json( new_match_path_value_combo_detector.persistence_file_name), ([[log_atom_sequence_me.raw_data]]))
def run_analysis(self, master_fd): """ Run the analysis thread. @param master_fd the main communication socket to the parent to receive logfile updates from the parent. @return 0 on success, e.g. normal termination via signal or 1 on error. """ # The masterControlSocket is the socket to communicate with the master process to receive commands or logstream data. Expect # the parent/child communication socket on fd 3. This also duplicates the fd, so close the old one. self.master_control_socket = socket.fromfd(master_fd, socket.AF_UNIX, socket.SOCK_DGRAM, 0) os.close(master_fd) self.tracked_fds_dict[self.master_control_socket.fileno()] = self.master_control_socket # Locate the real analysis configuration. self.analysis_context.build_analysis_pipeline() if self.analysis_context.atomizer_factory is None: msg = 'build_analysis_pipeline() did not initialize atomizer_factory, terminating' print('FATAL: ' + msg, file=sys.stderr) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).critical(msg) return 1 real_time_triggered_components = self.analysis_context.real_time_triggered_components analysis_time_triggered_components = self.analysis_context.analysis_time_triggered_components max_memory_mb = self.analysis_context.aminer_config.config_properties.get(AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE, None) if max_memory_mb is not None: try: max_memory_mb = int(max_memory_mb) resource.setrlimit(resource.RLIMIT_AS, (max_memory_mb * 1024 * 1024, resource.RLIM_INFINITY)) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('set max memory limit to %d MB.', max_memory_mb) except ValueError: msg = '%s must be an integer, terminating' % AMinerConfig.KEY_RESOURCES_MAX_MEMORY_USAGE print('FATAL: ' + msg, file=sys.stderr) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).critical(msg) return 1 # Load continuation data for last known log streams. The loaded data has to be a dictionary with repositioning information for # each stream. The data is used only when creating the first stream with that name. self.repositioning_data_dict = PersistenceUtil.load_json(self.persistence_file_name) if self.repositioning_data_dict is None: self.repositioning_data_dict = {} # A list of LogStreams where handleStream() blocked due to downstream not being able to consume the data yet. blocked_log_streams = [] # Always start when number is None. next_real_time_trigger_time = None next_analysis_time_trigger_time = None next_backup_time_trigger_time = None log_stat_period = self.analysis_context.aminer_config.config_properties.get( AMinerConfig.KEY_LOG_STAT_PERIOD, AMinerConfig.DEFAULT_STAT_PERIOD) next_statistics_log_time = time.time() + log_stat_period delayed_return_status = 0 while self.run_analysis_loop_flag: # Build the list of inputs to select for anew each time: the LogStream file descriptors may change due to rollover. input_select_fd_list = [] output_select_fd_list = [] for fd_handler_object in self.tracked_fds_dict.values(): if isinstance(fd_handler_object, LogStream): stream_fd = fd_handler_object.get_current_fd() if stream_fd < 0: continue input_select_fd_list.append(stream_fd) elif isinstance(fd_handler_object, AnalysisChildRemoteControlHandler): fd_handler_object.add_select_fds(input_select_fd_list, output_select_fd_list) else: # This has to be a socket, just add the file descriptor. input_select_fd_list.append(fd_handler_object.fileno()) # Loop over the list in reverse order to avoid skipping elements in remove. if not suspended_flag: for log_stream in reversed(blocked_log_streams): current_stream_fd = log_stream.handle_stream() if current_stream_fd >= 0: self.tracked_fds_dict[current_stream_fd] = log_stream input_select_fd_list.append(current_stream_fd) blocked_log_streams.remove(log_stream) read_list = None write_list = None try: (read_list, write_list, _except_list) = select.select(input_select_fd_list, output_select_fd_list, [], 1) except select.error as select_error: # Interrupting signals, e.g. for shutdown are OK. if select_error[0] == errno.EINTR: continue msg = 'Unexpected select result %s' % str(select_error) print(msg, file=sys.stderr) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg) delayed_return_status = 1 break for read_fd in read_list: fd_handler_object = self.tracked_fds_dict[read_fd] if isinstance(fd_handler_object, LogStream): # Handle this LogStream. Only when downstream processing blocks, add the stream to the blocked stream list. handle_result = fd_handler_object.handle_stream() if handle_result < 0: # No need to care if current internal file descriptor in LogStream has changed in handleStream(), # this will be handled when unblocking. del self.tracked_fds_dict[read_fd] blocked_log_streams.append(fd_handler_object) elif handle_result != read_fd: # The current fd has changed, update the tracking list. del self.tracked_fds_dict[read_fd] self.tracked_fds_dict[handle_result] = fd_handler_object continue if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler): try: fd_handler_object.do_receive() except ConnectionError as receiveException: msg = 'Unclean termination of remote control: %s' % str(receiveException) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg) print(msg, file=sys.stderr) if fd_handler_object.is_dead(): logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('Deleting fd %s from tracked_fds_dict.', str(read_fd)) del self.tracked_fds_dict[read_fd] # Reading is only attempted when output buffer was already flushed. Try processing the next request to fill the output # buffer for next round. else: fd_handler_object.do_process(self.analysis_context) continue if fd_handler_object == self.master_control_socket: self.handle_master_control_socket_receive() continue if fd_handler_object == self.remote_control_socket: # We received a remote connection, accept it unconditionally. Users should make sure, that they do not exhaust # resources by hogging open connections. (control_client_socket, _remote_address) = self.remote_control_socket.accept() # Keep track of information received via this remote control socket. remote_control_handler = AnalysisChildRemoteControlHandler(control_client_socket) self.tracked_fds_dict[control_client_socket.fileno()] = remote_control_handler continue msg = 'Unhandled object type %s' % type(fd_handler_object) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg) raise Exception(msg) for write_fd in write_list: fd_handler_object = self.tracked_fds_dict[write_fd] if isinstance(fd_handler_object, AnalysisChildRemoteControlHandler): buffer_flushed_flag = False try: buffer_flushed_flag = fd_handler_object.do_send() except OSError as sendError: msg = 'Error at sending data via remote control: %s' % str(sendError) print(msg, file=sys.stderr) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg) try: fd_handler_object.terminate() except ConnectionError as terminateException: msg = 'Unclean termination of remote control: %s' % str(terminateException) print(msg, file=sys.stderr) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg) if buffer_flushed_flag: fd_handler_object.do_process(self.analysis_context) if fd_handler_object.is_dead(): del self.tracked_fds_dict[write_fd] continue msg = 'Unhandled object type %s' % type(fd_handler_object) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).error(msg) raise Exception(msg) # Handle the real time events. real_time = time.time() if next_real_time_trigger_time is None or real_time >= next_real_time_trigger_time: next_trigger_offset = 3600 for component in real_time_triggered_components: if not suspended_flag: next_trigger_request = component.do_timer(real_time) next_trigger_offset = min(next_trigger_offset, next_trigger_request) next_real_time_trigger_time = real_time + next_trigger_offset if real_time >= next_statistics_log_time: next_statistics_log_time = real_time + log_stat_period logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).debug('Statistics logs are written..') # log the statistics for every component. for component_name in self.analysis_context.registered_components_by_name: component = self.analysis_context.registered_components_by_name[component_name] component.log_statistics(component_name) # Handle the analysis time events. The analysis time will be different when an analysis time component is registered. analysis_time = self.analysis_context.analysis_time if analysis_time is None: analysis_time = real_time if next_analysis_time_trigger_time is None or analysis_time >= next_analysis_time_trigger_time: next_trigger_offset = 3600 for component in analysis_time_triggered_components: if not suspended_flag: next_trigger_request = component.do_timer(real_time) next_trigger_offset = min(next_trigger_offset, next_trigger_request) next_analysis_time_trigger_time = analysis_time + next_trigger_offset # backup the persistence data. backup_time = time.time() backup_time_str = datetime.fromtimestamp(backup_time).strftime('%Y-%m-%d-%H-%M-%S') persistence_dir = self.analysis_context.aminer_config.config_properties.get( AMinerConfig.KEY_PERSISTENCE_DIR, AMinerConfig.DEFAULT_PERSISTENCE_DIR) persistence_dir = persistence_dir.rstrip('/') backup_path = persistence_dir + '/backup/' backup_path_with_date = os.path.join(backup_path, backup_time_str) if next_backup_time_trigger_time is None or backup_time >= next_backup_time_trigger_time: next_trigger_offset = 3600 * 24 if next_backup_time_trigger_time is not None: shutil.copytree(persistence_dir, backup_path_with_date, ignore=shutil.ignore_patterns('backup*')) logging.getLogger(AMinerConfig.DEBUG_LOG_NAME).info('Persistence backup created in %s.', backup_path_with_date) next_backup_time_trigger_time = backup_time + next_trigger_offset # Analysis loop is only left on shutdown. Try to persist everything and leave. PersistenceUtil.persist_all() for sock in self.tracked_fds_dict.values(): sock.close() return delayed_return_status
def persist_all(self): """Persist all data by calling the function in PersistenceUtil.""" PersistenceUtil.persist_all() self.REMOTE_CONTROL_RESPONSE = 'OK' logging.getLogger(DEBUG_LOG_NAME).info( 'Called persist_all() via remote control.')