def _repeat_loading(self, repeat_interval): """Periodically loading summary.""" while True: try: logger.info('Start to load data, repeat interval: %r.', repeat_interval) self._load_data() if not repeat_interval: return time.sleep(repeat_interval) except UnknownError as ex: logger.error( 'Unexpected error happens when loading data. Loading status: %s, loading pool size: %d' 'Detail: %s', self._loading_status, len(self._loader_pool), str(ex))
def _repeat_loading(self, repeat_interval): """Periodically loading summary.""" # Allocate CPU resources to enable gunicorn to start the web service. time.sleep(1) while True: try: if self.status == _ExplainManagerStatus.STOPPING.value: logger.debug( 'Current loading status is %s, we will not trigger repeat loading.', _ExplainManagerStatus.STOPPING.value) else: logger.info( 'Starts triggering repeat loading, repeat interval: %r.', repeat_interval) self._load_data() if not repeat_interval: return time.sleep(repeat_interval) except UnknownError as ex: logger.error( 'Unexpected error happens when loading data. Loading status: %s, loading pool size: %d' 'Detail: %s', self.status, len(self._loader_pool), str(ex))
def list_events(self, filenames): """ Load summary file and parse file content. Args: filenames (list[str]): File name list. Returns: tuple, the elements of the tuple are: - file_changed (bool): True if the latest file is changed. - is_end (bool): True if all the summary files are finished loading. - event_data (dict): Event data where keys are explanation field. """ summary_files = self.sort_files(filenames) is_end = False file_changed = False event_data = {} filename = summary_files[-1] file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_offset = 0 file_changed = True new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_offset: is_end = True return file_changed, is_end, event_data while True: start_offset = self._summary_file_handler.offset try: event_str = self.event_load(self._summary_file_handler) if event_str is None: self._summary_file_handler.reset_offset(start_offset) is_end = True return file_changed, is_end, event_data if len(event_str) > MAX_EVENT_STRING: logger.warning( "file_path: %s, event string: %d exceeds %d and drop it.", self._summary_file_handler.file_path, len(event_str), MAX_EVENT_STRING) continue field_list, tensor_value_list = self._event_decode(event_str) for field, tensor_value in zip(field_list, tensor_value_list): event_data[field] = tensor_value logger.debug("Parse summary file offset %d, file path: %s.", self._summary_file_handler.offset, file_path) return file_changed, is_end, event_data except exceptions.CRCLengthFailedError as ex: self._summary_file_handler.reset_offset(start_offset) is_end = True logger.warning( "Check crc failed and reset offset, file_path=%s, offset=%s. Detail: %r.", self._summary_file_handler.file_path, self._summary_file_handler.offset, str(ex)) return file_changed, is_end, event_data except Exception as ex: # Note: If an unknown error occurs, we will set the offset to the end of this file, # which is equivalent to stopping parsing this file. We do not delete the current job # and retain the data that has been successfully parsed. self._summary_file_handler.reset_offset(new_size) # Notice: If the current job is the latest one in the loader pool and the job is deleted, # the job goes into an infinite cycle of load-fail-delete-reload-load-fail-delete. # We need to prevent this infinite loop. logger.error( "Parse summary file failed, will set offset to the file end. file_path: %s, " "offset: %d, detail: %s.", file_path, self._summary_file_handler.offset, str(ex)) logger.exception(ex) raise UnknownError(str(ex)) finally: self._latest_offset = self._summary_file_handler.offset