def load(self): """Start loading data from the latest summary file to the loader.""" self.status = _LoaderStatus.LOADING.value filenames = [] for filename in FileHandler.list_dir(self._loader_info['summary_dir']): if FileHandler.is_file( FileHandler.join(self._loader_info['summary_dir'], filename)): filenames.append(filename) filenames = ExplainLoader._filter_files(filenames) if not filenames: raise TrainJobNotExistError( 'No summary file found in %s, explain job will be delete.' % self._loader_info['summary_dir']) is_end = False while not is_end and self.status != _LoaderStatus.STOP.value: try: file_changed, is_end, event_dict = self._parser.list_events( filenames) except UnknownError: break if file_changed: logger.info( 'Summary file in %s update, reload the data in the summary.', self._loader_info['summary_dir']) self._clear_job() if event_dict: self._import_data_from_event(event_dict)
def _parse_pb_file(self, filename): """ Parse pb file and write content to `EventsData`. Args: filename (str): The file path of pb file. """ file_path = FileHandler.join(self._summary_dir, filename) logger.info("Start to load graph from pb file, file path: %s.", file_path) filehandler = FileHandler(file_path) model_proto = anf_ir_pb2.ModelProto() try: model_proto.ParseFromString(filehandler.read()) except ParseError: logger.warning( "The given file is not a valid pb file, file path: %s.", file_path) return graph = MSGraph() graph.build_graph(model_proto.graph) tensor_event = TensorEvent(wall_time=FileHandler.file_stat(file_path), step=0, tag=filename, plugin_name=PluginNameEnum.GRAPH.value, value=graph) self._events_data.add_tensor_event(tensor_event)
def sort_files(self, filenames): """Sort by modify time increments and filenames increments.""" filenames = sorted( filenames, key=lambda file: (FileHandler.file_stat( FileHandler.join(self._summary_dir, file)).mtime, file)) return filenames
def parse(self): """Load summary file and parse file content.""" summary_file_handler = FileHandler(self.summary_file, 'rb') self._file_size = os.path.getsize(self.summary_file) # when current parsed size bigger than self._process_info, print process self._process_info = self._file_size // INFO_INTERVAL parse_summary_logger.info("Loading %s.", self.summary_file) result = self._load(summary_file_handler) if result: warning = '' scalar_path = FileHandler.join(self._output, "scalar.csv") image_path = FileHandler.join(self._output, IMAGE) if not self._image_check: warning = warning + " The summary file contains no image." else: parse_summary_logger.info("Images are written in %s.", image_path) if not self._scalar_check: warning = warning + " The summary file contains no scalar value." else: parse_summary_logger.info("Writing scalar data into %s.", scalar_path) self._scalar_writer.write() if warning: parse_summary_logger.warning(warning) parse_summary_logger.info("Finished loading %s.", self.summary_file)
def load(self): """Start loading data from the latest summary file to the loader.""" filenames = [] for filename in FileHandler.list_dir(self._loader_info['summary_dir']): if FileHandler.is_file( FileHandler.join(self._loader_info['summary_dir'], filename)): filenames.append(filename) filenames = ExplainLoader._filter_files(filenames) if not filenames: raise TrainJobNotExistError( 'No summary file found in %s, explain job will be delete.' % self._loader_info['summary_dir']) is_end = False while not is_end: is_clean, is_end, event_dict = self._parser.parse_explain( filenames) if is_clean: logger.info( 'Summary file in %s update, reload the data in the summary.', self._loader_info['summary_dir']) self._clear_job() if event_dict: self._import_data_from_event(event_dict)
def parse_files(self, filenames, events_data): """ Load summary file and parse file content. Args: filenames (list[str]): File name list. events_data (EventsData): The container of event data. """ self._events_data = events_data summary_files = self.filter_files(filenames) summary_files = self.sort_files(summary_files) for filename in summary_files: if self._latest_filename and \ (self._compare_summary_file(self._latest_filename, filename)): continue file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_file_size = 0 new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_file_size: continue self._latest_file_size = new_size try: self._load_single_file(self._summary_file_handler) except UnknownError as ex: logger.warning("Parse summary file failed, detail: %r," "file path: %s.", str(ex), file_path)
def phase_pb_file(file_path: str) -> Union[MSGraph, None]: """ Parse pb file to graph Args: file_path (str): The file path of pb file. Returns: MSGraph, if load pb file and build graph success, will return the graph, else return None. """ if not CONFIG.VERBOSE: logger.setLevel(logging.ERROR) logger.info("Start to load graph from pb file, file path: %s.", file_path) model_proto = anf_ir_pb2.ModelProto() try: model_proto.ParseFromString(FileHandler(file_path).read()) except ParseError: logger.warning("The given file is not a valid pb file, file path: %s.", file_path) return None graph = MSGraph() try: graph.build_graph(model_proto.graph) except Exception as ex: logger.error("Build graph failed, file path: %s.", file_path) logger.exception(ex) raise UnknownError(str(ex)) logger.info("Build graph success, file path: %s.", file_path) return graph
def parse_files(self, executor, filenames, events_data): """ Load summary file and parse file content. Args: executor (Executor): The executor instance. filenames (list[str]): File name list. events_data (EventsData): The container of event data. Returns: bool, True if all the summary files are finished loading. """ summary_files = self.filter_files(filenames) summary_files = self.sort_files(summary_files) if self._latest_filename in summary_files: index = summary_files.index(self._latest_filename) summary_files = summary_files[index:] for filename in summary_files: file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_file_size = 0 new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_file_size: continue try: if not self._load_single_file(self._summary_file_handler, executor, events_data): self._latest_file_size = self._summary_file_handler.offset else: self._latest_file_size = new_size # Wait for data in this file to be processed to avoid loading multiple files at the same time. logger.debug("Parse summary file offset %d, file path: %s.", self._latest_file_size, file_path) return False except UnknownError as ex: logger.warning( "Parse summary file failed, detail: %r," "file path: %s.", str(ex), file_path) return True
def filter_valid_files(self): """ Gets a list of valid files from the given file path. Returns: list[str], file name list. """ filenames = [] for filename in FileHandler.list_dir(self._summary_dir): if FileHandler.is_file(FileHandler.join(self._summary_dir, filename)): filenames.append(filename) valid_filenames = [] for parser in self._parser_list: valid_filenames.extend(parser.filter_files(filenames)) return list(set(valid_filenames))
def _generate_loader_from_relative_path( self, relative_path: str) -> ExplainLoader: """Generate explain loader from the given relative path.""" self._check_summary_exist(relative_path) current_dir = os.path.realpath( FileHandler.join(self._summary_base_dir, relative_path)) loader_id = self._generate_loader_id(relative_path) loader = ExplainLoader(loader_id=loader_id, summary_dir=current_dir) return loader
def __init__(self, summary_file, output): self.summary_file = summary_file self._output = output self._scalar_writer = ScalarWriter(self._output) self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE)) self._file_size = 0 self._process_info = 0 self._image_check = False self._scalar_check = False
def _parse_pb_file(summary_dir, filename): """ Parse pb file and write content to `EventsData`. Args: filename (str): The file path of pb file. Returns: TensorEvent, if load pb file and build graph success, will return tensor event, else return None. """ file_path = FileHandler.join(summary_dir, filename) logger.info("Start to load graph from pb file, file path: %s.", file_path) filehandler = FileHandler(file_path) model_proto = anf_ir_pb2.ModelProto() try: model_proto.ParseFromString(filehandler.read()) except ParseError: logger.warning( "The given file is not a valid pb file, file path: %s.", file_path) return None graph = MSGraph() try: graph.build_graph(model_proto.graph) except Exception as ex: # Normally, there are no exceptions, and it is only possible for users on the MindSpore side # to dump other non-default graphs. logger.error("Build graph failed, file path: %s.", file_path) logger.exception(ex) raise UnknownError(str(ex)) tensor_event = TensorEvent( wall_time=FileHandler.file_stat(file_path).mtime, step=0, tag=filename, plugin_name=PluginNameEnum.GRAPH.value, value=graph, filename=filename) logger.info("Build graph success, file path: %s.", file_path) return tensor_event
def _generate_loader_by_relative_path(self, relative_path): """ Generate loader by relative path. Args: relative_path (str): Relative path of a summary directory, e.g. './log1'. Returns: dict[str, LoaderStruct], a dict of `Loader`. """ current_dir = os.path.realpath(FileHandler.join(self._summary_path, relative_path)) data_loader = DataLoader(current_dir) loader_id = self._generate_loader_id(relative_path) loader = LoaderStruct(loader_id=loader_id, name=self._generate_loader_name(relative_path), path=current_dir, latest_update_time=FileHandler.file_stat(current_dir).mtime, data_loader=data_loader) return loader
def _set_latest_file(self, filename): """ Check if the file's modification time is newer than the last time it was loaded, and if so, set the time. Args: filename (str): The file name that needs to be checked and set. Returns: bool, Returns True if the file was modified earlier than the last time it was loaded, or False. """ mtime = FileHandler.file_stat(FileHandler.join(self._summary_dir, filename)).mtime if mtime < self._latest_mtime or \ (mtime == self._latest_mtime and filename <= self._latest_filename): return False self._latest_mtime = mtime self._latest_filename = filename return True
def run(self, args): """ Execute for start command. Args: args (Namespace): Parsed arguments to hold customized parameters. """ try: date_time = datetime.datetime.now().strftime( 'output_%Y%m%d_%H%M%S_%f') output_path = os.path.join(args.output, date_time) summary_dir = args.summary_dir if not self._check_dirpath(summary_dir): return summary_parser = _SummaryParser(summary_dir) summary_files = summary_parser.filter_files( os.listdir(summary_dir)) if not summary_files: parse_summary_logger.error('Path %s has no summary file.', summary_dir) return summary_files = summary_parser.sort_files(summary_files) filename = summary_files[-1] summary_file = FileHandler.join(summary_dir, filename) if not (self._check_filepath(summary_file) and self._check_create_filepath(output_path) and self._check_create_filepath( FileHandler.join(output_path, 'image'))): return eventparser = EventParser(summary_file, output_path) eventparser.parse() except Exception as ex: parse_summary_logger.error( "Parse summary file failed, detail: %r.", str(ex)) raise UnknownError(str(ex))
def _load_pb_files(self, filenames): """ Load and parse the pb files. Args: filenames (list[str]): File name list, like [filename1, filename2]. Returns: list[str], filename list. """ pb_filenames = self._filter_pb_files(filenames) pb_filenames = sorted( pb_filenames, key=lambda file: FileHandler.file_stat( FileHandler.join(self._summary_dir, file)).mtime) for filename in pb_filenames: mtime = FileHandler.file_stat( FileHandler.join(self._summary_dir, filename)).mtime if mtime <= self._latest_pb_file_mtime: continue self._latest_pb_file_mtime = mtime self._parse_pb_file(filename)
def parse_files(self, executor, filenames, events_data): """ Load summary file and parse file content. Args: executor (Executor): The executor instance. filenames (list[str]): File name list. events_data (EventsData): The container of event data. """ self._events_data = events_data summary_files = self.filter_files(filenames) summary_files = self.sort_files(summary_files) for filename in summary_files: if self._latest_filename and \ (self._compare_summary_file(self._latest_filename, filename)): continue file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_file_size = 0 new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_file_size: continue self._latest_file_size = new_size try: self._load_single_file(self._summary_file_handler, executor) # Wait for data in this file to be processed to avoid loading multiple files at the same time. executor.wait_all_tasks_finish() except UnknownError as ex: logger.warning( "Parse summary file failed, detail: %r," "file path: %s.", str(ex), file_path)
def check_path(file_path): """ Check argument for file path. Args: file_path (str): File path. """ if file_path.startswith('~'): file_path = os.path.realpath(os.path.expanduser(file_path)) if not file_path.startswith('/'): file_path = os.path.realpath( FileHandler.join(os.getcwd(), file_path)) return os.path.realpath(file_path)
def generate_loaders(self, loader_pool): """ Generate loader from summary path, if summary path is empty, will return empty list. Args: loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager. Returns: dict[str, LoaderStruct], a dict of `Loader`. """ loader_dict = {} if not FileHandler.exists(self._summary_path): logger.warning( "Summary path does not exist. It will not start loading events data. " "Current path is %r.", self._summary_path) return loader_dict dir_map_mtime_dict = {} min_modify_time = None summaries_info = self._summary_watcher.list_summary_directories( self._summary_path) for item in summaries_info: relative_path = item.get("relative_path") current_dir = FileHandler.join(self._summary_path, relative_path) dataloader = DataLoader(current_dir) if not dataloader.has_valid_files(): logger.debug( "Can not find valid train log file in folder %s , " "will ignore.", relative_path) continue modify_time = item.get("update_time").timestamp() # if loader exists in loader pool and newer time, update its time loader_id = self._generate_loader_id(relative_path) loader = loader_pool.get(loader_id) if loader is not None and loader.latest_update_time > modify_time: modify_time = loader.latest_update_time if not min_modify_time: # The first load, init min modify time min_modify_time = modify_time # We need to find `MAX_DATA_LOADER_SIZE` newly modified folders. if len(dir_map_mtime_dict) < MAX_DATA_LOADER_SIZE: if modify_time < min_modify_time: min_modify_time = modify_time dir_map_mtime_dict.update({relative_path: modify_time}) else: if modify_time >= min_modify_time: dir_map_mtime_dict.update({relative_path: modify_time}) sorted_dir_tuple = sorted(dir_map_mtime_dict.items(), key=lambda d: d[1])[-MAX_DATA_LOADER_SIZE:] for relative_path, modify_time in sorted_dir_tuple: loader_id = self._generate_loader_id(relative_path) loader = self._generate_loader_by_relative_path(relative_path) loader_dict.update({loader_id: loader}) return loader_dict
class ExplainParser(_SummaryParser): """The summary file parser.""" def __init__(self, summary_dir): super(ExplainParser, self).__init__(summary_dir) self._latest_filename = '' def parse_explain(self, filenames): """ Load summary file and parse file content. Args: filenames (list[str]): File name list. Returns: bool, True if all the summary files are finished loading. """ summary_files = self.sort_files(filenames) is_end = False is_clean = False event_data = {} filename = summary_files[-1] file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_file_size = 0 is_clean = True new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_file_size: is_end = True return is_clean, is_end, event_data while True: start_offset = self._summary_file_handler.offset try: event_str = self.event_load(self._summary_file_handler) if event_str is None: self._summary_file_handler.reset_offset(start_offset) is_end = True return is_clean, is_end, event_data if len(event_str) > MAX_EVENT_STRING: logger.warning( "file_path: %s, event string: %d exceeds %d and drop it.", self._summary_file_handler.file_path, len(event_str), MAX_EVENT_STRING) continue field_list, tensor_value_list = self._event_decode(event_str) for field, tensor_value in zip(field_list, tensor_value_list): event_data[field] = tensor_value logger.info("Parse summary file offset %d, file path: %s.", self._summary_file_handler.offset, file_path) return is_clean, is_end, event_data except (exceptions.CRCFailedError, exceptions.CRCLengthFailedError) as ex: self._summary_file_handler.reset_offset(start_offset) is_end = True logger.warning( "Check crc failed and ignore this file, file_path=%s, offset=%s. Detail: %r.", self._summary_file_handler.file_path, self._summary_file_handler.offset, str(ex)) return is_clean, is_end, event_data except (OSError, DecodeError, exceptions.MindInsightException) as ex: is_end = True logger.warning( "Parse log file fail, and ignore this file, detail: %r," "file path: %s.", str(ex), self._summary_file_handler.file_path) return is_clean, is_end, event_data except Exception as ex: logger.exception(ex) raise UnknownError(str(ex)) @staticmethod def _event_decode(event_str): """ Transform `Event` data to tensor_event and update it to EventsData. Args: event_str (str): Message event string in summary proto, data read from file handler. """ logger.debug("Start to parse event string. Event string len: %s.", len(event_str)) event = summary_pb2.Event.FromString(event_str) logger.debug("Deserialize event string completed.") fields = { 'sample_id': ExplainFieldsEnum.SAMPLE_ID, 'benchmark': ExplainFieldsEnum.BENCHMARK, 'metadata': ExplainFieldsEnum.METADATA } tensor_event_value = getattr(event, 'explain') field_list = [] tensor_value_list = [] for field in fields: if not getattr(tensor_event_value, field, False): continue if ExplainFieldsEnum.METADATA.value == field and not tensor_event_value.metadata.label: continue tensor_value = None if field == ExplainFieldsEnum.SAMPLE_ID.value: tensor_value = ExplainParser._add_image_data( tensor_event_value) elif field == ExplainFieldsEnum.BENCHMARK.value: tensor_value = ExplainParser._add_benchmark(tensor_event_value) elif field == ExplainFieldsEnum.METADATA.value: tensor_value = ExplainParser._add_metadata(tensor_event_value) logger.debug("Event generated, label is %s, step is %s.", field, event.step) field_list.append(field) tensor_value_list.append(tensor_value) return field_list, tensor_value_list @staticmethod def _add_image_data(tensor_event_value): """ Parse image data based on sample_id in Explain message Args: tensor_event_value: the object of Explain message """ inference = InferfenceContainer( ground_truth_prob=tensor_event_value.inference.ground_truth_prob, ground_truth_prob_sd=tensor_event_value.inference. ground_truth_prob_sd, ground_truth_prob_itl95_low=tensor_event_value.inference. ground_truth_prob_itl95_low, ground_truth_prob_itl95_hi=tensor_event_value.inference. ground_truth_prob_itl95_hi, predicted_label=tensor_event_value.inference.predicted_label, predicted_prob=tensor_event_value.inference.predicted_prob, predicted_prob_sd=tensor_event_value.inference.predicted_prob_sd, predicted_prob_itl95_low=tensor_event_value.inference. predicted_prob_itl95_low, predicted_prob_itl95_hi=tensor_event_value.inference. predicted_prob_itl95_hi) sample_data = SampleContainer( sample_id=tensor_event_value.sample_id, image_path=tensor_event_value.image_path, ground_truth_label=tensor_event_value.ground_truth_label, inference=inference, explanation=tensor_event_value.explanation, status=tensor_event_value.status) return sample_data @staticmethod def _add_benchmark(tensor_event_value): """ Parse benchmark data from Explain message. Args: tensor_event_value: the object of Explain message Returns: benchmark_data: An object containing benchmark. """ benchmark_data = BenchmarkContainer( benchmark=tensor_event_value.benchmark, status=tensor_event_value.status) return benchmark_data @staticmethod def _add_metadata(tensor_event_value): """ Parse metadata from Explain message. Args: tensor_event_value: the object of Explain message Returns: benchmark_data: An object containing metadata. """ metadata_value = MetadataContainer( metadata=tensor_event_value.metadata, status=tensor_event_value.status) return metadata_value
def parse_explain(self, filenames): """ Load summary file and parse file content. Args: filenames (list[str]): File name list. Returns: bool, True if all the summary files are finished loading. """ summary_files = self.sort_files(filenames) is_end = False is_clean = False event_data = {} filename = summary_files[-1] file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_file_size = 0 is_clean = True new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_file_size: is_end = True return is_clean, is_end, event_data while True: start_offset = self._summary_file_handler.offset try: event_str = self.event_load(self._summary_file_handler) if event_str is None: self._summary_file_handler.reset_offset(start_offset) is_end = True return is_clean, is_end, event_data if len(event_str) > MAX_EVENT_STRING: logger.warning( "file_path: %s, event string: %d exceeds %d and drop it.", self._summary_file_handler.file_path, len(event_str), MAX_EVENT_STRING) continue field_list, tensor_value_list = self._event_decode(event_str) for field, tensor_value in zip(field_list, tensor_value_list): event_data[field] = tensor_value logger.info("Parse summary file offset %d, file path: %s.", self._summary_file_handler.offset, file_path) return is_clean, is_end, event_data except (exceptions.CRCFailedError, exceptions.CRCLengthFailedError) as ex: self._summary_file_handler.reset_offset(start_offset) is_end = True logger.warning( "Check crc failed and ignore this file, file_path=%s, offset=%s. Detail: %r.", self._summary_file_handler.file_path, self._summary_file_handler.offset, str(ex)) return is_clean, is_end, event_data except (OSError, DecodeError, exceptions.MindInsightException) as ex: is_end = True logger.warning( "Parse log file fail, and ignore this file, detail: %r," "file path: %s.", str(ex), self._summary_file_handler.file_path) return is_clean, is_end, event_data except Exception as ex: logger.exception(ex) raise UnknownError(str(ex))
def list_events(self, filenames): """ Load summary file and parse file content. Args: filenames (list[str]): File name list. Returns: tuple, the elements of the tuple are: - file_changed (bool): True if the latest file is changed. - is_end (bool): True if all the summary files are finished loading. - event_data (dict): Event data where keys are explanation field. """ summary_files = self.sort_files(filenames) is_end = False file_changed = False event_data = {} filename = summary_files[-1] file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_offset = 0 file_changed = True new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_offset: is_end = True return file_changed, is_end, event_data while True: start_offset = self._summary_file_handler.offset try: event_str = self.event_load(self._summary_file_handler) if event_str is None: self._summary_file_handler.reset_offset(start_offset) is_end = True return file_changed, is_end, event_data if len(event_str) > MAX_EVENT_STRING: logger.warning( "file_path: %s, event string: %d exceeds %d and drop it.", self._summary_file_handler.file_path, len(event_str), MAX_EVENT_STRING) continue field_list, tensor_value_list = self._event_decode(event_str) for field, tensor_value in zip(field_list, tensor_value_list): event_data[field] = tensor_value logger.debug("Parse summary file offset %d, file path: %s.", self._summary_file_handler.offset, file_path) return file_changed, is_end, event_data except exceptions.CRCLengthFailedError as ex: self._summary_file_handler.reset_offset(start_offset) is_end = True logger.warning( "Check crc failed and reset offset, file_path=%s, offset=%s. Detail: %r.", self._summary_file_handler.file_path, self._summary_file_handler.offset, str(ex)) return file_changed, is_end, event_data except Exception as ex: # Note: If an unknown error occurs, we will set the offset to the end of this file, # which is equivalent to stopping parsing this file. We do not delete the current job # and retain the data that has been successfully parsed. self._summary_file_handler.reset_offset(new_size) # Notice: If the current job is the latest one in the loader pool and the job is deleted, # the job goes into an infinite cycle of load-fail-delete-reload-load-fail-delete. # We need to prevent this infinite loop. logger.error( "Parse summary file failed, will set offset to the file end. file_path: %s, " "offset: %d, detail: %s.", file_path, self._summary_file_handler.offset, str(ex)) logger.exception(ex) raise UnknownError(str(ex)) finally: self._latest_offset = self._summary_file_handler.offset
class ExplainParser(_SummaryParser): """The summary file parser.""" def __init__(self, summary_dir): super(ExplainParser, self).__init__(summary_dir) self._latest_offset = 0 def list_events(self, filenames): """ Load summary file and parse file content. Args: filenames (list[str]): File name list. Returns: tuple, the elements of the tuple are: - file_changed (bool): True if the latest file is changed. - is_end (bool): True if all the summary files are finished loading. - event_data (dict): Event data where keys are explanation field. """ summary_files = self.sort_files(filenames) is_end = False file_changed = False event_data = {} filename = summary_files[-1] file_path = FileHandler.join(self._summary_dir, filename) if filename != self._latest_filename: self._summary_file_handler = FileHandler(file_path, 'rb') self._latest_filename = filename self._latest_offset = 0 file_changed = True new_size = FileHandler.file_stat(file_path).size if new_size == self._latest_offset: is_end = True return file_changed, is_end, event_data while True: start_offset = self._summary_file_handler.offset try: event_str = self.event_load(self._summary_file_handler) if event_str is None: self._summary_file_handler.reset_offset(start_offset) is_end = True return file_changed, is_end, event_data if len(event_str) > MAX_EVENT_STRING: logger.warning( "file_path: %s, event string: %d exceeds %d and drop it.", self._summary_file_handler.file_path, len(event_str), MAX_EVENT_STRING) continue field_list, tensor_value_list = self._event_decode(event_str) for field, tensor_value in zip(field_list, tensor_value_list): event_data[field] = tensor_value logger.debug("Parse summary file offset %d, file path: %s.", self._summary_file_handler.offset, file_path) return file_changed, is_end, event_data except exceptions.CRCLengthFailedError as ex: self._summary_file_handler.reset_offset(start_offset) is_end = True logger.warning( "Check crc failed and reset offset, file_path=%s, offset=%s. Detail: %r.", self._summary_file_handler.file_path, self._summary_file_handler.offset, str(ex)) return file_changed, is_end, event_data except Exception as ex: # Note: If an unknown error occurs, we will set the offset to the end of this file, # which is equivalent to stopping parsing this file. We do not delete the current job # and retain the data that has been successfully parsed. self._summary_file_handler.reset_offset(new_size) # Notice: If the current job is the latest one in the loader pool and the job is deleted, # the job goes into an infinite cycle of load-fail-delete-reload-load-fail-delete. # We need to prevent this infinite loop. logger.error( "Parse summary file failed, will set offset to the file end. file_path: %s, " "offset: %d, detail: %s.", file_path, self._summary_file_handler.offset, str(ex)) logger.exception(ex) raise UnknownError(str(ex)) finally: self._latest_offset = self._summary_file_handler.offset @staticmethod def _event_decode(event_str): """ Transform `Event` data to tensor_event and update it to EventsData. Args: event_str (str): Message event string in summary proto, data read from file handler. Returns: tuple, the elements of the result tuple are: - field_list (list): Explain fields to be parsed. - tensor_value_list (list): Parsed data with respect to the field list. """ logger.debug("Start to parse event string. Event string len: %s.", len(event_str)) event = xai_pb2.Event.FromString(event_str) logger.debug("Deserialize event string completed.") fields = { 'sample_id': ExplainFieldsEnum.SAMPLE_ID, 'benchmark': ExplainFieldsEnum.BENCHMARK, 'metadata': ExplainFieldsEnum.METADATA } tensor_event_value = getattr(event, 'explain') field_list = [] tensor_value_list = [] for field in fields: if getattr(tensor_event_value, field, None) is None: continue if ExplainFieldsEnum.METADATA.value == field and not tensor_event_value.metadata.label: continue tensor_value = None if field == ExplainFieldsEnum.SAMPLE_ID.value: tensor_value = ExplainParser._add_image_data( tensor_event_value) elif field == ExplainFieldsEnum.BENCHMARK.value: tensor_value = ExplainParser._add_benchmark(tensor_event_value) elif field == ExplainFieldsEnum.METADATA.value: tensor_value = ExplainParser._add_metadata(tensor_event_value) logger.debug("Event generated, label is %s, step is %s.", field, event.step) field_list.append(field) tensor_value_list.append(tensor_value) return field_list, tensor_value_list @staticmethod def _add_image_data(tensor_event_value): """ Parse image data based on sample_id in Explain message. Args: tensor_event_value (Event): The object of Explain message. Returns: SampleContainer, a named tuple containing sample data. """ inference = InferfenceContainer( ground_truth_prob=tensor_event_value.inference.ground_truth_prob, ground_truth_prob_sd=tensor_event_value.inference. ground_truth_prob_sd, ground_truth_prob_itl95_low=tensor_event_value.inference. ground_truth_prob_itl95_low, ground_truth_prob_itl95_hi=tensor_event_value.inference. ground_truth_prob_itl95_hi, predicted_label=tensor_event_value.inference.predicted_label, predicted_prob=tensor_event_value.inference.predicted_prob, predicted_prob_sd=tensor_event_value.inference.predicted_prob_sd, predicted_prob_itl95_low=tensor_event_value.inference. predicted_prob_itl95_low, predicted_prob_itl95_hi=tensor_event_value.inference. predicted_prob_itl95_hi) sample_data = SampleContainer( sample_id=tensor_event_value.sample_id, image_path=tensor_event_value.image_path, ground_truth_label=tensor_event_value.ground_truth_label, inference=inference, explanation=tensor_event_value.explanation, hierarchical_occlusion=tensor_event_value.hoc, status=tensor_event_value.status) return sample_data @staticmethod def _add_benchmark(tensor_event_value): """ Parse benchmark data from Explain message. Args: tensor_event_value (Event): The object of Explain message. Returns: BenchmarkContainer, a named tuple containing benchmark data. """ benchmark_data = BenchmarkContainer( benchmark=tensor_event_value.benchmark, status=tensor_event_value.status) return benchmark_data @staticmethod def _add_metadata(tensor_event_value): """ Parse metadata from Explain message. Args: tensor_event_value (Event): The object of Explain message. Returns: MetadataContainer, a named tuple containing benchmark data. """ metadata_value = MetadataContainer( metadata=tensor_event_value.metadata, status=tensor_event_value.status) return metadata_value
def abc(): FileHandler.is_file('aaa') print('after')