예제 #1
0
 def _delete_loader(self, loader_id):
     """Delete loader given loader_id."""
     if loader_id in self._loader_pool:
         self._loader_pool.pop(loader_id)
         logger.debug(
             'delete loader %s, and stop this loader loading data.',
             loader_id)
예제 #2
0
 def _reload_data_again(self):
     """Reload the data one more time."""
     logger.debug('Start to reload data again.')
     thread = threading.Thread(target=self._load_data,
                               name='reload_data_thread')
     thread.daemon = False
     thread.start()
예제 #3
0
    def _reload_data_again(self):
        """Reload the data one more time."""
        logger.debug('Start to reload data again.')

        def _wrapper():
            if self.status == _ExplainManagerStatus.STOPPING.value:
                return
            self._stop_load_data()
            self._load_data()

        thread = threading.Thread(target=_wrapper,
                                  name='explainer.reload_data_thread')
        thread.daemon = False
        thread.start()
예제 #4
0
    def _execute_loading(self):
        """Execute the data loading."""
        # We will load the newest loader first.
        for loader_id in list(self._loader_pool.keys())[::-1]:
            with self._loader_pool_mutex:
                loader = self._loader_pool.get(loader_id, None)
                if loader is None:
                    logger.debug(
                        'Loader %r has been deleted, will not load data.',
                        loader_id)
                    continue

            if self.status == _ExplainManagerStatus.STOPPING.value:
                logger.info('Loader %s status is %s, will return.', loader_id,
                            loader.status)
                return

            loader.load()
예제 #5
0
    def _execute_loading(self):
        """Execute the data loading."""
        for loader_id in list(self._loader_pool.keys()):
            try:
                with self._loader_pool_mutex:
                    loader = self._loader_pool.get(loader_id, None)
                    if loader is None:
                        logger.debug(
                            'Loader %r has been deleted, will not load data',
                            loader_id)
                        return
                loader.load()

            except MindInsightException as ex:
                logger.warning(
                    'Data loader %r load data failed. Delete data_loader. Detail: %s',
                    loader_id, ex)
                with self._loader_pool_mutex:
                    self._delete_loader(loader_id)
예제 #6
0
 def _repeat_loading(self, repeat_interval):
     """Periodically loading summary."""
     # Allocate CPU resources to enable gunicorn to start the web service.
     time.sleep(1)
     while True:
         try:
             if self.status == _ExplainManagerStatus.STOPPING.value:
                 logger.debug(
                     'Current loading status is %s, we will not trigger repeat loading.',
                     _ExplainManagerStatus.STOPPING.value)
             else:
                 logger.info(
                     'Starts triggering repeat loading, repeat interval: %r.',
                     repeat_interval)
                 self._load_data()
                 if not repeat_interval:
                     return
             time.sleep(repeat_interval)
         except UnknownError as ex:
             logger.error(
                 'Unexpected error happens when loading data. Loading status: %s, loading pool size: %d'
                 'Detail: %s', self.status, len(self._loader_pool), str(ex))
예제 #7
0
    def _event_decode(event_str):
        """
        Transform `Event` data to tensor_event and update it to EventsData.

        Args:
            event_str (str): Message event string in summary proto, data read from file handler.

        Returns:
            tuple, the elements of the result tuple are:

                - field_list (list): Explain fields to be parsed.
                - tensor_value_list (list): Parsed data with respect to the field list.
        """

        logger.debug("Start to parse event string. Event string len: %s.",
                     len(event_str))
        event = xai_pb2.Event.FromString(event_str)
        logger.debug("Deserialize event string completed.")

        fields = {
            'sample_id': ExplainFieldsEnum.SAMPLE_ID,
            'benchmark': ExplainFieldsEnum.BENCHMARK,
            'metadata': ExplainFieldsEnum.METADATA
        }

        tensor_event_value = getattr(event, 'explain')

        field_list = []
        tensor_value_list = []
        for field in fields:
            if getattr(tensor_event_value, field, None) is None:
                continue

            if ExplainFieldsEnum.METADATA.value == field and not tensor_event_value.metadata.label:
                continue

            tensor_value = None
            if field == ExplainFieldsEnum.SAMPLE_ID.value:
                tensor_value = ExplainParser._add_image_data(
                    tensor_event_value)
            elif field == ExplainFieldsEnum.BENCHMARK.value:
                tensor_value = ExplainParser._add_benchmark(tensor_event_value)
            elif field == ExplainFieldsEnum.METADATA.value:
                tensor_value = ExplainParser._add_metadata(tensor_event_value)
            logger.debug("Event generated, label is %s, step is %s.", field,
                         event.step)
            field_list.append(field)
            tensor_value_list.append(tensor_value)
        return field_list, tensor_value_list
예제 #8
0
 def _delete_loader(self, loader_id):
     """delete loader given loader_id"""
     if loader_id in self._loader_pool:
         self._loader_pool.pop(loader_id)
         logger.debug('delete loader %s', loader_id)
예제 #9
0
    def list_events(self, filenames):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.

        Returns:
            tuple, the elements of the tuple are:

                - file_changed (bool): True if the latest file is changed.
                - is_end (bool): True if all the summary files are finished loading.
                - event_data (dict): Event data where keys are explanation field.
        """
        summary_files = self.sort_files(filenames)

        is_end = False
        file_changed = False
        event_data = {}
        filename = summary_files[-1]

        file_path = FileHandler.join(self._summary_dir, filename)
        if filename != self._latest_filename:
            self._summary_file_handler = FileHandler(file_path, 'rb')
            self._latest_filename = filename
            self._latest_offset = 0
            file_changed = True

        new_size = FileHandler.file_stat(file_path).size
        if new_size == self._latest_offset:
            is_end = True
            return file_changed, is_end, event_data

        while True:
            start_offset = self._summary_file_handler.offset
            try:
                event_str = self.event_load(self._summary_file_handler)
                if event_str is None:
                    self._summary_file_handler.reset_offset(start_offset)
                    is_end = True
                    return file_changed, is_end, event_data
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning(
                        "file_path: %s, event string: %d exceeds %d and drop it.",
                        self._summary_file_handler.file_path, len(event_str),
                        MAX_EVENT_STRING)
                    continue

                field_list, tensor_value_list = self._event_decode(event_str)
                for field, tensor_value in zip(field_list, tensor_value_list):
                    event_data[field] = tensor_value

                logger.debug("Parse summary file offset %d, file path: %s.",
                             self._summary_file_handler.offset, file_path)
                return file_changed, is_end, event_data
            except exceptions.CRCLengthFailedError as ex:
                self._summary_file_handler.reset_offset(start_offset)
                is_end = True
                logger.warning(
                    "Check crc failed and reset offset, file_path=%s, offset=%s. Detail: %r.",
                    self._summary_file_handler.file_path,
                    self._summary_file_handler.offset, str(ex))
                return file_changed, is_end, event_data
            except Exception as ex:
                # Note: If an unknown error occurs, we will set the offset to the end of this file,
                # which is equivalent to stopping parsing this file. We do not delete the current job
                # and retain the data that has been successfully parsed.
                self._summary_file_handler.reset_offset(new_size)

                # Notice: If the current job is the latest one in the loader pool and the job is deleted,
                # the job goes into an infinite cycle of load-fail-delete-reload-load-fail-delete.
                # We need to prevent this infinite loop.
                logger.error(
                    "Parse summary file failed, will set offset to the file end. file_path: %s, "
                    "offset: %d, detail: %s.", file_path,
                    self._summary_file_handler.offset, str(ex))
                logger.exception(ex)
                raise UnknownError(str(ex))
            finally:
                self._latest_offset = self._summary_file_handler.offset