Esempio n. 1
0
    def _is_inference_valid(sample):
        """
        Check whether the inference data is empty or have the same length.

        If probs have different length with the labels, it can be confusing when assigning each prob to label.
        '_is_inference_valid' returns True only when the data size of match to each other. Note that prob data could be
        empty, so empty prob will pass the check.
        """
        ground_truth_len = len(sample['ground_truth_label'])
        for name in [
                'ground_truth_prob', 'ground_truth_prob_sd',
                'ground_truth_prob_itl95_low', 'ground_truth_prob_itl95_hi'
        ]:
            if sample[name] and len(sample[name]) != ground_truth_len:
                logger.info(
                    'Length of %s not match the ground_truth_label. Length of ground_truth_label: %d,'
                    'length of %s: %d', name, ground_truth_len, name,
                    len(sample[name]))
                return False

        predicted_len = len(sample['predicted_label'])
        for name in [
                'predicted_prob', 'predicted_prob_sd',
                'predicted_prob_itl95_low', 'predicted_prob_itl95_hi'
        ]:
            if sample[name] and len(sample[name]) != predicted_len:
                logger.info(
                    'Length of %s not match the predicted_labels. Length of predicted_label: %d,'
                    'length of %s: %d', name, predicted_len, name,
                    len(sample[name]))
                return False
        return True
Esempio n. 2
0
    def load(self):
        """Start loading data from the latest summary file to the loader."""
        self.status = _LoaderStatus.LOADING.value
        filenames = []
        for filename in FileHandler.list_dir(self._loader_info['summary_dir']):
            if FileHandler.is_file(
                    FileHandler.join(self._loader_info['summary_dir'],
                                     filename)):
                filenames.append(filename)
        filenames = ExplainLoader._filter_files(filenames)

        if not filenames:
            raise TrainJobNotExistError(
                'No summary file found in %s, explain job will be delete.' %
                self._loader_info['summary_dir'])

        is_end = False
        while not is_end and self.status != _LoaderStatus.STOP.value:
            try:
                file_changed, is_end, event_dict = self._parser.list_events(
                    filenames)
            except UnknownError:
                break

            if file_changed:
                logger.info(
                    'Summary file in %s update, reload the data in the summary.',
                    self._loader_info['summary_dir'])
                self._clear_job()
            if event_dict:
                self._import_data_from_event(event_dict)
Esempio n. 3
0
    def load(self):
        """Start loading data from the latest summary file to the loader."""
        filenames = []
        for filename in FileHandler.list_dir(self._loader_info['summary_dir']):
            if FileHandler.is_file(
                    FileHandler.join(self._loader_info['summary_dir'],
                                     filename)):
                filenames.append(filename)
        filenames = ExplainLoader._filter_files(filenames)

        if not filenames:
            raise TrainJobNotExistError(
                'No summary file found in %s, explain job will be delete.' %
                self._loader_info['summary_dir'])

        is_end = False
        while not is_end:
            is_clean, is_end, event_dict = self._parser.parse_explain(
                filenames)

            if is_clean:
                logger.info(
                    'Summary file in %s update, reload the data in the summary.',
                    self._loader_info['summary_dir'])
                self._clear_job()
            if event_dict:
                self._import_data_from_event(event_dict)
Esempio n. 4
0
 def _repeat_loading(self, repeat_interval):
     """Periodically loading summary."""
     while True:
         try:
             logger.info('Start to load data, repeat interval: %r.',
                         repeat_interval)
             self._load_data()
             if not repeat_interval:
                 return
             time.sleep(repeat_interval)
         except UnknownError as ex:
             logger.error(
                 'Unexpected error happens when loading data. Loading status: %s, loading pool size: %d'
                 'Detail: %s', self._loading_status, len(self._loader_pool),
                 str(ex))
Esempio n. 5
0
    def _stop_load_data(self):
        """Stop loading data, status changes to Stopping."""
        if self.status != _ExplainManagerStatus.LOADING.value:
            return

        logger.info('Start to stop loading data, set status to %s.',
                    _ExplainManagerStatus.STOPPING.value)
        self.status = _ExplainManagerStatus.STOPPING.value

        for loader in self._loader_pool.values():
            loader.stop()

        while self.status != _ExplainManagerStatus.DONE.value:
            continue
        logger.info('Stop loading data end.')
Esempio n. 6
0
    def _import_data_from_event(self, event_dict: Dict):
        """Parse and import data from the event data."""
        if 'metadata' not in event_dict and self._is_metadata_empty():
            raise ParamValueError(
                'metadata is incomplete, should write metadata first in the summary.'
            )

        for tag, event in event_dict.items():
            if tag == ExplainFieldsEnum.METADATA.value:
                self._import_metadata_from_event(event.metadata)
            elif tag == ExplainFieldsEnum.BENCHMARK.value:
                self._import_benchmark_from_event(event.benchmark)
            elif tag == ExplainFieldsEnum.SAMPLE_ID.value:
                self._import_sample_from_event(event)
            else:
                logger.info('Unknown ExplainField: %s.', tag)
Esempio n. 7
0
    def _execute_loading(self):
        """Execute the data loading."""
        # We will load the newest loader first.
        for loader_id in list(self._loader_pool.keys())[::-1]:
            with self._loader_pool_mutex:
                loader = self._loader_pool.get(loader_id, None)
                if loader is None:
                    logger.debug(
                        'Loader %r has been deleted, will not load data.',
                        loader_id)
                    continue

            if self.status == _ExplainManagerStatus.STOPPING.value:
                logger.info('Loader %s status is %s, will return.', loader_id,
                            loader.status)
                return

            loader.load()
Esempio n. 8
0
 def _repeat_loading(self, repeat_interval):
     """Periodically loading summary."""
     # Allocate CPU resources to enable gunicorn to start the web service.
     time.sleep(1)
     while True:
         try:
             if self.status == _ExplainManagerStatus.STOPPING.value:
                 logger.debug(
                     'Current loading status is %s, we will not trigger repeat loading.',
                     _ExplainManagerStatus.STOPPING.value)
             else:
                 logger.info(
                     'Starts triggering repeat loading, repeat interval: %r.',
                     repeat_interval)
                 self._load_data()
                 if not repeat_interval:
                     return
             time.sleep(repeat_interval)
         except UnknownError as ex:
             logger.error(
                 'Unexpected error happens when loading data. Loading status: %s, loading pool size: %d'
                 'Detail: %s', self.status, len(self._loader_pool), str(ex))
Esempio n. 9
0
    def _load_data(self):
        """
        Prepare loaders in cache and start loading the data from summaries.

        Only a limited number of loaders will be cached in terms of updated_time or query_time. The size of cache
        pool is determined by _MAX_LOADERS_NUM. When the manager start loading data, only the lastest _MAX_LOADER_NUM
        summaries will be loaded in cache. If a cached loader if queries by 'get_job', the query_time of the loader
        will be updated as well as the the loader moved to the end of cache. If an uncached summary is queried,
        a new loader instance will be generated and put to the end cache.
        """
        try:
            with self._status_mutex:
                if self._loading_status == _ExplainManagerStatus.LOADING.value:
                    logger.info(
                        'Current status is %s, will ignore to load data.',
                        self._loading_status)
                    return

                self._loading_status = _ExplainManagerStatus.LOADING.value

                self._cache_loaders()
                self._execute_loading()

                if not self._loader_pool:
                    self._loading_status = _ExplainManagerStatus.INVALID.value
                else:
                    self._loading_status = _ExplainManagerStatus.DONE.value

                logger.info(
                    'Load event data end, status: %s, and loader pool size: %d',
                    self._loading_status, len(self._loader_pool))

        except Exception as ex:
            self._loading_status = _ExplainManagerStatus.INVALID.value
            logger.exception(ex)
            raise UnknownError(str(ex))
Esempio n. 10
0
    def parse_explain(self, filenames):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.
        Returns:
            bool, True if all the summary files are finished loading.
        """
        summary_files = self.sort_files(filenames)

        is_end = False
        is_clean = False
        event_data = {}
        filename = summary_files[-1]

        file_path = FileHandler.join(self._summary_dir, filename)
        if filename != self._latest_filename:
            self._summary_file_handler = FileHandler(file_path, 'rb')
            self._latest_filename = filename
            self._latest_file_size = 0
            is_clean = True

        new_size = FileHandler.file_stat(file_path).size
        if new_size == self._latest_file_size:
            is_end = True
            return is_clean, is_end, event_data

        while True:
            start_offset = self._summary_file_handler.offset
            try:
                event_str = self.event_load(self._summary_file_handler)
                if event_str is None:
                    self._summary_file_handler.reset_offset(start_offset)
                    is_end = True
                    return is_clean, is_end, event_data
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning(
                        "file_path: %s, event string: %d exceeds %d and drop it.",
                        self._summary_file_handler.file_path, len(event_str),
                        MAX_EVENT_STRING)
                    continue

                field_list, tensor_value_list = self._event_decode(event_str)
                for field, tensor_value in zip(field_list, tensor_value_list):
                    event_data[field] = tensor_value
                logger.info("Parse summary file offset %d, file path: %s.",
                            self._summary_file_handler.offset, file_path)
                return is_clean, is_end, event_data

            except (exceptions.CRCFailedError,
                    exceptions.CRCLengthFailedError) as ex:
                self._summary_file_handler.reset_offset(start_offset)
                is_end = True
                logger.warning(
                    "Check crc failed and ignore this file, file_path=%s, offset=%s. Detail: %r.",
                    self._summary_file_handler.file_path,
                    self._summary_file_handler.offset, str(ex))
                return is_clean, is_end, event_data
            except (OSError, DecodeError,
                    exceptions.MindInsightException) as ex:
                is_end = True
                logger.warning(
                    "Parse log file fail, and ignore this file, detail: %r,"
                    "file path: %s.", str(ex),
                    self._summary_file_handler.file_path)
                return is_clean, is_end, event_data
            except Exception as ex:
                logger.exception(ex)
                raise UnknownError(str(ex))