def is_summary_directory(self, summary_base_dir, relative_path):
        """
        Check if the given summary directory is valid.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            bool, indicates if the given summary directory is valid.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir,
                              relative_path=relative_path):
            return False

        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return False

        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if entry.is_symlink():
                continue

            summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX,
                                        entry.name)
            if summary_pattern is not None and entry.is_file():
                return True

            pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
            if pb_pattern is not None and entry.is_file():
                return True

            if entry.is_dir():
                profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX,
                                             entry.name)
                cluster_profiler_pattern = re.search(
                    self.CLUSTER_PROFILER_DIRECTORY_REGEX, entry.name)
                if profiler_pattern is not None or cluster_profiler_pattern is not None:
                    full_path = os.path.realpath(
                        os.path.join(summary_directory, entry.name))
                    if self._is_valid_profiler_directory(full_path)[0] or \
                            self._is_valid_cluster_profiler_directory(full_path)[0]:
                        return True
        return False
Example #2
0
def init(workspace='', config='', **kwargs):
    """
    Init MindInsight context.

    Args:
        workspace (str): specify mindinsight workspace, default is ''.
        config (str): specify mindinsight config file, default is ''.

    Raises:
        FileSystemPermissionError, if workspace is not allowed to access or available.
    """
    permissions = os.R_OK | os.W_OK | os.X_OK

    # set umask to 0o077
    os.umask(permissions << 3 | permissions)

    # assign argument values into environment
    if workspace:
        kwargs['workspace'] = workspace

    if config:
        kwargs['config'] = config

    for key, value in kwargs.items():
        variable = 'MINDINSIGHT_{}'.format(key.upper())
        os.environ[variable] = str(value)

    settings.refresh()

    if os.path.exists(settings.WORKSPACE):
        if not os.access(settings.WORKSPACE, permissions):
            raise FileSystemPermissionError(
                'Workspace {} not allowed to access'.format(workspace))
    else:
        try:
            mode = permissions << 6
            os.makedirs(settings.WORKSPACE, mode=mode, exist_ok=True)
        except OSError:
            # race condition or priority problem
            raise FileSystemPermissionError(
                'Workspace {} not available'.format(workspace))

    for hook in HookUtils.instance().hooks():
        hook.on_init()
Example #3
0
    def _get_hoc_image(self, image_path, train_id):
        """Get hoc image for image data demonstration in UI."""

        sample_id, label, layer = image_path.strip(".jpg").split("_")
        layer = int(layer)
        job = self.job_manager.get_job(train_id)
        samples = job.samples
        label_idx = job.labels.index(label)

        chosen_sample = samples[int(sample_id)]
        original_path_image = chosen_sample['image']
        abs_image_path = os.path.join(self.job_manager.summary_base_dir,
                                      _clean_train_id_b4_join(train_id),
                                      original_path_image)
        if self._is_forbidden(abs_image_path):
            raise FileSystemPermissionError("Forbidden.")

        image_type = ImageQueryTypes.OUTCOME.value
        try:
            image = Image.open(abs_image_path)
        except FileNotFoundError:
            raise ImageNotExistError(
                f"train_id:{train_id} path:{image_path} type:{image_type}")
        except PermissionError:
            raise FileSystemPermissionError(
                f"train_id:{train_id} path:{image_path} type:{image_type}")
        except OSError:
            raise UnknownError(
                f"Invalid image file: train_id:{train_id} path:{image_path} type:{image_type}"
            )

        edit_steps = []
        boxes = chosen_sample["hierarchical_occlusion"][label_idx][
            "hoc_layers"][layer]["boxes"]
        mask = chosen_sample["hierarchical_occlusion"][label_idx]["mask"]

        for box in boxes:
            edit_steps.append(EditStep(layer, *box))
        image_cp = pil_apply_edit_steps(image, mask, edit_steps)
        buffer = io.BytesIO()
        image_cp.save(buffer, format=_PNG_FORMAT)

        return buffer.getvalue()
def handle_unknown_error(ex):
    """Handle unknown error."""
    logger.error('%r %r detail: %r', request.method, quote(request.path),
                 str(ex))
    logger.exception(ex)
    if isinstance(ex, PermissionError):
        error = FileSystemPermissionError('File System Permission Error')
    else:
        error = UnknownError('System error.')
    res_body = dict(error_code=error.error_code, error_msg=error.message)
    return jsonify(res_body), error.http_code
Example #5
0
    def _validate_config(self, config_path):
        """Check config_path."""
        config_path = self._normalize_path("config_path", config_path)
        try:
            with open(config_path, "r") as file:
                config_info = yaml.safe_load(file)
        except PermissionError as exc:
            raise FileSystemPermissionError(
                "Can not open config file. Detail: %s." % str(exc))
        except Exception as exc:
            raise UnknownError("Detail: %s." % str(exc))

        self._validate_config_schema(config_info)
        config_info['summary_base_dir'] = self._normalize_path(
            "summary_base_dir", config_info.get('summary_base_dir'))
        self._make_summary_base_dir(config_info['summary_base_dir'])
        return config_info
Example #6
0
    def list_summary_directories(self, summary_base_dir, overall=True):
        """
        List summary directories within base directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            overall (bool): Limit the total num of scanning if overall is False.

        Returns:
            list, list of summary directory info, each of which including the following attributes.
                - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
                                        starting with "./".
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.
                - profiler (dict): profiler info, including profiler subdirectory path, profiler creation time and
                                    profiler modification time.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir):
            return []

        relative_path = os.path.join('.', '')
        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summary_dict = {}
        counter = Counter(max_count=None if overall else self.MAX_SCAN_COUNT)

        try:
            entries = os.scandir(summary_base_dir)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                break
            try:
                counter.add()
            except MaxCountExceededError:
                logger.info(
                    'Stop further scanning due to overall is False and '
                    'number of scanned files exceeds upper limit.')
                break
            if entry.is_symlink():
                pass
            elif entry.is_file():
                self._update_summary_dict(summary_dict, summary_base_dir,
                                          relative_path, entry)
            elif entry.is_dir():
                entry_path = os.path.realpath(
                    os.path.join(summary_base_dir, entry.name))
                self._scan_subdir_entries(summary_dict, summary_base_dir,
                                          entry_path, entry.name, counter)

        directories = []
        for key, value in summary_dict.items():
            directory = {
                'relative_path': key,
                'profiler': None,
                'create_time': value['ctime'],
                'update_time': value['mtime'],
            }
            profiler = value.get('profiler')
            if profiler is not None:
                directory['profiler'] = {
                    'directory': profiler['directory'],
                    'create_time': profiler['ctime'],
                    'update_time': profiler['mtime'],
                }
            directories.append(directory)

        # sort by update time in descending order and relative path in ascending order
        directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[
            'relative_path']))

        return directories
Example #7
0
    def list_summaries(self, summary_base_dir, relative_path='./'):
        """
        Get info of latest summary file within the given summary directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            list, list of summary file including the following attributes.
                - file_name (str): Summary file name.
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir,
                              relative_path=relative_path):
            return []

        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summaries = []
        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary directory is not accessible.')

        for entry in entries:
            if entry.is_symlink() or not entry.is_file():
                continue

            pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
            if pattern is None:
                continue

            timestamp = int(pattern.groupdict().get('timestamp'))
            try:
                # extract created time from filename
                ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
            except OverflowError:
                continue

            try:
                stat = entry.stat()
            except FileNotFoundError:
                logger.warning('File %s not found.', entry.name)
                continue

            mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

            summaries.append({
                'file_name': entry.name,
                'create_time': ctime,
                'update_time': mtime,
            })

        # sort by update time in descending order and filename in ascending order
        summaries.sort(
            key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))

        return summaries
Example #8
0
    def query_image_binary(self, train_id, image_path, image_type):
        """
        Query image binary content.

        Args:
            train_id (str): Job ID.
            image_path (str): Image path relative to explain job's summary directory.
            image_type (str): Image type, 'original' or 'overlay'.

        Returns:
            bytes, image binary.
        """

        abs_image_path = os.path.join(self.job_manager.summary_base_dir,
                                      _clean_train_id_b4_join(train_id),
                                      image_path)

        if self._is_forbidden(abs_image_path):
            raise FileSystemPermissionError("Forbidden.")

        try:

            if image_type != "overlay":
                # no need to convert
                with open(abs_image_path, "rb") as fp:
                    return fp.read()

            image = Image.open(abs_image_path)

            if image.mode == _RGBA_MODE:
                # It is RGBA already, do not convert.
                with open(abs_image_path, "rb") as fp:
                    return fp.read()

        except FileNotFoundError:
            raise ImageNotExistError(
                f"train_id:{train_id} path:{image_path} type:{image_type}")
        except PermissionError:
            raise FileSystemPermissionError(
                f"train_id:{train_id} path:{image_path} type:{image_type}")
        except OSError:
            raise UnknownError(
                f"Invalid image file: train_id:{train_id} path:{image_path} type:{image_type}"
            )

        if image.mode == _SINGLE_CHANNEL_MODE:
            saliency = np.asarray(image) / _UINT8_MAX
        elif image.mode == _RGB_MODE:
            saliency = np.asarray(image)
            saliency = saliency[:, :, 0] / _UINT8_MAX
        else:
            raise UnknownError(f"Invalid overlay image mode:{image.mode}.")

        saliency_stack = np.empty((saliency.shape[0], saliency.shape[1], 4))
        for c in range(3):
            saliency_stack[:, :, c] = saliency
        rgba = saliency_stack * _SALIENCY_CMAP_HI
        rgba += (1 - saliency_stack) * _SALIENCY_CMAP_LOW
        rgba[:, :, 3] = saliency * _UINT8_MAX

        overlay = Image.fromarray(np.uint8(rgba), mode=_RGBA_MODE)
        buffer = io.BytesIO()
        overlay.save(buffer, format=_PNG_FORMAT)

        return buffer.getvalue()
    def list_summary_directories(self, summary_base_dir, overall=True):
        """
        List summary directories within base directory.

        Args:
            summary_base_dir (str): Path of summary base directory.

        Returns:
            list, list of summary directory info, each of which including the following attributes.
                - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
                                        starting with "./".
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
        """
        if self._contains_null_byte(summary_base_dir=summary_base_dir):
            return []

        if not os.path.exists(summary_base_dir):
            logger.warning('Path of summary base directory not exists.')
            return []

        if not os.path.isdir(summary_base_dir):
            logger.warning(
                'Path of summary base directory is not a valid directory.')
            return []

        summary_dict = {}
        scan_count = 0

        try:
            entries = os.scandir(summary_base_dir)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                break
            relative_path = os.path.join('.', '')
            if entry.is_symlink():
                pass
            elif entry.is_file():
                self._update_summary_dict(summary_dict, relative_path, entry)
            elif entry.is_dir():
                full_path = os.path.realpath(
                    os.path.join(summary_base_dir, entry.name))

                try:
                    subdir_entries = os.scandir(full_path)
                except PermissionError:
                    logger.warning(
                        'Path of %s under summary base directory is not accessible.',
                        entry.name)
                else:
                    for subdir_entry in subdir_entries:
                        if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                            break
                        subdir_relative_path = os.path.join('.', entry.name)
                        if subdir_entry.is_symlink():
                            pass
                        elif subdir_entry.is_file():
                            self._update_summary_dict(summary_dict,
                                                      subdir_relative_path,
                                                      subdir_entry)

                        scan_count += 1
                        if not overall and scan_count >= self.MAX_SCAN_COUNT:
                            break

            scan_count += 1
            if not overall and scan_count >= self.MAX_SCAN_COUNT:
                logger.info(
                    'Stop further scanning due to overall is False and '
                    'number of scanned files exceeds upper limit.')
                break

        directories = [{
            'relative_path': key,
            'create_time': value['ctime'],
            'update_time': value['mtime'],
        } for key, value in summary_dict.items()]

        # sort by update time in descending order and relative path in ascending order
        directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[
            'relative_path']))

        return directories