Example #1
0
    def get_single_image(self, train_id, tag, step):
        """
        Returns the actual image bytes for a given image.

        Args:
            train_id (str): The ID of the events data the image belongs to.
            tag (str): The name of the tag the images belongs to.
            step (int): The step of the image in the current reservoir. If step = -1, return image of final step.

        Returns:
            bytes, a byte string of the raw image bytes.

        """
        Validation.check_param_empty(train_id=train_id, tag=tag, step=step)
        step = to_int(step, "step")

        try:
            tensors = self._data_manager.list_tensors(train_id, tag)
        except ParamValueError as ex:
            raise ImageNotExistError(ex.message)

        image = _find_image(tensors, step)
        if image is None:
            raise ImageNotExistError(
                "Can not find the step with given train job id and tag.")

        return image
def query_explain_jobs():
    """
    Query explain jobs.

    Returns:
        Response, contains dict that stores base directory, total number of jobs and their detailed job metadata.

    Raises:
        ParamMissError: If train_id info is not in the request.
        ParamTypeError: If one of (offset, limit) is not integer in the request.
        ParamValueError: If one of (offset, limit) does not have the expected value in the request.
    """
    offset = request.args.get("offset", default=0)
    limit = request.args.get("limit", default=10)
    offset = Validation.check_offset(offset=offset)
    limit = Validation.check_limit(
        limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT)

    encapsulator = ExplainJobEncap(EXPLAIN_MANAGER)
    total, jobs = encapsulator.query_explain_jobs(offset, limit)

    return jsonify({
        'name':
        os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)),
        'total':
        total,
        'explain_jobs':
        jobs,
    })
Example #3
0
    def get_plugins(self, train_id, manual_update=True):
        """
        Queries the plug-in data for the specified training job

        Args:
            train_id (str): Specify a training job to query.
            manual_update (bool): Specifies whether to refresh automatically.

        Returns:
            dict, refer to restful api.
        """
        Validation.check_param_empty(train_id=train_id)
        if contains_null_byte(train_id=train_id):
            raise QueryStringContainsNullByteError("train job id: {} contains null byte.".format(train_id))

        if manual_update:
            self._data_manager.cache_train_job(train_id)

        train_job = self._data_manager.get_train_job(train_id)

        try:
            data_visual_content = train_job.get_detail(DATAVISUAL_CACHE_KEY)
            plugins = data_visual_content.get(DATAVISUAL_PLUGIN_KEY)
        except exceptions.TrainJobDetailNotInCacheError:
            plugins = []

        if not plugins:
            default_result = dict()
            for plugin_name in PluginNameEnum.list_members():
                default_result.update({plugin_name: list()})
            return dict(plugins=default_result)

        return dict(
            plugins=plugins
        )
Example #4
0
    def get_single_image(self, train_id, tag, step):
        """
        Returns the actual image bytes for a given image.

        Args:
            train_id (str): The ID of the events data the image belongs to.
            tag (str): The name of the tag the images belongs to.
            step (int): The step of the image in the current reservoir.

        Returns:
            bytes, a byte string of the raw image bytes.

        """
        Validation.check_param_empty(train_id=train_id, tag=tag, step=step)
        step = to_int(step, "step")

        tensors = self._data_manager.list_tensors(train_id, tag)

        image = None
        for tensor in tensors:
            if tensor.step == step:
                # Default value for bytes field is empty byte string normally,
                # see also "Optional Fields And Default Values" in protobuf
                # documentation.
                image = tensor.value.encoded_image
                break

        if image is None:
            raise ParamValueError(
                "Can not find the step with given train job id and tag.")

        return image
Example #5
0
    def list_explain_directories(self, summary_base_dir, offset=0, limit=None):
        """
        List explain directories within base directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0.
            limit (int): The max data items for per page. Default value is 10.

        Returns:
            tuple[total, directories], total indicates the overall number of explain directories and directories
                    indicate list of summary directory info including the following attributes.
                - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
                                        starting with "./".
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Raises:
            ParamValueError, if offset < 0 or limit is out of valid value range.
            ParamTypeError, if offset or limit is not valid integer.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> total, directories = summary_watcher.list_explain_directories('/summary/base/dir', offset=0, limit=10)
        """
        offset = Validation.check_offset(offset=offset)
        limit = Validation.check_limit(limit, min_value=1, max_value=999, default_value=None)

        directories = self.list_summary_directories(summary_base_dir, overall=False, list_explain=True)
        if limit is None:
            return len(directories), directories

        return len(directories), directories[offset * limit:(offset + 1) * limit]
    def get_tensors(self, train_ids, tags, step, dims, detail):
        """
        Get tensor data for given train_ids, tags, step, dims and detail.

        Args:
            train_ids (list): Specify list of train job ID.
            tags (list): Specify list of tag.
            step (int): Specify step of tag, it's necessary when detail is equal to 'data'.
            dims (str): Specify dims of step, it's necessary when detail is equal to 'data'.
            detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

        Returns:
            dict, a dict including the `tensors`.

        Raises:
            UrlDecodeError, If unquote train id error with strict mode.
        """
        Validation.check_param_empty(train_id=train_ids, tag=tags)
        TensorUtils.validate_dims_format(dims)

        for index, train_id in enumerate(train_ids):
            try:
                train_id = unquote(train_id, errors='strict')
            except UnicodeDecodeError:
                raise UrlDecodeError('Unquote train id error with strict mode')
            else:
                train_ids[index] = train_id

        tensors = []
        for train_id in train_ids:
            tensors += self._get_train_tensors(train_id, tags, step, dims,
                                               detail)

        return {"tensors": tensors}
def _get_query_sample_parameters(data):
    """
    Get parameter for query.

    Args:
        data (dict): Dict that contains request info.

    Returns:
        dict, key-value pairs to call backend query functions.

    Raises:
        ParamMissError: If train_id info is not in the request.
        ParamTypeError: If certain key is not in the expected type in the request.
        ParamValueError: If certain key does not have the expected value in the request.
    """

    train_id = data.get("train_id")
    if train_id is None:
        raise ParamMissError('train_id')

    labels = data.get("labels")
    if labels is not None:
        _validate_type(labels, "labels", list)
    if labels:
        for item in labels:
            _validate_type(item, "element of labels", str)

    limit = data.get("limit", 10)
    limit = Validation.check_limit(limit, min_value=1, max_value=100)
    offset = data.get("offset", 0)
    offset = Validation.check_offset(offset=offset)
    sorted_name = data.get("sorted_name", "")
    _validate_value(sorted_name, "sorted_name",
                    ('', 'confidence', 'uncertainty'))

    sorted_type = data.get("sorted_type", "descending")
    _validate_value(sorted_type, "sorted_type", ("ascending", "descending"))

    prediction_types = data.get("prediction_types")
    if prediction_types is not None:
        _validate_type(prediction_types, "element of labels", list)
    if prediction_types:
        for item in prediction_types:
            _validate_value(item, "element of prediction_types",
                            ('TP', 'FN', 'FP'))

    query_kwarg = {
        "train_id": train_id,
        "labels": labels,
        "limit": limit,
        "offset": offset,
        "sorted_name": sorted_name,
        "sorted_type": sorted_type,
        "prediction_types": prediction_types
    }
    return query_kwarg
    def _get_train_tensors(self, train_id, tags, step, dims, detail):
        """
        Get tensor data for given train_id, tags, step, dims and detail.

        Args:
            train_id (str): Specify list of train job ID.
            tags (list): Specify list of tag.
            step (int): Specify step of tensor, it's necessary when detail is set to 'data'.
            dims (str): Specify dims of tensor, it's necessary when detail is set to 'data'.
            detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

        Returns:
            list[dict], a list of dictionaries containing the `train_id`, `tag`, `values`.

        Raises:
            TensorNotExistError, If tensor with specific train_id and tag is not exist in cache.
            ParamValueError, If the value of detail is not within available values:
                            'stats', 'histogram' and 'data'.
        """

        tensors_response = []
        for tag in tags:
            try:
                tensors = self._data_manager.list_tensors(train_id, tag)
            except ParamValueError as err:
                raise TensorNotExistError(err.message)

            if tensors and not isinstance(tensors[0].value, TensorContainer):
                raise TensorNotExistError(
                    "there is no tensor data in this tag: {}".format(tag))

            if detail is None or detail == 'stats':
                values = self._get_tensors_summary(detail, tensors)
            elif detail == 'data':
                Validation.check_param_empty(step=step, dims=dims)
                # Limit to query max two dimensions for tensor in table view.
                dims = TensorUtils.parse_shape(dims,
                                               limit=MAX_DIMENSIONS_FOR_TENSOR)
                step = to_int(step, "step")
                values = self._get_tensors_data(step, dims, tensors)
            elif detail == 'histogram':
                values = self._get_tensors_histogram(tensors)
            else:
                raise ParamValueError(
                    'Can not support this value: {} of detail.'.format(detail))

            tensor = {"train_id": train_id, "tag": tag, "values": values}
            tensors_response.append(tensor)

        return tensors_response
Example #9
0
    def get_tensors(self, train_ids, tags, step, dims, detail):
        """
        Get tensor data for given train_ids, tags, step, dims and detail.

        Args:
            train_ids (list): Specify list of train job ID.
            tags (list): Specify list of tag.
            step (int): Specify step of tag, it's necessary when detail is equal to 'data'.
            dims (str): Specify dims of step, it's necessary when detail is equal to 'data'.
            detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

        Returns:
            dict, a dict including the `tensors`.

        Raises:
            UrlDecodeError, If unquote train id error with strict mode.
        """
        Validation.check_param_empty(train_id=train_ids, tag=tags)
        if dims is not None:
            if not isinstance(dims, str):
                raise ParamValueError('The type of dims must be str, but got {}.'.format(type(dims)))
            dims = dims.strip()
            if not (dims.startswith('[') and dims.endswith(']')):
                raise ParamValueError('The value: {} of dims must be '
                                      'start with `[` and end with `]`.'.format(dims))
            for dim in dims[1:-1].split(','):
                dim = dim.strip()
                if dim == ":":
                    continue
                if dim.startswith('-'):
                    dim = dim[1:]
                if not dim.isdigit():
                    raise ParamValueError('The value: {} of dims in the square brackets '
                                          'must be int or `:`.'.format(dims))

        for index, train_id in enumerate(train_ids):
            try:
                train_id = unquote(train_id, errors='strict')
            except UnicodeDecodeError:
                raise UrlDecodeError('Unquote train id error with strict mode')
            else:
                train_ids[index] = train_id

        tensors = []
        for train_id in train_ids:
            tensors += self._get_train_tensors(train_id, tags, step, dims, detail)

        return {"tensors": tensors}
    def search_node_names(self, search_content, offset, limit):
        """
        Search node names by search content.

        Args:
            search_content (Any): This content can be the key content of the node to search.
            offset (int): An offset for page. Ex, offset is 0, mean current page is 1.
            limit (int): The max data items for per page.

        Returns:
            TypedDict('Names', {'names': list[str]}), {"names": ["node_names"]}.
        """
        offset = Validation.check_offset(offset=offset)
        limit = Validation.check_limit(limit, min_value=1, max_value=1000)
        names = self._graph.search_node_names(search_content, offset, limit)
        return {"names": names}
def query_train_jobs():
    """Query train jobs."""
    offset = request.args.get("offset", default=0)
    limit = request.args.get("limit", default=10)

    offset = Validation.check_offset(offset=offset)
    limit = Validation.check_limit(limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT)

    processor = TrainTaskManager(DATA_MANAGER)
    total, train_jobs = processor.query_train_jobs(offset, limit)

    return jsonify({
        'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)),
        'total': total,
        'train_jobs': train_jobs,
    })
    def search_single_node(self, name):
        """
        Search node by node name.

        Args:
            name (str): The name of node.

        Returns:
            dict, format is:
                item_object = {'nodes': [<Node object>],
                       'scope_name': '',
                       'children': {<item_object>}}
        """
        Validation.check_param_empty(name=name)

        nodes = self._graph.search_single_node(name)
        return nodes
Example #13
0
    def get_single_train_task(self, plugin_name, train_id):
        """
        get single train task.

        Args:
            plugin_name (str): Plugin name, refer `PluginNameEnum`.
            train_id (str): Specify a training job to query.

        Returns:
            {'train_jobs': list[TrainJob]}, refer to restful api.
        """
        Validation.check_param_empty(plugin_name=plugin_name, train_id=train_id)
        Validation.check_plugin_name(plugin_name=plugin_name)
        train_job = self._data_manager.get_train_job_by_plugin(train_id=train_id, plugin_name=plugin_name)
        if train_job is None:
            raise exceptions.TrainJobNotExistError()
        return dict(train_jobs=[train_job])
    def get_histograms(self, train_id, tag):
        """
        Builds a JSON-serializable object with information about histogram data.

        Args:
            train_id (str): The ID of the events data.
            tag (str): The name of the tag the histogram data all belong to.

        Returns:
            dict, a dict including the `train_id`, `tag`, and `histograms'.
                    {
                        "train_id": ****,
                        "tag": ****,
                        "histograms": [{
                            "wall_time": ****,
                            "step": ****,
                            "bucket": [[**, **, **]],
                            },
                            {...}
                        ]
                    }
        """
        Validation.check_param_empty(train_id=train_id, tag=tag)
        logger.info("Start to process histogram data...")
        try:
            tensors = self._data_manager.list_tensors(train_id, tag)
        except ParamValueError as err:
            raise HistogramNotExistError(err.message)

        histograms = []
        for tensor in tensors:
            histogram = tensor.value
            buckets = histogram.buckets()
            histograms.append({
                "wall_time": tensor.wall_time,
                "step": tensor.step,
                "buckets": buckets
            })

        logger.info("Histogram data processing is finished!")
        response = {
            "train_id": train_id,
            "tag": tag,
            "histograms": histograms
        }
        return response
    def __init__(self, train_id, data_manager, tag=None):
        Validation.check_param_empty(train_id=train_id)
        super(GraphProcessor, self).__init__(data_manager)

        train_job = self._data_manager.get_train_job_by_plugin(
            train_id, PluginNameEnum.GRAPH.value)
        if train_job is None:
            raise exceptions.SummaryLogPathInvalid()
        if not train_job['tags']:
            raise ParamValueError(
                "Can not find any graph data in the train job.")

        if tag is None:
            tag = train_job['tags'][0]

        tensors = self._data_manager.list_tensors(train_id, tag=tag)
        self._graph = tensors[0].value
Example #16
0
    def __init__(self, train_id, data_manager, tag=None):
        Validation.check_param_empty(train_id=train_id)
        super(GraphProcessor, self).__init__(data_manager)

        train_job = self._data_manager.get_train_job_by_plugin(
            train_id, PluginNameEnum.GRAPH.value)
        if train_job is None:
            raise exceptions.TrainJobNotExistError()
        if not train_job['tags'] or (tag is not None
                                     and tag not in train_job['tags']):
            raise exceptions.GraphNotExistError()

        if tag is None:
            tag = train_job['tags'][0]

        tensors = self._data_manager.list_tensors(train_id, tag=tag)
        self._graph = tensors[0].value
Example #17
0
def query_explain_jobs():
    """Query explain jobs."""
    offset = request.args.get("offset", default=0)
    limit = request.args.get("limit", default=10)
    offset = Validation.check_offset(offset=offset)
    limit = Validation.check_limit(
        limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT)

    encapsulator = ExplainJobEncap(EXPLAIN_MANAGER)
    total, jobs = encapsulator.query_explain_jobs(offset, limit)

    return jsonify({
        'name':
        os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)),
        'total':
        total,
        'explain_jobs':
        jobs,
    })
Example #18
0
    def search_node_names(self, search_content, offset, limit):
        """
        Search node names by search content.

        Args:
            search_content (Any): This content can be the key content of the node to search.
            offset (int): An offset for page. Ex, offset is 0, mean current page is 1.
            limit (int): The max data items for per page.

        Returns:
            Dict, the searched nodes.
        """
        offset = Validation.check_offset(offset=offset)
        limit = Validation.check_limit(limit, min_value=1, max_value=1000)
        nodes = self._graph.search_nodes_by_pattern(search_content)
        real_offset = offset * limit
        search_nodes = self._graph.get_nodes(nodes[real_offset:real_offset +
                                                   limit])

        return {"nodes": search_nodes}
Example #19
0
    def get_plugins(self, train_id, manual_update=True):
        """
        Queries the plug-in data for the specified training job

        Args:
            train_id (str): Specify a training job to query.
            manual_update (bool): Specifies whether to refresh automatically.

        Returns:
            dict, refer to restful api.
        """
        Validation.check_param_empty(train_id=train_id)
        train_job = self._data_manager.get_single_train_job(
            train_id, manual_update=manual_update)
        if not train_job:
            default_result = dict()
            for plugin_name in PluginNameEnum.list_members():
                default_result.update({plugin_name: list()})
            return dict(plugins=default_result)

        return dict(plugins=train_job['tag_mapping'])
    def get_metadata_list(self, train_id, tag):
        """
        Builds a JSON-serializable object with information about scalars.

        Args:
            train_id (str): The ID of the events data.
            tag (str): The name of the tag the scalars all belonging to.

        Returns:
            list[dict], a list of dictionaries containing the `wall_time`, `step`, `value` for each scalar.
        """
        Validation.check_param_empty(train_id=train_id, tag=tag)
        job_response = []
        tensors = self._data_manager.list_tensors(train_id, tag)

        for tensor in tensors:
            job_response.append({
                'wall_time': tensor.wall_time,
                'step': tensor.step,
                'value': tensor.value
            })
        return dict(metadatas=job_response)
Example #21
0
    def get_metadata_list(self, train_id, tag):
        """
        Builds a JSON-serializable object with information about images.

        Args:
            train_id (str): The ID of the events data.
            tag (str): The name of the tag the images all belong to.

        Returns:
            list[dict], a list of dictionaries containing the `wall_time`, `step`, `width`,
                and `height` for each image.
                    [
                        {
                            "wall_time": ****,
                            "step": ****,
                            "width": ****,
                            "height": ****,
                        },
                        {...}
                    ]

        """
        Validation.check_param_empty(train_id=train_id, tag=tag)
        result = []
        try:
            tensors = self._data_manager.list_tensors(train_id, tag)
        except ParamValueError as ex:
            raise ImageNotExistError(ex.message)

        for tensor in tensors:
            # no tensor_proto in TensorEvent
            (width, height) = (tensor.value.width, tensor.value.height)
            result.append({
                'wall_time': tensor.wall_time,
                'step': tensor.step,
                'width': int(width),
                'height': int(height),
            })
        return dict(metadatas=result)
Example #22
0
def query_saliency():
    """Query saliency map related results."""

    data = _read_post_request(request)

    train_id = data.get("train_id")
    if train_id is None:
        raise ParamMissError('train_id')

    labels = data.get("labels")
    explainers = data.get("explainers")
    limit = data.get("limit", 10)
    limit = Validation.check_limit(limit, min_value=1, max_value=100)
    offset = data.get("offset", 0)
    offset = Validation.check_offset(offset=offset)
    sorted_name = data.get("sorted_name", "")
    sorted_type = data.get("sorted_type", "descending")

    if sorted_name not in ("", "confidence", "uncertainty"):
        raise ParamValueError(
            f"sorted_name: {sorted_name}, valid options: '' 'confidence' 'uncertainty'"
        )
    if sorted_type not in ("ascending", "descending"):
        raise ParamValueError(
            f"sorted_type: {sorted_type}, valid options: 'confidence' 'uncertainty'"
        )

    encapsulator = SaliencyEncap(_image_url_formatter, EXPLAIN_MANAGER)
    count, samples = encapsulator.query_saliency_maps(train_id=train_id,
                                                      labels=labels,
                                                      explainers=explainers,
                                                      limit=limit,
                                                      offset=offset,
                                                      sorted_name=sorted_name,
                                                      sorted_type=sorted_type)

    return jsonify({"count": count, "samples": samples})