예제 #1
0
    def _import_data_from_event(self, event_dict: Dict):
        """Parse and import data from the event data."""
        if 'metadata' not in event_dict and self._is_metadata_empty():
            raise ParamValueError(
                'metadata is imcomplete, should write metadata first in the summary.'
            )

        for tag, event in event_dict.items():
            if tag == ExplainFieldsEnum.METADATA.value:
                self._import_metadata_from_event(event.metadata)
            elif tag == ExplainFieldsEnum.BENCHMARK.value:
                self._import_benchmark_from_event(event.benchmark)
            elif tag == ExplainFieldsEnum.SAMPLE_ID.value:
                self._import_sample_from_event(event)
            else:
                logger.info('Unknown ExplainField: %s.', tag)
예제 #2
0
    def _get_train_tensors(self, train_id, tags, step, dims, detail):
        """
        Get tensor data for given train_id, tags, step, dims and detail.

        Args:
            train_id (str): Specify list of train job ID.
            tags (list): Specify list of tag.
            step (int): Specify step of tensor, it's necessary when detail is set to 'data'.
            dims (str): Specify dims of tensor, it's necessary when detail is set to 'data'.
            detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

        Returns:
            list[dict], a list of dictionaries containing the `train_id`, `tag`, `values`.

        Raises:
            TensorNotExistError, If tensor with specific train_id and tag is not exist in cache.
            ParamValueError, If the value of detail is not within available values:
                            'stats', 'histogram' and 'data'.
        """

        tensors_response = []
        for tag in tags:
            try:
                tensors = self._data_manager.list_tensors(train_id, tag)
            except ParamValueError as err:
                raise TensorNotExistError(err.message)

            if tensors and not isinstance(tensors[0].value, TensorContainer):
                raise TensorNotExistError(
                    "there is no tensor data in this tag: {}".format(tag))

            if detail is None or detail == 'stats':
                values = self._get_tensors_summary(detail, tensors)
            elif detail == 'data':
                Validation.check_param_empty(step=step, dims=dims)
                step = to_int(step, "step")
                values = self._get_tensors_data(step, dims, tensors)
            elif detail == 'histogram':
                values = self._get_tensors_histogram(tensors)
            else:
                raise ParamValueError(
                    'Can not support this value: {} of detail.'.format(detail))

            tensor = {"train_id": train_id, "tag": tag, "values": values}
            tensors_response.append(tensor)

        return tensors_response
예제 #3
0
def _validate_value(param, name, expected_values):
    """
    Common function to validate values of param.

    Args:
        param (object): Parameter to be validated.
        name (str): Name of the parameter.
        expected_values (tuple) : Expected values of param.

    Raises:
        ParamValueError: When param is not in expected_values.
    """

    if param not in expected_values:
        raise ParamValueError(
            f"Valid options for {name} are {expected_values}, but got {param}."
        )
예제 #4
0
def _get_lineage_info(lineage_type, search_condition):
    """
    Get lineage info for dataset or model.

    Args:
        lineage_type (str): Lineage type, 'dataset' or 'model'.
        search_condition (dict): Search condition.

    Returns:
        dict, lineage info.

    Raises:
        MindInsightException: If method fails to be called.
    """
    if 'lineage_type' in search_condition:
        raise ParamValueError(
            "Lineage type does not need to be assigned in a specific interface."
        )
    if lineage_type == 'dataset':
        search_condition.update({'lineage_type': 'dataset'})
    summary_base_dir = str(settings.SUMMARY_BASE_DIR)
    try:
        lineage_info = filter_summary_lineage(summary_base_dir,
                                              search_condition)

        lineages = lineage_info['object']

        summary_base_dir = os.path.realpath(summary_base_dir)
        length = len(summary_base_dir)

        for lineage in lineages:
            summary_dir = lineage['summary_dir']
            summary_dir = os.path.realpath(summary_dir)
            if summary_base_dir == summary_dir:
                relative_dir = './'
            else:
                relative_dir = os.path.join(os.curdir,
                                            summary_dir[length + 1:])
            lineage['summary_dir'] = relative_dir

    except MindInsightException as exception:
        raise MindInsightException(exception.error,
                                   exception.message,
                                   http_code=400)

    return lineage_info
예제 #5
0
    def __init__(self,
                 method: str,
                 beta,
                 xi,
                 beta_decay=1,
                 beta_decay_delay=0):
        self._beta = beta
        self._beta_decay = beta_decay
        self._beta_decay_delay = beta_decay_delay
        self._xi = xi
        self._method = method.lower()
        if self._method not in AcquisitionFunctionEnum.list_members():
            raise ParamValueError(
                error_detail="The 'method' should be in %s." %
                AcquisitionFunctionEnum.list_members())

        self._counter = 0
예제 #6
0
    def __init__(self, train_id, data_manager, tag=None):
        Validation.check_param_empty(train_id=train_id)
        super(GraphProcessor, self).__init__(data_manager)

        train_job = self._data_manager.get_train_job_by_plugin(
            train_id, PluginNameEnum.GRAPH.value)
        if train_job is None:
            raise exceptions.SummaryLogPathInvalid()
        if not train_job['tags']:
            raise ParamValueError(
                "Can not find any graph data in the train job.")

        if tag is None:
            tag = train_job['tags'][0]

        tensors = self._data_manager.list_tensors(train_id, tag=tag)
        self._graph = tensors[0].value
예제 #7
0
    def _check_and_normalize_summary_path(self, summary_path):
        """
        Check and normalize summary path.

        Args:
            summary_path (str): A directory path, e.g. '/data/ImageNet/'.

        Returns:
            str, normalized summary path.

        """
        if summary_path is None:
            logger.warning("Summary path is None. It will not init data loader generator.")
            raise ParamValueError("Summary path is None.")

        summary_path = os.path.realpath(summary_path)

        return summary_path
예제 #8
0
def update_lineage_object(data_manager, train_id, added_info: dict):
    """Update lineage objects about tag and remark."""
    validate_train_id(train_id)
    validate_added_info(added_info)
    cache_item = data_manager.get_brief_train_job(train_id)
    lineage_item = cache_item.get(key=LINEAGE, raise_exception=False)
    if lineage_item is None:
        logger.warning("Cannot update the lineage for tran job %s, because it does not exist.", train_id)
        raise ParamValueError("Cannot update the lineage for tran job %s, because it does not exist." % train_id)

    cached_added_info = lineage_item.super_lineage_obj.added_info
    new_added_info = dict(cached_added_info)

    for key, value in added_info.items():
        new_added_info.update({key: value})

    with cache_item.lock_key(LINEAGE):
        cache_item.get(key=LINEAGE).super_lineage_obj.added_info = new_added_info
    def check_offset(cls, offset, default_value=0):
        """
        Check offset parameter, it must be greater or equal 0.

        Args:
            offset (Union[str, int]): Value can be string number or int.
            default_value (int): Default value for checked offset. Default: 0.

        Returns:
            int, offset.
        """

        if offset is None:
            return default_value
        offset = to_int(offset, 'offset')
        if offset < 0:
            raise ParamValueError("'offset' should be greater than or equal to 0.")
        return offset
예제 #10
0
def get_dataset_graph():
    """
    Get dataset graph.

    Returns:
        str, the dataset graph information.

    Raises:
        MindInsightException: If method fails to be called.
        ParamValueError: If summary_dir is invalid.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/datasets/dataset_graph?train_id=xxx
    """

    summary_base_dir = str(settings.SUMMARY_BASE_DIR)
    summary_dir = get_train_id(request)
    if summary_dir.startswith('/'):
        validate_path(summary_dir)
    elif summary_dir.startswith('./'):
        summary_dir = os.path.join(summary_base_dir, summary_dir[2:])
        summary_dir = validate_path(summary_dir)
    else:
        raise ParamValueError("Summary dir should be absolute path or "
                              "relative path that relate to summary base dir.")
    try:
        dataset_graph = get_summary_lineage(summary_dir=summary_dir,
                                            keys=['dataset_graph'])
    except MindInsightException as exception:
        raise MindInsightException(exception.error,
                                   exception.message,
                                   http_code=400)

    if dataset_graph:
        summary_dir_result = dataset_graph.get('summary_dir')
        base_dir_len = len(summary_base_dir)
        if summary_base_dir == summary_dir_result:
            relative_dir = './'
        else:
            relative_dir = os.path.join(os.curdir,
                                        summary_dir[base_dir_len + 1:])
        dataset_graph['summary_dir'] = relative_dir

    return jsonify(dataset_graph)
예제 #11
0
def query_image():
    """Query image."""
    train_id = get_train_id(request)
    if train_id is None:
        raise ParamMissError("train_id")
    image_path = request.args.get("path")
    if image_path is None:
        raise ParamMissError("path")
    image_type = request.args.get("type")
    if image_type is None:
        raise ParamMissError("type")
    if image_type not in ("original", "overlay"):
        raise ParamValueError(
            f"type:{image_type}, valid options: 'original' 'overlay'")

    encapsulator = DatafileEncap(EXPLAIN_MANAGER)
    image = encapsulator.query_image_binary(train_id, image_path, image_type)

    return image
예제 #12
0
    def __init__(self, size):
        """
        A Container constructor which create a new Reservoir.

        Args:
            size (int): Container Size. If the size is 0, the container is not limited.

        Raises:
            ValueError: If size is negative integer.
        """
        if not isinstance(size, (int, )) or size < 0:
            raise ParamValueError('size must be nonnegative integer, was %s' %
                                  size)

        self._samples_max_size = size
        self._samples = []
        self._sample_counter = 0
        self._sample_selector = random.Random(0)
        self._mutex = threading.Lock()
예제 #13
0
    def get_display_timeline(self, device_type, scope_name_num):
        """
        Get timeline data for UI display.

        Returns:
            json, the content of timeline data.
        """
        if device_type == "ascend":
            display_filename = self._ascend_display_filename.format(
                self._device_id)
        elif device_type == "gpu":
            display_filename = self._gpu_display_filename.format(
                self._device_id)
        else:
            logger.info(
                'device type should be ascend or gpu. Please check the device type.'
            )
            raise ParamValueError("Invalid device_type.")
        file_path = os.path.join(self._profiling_dir, display_filename)
        file_path = validate_and_normalize_path(
            file_path, raise_key='Invalid timeline json path.')

        timeline = []
        if os.path.exists(file_path):
            try:
                with open(file_path, 'r') as f_obj:
                    timeline = json.load(f_obj)
                    for idx, time_item in enumerate(timeline):
                        if time_item["tid"] == 100001 and \
                                time_item["ph"] != "M" and \
                                int(time_item["scope_level"]) >= int(scope_name_num):
                            timeline[idx] = None
                    timeline = list(filter(lambda x: x, timeline))
            except (IOError, OSError, json.JSONDecodeError) as err:
                logger.error(
                    'Error occurred when read timeline display file: %s', err)
                raise ProfilerIOException()
        else:
            logger.info('No timeline file. Please check the output path.')

        return timeline
예제 #14
0
def update_lineage():
    """
    Get lineage.

    Returns:
        str, update the lineage information about cache and tag.

    Raises:
        MindInsightException: If method fails to be called.

    Examples:
        >>> PUT http://xxxx/v1/mindinsight/lineagemgr/lineages?train_id=./run1
    """
    train_id = get_train_id(request)
    added_info = request.json
    if not isinstance(added_info, dict):
        raise ParamValueError("The request body should be a dict.")

    update_lineage_object(DATA_MANAGER, train_id, added_info)

    return jsonify({"status": "success"})
예제 #15
0
    def _check_train_job_exist(self, train_id, loader_pool):
        """
        Check train job exist, if not exist, will raise exception.

        Args:
            train_id (str): The given train job id.
            loader_pool (dict[str, LoaderStruct]): Refer to self._loader_pool.

        Raises:
            ParamValueError: Can not found train job in data manager.
        """
        is_exist = False
        if train_id in loader_pool:
            return
        for generator in self._loader_generators:
            if generator.check_train_job_exist(train_id):
                is_exist = True
                break
        if not is_exist:
            raise ParamValueError(
                "Can not find the train job in data manager.")
예제 #16
0
def str_to_slice_or_int(input_str):
    """
    Translate param from string to slice or int.

    Args:
        input_str (str): The string to be translated.

    Returns:
        Union[int, slice], the transformed param.
    """
    try:
        if ':' in input_str:
            ret = slice(*map(lambda x: int(x.strip()) if x.strip() else None,
                             input_str.split(':')))
        else:
            ret = int(input_str)
    except ValueError:
        raise ParamValueError(
            "Invalid shape. Convert int from str failed. input_str: {}".format(
                input_str))
    return ret
    def check_limit(cls, limit, min_value=1, max_value=1000, default_value=100):
        """
        Check limit parameter, it should between min_value and max_value.

        Args:
            limit (Union[str, int]): Value can be string number or int.
            min_value (int): Limit should greater or equal this value. Default: 1.
            max_value (int): Limit should less or equal this value. Default: 1000.
            default_value (int): Default value for limit. Default: 100.

        Returns:
            int, limit.
        """

        if limit is None:
            return default_value

        limit = to_int(limit, 'limit')
        if limit < min_value or limit > max_value:
            raise ParamValueError("'limit' should in [{}, {}].".format(min_value, max_value))
        return limit
예제 #18
0
    def get(self, key, raise_exception=True):
        """
        Get value from cache.

        Args:
            key (str): Key of content.
            raise_exception (bool): If the key does not exist and
                raise_exception is True, it will raise an Exception.

        Returns:
            Union[Object, None], Return value if key in content,
                return False else if raise_exception is False.
        Raises:
            ParamValueError, if the key does not exist and raise_exception is True.

        """
        try:
            return self._content[key]
        except KeyError:
            if raise_exception:
                raise ParamValueError("Invalid cache key({}).".format(key))
            return None
예제 #19
0
    def get_timeline_summary(self, device_type):
        """
        Get timeline summary information for UI display.

        Returns:
            json, the content of timeline summary information.
        """
        if device_type == "ascend":
            summary_filename = self._ascend_timeline_summary_filename.format(
                self._device_id)
        elif device_type == "gpu":
            summary_filename = self._gpu_timeline_summary_filename.format(
                self._device_id)
        else:
            logger.info(
                'device type should be ascend or gpu. Please check the device type.'
            )
            raise ParamValueError("Invalid device_type.")
        file_path = os.path.join(self._profiling_dir, summary_filename)
        file_path = validate_and_normalize_path(
            file_path, raise_key='Invalid timeline summary path.')

        timeline_summary = {}
        if os.path.exists(file_path):
            try:
                with open(file_path, 'r') as f_obj:
                    timeline_summary = json.load(f_obj)
            except (IOError, OSError, json.JSONDecodeError) as err:
                logger.error(
                    'Error occurred when read timeline summary file: %s', err)
                raise ProfilerIOException()
        else:
            logger.info(
                'No timeline summary file. Please check the output path.')

        timeline_summary.setdefault("max_scope_name_num", 0)

        return timeline_summary
예제 #20
0
def get_lineage():
    """
    Get lineage.

    Returns:
        str, the lineage information.

    Raises:
        MindInsightException: If method fails to be called.
        ParamValueError: If parsing json data search_condition fails.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/lineagemgr/lineages
    """
    search_condition = request.stream.read()
    try:
        search_condition = json.loads(search_condition if search_condition else "{}")
    except Exception:
        raise ParamValueError("Json data parse failed.")

    lineage_info = _get_lineage_info(search_condition=search_condition)

    return jsonify(lineage_info)
예제 #21
0
def calc_original_buckets(np_value, stats):
    """
    Calculate buckets from tensor data.

    Args:
        np_value (numpy.ndarray): An numpy.ndarray of tensor data.
        stats (Statistics): An instance of Statistics about tensor data.

    Returns:
        list, a list of bucket about tensor data.

    Raises:
        ParamValueError, If np_value or stats is None.
    """
    if np_value is None or stats is None:
        raise ParamValueError("Invalid input. np_value or stats is None.")
    valid_count = stats.count - stats.nan_count - stats.neg_inf_count - stats.pos_inf_count
    if not valid_count:
        return []

    bins = calc_histogram_bins(valid_count)
    first_edge, last_edge = stats.min, stats.max

    if not first_edge < last_edge:
        first_edge -= 0.5
        last_edge += 0.5

    bins = np.linspace(first_edge, last_edge, bins + 1, dtype=np_value.dtype)
    hists, edges = np.histogram(np_value, bins=bins)

    buckets = []
    for hist, edge1, edge2 in zip(hists, edges, edges[1:]):
        bucket = Bucket(edge1, edge2 - edge1, hist)
        buckets.append(bucket)

    return buckets
예제 #22
0
    def query_saliency_maps(self, train_id, labels, explainers, limit, offset,
                            sorted_name, sorted_type):
        """
        Query saliency maps.
        Args:
            train_id (str): Job ID.
            labels (list[str]): Label filter.
            explainers (list[str]): Explainers of saliency maps to be shown.
            limit (int): Max. no. of items to be returned.
            offset (int): Page offset.
            sorted_name (str): Field to be sorted.
            sorted_type (str): Sorting order, 'ascending' or 'descending'.

        Returns:
            tuple[int, list[dict]], total no. of samples after filtering and
                list of sample result.
        """
        job = self.job_manager.get_job(train_id)
        if job is None:
            raise TrainJobNotExistError(train_id)

        samples = copy.deepcopy(job.get_all_samples())
        if labels:
            filtered = []
            for sample in samples:
                infer_labels = [
                    inference["label"] for inference in sample["inferences"]
                ]
                for infer_label in infer_labels:
                    if infer_label in labels:
                        filtered.append(sample)
                        break
            samples = filtered

        reverse = sorted_type == "descending"
        if sorted_name == "confidence":
            if reverse:
                samples.sort(key=_sort_key_max_confidence, reverse=reverse)
            else:
                samples.sort(key=_sort_key_min_confidence, reverse=reverse)
        elif sorted_name == "uncertainty":
            if not job.uncertainty_enabled:
                raise ParamValueError(
                    "Uncertainty is not enabled, sorted_name cannot be 'uncertainty'"
                )
            if reverse:
                samples.sort(key=_sort_key_max_confidence_sd, reverse=reverse)
            else:
                samples.sort(key=_sort_key_min_confidence_sd, reverse=reverse)
        elif sorted_name != "":
            raise ParamValueError("sorted_name")

        sample_infos = []
        obj_offset = offset * limit
        count = len(samples)
        end = count
        if obj_offset + limit < end:
            end = obj_offset + limit
        for i in range(obj_offset, end):
            sample = samples[i]
            sample_infos.append(self._touch_sample(sample, job, explainers))

        return count, sample_infos
예제 #23
0
    def _query_samples(self,
                       job,
                       labels,
                       sorted_name,
                       sorted_type,
                       prediction_types=None,
                       drop_type=None):
        """
        Query samples.

        Args:
            job (ExplainManager): Explain job to be query from.
            labels (list[str]): Label filter.
            sorted_name (str): Field to be sorted.
            sorted_type (str): Sorting order, 'ascending' or 'descending'.
            prediction_types (list[str]): Prediction type filter.
            drop_type (str, None): When it is None, all data will be kept. When it is 'hoc_layers', samples without
                hoc explanations will be drop out. When it is 'saliency_maps', samples without saliency explanations
                will be drop out.

        Returns:
             list[dict], samples to be queried.
        """

        samples = copy.deepcopy(job.get_all_samples())
        if drop_type not in (None, ExplanationKeys.SALIENCY.value,
                             ExplanationKeys.HOC.value):
            raise ParamValueError(
                f"Argument drop_type valid options: None, {ExplanationKeys.SALIENCY.value}, "
                f"{ExplanationKeys.HOC.value}, but got {drop_type}.")

        if drop_type is not None:
            samples = [
                sample for sample in samples
                if any(infer[drop_type] for infer in sample['inferences'])
            ]
        if labels:
            filtered = []
            for sample in samples:
                infer_labels = [
                    inference["label"] for inference in sample["inferences"]
                ]
                for infer_label in infer_labels:
                    if infer_label in labels:
                        filtered.append(sample)
                        break
            samples = filtered

        if prediction_types and len(prediction_types) < 3:
            filtered = []
            for sample in samples:
                infer_types = [
                    inference["prediction_type"]
                    for inference in sample["inferences"]
                ]
                for infer_type in infer_types:
                    if infer_type in prediction_types:
                        filtered.append(sample)
                        break
            samples = filtered

        reverse = sorted_type == "descending"
        if sorted_name == "confidence":
            if reverse:
                samples.sort(key=lambda x: _sort_key_max_confidence(x, labels),
                             reverse=reverse)
            else:
                samples.sort(key=lambda x: _sort_key_min_confidence(x, labels),
                             reverse=reverse)
        elif sorted_name == "uncertainty":
            if not job.uncertainty_enabled:
                raise ParamValueError(
                    "Uncertainty is not enabled, sorted_name cannot be 'uncertainty'"
                )
            if reverse:
                samples.sort(
                    key=lambda x: _sort_key_max_confidence_sd(x, labels),
                    reverse=reverse)
            else:
                samples.sort(
                    key=lambda x: _sort_key_min_confidence_sd(x, labels),
                    reverse=reverse)
        elif sorted_name != "":
            raise ParamValueError("sorted_name")
        return samples
예제 #24
0
 def _check_summary_exist(self, loader_id):
     """Verify thee train_job is existed given loader_id."""
     if not self._summary_watcher.is_summary_directory(
             self._summary_base_dir, loader_id):
         raise ParamValueError('Can not find the train job in the manager.')
예제 #25
0
    def get_nodes(self, name, node_type):
        """
        Get the nodes of every layer in graph.

        Args:
            name (str): The name of a node.
            node_type (Any): The type of node, either 'name_scope' or 'polymeric'.

        Returns:
            TypedDict('Nodes', {'nodes': list[Node]}), format is {'nodes': [<Node object>]}.
                example:
                    {
                      "nodes" : [
                        {
                          "attr" :
                          {
                            "index" : "i: 0\n"
                          },
                          "input" : {},
                          "name" : "input_tensor",
                          "output" :
                          {
                            "Default/TensorAdd-op17" :
                            {
                              "edge_type" : "data",
                              "scope" : "name_scope",
                              "shape" : [1, 16, 128, 128]
                            }
                          },
                          "output_i" : -1,
                          "polymeric_input" : {},
                          "polymeric_output" : {},
                          "polymeric_scope_name" : "",
                          "subnode_count" : 0,
                          "type" : "Data"
                        }
                      ]
                    }
        """
        if node_type not in [
                NodeTypeEnum.NAME_SCOPE.value,
                NodeTypeEnum.POLYMERIC_SCOPE.value
        ]:
            raise ParamValueError(
                'The node type is not support, only either %s or %s.'
                '' % (NodeTypeEnum.NAME_SCOPE.value,
                      NodeTypeEnum.POLYMERIC_SCOPE.value))

        if name and not self._graph.exist_node(name):
            raise ParamValueError("The node name is not in graph.")
        nodes = []
        if node_type == NodeTypeEnum.NAME_SCOPE.value:
            nodes = self._graph.get_normal_nodes(name)

        if node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
            if not name:
                raise ParamValueError(
                    'The node name "%s" not in graph, node type is %s.' %
                    (name, node_type))
            polymeric_scope_name = name
            nodes = self._graph.get_polymeric_nodes(polymeric_scope_name)

        return {'nodes': nodes}