def get_single_image(self, train_id, tag, step): """ Returns the actual image bytes for a given image. Args: train_id (str): The ID of the events data the image belongs to. tag (str): The name of the tag the images belongs to. step (int): The step of the image in the current reservoir. If step = -1, return image of final step. Returns: bytes, a byte string of the raw image bytes. """ Validation.check_param_empty(train_id=train_id, tag=tag, step=step) step = to_int(step, "step") try: tensors = self._data_manager.list_tensors(train_id, tag) except ParamValueError as ex: raise ImageNotExistError(ex.message) image = _find_image(tensors, step) if image is None: raise ImageNotExistError( "Can not find the step with given train job id and tag.") return image
def query_explain_jobs(): """ Query explain jobs. Returns: Response, contains dict that stores base directory, total number of jobs and their detailed job metadata. Raises: ParamMissError: If train_id info is not in the request. ParamTypeError: If one of (offset, limit) is not integer in the request. ParamValueError: If one of (offset, limit) does not have the expected value in the request. """ offset = request.args.get("offset", default=0) limit = request.args.get("limit", default=10) offset = Validation.check_offset(offset=offset) limit = Validation.check_limit( limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT) encapsulator = ExplainJobEncap(EXPLAIN_MANAGER) total, jobs = encapsulator.query_explain_jobs(offset, limit) return jsonify({ 'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)), 'total': total, 'explain_jobs': jobs, })
def get_plugins(self, train_id, manual_update=True): """ Queries the plug-in data for the specified training job Args: train_id (str): Specify a training job to query. manual_update (bool): Specifies whether to refresh automatically. Returns: dict, refer to restful api. """ Validation.check_param_empty(train_id=train_id) if contains_null_byte(train_id=train_id): raise QueryStringContainsNullByteError("train job id: {} contains null byte.".format(train_id)) if manual_update: self._data_manager.cache_train_job(train_id) train_job = self._data_manager.get_train_job(train_id) try: data_visual_content = train_job.get_detail(DATAVISUAL_CACHE_KEY) plugins = data_visual_content.get(DATAVISUAL_PLUGIN_KEY) except exceptions.TrainJobDetailNotInCacheError: plugins = [] if not plugins: default_result = dict() for plugin_name in PluginNameEnum.list_members(): default_result.update({plugin_name: list()}) return dict(plugins=default_result) return dict( plugins=plugins )
def get_single_image(self, train_id, tag, step): """ Returns the actual image bytes for a given image. Args: train_id (str): The ID of the events data the image belongs to. tag (str): The name of the tag the images belongs to. step (int): The step of the image in the current reservoir. Returns: bytes, a byte string of the raw image bytes. """ Validation.check_param_empty(train_id=train_id, tag=tag, step=step) step = to_int(step, "step") tensors = self._data_manager.list_tensors(train_id, tag) image = None for tensor in tensors: if tensor.step == step: # Default value for bytes field is empty byte string normally, # see also "Optional Fields And Default Values" in protobuf # documentation. image = tensor.value.encoded_image break if image is None: raise ParamValueError( "Can not find the step with given train job id and tag.") return image
def list_explain_directories(self, summary_base_dir, offset=0, limit=None): """ List explain directories within base directory. Args: summary_base_dir (str): Path of summary base directory. offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0. limit (int): The max data items for per page. Default value is 10. Returns: tuple[total, directories], total indicates the overall number of explain directories and directories indicate list of summary directory info including the following attributes. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR, starting with "./". - create_time (datetime): Creation time of summary file. - update_time (datetime): Modification time of summary file. Raises: ParamValueError, if offset < 0 or limit is out of valid value range. ParamTypeError, if offset or limit is not valid integer. Examples: >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher >>> summary_watcher = SummaryWatcher() >>> total, directories = summary_watcher.list_explain_directories('/summary/base/dir', offset=0, limit=10) """ offset = Validation.check_offset(offset=offset) limit = Validation.check_limit(limit, min_value=1, max_value=999, default_value=None) directories = self.list_summary_directories(summary_base_dir, overall=False, list_explain=True) if limit is None: return len(directories), directories return len(directories), directories[offset * limit:(offset + 1) * limit]
def get_tensors(self, train_ids, tags, step, dims, detail): """ Get tensor data for given train_ids, tags, step, dims and detail. Args: train_ids (list): Specify list of train job ID. tags (list): Specify list of tag. step (int): Specify step of tag, it's necessary when detail is equal to 'data'. dims (str): Specify dims of step, it's necessary when detail is equal to 'data'. detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'. Returns: dict, a dict including the `tensors`. Raises: UrlDecodeError, If unquote train id error with strict mode. """ Validation.check_param_empty(train_id=train_ids, tag=tags) TensorUtils.validate_dims_format(dims) for index, train_id in enumerate(train_ids): try: train_id = unquote(train_id, errors='strict') except UnicodeDecodeError: raise UrlDecodeError('Unquote train id error with strict mode') else: train_ids[index] = train_id tensors = [] for train_id in train_ids: tensors += self._get_train_tensors(train_id, tags, step, dims, detail) return {"tensors": tensors}
def _get_query_sample_parameters(data): """ Get parameter for query. Args: data (dict): Dict that contains request info. Returns: dict, key-value pairs to call backend query functions. Raises: ParamMissError: If train_id info is not in the request. ParamTypeError: If certain key is not in the expected type in the request. ParamValueError: If certain key does not have the expected value in the request. """ train_id = data.get("train_id") if train_id is None: raise ParamMissError('train_id') labels = data.get("labels") if labels is not None: _validate_type(labels, "labels", list) if labels: for item in labels: _validate_type(item, "element of labels", str) limit = data.get("limit", 10) limit = Validation.check_limit(limit, min_value=1, max_value=100) offset = data.get("offset", 0) offset = Validation.check_offset(offset=offset) sorted_name = data.get("sorted_name", "") _validate_value(sorted_name, "sorted_name", ('', 'confidence', 'uncertainty')) sorted_type = data.get("sorted_type", "descending") _validate_value(sorted_type, "sorted_type", ("ascending", "descending")) prediction_types = data.get("prediction_types") if prediction_types is not None: _validate_type(prediction_types, "element of labels", list) if prediction_types: for item in prediction_types: _validate_value(item, "element of prediction_types", ('TP', 'FN', 'FP')) query_kwarg = { "train_id": train_id, "labels": labels, "limit": limit, "offset": offset, "sorted_name": sorted_name, "sorted_type": sorted_type, "prediction_types": prediction_types } return query_kwarg
def _get_train_tensors(self, train_id, tags, step, dims, detail): """ Get tensor data for given train_id, tags, step, dims and detail. Args: train_id (str): Specify list of train job ID. tags (list): Specify list of tag. step (int): Specify step of tensor, it's necessary when detail is set to 'data'. dims (str): Specify dims of tensor, it's necessary when detail is set to 'data'. detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'. Returns: list[dict], a list of dictionaries containing the `train_id`, `tag`, `values`. Raises: TensorNotExistError, If tensor with specific train_id and tag is not exist in cache. ParamValueError, If the value of detail is not within available values: 'stats', 'histogram' and 'data'. """ tensors_response = [] for tag in tags: try: tensors = self._data_manager.list_tensors(train_id, tag) except ParamValueError as err: raise TensorNotExistError(err.message) if tensors and not isinstance(tensors[0].value, TensorContainer): raise TensorNotExistError( "there is no tensor data in this tag: {}".format(tag)) if detail is None or detail == 'stats': values = self._get_tensors_summary(detail, tensors) elif detail == 'data': Validation.check_param_empty(step=step, dims=dims) # Limit to query max two dimensions for tensor in table view. dims = TensorUtils.parse_shape(dims, limit=MAX_DIMENSIONS_FOR_TENSOR) step = to_int(step, "step") values = self._get_tensors_data(step, dims, tensors) elif detail == 'histogram': values = self._get_tensors_histogram(tensors) else: raise ParamValueError( 'Can not support this value: {} of detail.'.format(detail)) tensor = {"train_id": train_id, "tag": tag, "values": values} tensors_response.append(tensor) return tensors_response
def get_tensors(self, train_ids, tags, step, dims, detail): """ Get tensor data for given train_ids, tags, step, dims and detail. Args: train_ids (list): Specify list of train job ID. tags (list): Specify list of tag. step (int): Specify step of tag, it's necessary when detail is equal to 'data'. dims (str): Specify dims of step, it's necessary when detail is equal to 'data'. detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'. Returns: dict, a dict including the `tensors`. Raises: UrlDecodeError, If unquote train id error with strict mode. """ Validation.check_param_empty(train_id=train_ids, tag=tags) if dims is not None: if not isinstance(dims, str): raise ParamValueError('The type of dims must be str, but got {}.'.format(type(dims))) dims = dims.strip() if not (dims.startswith('[') and dims.endswith(']')): raise ParamValueError('The value: {} of dims must be ' 'start with `[` and end with `]`.'.format(dims)) for dim in dims[1:-1].split(','): dim = dim.strip() if dim == ":": continue if dim.startswith('-'): dim = dim[1:] if not dim.isdigit(): raise ParamValueError('The value: {} of dims in the square brackets ' 'must be int or `:`.'.format(dims)) for index, train_id in enumerate(train_ids): try: train_id = unquote(train_id, errors='strict') except UnicodeDecodeError: raise UrlDecodeError('Unquote train id error with strict mode') else: train_ids[index] = train_id tensors = [] for train_id in train_ids: tensors += self._get_train_tensors(train_id, tags, step, dims, detail) return {"tensors": tensors}
def search_node_names(self, search_content, offset, limit): """ Search node names by search content. Args: search_content (Any): This content can be the key content of the node to search. offset (int): An offset for page. Ex, offset is 0, mean current page is 1. limit (int): The max data items for per page. Returns: TypedDict('Names', {'names': list[str]}), {"names": ["node_names"]}. """ offset = Validation.check_offset(offset=offset) limit = Validation.check_limit(limit, min_value=1, max_value=1000) names = self._graph.search_node_names(search_content, offset, limit) return {"names": names}
def query_train_jobs(): """Query train jobs.""" offset = request.args.get("offset", default=0) limit = request.args.get("limit", default=10) offset = Validation.check_offset(offset=offset) limit = Validation.check_limit(limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT) processor = TrainTaskManager(DATA_MANAGER) total, train_jobs = processor.query_train_jobs(offset, limit) return jsonify({ 'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)), 'total': total, 'train_jobs': train_jobs, })
def search_single_node(self, name): """ Search node by node name. Args: name (str): The name of node. Returns: dict, format is: item_object = {'nodes': [<Node object>], 'scope_name': '', 'children': {<item_object>}} """ Validation.check_param_empty(name=name) nodes = self._graph.search_single_node(name) return nodes
def get_single_train_task(self, plugin_name, train_id): """ get single train task. Args: plugin_name (str): Plugin name, refer `PluginNameEnum`. train_id (str): Specify a training job to query. Returns: {'train_jobs': list[TrainJob]}, refer to restful api. """ Validation.check_param_empty(plugin_name=plugin_name, train_id=train_id) Validation.check_plugin_name(plugin_name=plugin_name) train_job = self._data_manager.get_train_job_by_plugin(train_id=train_id, plugin_name=plugin_name) if train_job is None: raise exceptions.TrainJobNotExistError() return dict(train_jobs=[train_job])
def get_histograms(self, train_id, tag): """ Builds a JSON-serializable object with information about histogram data. Args: train_id (str): The ID of the events data. tag (str): The name of the tag the histogram data all belong to. Returns: dict, a dict including the `train_id`, `tag`, and `histograms'. { "train_id": ****, "tag": ****, "histograms": [{ "wall_time": ****, "step": ****, "bucket": [[**, **, **]], }, {...} ] } """ Validation.check_param_empty(train_id=train_id, tag=tag) logger.info("Start to process histogram data...") try: tensors = self._data_manager.list_tensors(train_id, tag) except ParamValueError as err: raise HistogramNotExistError(err.message) histograms = [] for tensor in tensors: histogram = tensor.value buckets = histogram.buckets() histograms.append({ "wall_time": tensor.wall_time, "step": tensor.step, "buckets": buckets }) logger.info("Histogram data processing is finished!") response = { "train_id": train_id, "tag": tag, "histograms": histograms } return response
def __init__(self, train_id, data_manager, tag=None): Validation.check_param_empty(train_id=train_id) super(GraphProcessor, self).__init__(data_manager) train_job = self._data_manager.get_train_job_by_plugin( train_id, PluginNameEnum.GRAPH.value) if train_job is None: raise exceptions.SummaryLogPathInvalid() if not train_job['tags']: raise ParamValueError( "Can not find any graph data in the train job.") if tag is None: tag = train_job['tags'][0] tensors = self._data_manager.list_tensors(train_id, tag=tag) self._graph = tensors[0].value
def __init__(self, train_id, data_manager, tag=None): Validation.check_param_empty(train_id=train_id) super(GraphProcessor, self).__init__(data_manager) train_job = self._data_manager.get_train_job_by_plugin( train_id, PluginNameEnum.GRAPH.value) if train_job is None: raise exceptions.TrainJobNotExistError() if not train_job['tags'] or (tag is not None and tag not in train_job['tags']): raise exceptions.GraphNotExistError() if tag is None: tag = train_job['tags'][0] tensors = self._data_manager.list_tensors(train_id, tag=tag) self._graph = tensors[0].value
def query_explain_jobs(): """Query explain jobs.""" offset = request.args.get("offset", default=0) limit = request.args.get("limit", default=10) offset = Validation.check_offset(offset=offset) limit = Validation.check_limit( limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT) encapsulator = ExplainJobEncap(EXPLAIN_MANAGER) total, jobs = encapsulator.query_explain_jobs(offset, limit) return jsonify({ 'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)), 'total': total, 'explain_jobs': jobs, })
def search_node_names(self, search_content, offset, limit): """ Search node names by search content. Args: search_content (Any): This content can be the key content of the node to search. offset (int): An offset for page. Ex, offset is 0, mean current page is 1. limit (int): The max data items for per page. Returns: Dict, the searched nodes. """ offset = Validation.check_offset(offset=offset) limit = Validation.check_limit(limit, min_value=1, max_value=1000) nodes = self._graph.search_nodes_by_pattern(search_content) real_offset = offset * limit search_nodes = self._graph.get_nodes(nodes[real_offset:real_offset + limit]) return {"nodes": search_nodes}
def get_plugins(self, train_id, manual_update=True): """ Queries the plug-in data for the specified training job Args: train_id (str): Specify a training job to query. manual_update (bool): Specifies whether to refresh automatically. Returns: dict, refer to restful api. """ Validation.check_param_empty(train_id=train_id) train_job = self._data_manager.get_single_train_job( train_id, manual_update=manual_update) if not train_job: default_result = dict() for plugin_name in PluginNameEnum.list_members(): default_result.update({plugin_name: list()}) return dict(plugins=default_result) return dict(plugins=train_job['tag_mapping'])
def get_metadata_list(self, train_id, tag): """ Builds a JSON-serializable object with information about scalars. Args: train_id (str): The ID of the events data. tag (str): The name of the tag the scalars all belonging to. Returns: list[dict], a list of dictionaries containing the `wall_time`, `step`, `value` for each scalar. """ Validation.check_param_empty(train_id=train_id, tag=tag) job_response = [] tensors = self._data_manager.list_tensors(train_id, tag) for tensor in tensors: job_response.append({ 'wall_time': tensor.wall_time, 'step': tensor.step, 'value': tensor.value }) return dict(metadatas=job_response)
def get_metadata_list(self, train_id, tag): """ Builds a JSON-serializable object with information about images. Args: train_id (str): The ID of the events data. tag (str): The name of the tag the images all belong to. Returns: list[dict], a list of dictionaries containing the `wall_time`, `step`, `width`, and `height` for each image. [ { "wall_time": ****, "step": ****, "width": ****, "height": ****, }, {...} ] """ Validation.check_param_empty(train_id=train_id, tag=tag) result = [] try: tensors = self._data_manager.list_tensors(train_id, tag) except ParamValueError as ex: raise ImageNotExistError(ex.message) for tensor in tensors: # no tensor_proto in TensorEvent (width, height) = (tensor.value.width, tensor.value.height) result.append({ 'wall_time': tensor.wall_time, 'step': tensor.step, 'width': int(width), 'height': int(height), }) return dict(metadatas=result)
def query_saliency(): """Query saliency map related results.""" data = _read_post_request(request) train_id = data.get("train_id") if train_id is None: raise ParamMissError('train_id') labels = data.get("labels") explainers = data.get("explainers") limit = data.get("limit", 10) limit = Validation.check_limit(limit, min_value=1, max_value=100) offset = data.get("offset", 0) offset = Validation.check_offset(offset=offset) sorted_name = data.get("sorted_name", "") sorted_type = data.get("sorted_type", "descending") if sorted_name not in ("", "confidence", "uncertainty"): raise ParamValueError( f"sorted_name: {sorted_name}, valid options: '' 'confidence' 'uncertainty'" ) if sorted_type not in ("ascending", "descending"): raise ParamValueError( f"sorted_type: {sorted_type}, valid options: 'confidence' 'uncertainty'" ) encapsulator = SaliencyEncap(_image_url_formatter, EXPLAIN_MANAGER) count, samples = encapsulator.query_saliency_maps(train_id=train_id, labels=labels, explainers=explainers, limit=limit, offset=offset, sorted_name=sorted_name, sorted_type=sorted_type) return jsonify({"count": count, "samples": samples})