def general_get_summary_lineage(data_manager=None, summary_dir=None, keys=None): """ Get summary lineage from data_manager or parsing from summaries. One of data_manager or summary_dir needs to be specified. Support getting super_lineage_obj from data_manager or parsing summaries by summary_dir. Args: data_manager (DataManager): Data manager defined as mindinsight.datavisual.data_transform.data_manager.DataManager summary_dir (str): The summary directory. It contains summary logs for one training. keys (list[str]): The filter keys of lineage information. The acceptable keys are `metric`, `user_defined`, `hyper_parameters`, `algorithm`, `train_dataset`, `model`, `valid_dataset` and `dataset_graph`. If it is `None`, all information will be returned. Default: None. Returns: dict, the lineage information for one training. Raises: LineageParamSummaryPathError: If summary path is invalid. LineageQuerySummaryDataError: If querying summary data fails. LineageFileNotFoundError: If the summary log file is not found. """ default_result = {} if data_manager is None and summary_dir is None: raise LineageParamTypeError( "One of data_manager or summary_dir needs to be specified.") if data_manager is not None and summary_dir is None: raise LineageParamTypeError( "If data_manager is specified, the summary_dir needs to be " "specified as relative path.") if keys is not None: validate_filter_key(keys) if data_manager is None: normalize_summary_dir(summary_dir) super_lineage_obj = LineageParser(summary_dir).super_lineage_obj else: validate_train_id(summary_dir) super_lineage_obj = LineageOrganizer( data_manager=data_manager).get_super_lineage_obj(summary_dir) if super_lineage_obj is None: return default_result try: result = Querier({ summary_dir: super_lineage_obj }).get_summary_lineage(summary_dir, keys) except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Get summary lineage failed.") return result[0]
def load(self): """Find and load summaries.""" # get sorted lineage files lineage_files = SummaryPathParser.get_lineage_summaries(self._summary_dir, is_sorted=True) if not lineage_files: logger.info('There is no summary log file under summary_dir %s.', self._summary_dir) raise LineageFileNotFoundError( 'There is no summary log file under summary_dir.' ) self._init_if_files_deleted(lineage_files) index = 0 if self._latest_filename is not None: index = lineage_files.index(self._latest_filename) for filename in lineage_files[index:]: if filename != self._latest_filename: self._latest_filename = filename self._latest_file_size = 0 file_path = os.path.join(self._summary_dir, filename) new_size = FileHandler(file_path).size if new_size == self._latest_file_size: continue self._latest_file_size = new_size try: self._parse_summary_log() except (LineageSummaryAnalyzeException, LineageEventNotExistException, LineageEventFieldNotExistException) as error: logger.debug("Parse file failed, file_path is %s. Detail: %s", file_path, str(error)) except MindInsightException as error: logger.exception(error) logger.debug("Parse file failed, file_path is %s.", file_path)
def _parse_summary_log(self): """ Parse the single summary log. Returns: bool, `True` if parse summary log success, else `False`. """ file_path = os.path.realpath( os.path.join(self._summary_dir, self._latest_filename)) try: lineage_info = LineageSummaryAnalyzer.get_summary_infos(file_path) user_defined_info = LineageSummaryAnalyzer.get_user_defined_info( file_path) self._update_lineage_obj(lineage_info, user_defined_info) except LineageSummaryAnalyzeException: logger.warning("Parse file failed under summary_dir %s.", file_path) except (LineageEventNotExistException, LineageEventFieldNotExistException) as error: logger.warning( "Parse file failed under summary_dir %s. Detail: %s.", file_path, str(error)) except MindInsightException as error: logger.exception(error) logger.warning("Parse file failed under summary_dir %s.", file_path)
def normalize_summary_dir(summary_dir): """Normalize summary dir.""" try: summary_dir = validate_path(summary_dir) except (LineageParamValueError, LineageDirNotExistError) as error: log.error(str(error)) log.exception(error) raise LineageParamSummaryPathError(str(error.message)) return summary_dir
def get_summary_lineage(summary_dir, keys=None): """ Get the lineage information according to summary directory and keys. The function queries lineage information of single train process corresponding to the given summary directory. Users can query the information according to `keys`. Args: summary_dir (str): The summary directory. It contains summary logs for one training. keys (list[str]): The filter keys of lineage information. The acceptable keys are `metric`, `hyper_parameters`, `algorithm`, `train_dataset`, `model`, `valid_dataset` and `dataset_graph`. If it is `None`, all information will be returned. Default: None. Returns: dict, the lineage information for one training. Raises: LineageParamSummaryPathError: If summary path is invalid. LineageQuerySummaryDataError: If querying summary data fails. LineageFileNotFoundError: If the summary log file is not found. Examples: >>> summary_dir = "/path/to/summary" >>> summary_lineage_info = get_summary_lineage(summary_dir) >>> hyper_parameters = get_summary_lineage(summary_dir, keys=["hyper_parameters"]) """ try: summary_dir = validate_path(summary_dir) except MindInsightException as error: log.error(str(error)) log.exception(error) raise LineageParamSummaryPathError(str(error.message)) if keys is not None: validate_filter_key(keys) summary_path = SummaryPathParser.get_latest_lineage_summary(summary_dir) if summary_path is None: log.error('There is no summary log file under summary_dir.') raise LineageFileNotFoundError( 'There is no summary log file under summary_dir.') try: result = Querier(summary_path).get_summary_lineage(summary_dir, filter_keys=keys) except LineageSummaryParseException: return {} except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Get summary lineage failed.") return result[0]
def filter_summary_lineage(data_manager=None, summary_base_dir=None, search_condition=None): """ Filter summary lineage from data_manager or parsing from summaries. One of data_manager or summary_base_dir needs to be specified. Support getting super_lineage_obj from data_manager or parsing summaries by summary_base_dir. Args: data_manager (DataManager): Data manager defined as mindinsight.datavisual.data_transform.data_manager.DataManager summary_base_dir (str): The summary base directory. It contains summary directories generated by training. search_condition (dict): The search condition. """ if data_manager is None and summary_base_dir is None: raise LineageParamTypeError("One of data_manager or summary_base_dir needs to be specified.") if data_manager is None: summary_base_dir = validate_and_normalize_path(summary_base_dir, 'summary_base_dir') else: summary_base_dir = data_manager.summary_base_dir search_condition = {} if search_condition is None else search_condition try: validate_condition(search_condition) validate_search_model_condition(SearchModelConditionParameter, search_condition) except MindInsightException as error: log.error(str(error)) log.exception(error) raise LineageSearchConditionParamError(str(error.message)) try: lineage_objects = LineageOrganizer(data_manager, summary_base_dir).super_lineage_objs result = Querier(lineage_objects).filter_summary_lineage(condition=search_condition) except LineageSummaryParseException: result = {'object': [], 'count': 0} except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Filter summary lineage failed.") return result
def get_summary_infos(cls, file_path): """ Get lineage summary information from summary log file. Args: file_path (str): The file path of summary log. Returns: LineageInfo, the lineage summary information. Raises: LineageSummaryAnalyzeException: If failed to get lineage information. """ analyzer = cls(file_path) try: lineage_info = analyzer.get_latest_info() except (MindInsightException, IOError) as err: log.error("Failed to get lineage information.") log.exception(err) raise LineageSummaryAnalyzeException() return lineage_info
def filter_summary_lineage(summary_base_dir, search_condition=None): """ Filter the lineage information under summary base directory according to search condition. Users can filter and sort all lineage information according to the search condition. The supported filter fields include `summary_dir`, `network`, etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`. At the same time, the combined use of these fields and conditions is supported. If you want to sort based on filter fields, the field of `sorted_name` and `sorted_type` should be specified. Users can use `lineage_type` to decide what kind of lineage information to query. If the `lineage_type` is `dataset`, the query result is only the lineage information related to data augmentation. If the `lineage_type` is `model` or `None`, the query result is all lineage information. Users can paginate query result based on `offset` and `limit`. The `offset` refers to page number. The `limit` refers to the number in one page. Args: summary_base_dir (str): The summary base directory. It contains summary directories generated by training. search_condition (dict): The search condition. When filtering and sorting, in addition to the following supported fields, fields prefixed with `metric_` are also supported. The fields prefixed with `metric_` are related to the `metrics` parameter in the training script. For example, if the key of `metrics` parameter is `accuracy`, the field should be `metric_accuracy`. Default: None. - summary_dir (dict): The filter condition of summary directory. - loss_function (dict): The filter condition of loss function. - train_dataset_path (dict): The filter condition of train dataset path. - train_dataset_count (dict): The filter condition of train dataset count. - test_dataset_path (dict): The filter condition of test dataset path. - test_dataset_count (dict): The filter condition of test dataset count. - network (dict): The filter condition of network. - optimizer (dict): The filter condition of optimizer. - learning_rate (dict): The filter condition of learning rate. - epoch (dict): The filter condition of epoch. - batch_size (dict): The filter condition of batch size. - loss (dict): The filter condition of loss. - model_size (dict): The filter condition of model size. - dataset_mark (dict): The filter condition of dataset mark. - offset (int): Page number, the value range is [0, 100000]. - limit (int): The number in one page, the value range is [1, 100]. - sorted_name (str): Specify which field to sort by. - sorted_type (str): Specify sort order. It can be `ascending` or `descending`. - lineage_type (str): It decides what kind of lineage information to query. It can be `dataset` or `model`. If it is `dataset`, the query result is only the lineage information related to data augmentation. If it is `model` or `None`, the query result is all lineage information. Returns: dict, all lineage information under summary base directory according to search condition. Raises: LineageSearchConditionParamError: If search_condition param is invalid. LineageParamSummaryPathError: If summary path is invalid. LineageFileNotFoundError: If the summary log file is not found. LineageQuerySummaryDataError: If querying summary log file data fails. Examples: >>> summary_base_dir = "/path/to/summary_base" >>> search_condition = { >>> 'summary_dir': { >>> 'in': [ >>> os.path.join(summary_base_dir, 'summary_1'), >>> os.path.join(summary_base_dir, 'summary_2'), >>> os.path.join(summary_base_dir, 'summary_3') >>> ] >>> }, >>> 'loss': { >>> 'gt': 2.0 >>> }, >>> 'batch_size': { >>> 'ge': 128, >>> 'le': 256 >>> }, >>> 'metric_accuracy': { >>> 'lt': 0.1 >>> }, >>> 'sorted_name': 'summary_dir', >>> 'sorted_type': 'descending', >>> 'limit': 3, >>> 'offset': 0, >>> 'lineage_type': 'model' >>> } >>> summary_lineage = filter_summary_lineage(summary_base_dir) >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition) """ try: summary_base_dir = validate_path(summary_base_dir) except (LineageParamValueError, LineageDirNotExistError) as error: log.error(str(error)) log.exception(error) raise LineageParamSummaryPathError(str(error.message)) search_condition = {} if search_condition is None else search_condition try: validate_condition(search_condition) validate_search_model_condition(SearchModelConditionParameter, search_condition) except MindInsightException as error: log.error(str(error)) log.exception(error) raise LineageSearchConditionParamError(str(error.message)) try: search_condition = _convert_relative_path_to_abspath( summary_base_dir, search_condition) except (LineageParamValueError, LineageDirNotExistError) as error: log.error(str(error)) log.exception(error) raise LineageParamSummaryPathError(str(error.message)) summary_path = SummaryPathParser.get_latest_lineage_summaries( summary_base_dir) if not summary_path: log.error('There is no summary log file under summary_base_dir.') raise LineageFileNotFoundError( 'There is no summary log file under summary_base_dir.') try: result = Querier(summary_path).filter_summary_lineage( condition=search_condition) except LineageSummaryParseException: result = {'object': [], 'count': 0} except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Filter summary lineage failed.") return result