Example #1
0
def general_get_summary_lineage(data_manager=None,
                                summary_dir=None,
                                keys=None):
    """
    Get summary lineage from data_manager or parsing from summaries.

    One of data_manager or summary_dir needs to be specified. Support getting
    super_lineage_obj from data_manager or parsing summaries by summary_dir.

    Args:
        data_manager (DataManager): Data manager defined as
            mindinsight.datavisual.data_transform.data_manager.DataManager
        summary_dir (str): The summary directory. It contains summary logs for
            one training.
        keys (list[str]): The filter keys of lineage information. The acceptable
            keys are `metric`, `user_defined`, `hyper_parameters`, `algorithm`,
            `train_dataset`, `model`, `valid_dataset` and `dataset_graph`.
            If it is `None`, all information will be returned. Default: None.

    Returns:
        dict, the lineage information for one training.

    Raises:
        LineageParamSummaryPathError: If summary path is invalid.
        LineageQuerySummaryDataError: If querying summary data fails.
        LineageFileNotFoundError: If the summary log file is not found.

    """
    default_result = {}
    if data_manager is None and summary_dir is None:
        raise LineageParamTypeError(
            "One of data_manager or summary_dir needs to be specified.")
    if data_manager is not None and summary_dir is None:
        raise LineageParamTypeError(
            "If data_manager is specified, the summary_dir needs to be "
            "specified as relative path.")

    if keys is not None:
        validate_filter_key(keys)

    if data_manager is None:
        normalize_summary_dir(summary_dir)
        super_lineage_obj = LineageParser(summary_dir).super_lineage_obj
    else:
        validate_train_id(summary_dir)
        super_lineage_obj = LineageOrganizer(
            data_manager=data_manager).get_super_lineage_obj(summary_dir)

    if super_lineage_obj is None:
        return default_result

    try:
        result = Querier({
            summary_dir: super_lineage_obj
        }).get_summary_lineage(summary_dir, keys)
    except (LineageQuerierParamException, LineageParamTypeError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageQuerySummaryDataError("Get summary lineage failed.")
    return result[0]
Example #2
0
    def load(self):
        """Find and load summaries."""
        # get sorted lineage files
        lineage_files = SummaryPathParser.get_lineage_summaries(self._summary_dir, is_sorted=True)
        if not lineage_files:
            logger.info('There is no summary log file under summary_dir %s.', self._summary_dir)
            raise LineageFileNotFoundError(
                'There is no summary log file under summary_dir.'
            )
        self._init_if_files_deleted(lineage_files)

        index = 0
        if self._latest_filename is not None:
            index = lineage_files.index(self._latest_filename)

        for filename in lineage_files[index:]:
            if filename != self._latest_filename:
                self._latest_filename = filename
                self._latest_file_size = 0

            file_path = os.path.join(self._summary_dir, filename)
            new_size = FileHandler(file_path).size
            if new_size == self._latest_file_size:
                continue

            self._latest_file_size = new_size
            try:
                self._parse_summary_log()
            except (LineageSummaryAnalyzeException,
                    LineageEventNotExistException,
                    LineageEventFieldNotExistException) as error:
                logger.debug("Parse file failed, file_path is %s. Detail: %s", file_path, str(error))
            except MindInsightException as error:
                logger.exception(error)
                logger.debug("Parse file failed, file_path is %s.", file_path)
Example #3
0
    def _parse_summary_log(self):
        """
        Parse the single summary log.

        Returns:
            bool, `True` if parse summary log success, else `False`.
        """
        file_path = os.path.realpath(
            os.path.join(self._summary_dir, self._latest_filename))
        try:
            lineage_info = LineageSummaryAnalyzer.get_summary_infos(file_path)
            user_defined_info = LineageSummaryAnalyzer.get_user_defined_info(
                file_path)
            self._update_lineage_obj(lineage_info, user_defined_info)
        except LineageSummaryAnalyzeException:
            logger.warning("Parse file failed under summary_dir %s.",
                           file_path)
        except (LineageEventNotExistException,
                LineageEventFieldNotExistException) as error:
            logger.warning(
                "Parse file failed under summary_dir %s. Detail: %s.",
                file_path, str(error))
        except MindInsightException as error:
            logger.exception(error)
            logger.warning("Parse file failed under summary_dir %s.",
                           file_path)
Example #4
0
def normalize_summary_dir(summary_dir):
    """Normalize summary dir."""
    try:
        summary_dir = validate_path(summary_dir)
    except (LineageParamValueError, LineageDirNotExistError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageParamSummaryPathError(str(error.message))
    return summary_dir
Example #5
0
def get_summary_lineage(summary_dir, keys=None):
    """
    Get the lineage information according to summary directory and keys.

    The function queries lineage information of single train process
    corresponding to the given summary directory. Users can query the
    information according to `keys`.

    Args:
        summary_dir (str): The summary directory. It contains summary logs for
            one training.
        keys (list[str]): The filter keys of lineage information. The acceptable
            keys are `metric`, `hyper_parameters`, `algorithm`, `train_dataset`,
            `model`, `valid_dataset` and `dataset_graph`. If it is `None`, all
            information will be returned. Default: None.

    Returns:
        dict, the lineage information for one training.

    Raises:
        LineageParamSummaryPathError: If summary path is invalid.
        LineageQuerySummaryDataError: If querying summary data fails.
        LineageFileNotFoundError: If the summary log file is not found.

    Examples:
        >>> summary_dir = "/path/to/summary"
        >>> summary_lineage_info = get_summary_lineage(summary_dir)
        >>> hyper_parameters = get_summary_lineage(summary_dir, keys=["hyper_parameters"])
    """
    try:
        summary_dir = validate_path(summary_dir)
    except MindInsightException as error:
        log.error(str(error))
        log.exception(error)
        raise LineageParamSummaryPathError(str(error.message))

    if keys is not None:
        validate_filter_key(keys)

    summary_path = SummaryPathParser.get_latest_lineage_summary(summary_dir)
    if summary_path is None:
        log.error('There is no summary log file under summary_dir.')
        raise LineageFileNotFoundError(
            'There is no summary log file under summary_dir.')

    try:
        result = Querier(summary_path).get_summary_lineage(summary_dir,
                                                           filter_keys=keys)
    except LineageSummaryParseException:
        return {}
    except (LineageQuerierParamException, LineageParamTypeError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageQuerySummaryDataError("Get summary lineage failed.")

    return result[0]
Example #6
0
def filter_summary_lineage(data_manager=None, summary_base_dir=None, search_condition=None):
    """
    Filter summary lineage from data_manager or parsing from summaries.

    One of data_manager or summary_base_dir needs to be specified. Support getting
    super_lineage_obj from data_manager or parsing summaries by summary_base_dir.

    Args:
        data_manager (DataManager): Data manager defined as
            mindinsight.datavisual.data_transform.data_manager.DataManager
        summary_base_dir (str): The summary base directory. It contains summary
            directories generated by training.
        search_condition (dict): The search condition.
    """
    if data_manager is None and summary_base_dir is None:
        raise LineageParamTypeError("One of data_manager or summary_base_dir needs to be specified.")

    if data_manager is None:
        summary_base_dir = validate_and_normalize_path(summary_base_dir, 'summary_base_dir')
    else:
        summary_base_dir = data_manager.summary_base_dir

    search_condition = {} if search_condition is None else search_condition

    try:
        validate_condition(search_condition)
        validate_search_model_condition(SearchModelConditionParameter, search_condition)
    except MindInsightException as error:
        log.error(str(error))
        log.exception(error)
        raise LineageSearchConditionParamError(str(error.message))

    try:
        lineage_objects = LineageOrganizer(data_manager, summary_base_dir).super_lineage_objs
        result = Querier(lineage_objects).filter_summary_lineage(condition=search_condition)
    except LineageSummaryParseException:
        result = {'object': [], 'count': 0}
    except (LineageQuerierParamException, LineageParamTypeError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageQuerySummaryDataError("Filter summary lineage failed.")

    return result
    def get_summary_infos(cls, file_path):
        """
        Get lineage summary information from summary log file.

        Args:
            file_path (str): The file path of summary log.

        Returns:
            LineageInfo, the lineage summary information.

        Raises:
            LineageSummaryAnalyzeException: If failed to get lineage information.
        """
        analyzer = cls(file_path)
        try:
            lineage_info = analyzer.get_latest_info()
        except (MindInsightException, IOError) as err:
            log.error("Failed to get lineage information.")
            log.exception(err)
            raise LineageSummaryAnalyzeException()

        return lineage_info
Example #8
0
def filter_summary_lineage(summary_base_dir, search_condition=None):
    """
    Filter the lineage information under summary base directory according to search condition.

    Users can filter and sort all lineage information according to the search
    condition. The supported filter fields include `summary_dir`, `network`,
    etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`.
    At the same time, the combined use of these fields and conditions is
    supported. If you want to sort based on filter fields, the field of
    `sorted_name` and `sorted_type` should be specified.

    Users can use `lineage_type` to decide what kind of lineage information to
    query. If the `lineage_type` is `dataset`, the query result is only the
    lineage information related to data augmentation. If the `lineage_type` is
    `model` or `None`, the query result is all lineage information.

    Users can paginate query result based on `offset` and `limit`. The `offset`
    refers to page number. The `limit` refers to the number in one page.

    Args:
        summary_base_dir (str): The summary base directory. It contains summary
            directories generated by training.
        search_condition (dict): The search condition. When filtering and
            sorting, in addition to the following supported fields, fields
            prefixed with `metric_` are also supported. The fields prefixed with
            `metric_` are related to the `metrics` parameter in the training
            script. For example, if the key of `metrics` parameter is
            `accuracy`, the field should be `metric_accuracy`. Default: None.

            - summary_dir (dict): The filter condition of summary directory.

            - loss_function (dict): The filter condition of loss function.

            - train_dataset_path (dict): The filter condition of train dataset path.

            - train_dataset_count (dict): The filter condition of train dataset count.

            - test_dataset_path (dict): The filter condition of test dataset path.

            - test_dataset_count (dict): The filter condition of test dataset count.

            - network (dict): The filter condition of network.

            - optimizer (dict): The filter condition of optimizer.

            - learning_rate (dict): The filter condition of learning rate.

            - epoch (dict): The filter condition of epoch.

            - batch_size (dict): The filter condition of batch size.

            - loss (dict): The filter condition of loss.

            - model_size (dict): The filter condition of model size.

            - dataset_mark (dict): The filter condition of dataset mark.

            - offset (int): Page number, the value range is [0, 100000].

            - limit (int): The number in one page, the value range is [1, 100].

            - sorted_name (str): Specify which field to sort by.

            - sorted_type (str): Specify sort order. It can be `ascending` or
              `descending`.

            - lineage_type (str): It decides what kind of lineage information to
              query. It can be `dataset` or `model`. If it is `dataset`,
              the query result is only the lineage information related to data
              augmentation. If it is `model` or `None`, the query result is all
              lineage information.

    Returns:
        dict, all lineage information under summary base directory according to
        search condition.

    Raises:
        LineageSearchConditionParamError: If search_condition param is invalid.
        LineageParamSummaryPathError: If summary path is invalid.
        LineageFileNotFoundError: If the summary log file is not found.
        LineageQuerySummaryDataError: If querying summary log file data fails.

    Examples:
        >>> summary_base_dir = "/path/to/summary_base"
        >>> search_condition = {
        >>>     'summary_dir': {
        >>>         'in': [
        >>>             os.path.join(summary_base_dir, 'summary_1'),
        >>>             os.path.join(summary_base_dir, 'summary_2'),
        >>>             os.path.join(summary_base_dir, 'summary_3')
        >>>         ]
        >>>     },
        >>>     'loss': {
        >>>         'gt': 2.0
        >>>     },
        >>>     'batch_size': {
        >>>         'ge': 128,
        >>>         'le': 256
        >>>     },
        >>>     'metric_accuracy': {
        >>>         'lt': 0.1
        >>>     },
        >>>     'sorted_name': 'summary_dir',
        >>>     'sorted_type': 'descending',
        >>>     'limit': 3,
        >>>     'offset': 0,
        >>>     'lineage_type': 'model'
        >>> }
        >>> summary_lineage = filter_summary_lineage(summary_base_dir)
        >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition)
    """
    try:
        summary_base_dir = validate_path(summary_base_dir)
    except (LineageParamValueError, LineageDirNotExistError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageParamSummaryPathError(str(error.message))

    search_condition = {} if search_condition is None else search_condition

    try:
        validate_condition(search_condition)
        validate_search_model_condition(SearchModelConditionParameter,
                                        search_condition)
    except MindInsightException as error:
        log.error(str(error))
        log.exception(error)
        raise LineageSearchConditionParamError(str(error.message))

    try:
        search_condition = _convert_relative_path_to_abspath(
            summary_base_dir, search_condition)
    except (LineageParamValueError, LineageDirNotExistError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageParamSummaryPathError(str(error.message))

    summary_path = SummaryPathParser.get_latest_lineage_summaries(
        summary_base_dir)
    if not summary_path:
        log.error('There is no summary log file under summary_base_dir.')
        raise LineageFileNotFoundError(
            'There is no summary log file under summary_base_dir.')

    try:
        result = Querier(summary_path).filter_summary_lineage(
            condition=search_condition)
    except LineageSummaryParseException:
        result = {'object': [], 'count': 0}
    except (LineageQuerierParamException, LineageParamTypeError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageQuerySummaryDataError("Filter summary lineage failed.")

    return result