Ejemplo n.º 1
0
def make_directory(path):
    """Make directory."""
    if path is None or not isinstance(path, str) or not path.strip():
        log.error("Invalid input path: %r.", path)
        raise LineageParamTypeError("Invalid path type")

    # convert relative path to abs path
    path = os.path.realpath(path)
    log.debug("The abs path is %r", path)

    # check path exist and its write permissions]
    if os.path.exists(path):
        real_path = path
    else:
        # All exceptions need to be caught because create directory maybe have some limit(permissions)
        log.debug("The directory(%s) doesn't exist, will create it", path)
        try:
            os.makedirs(path, exist_ok=True)
            real_path = path
        except PermissionError as err:
            log.error("No write permission on the directory(%r), error = %r",
                      path, err)
            raise LineageParamTypeError(
                "No write permission on the directory.")
    return real_path
Ejemplo n.º 2
0
def validate_filter_key(keys):
    """
    Verify the keys of filtering is valid or not.

    Args:
        keys (list): The keys to get the relative lineage info.

    Raises:
        LineageParamTypeError: If keys is not list.
        LineageParamValueError: If the value of keys is invalid.
    """
    filter_keys = [
        'metric', 'hyper_parameters', 'algorithm', 'train_dataset', 'model',
        'valid_dataset', 'dataset_graph'
    ]

    if not isinstance(keys, list):
        log.error("Keys must be list.")
        raise LineageParamTypeError("Keys must be list.")

    for element in keys:
        if not isinstance(element, str):
            log.error("Element of keys must be str.")
            raise LineageParamTypeError("Element of keys must be str.")

    if not set(keys).issubset(filter_keys):
        err_msg = "Keys must be in {}.".format(filter_keys)
        log.error(err_msg)
        raise LineageParamValueError(err_msg)
Ejemplo n.º 3
0
def general_get_summary_lineage(data_manager=None,
                                summary_dir=None,
                                keys=None):
    """
    Get summary lineage from data_manager or parsing from summaries.

    One of data_manager or summary_dir needs to be specified. Support getting
    super_lineage_obj from data_manager or parsing summaries by summary_dir.

    Args:
        data_manager (DataManager): Data manager defined as
            mindinsight.datavisual.data_transform.data_manager.DataManager
        summary_dir (str): The summary directory. It contains summary logs for
            one training.
        keys (list[str]): The filter keys of lineage information. The acceptable
            keys are `metric`, `user_defined`, `hyper_parameters`, `algorithm`,
            `train_dataset`, `model`, `valid_dataset` and `dataset_graph`.
            If it is `None`, all information will be returned. Default: None.

    Returns:
        dict, the lineage information for one training.

    Raises:
        LineageParamSummaryPathError: If summary path is invalid.
        LineageQuerySummaryDataError: If querying summary data fails.
        LineageFileNotFoundError: If the summary log file is not found.

    """
    default_result = {}
    if data_manager is None and summary_dir is None:
        raise LineageParamTypeError(
            "One of data_manager or summary_dir needs to be specified.")
    if data_manager is not None and summary_dir is None:
        raise LineageParamTypeError(
            "If data_manager is specified, the summary_dir needs to be "
            "specified as relative path.")

    if keys is not None:
        validate_filter_key(keys)

    if data_manager is None:
        normalize_summary_dir(summary_dir)
        super_lineage_obj = LineageParser(summary_dir).super_lineage_obj
    else:
        validate_train_id(summary_dir)
        super_lineage_obj = LineageOrganizer(
            data_manager=data_manager).get_super_lineage_obj(summary_dir)

    if super_lineage_obj is None:
        return default_result

    try:
        result = Querier({
            summary_dir: super_lineage_obj
        }).get_summary_lineage(summary_dir, keys)
    except (LineageQuerierParamException, LineageParamTypeError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageQuerySummaryDataError("Get summary lineage failed.")
    return result[0]
Ejemplo n.º 4
0
def validate_condition(search_condition):
    """
    Verify the param in search_condition is valid or not.

    Args:
        search_condition (dict): The search condition.

    Raises:
        LineageParamTypeError: If the type of the param in search_condition is invalid.
        LineageParamValueError: If the value of the param in search_condition is invalid.
    """
    if not isinstance(search_condition, dict):
        log.error("Invalid search_condition type, it should be dict.")
        raise LineageParamTypeError("Invalid search_condition type, "
                                    "it should be dict.")

    if "limit" in search_condition:
        if isinstance(search_condition.get("limit"), bool) \
                or not isinstance(search_condition.get("limit"), int):
            log.error("The limit must be int.")
            raise LineageParamTypeError("The limit must be int.")

    if "offset" in search_condition:
        if isinstance(search_condition.get("offset"), bool) \
                or not isinstance(search_condition.get("offset"), int):
            log.error("The offset must be int.")
            raise LineageParamTypeError("The offset must be int.")

    if "sorted_name" in search_condition:
        sorted_name = search_condition.get("sorted_name")
        err_msg = "The sorted_name must be in {} or start with " \
                  "`metric/` or `user_defined/`.".format(list(FIELD_MAPPING.keys()))
        if not isinstance(sorted_name, str):
            log.error(err_msg)
            raise LineageParamValueError(err_msg)
        if not (sorted_name in FIELD_MAPPING or
                (sorted_name.startswith('metric/')
                 and len(sorted_name) > len('metric/')) or
                (sorted_name.startswith('user_defined/')
                 and len(sorted_name) > len('user_defined/'))
                or sorted_name in ['tag']):
            log.error(err_msg)
            raise LineageParamValueError(err_msg)

    sorted_type_param = ['ascending', 'descending', None]
    if "sorted_type" in search_condition:
        if "sorted_name" not in search_condition:
            log.error("The sorted_name have to exist when sorted_type exists.")
            raise LineageParamValueError(
                "The sorted_name have to exist when sorted_type exists.")

        if search_condition.get("sorted_type") not in sorted_type_param:
            err_msg = "The sorted_type must be ascending or descending."
            log.error(err_msg)
            raise LineageParamValueError(err_msg)
Ejemplo n.º 5
0
 def _check_objs(self, super_lineage_objs):
     if super_lineage_objs is None:
         raise LineageQuerierParamException(
             'querier_init_param', 'The querier init param is empty.')
     if not isinstance(super_lineage_objs, dict):
         raise LineageParamTypeError("Init param should be a dict.")
     return super_lineage_objs
Ejemplo n.º 6
0
    def check_comparision(self, data, **kwargs):
        """Check comparision for all parameters in schema."""
        for attr, condition in data.items():
            if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']:
                continue

            if not isinstance(attr, str):
                raise LineageParamValueError('The search attribute not supported.')

            if attr not in FIELD_MAPPING and not attr.startswith(('metric/', 'user_defined/')):
                raise LineageParamValueError('The search attribute not supported.')

            if not isinstance(condition, dict):
                raise LineageParamTypeError("The search_condition element {} should be dict."
                                            .format(attr))

            for key in condition.keys():
                if key not in ["eq", "lt", "gt", "le", "ge", "in"]:
                    raise LineageParamValueError("The compare condition should be in "
                                                 "('eq', 'lt', 'gt', 'le', 'ge', 'in').")

            if attr.startswith('metric/'):
                if len(attr) == 7:
                    raise LineageParamValueError(
                        'The search attribute not supported.'
                    )
                try:
                    SearchModelConditionParameter.check_param_value_type(condition)
                except ValidationError:
                    raise MindInsightException(
                        error=LineageErrors.LINEAGE_PARAM_METRIC_ERROR,
                        message=LineageErrorMsg.LINEAGE_METRIC_ERROR.value.format(attr)
                    )
        return data
Ejemplo n.º 7
0
def _package_parameter(key, value, message):
    """
    Package parameters in operation.

    Args:
        key (str): Operation name.
        value (Union[str, bool, int, float, list, None]): Operation args.
        message (OperationParameter): Operation proto message.
    """
    if isinstance(value, str):
        message.mapStr[key] = value
    elif isinstance(value, bool):
        message.mapBool[key] = value
    elif isinstance(value, int):
        message.mapInt[key] = value
    elif isinstance(value, float):
        message.mapDouble[key] = value
    elif isinstance(value, list) and key != "operations":
        if value:
            replace_value_list = list(
                map(lambda x: "" if x is None else x, value))
            message.mapStrList[key].strValue.extend(replace_value_list)
    elif value is None:
        message.mapStr[key] = "None"
    else:
        error_msg = "Parameter {} is not supported " \
                    "in event package.".format(key)
        log.error(error_msg)
        raise LineageParamTypeError(error_msg)
Ejemplo n.º 8
0
    def _parse_summary_logs(self, summary_path):
        """
        Parse summary logs.

        Args:
            summary_path (Union[str, list[str]]): The single summary log path or
                a list of summary log path.
        """
        if not summary_path:
            raise LineageQuerierParamException('summary_path',
                                               'The summary path is empty.')
        if isinstance(summary_path, str):
            self._parse_summary_log(summary_path, 0)
        elif isinstance(summary_path, list):
            index = 0
            for path in summary_path:
                parse_result = self._parse_summary_log(path, index)
                if parse_result:
                    index += 1
        else:
            raise LineageParamTypeError('Summary path is not str or list.')

        if self._parse_failed_paths:
            logger.info('Parse failed paths: %s',
                        str(self._parse_failed_paths))

        if not self._lineage_objects:
            raise LineageSummaryParseException()
Ejemplo n.º 9
0
def general_filter_summary_lineage(data_manager=None,
                                   summary_base_dir=None,
                                   search_condition=None,
                                   added=False):
    """
    Filter summary lineage from data_manager or parsing from summaries.

    One of data_manager or summary_base_dir needs to be specified. Support getting
    super_lineage_obj from data_manager or parsing summaries by summary_base_dir.

    Args:
        data_manager (DataManager): Data manager defined as
            mindinsight.datavisual.data_transform.data_manager.DataManager
        summary_base_dir (str): The summary base directory. It contains summary
            directories generated by training.
        search_condition (dict): The search condition.
    """
    if data_manager is None and summary_base_dir is None:
        raise LineageParamTypeError(
            "One of data_manager or summary_base_dir needs to be specified.")

    if data_manager is None:
        summary_base_dir = normalize_summary_dir(summary_base_dir)
    else:
        summary_base_dir = data_manager.summary_base_dir

    search_condition = {} if search_condition is None else search_condition

    try:
        validate_condition(search_condition)
        validate_search_model_condition(SearchModelConditionParameter,
                                        search_condition)
    except MindInsightException as error:
        log.error(str(error))
        log.exception(error)
        raise LineageSearchConditionParamError(str(error.message))

    try:
        search_condition = _convert_relative_path_to_abspath(
            summary_base_dir, search_condition)
    except (LineageParamValueError, LineageDirNotExistError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageParamSummaryPathError(str(error.message))

    try:
        lineage_objects = LineageOrganizer(data_manager,
                                           summary_base_dir).super_lineage_objs
        result = Querier(lineage_objects).filter_summary_lineage(
            condition=search_condition, added=added)
    except LineageSummaryParseException:
        result = {'object': [], 'count': 0}
    except (LineageQuerierParamException, LineageParamTypeError) as error:
        log.error(str(error))
        log.exception(error)
        raise LineageQuerySummaryDataError("Filter summary lineage failed.")

    return result
Ejemplo n.º 10
0
 def test_invalid_search_condition(self, mock_path, mock_valid):
     """Test filter_summary_lineage with invalid invalid param."""
     mock_path.return_value = None
     mock_valid.side_effect = LineageParamTypeError(
         'Invalid search_condition type.')
     self.assertRaisesRegex(LineageSearchConditionParamError,
                            'Invalid search_condition type.',
                            filter_summary_lineage, '/path/to/summary/dir',
                            'invalid_condition')
Ejemplo n.º 11
0
def validate_user_defined_info(user_defined_info):
    """
    Validate user defined info, delete the item if its key is in lineage.

    Args:
        user_defined_info (dict): The user defined info.

    Raises:
        LineageParamTypeError: If the type of parameters is invalid.
        LineageParamValueError: If user defined keys have been defined in lineage.

    """
    if not isinstance(user_defined_info, dict):
        log.error("Invalid user defined info. It should be a dict.")
        raise LineageParamTypeError(
            "Invalid user defined info. It should be dict.")
    for key, value in user_defined_info.items():
        if not isinstance(key, str):
            error_msg = "Dict key type {} is not supported in user defined info." \
                        "Only str is permitted now.".format(type(key))
            log.error(error_msg)
            raise LineageParamTypeError(error_msg)
        if not isinstance(value, (int, str, float)):
            error_msg = "Dict value type {} is not supported in user defined info." \
                        "Only str, int and float are permitted now.".format(type(value))
            log.error(error_msg)
            raise LineageParamTypeError(error_msg)

    field_map = set(FIELD_MAPPING.keys())
    user_defined_keys = set(user_defined_info.keys())
    insertion = list(field_map & user_defined_keys)

    if insertion:
        for key in insertion:
            user_defined_info.pop(key)
        raise LineageParamValueError(
            "There are some keys have defined in lineage. "
            "Duplicated key(s): %s. " % insertion)