Ejemplo n.º 1
0
    def _parse_summary_logs(self, summary_path):
        """
        Parse summary logs.

        Args:
            summary_path (Union[str, list[str]]): The single summary log path or
                a list of summary log path.
        """
        if not summary_path:
            raise LineageQuerierParamException('summary_path',
                                               'The summary path is empty.')
        if isinstance(summary_path, str):
            self._parse_summary_log(summary_path, 0)
        elif isinstance(summary_path, list):
            index = 0
            for path in summary_path:
                parse_result = self._parse_summary_log(path, index)
                if parse_result:
                    index += 1
        else:
            raise LineageParamTypeError('Summary path is not str or list.')

        if self._parse_failed_paths:
            logger.info('Parse failed paths: %s',
                        str(self._parse_failed_paths))

        if not self._lineage_objects:
            raise LineageSummaryParseException()
Ejemplo n.º 2
0
 def _check_objs(self, super_lineage_objs):
     if super_lineage_objs is None:
         raise LineageQuerierParamException(
             'querier_init_param', 'The querier init param is empty.')
     if not isinstance(super_lineage_objs, dict):
         raise LineageParamTypeError("Init param should be a dict.")
     return super_lineage_objs
Ejemplo n.º 3
0
    def get_summary_lineage(self, summary_dir=None, filter_keys=None):
        """
        Get summary lineage information.

        If a summary dir is specified, the special summary lineage information
        will be found. If the summary dir is `None`, all summary lineage
        information will be found.

        Returns the content corresponding to the specified field in the filter
        key. The contents of the filter key include `metric`, `hyper_parameters`,
        `algorithm`, `train_dataset`, `valid_dataset` and `model`. You can
        specify multiple filter keys in the `filter_keys`. If the parameter is
        `None`, complete information will be returned.

        Args:
            summary_dir (Union[str, None]): Summary log dir. Default: None.
            filter_keys (Union[list[str], None]): Filter keys. Default: None.

        Returns:
            list[dict], summary lineage information.
        """
        self._parse_fail_summary_logs()

        if filter_keys is None:
            filter_keys = LineageFilterKey.get_key_list()
        else:
            for key in filter_keys:
                if not LineageFilterKey.is_valid_filter_key(key):
                    raise LineageQuerierParamException(
                        filter_keys,
                        'The filter key {} is invalid.'.format(key))

        if summary_dir is None:
            result = [
                item.get_summary_info(filter_keys)
                for item in self._lineage_objects
            ]
        else:
            index = self._index_map.get(summary_dir)
            if index is None:
                raise LineageQuerierParamException(
                    'summary_dir',
                    'Summary dir {} does not exist.'.format(summary_dir))
            lineage_obj = self._lineage_objects[index]
            result = [lineage_obj.get_summary_info(filter_keys)]
        return result
Ejemplo n.º 4
0
        def _filter(lineage_obj: LineageObj):
            for condition_key, condition_value in condition.items():
                if ConditionParam.is_condition_type(condition_key):
                    continue
                if self._is_valid_field(condition_key):
                    raise LineageQuerierParamException(
                        'condition',
                        'The field {} not supported'.format(condition_key))

                value = lineage_obj.get_value_by_key(condition_key)
                for exp_key, exp_value in condition_value.items():
                    if not ExpressionType.is_valid_exp(exp_key):
                        raise LineageQuerierParamException(
                            'condition',
                            'The expression {} not supported.'.format(exp_key))
                    if not ExpressionType.is_match(exp_key, exp_value, value):
                        return False
            return True
Ejemplo n.º 5
0
    def test_failed_to_querier(self, mock_query, mock_parse, *args):
        """Test filter_summary_lineage with invalid invalid param."""
        mock_query.side_effect = LineageSummaryParseException()
        mock_parse.return_value = ['/path/to/summary/file']
        args[0].return_value = None
        res = filter_summary_lineage('/path/to/summary')
        assert res == {'object': [], 'count': 0}

        mock_query.side_effect = LineageQuerierParamException(['keys'], 'key')
        self.assertRaisesRegex(LineageQuerySummaryDataError,
                               'Filter summary lineage failed.',
                               filter_summary_lineage, '/path/to/summary/dir')
Ejemplo n.º 6
0
    def test_get_summary_lineage_failed3(self, mock_summary, mock_querier,
                                         mock_valid):
        """Test get_summary_lineage failed."""
        mock_summary.return_value = '/path/to/summary/file'
        mock_querier.return_value.get_summary_lineage.side_effect = \
            LineageSummaryParseException()
        mock_valid.return_value = '/path/to/summary_dir'
        res = get_summary_lineage('/path/to/summary_dir')
        assert res == {}

        mock_querier.side_effect = LineageQuerierParamException(['keys'],
                                                                'key')
        self.assertRaisesRegex(LineageQuerySummaryDataError,
                               'Get summary lineage failed',
                               get_summary_lineage, '/path/to/summary_dir')
Ejemplo n.º 7
0
    def _sorted_results(self, results, condition):
        """Get sorted results."""
        def _cmp(value1, value2):
            if value1 is None and value2 is None:
                cmp_result = 0
            elif value1 is None:
                cmp_result = -1
            elif value2 is None:
                cmp_result = 1
            else:
                try:
                    cmp_result = (value1 > value2) - (value1 < value2)
                except TypeError:
                    type1 = type(value1).__name__
                    type2 = type(value2).__name__
                    cmp_result = (type1 > type2) - (type1 < type2)
            return cmp_result

        def _cmp_added_info(obj1: SuperLineageObj, obj2: SuperLineageObj):
            value1 = obj1.added_info.get(sorted_name)
            value2 = obj2.added_info.get(sorted_name)
            return _cmp(value1, value2)

        def _cmp_super_lineage_obj(obj1: SuperLineageObj,
                                   obj2: SuperLineageObj):
            value1 = obj1.lineage_obj.get_value_by_key(sorted_name)
            value2 = obj2.lineage_obj.get_value_by_key(sorted_name)

            return _cmp(value1, value2)

        if ConditionParam.SORTED_NAME.value in condition:
            sorted_name = condition.get(ConditionParam.SORTED_NAME.value)
            sorted_type = condition.get(ConditionParam.SORTED_TYPE.value)
            reverse = sorted_type == 'descending'
            if sorted_name in ['tag']:
                results = sorted(results,
                                 key=functools.cmp_to_key(_cmp_added_info),
                                 reverse=reverse)
                return results

            if self._is_valid_field(sorted_name):
                raise LineageQuerierParamException(
                    'condition',
                    'The sorted name {} not supported.'.format(sorted_name))
            results = sorted(results,
                             key=functools.cmp_to_key(_cmp_super_lineage_obj),
                             reverse=reverse)
        return results
Ejemplo n.º 8
0
    def filter_summary_lineage(self, condition=None):
        """
        Filter and sort lineage information based on the specified condition.

        See `ConditionType` and `ExpressionType` class for the rule of filtering
        and sorting. The filtering and sorting fields are defined in
        `FIELD_MAPPING` or prefixed with `metric_`.

        If the condition is `None`, all model lineage information will be
        returned.

        Args:
            condition (Union[dict, None]): Filter and sort condition.
                Default: None.

        Returns:
            dict, filtered and sorted model lineage information.
        """
        def _filter(lineage_obj: LineageObj):
            for condition_key, condition_value in condition.items():
                if ConditionParam.is_condition_type(condition_key):
                    continue
                if self._is_valid_field(condition_key):
                    raise LineageQuerierParamException(
                        'condition',
                        'The field {} not supported'.format(condition_key))

                value = lineage_obj.get_value_by_key(condition_key)
                for exp_key, exp_value in condition_value.items():
                    if not ExpressionType.is_valid_exp(exp_key):
                        raise LineageQuerierParamException(
                            'condition',
                            'The expression {} not supported.'.format(exp_key))
                    if not ExpressionType.is_match(exp_key, exp_value, value):
                        return False
            return True

        def _cmp(obj1: LineageObj, obj2: LineageObj):
            value1 = obj1.get_value_by_key(sorted_name)
            value2 = obj2.get_value_by_key(sorted_name)

            if value1 is None and value2 is None:
                cmp_result = 0
            elif value1 is None:
                cmp_result = -1
            elif value2 is None:
                cmp_result = 1
            else:
                cmp_result = (value1 > value2) - (value1 < value2)

            return cmp_result

        self._parse_fail_summary_logs()

        if condition is None:
            condition = {}
        result = list(filter(_filter, self._lineage_objects))

        if ConditionParam.SORTED_NAME.value in condition:
            sorted_name = condition.get(ConditionParam.SORTED_NAME.value)
            if self._is_valid_field(sorted_name):
                raise LineageQuerierParamException(
                    'condition',
                    'The sorted name {} not supported.'.format(sorted_name))
            sorted_type = condition.get(ConditionParam.SORTED_TYPE.value)
            reverse = sorted_type == 'descending'
            result = sorted(result,
                            key=functools.cmp_to_key(_cmp),
                            reverse=reverse)

        offset_result = self._handle_limit_and_offset(condition, result)

        search_type = condition.get(ConditionParam.LINEAGE_TYPE.value)
        lineage_info = {
            'object': [
                item.to_dataset_lineage_dict() if search_type
                == LineageType.DATASET.value else item.to_filtration_dict()
                for item in offset_result
            ],
            'count':
            len(result)
        }

        return lineage_info