Example #1
0
 def run(repo: MLRepo):
     test_definitions = repo.get_names(MLObjectType.TEST_DEFINITION)
     results = {}
     version_to_label = {}
     labels = repo.get_names(MLObjectType.LABEL)
     for l in labels:
         tmp = repo.get(l)
         version_to_label[tmp.version] = tmp.repo_info.name
     for t in test_definitions:
         test_definition = repo.get(t)
         models = test_definition._get_models(repo)
         for m, v in models.items():
             for version in v:
                 result = Tests.__check_test(repo, m, version,
                                             test_definition)
                 if len(result) > 0:
                     if version in version_to_label:
                         results[m + ':' +
                                 version_to_label[version]] = result
                     else:
                         results[m + ':' + version] = result
     return results
Example #2
0
    def __check_test(repo: MLRepo, model, model_version, test_definition):

        # loop over all data
        data = test_definition._get_data(repo)
        results = {}
        for d in data:
            # first create from definition the test for the given model to get the test name
            test_name = str(
                NamingConventions.Test(
                    model=NamingConventions.get_model_from_name(model),
                    test_name=test_definition.repo_info.name,
                    data=d))
            logging.debug('Checking test ' + test_name + ' for ' + model +
                          ', version ' + model_version)
            test = repo.get(test_name,
                            version=None,
                            modifier_versions={model: model_version},
                            throw_error_not_exist=False,
                            throw_error_not_unique=False)
            if test == []:
                results[
                    test_name] = 'Test for model ' + model + ', version ' + model_version + ' on latest data ' + d + ' missing.'
                continue
            if isinstance(test, list):  # search latest test
                t = test[0]
                for k in range(1, len(test)):
                    if test[k].repo_info.commit_date > t.repo_info.commit_date:
                        t = test[k]
                test = t
            result = test._check(repo)
            if not result is None:
                results[test_name] = result
            if not test.result == 'succeeded':
                results[
                    test_name] = 'Test for model ' + model + ', version ' + model_version + ' on latest data ' + d + ' failed, details: ' + str(
                        test.details)

        return results
Example #3
0
    def run(repo: MLRepo,
            model_name=None,
            correct=False,
            model_version=RepoStore.LAST_VERSION,
            model_label=None,
            check_for_latest=True):
        """Perform consistency checks for specified model versions

        Args:
            :param repo (MLRepo): ml repository
            :model_name (str, optional): Defaults to None. If specified, the model defined by the name and the model_version are checked.
            :param correct (bool, optional): Defaults to False. If True, the method starts the corresponding jobs to fix the found issues.
            :param model_version (str or list of str, optional): Defaults to RepoStore.LAST_VERSION. The model version(s) of the models to check
            :param model_label ([type], optional): Defaults to None. If it is set to '__ALL__', all labels are checked.

        Raises:
            Exception: Raises if a model version but no model name is specified 

        Returns:
            [dict]: dictionary mapping model+version to issues found. May be empty if no issues exist.
        """

        logger.info('Start checking model.')
        result = {}

        model_labels = []
        if model_label is not None:
            if isinstance(model_label, list):
                model_labels = model_label
            elif isinstance(model_label, str):
                if model_label == '__ALL__':
                    model_labels = repo.get_names(MLObjectType.LABEL)
                else:
                    model_labels = [model_label]

        for model_label in model_labels:  # check the model defined by the label
            label = repo.get(model_label)
            tmp = Model.__check_model(repo,
                                      label.name,
                                      correct,
                                      model_version=label.version,
                                      check_for_latest=False)
            if len(tmp) > 0:
                result[model_label] = tmp

        if model_name is not None:  # check the model defined by name and versions
            if len(model_name.split('/')) == 1:
                model_name = model_name + '/model'
            latest_version = repo.get(model_name).repo_info.version
            if model_version is None:
                model_version = RepoStore.LAST_VERSION
            if isinstance(model_version, str):
                model_versions = [model_version]
            else:
                model_versions = model_version
            for version in model_versions:
                if str(latest_version) == str(version) or str(
                        version) == RepoStore.LAST_VERSION:
                    if str(version) == RepoStore.LAST_VERSION:
                        logger.debug(
                            'Latest version found, check if latest version ran on latest data.'
                        )
                        tmp = Model.__check_model(
                            repo,
                            model_name,
                            correct,
                            latest_version,
                            check_for_latest=check_for_latest)
                    else:
                        tmp = Model.__check_model(
                            repo,
                            model_name,
                            correct,
                            version,
                            check_for_latest=check_for_latest)
                else:
                    tmp = Model.__check_model(
                        repo,
                        model_name,
                        correct,
                        version,
                        check_for_latest=check_for_latest)
                if len(tmp) > 0:
                    result[model_name + ':' + str(version)] = tmp

        logger.info('Finished checking model.')
        return result