Example #1
0
    def check(self, path):
        # NOTE: this uses the package because of the way hathi packages are formatted
        valid = True
        errors = []
        # Check if everything in access folder is found same in the preservation folder

        missing_pres_files = self.check_for_missing_matching_preservation(
            access_folder=path.directories["access"],
            preservation_folder=path.directories["preservation"])
        if missing_pres_files:
            valid = False
            new_error = error_message.ValidationError(
                "The files [{}] were found in the access but not in the preservation folder"
                .format(", ".join(
                    [os.path.basename(f) for f in missing_pres_files])),
                group=path.identifier)
            new_error.source = path.directories["access"]
            errors.append(new_error)

        missing_access_files = self.check_for_missing_matching_access(
            access_folder=path.directories["access"],
            preservation_folder=path.directories["preservation"])
        if missing_access_files:
            new_error = error_message.ValidationError(
                "The files [{}] were found in the preservation folder but not in the access folder"
                .format(", ".join(
                    [os.path.basename(f) for f in missing_access_files])),
                group=path.identifier)
            new_error.source = path.directories["preservation"]
            errors.append(new_error)
        return checkers.Results(self.checker_name(),
                                valid=valid,
                                errors=errors)
    def check(self, path):
        valid = True
        errors = []
        file_location = os.path.dirname(path)
        basename, extension = os.path.splitext(os.path.basename(path))
        if extension not in self.ignore_extension:

            if extension not in self.valid_extensions:
                valid = False
                new_error = error_message.ValidationError(
                    "Invalid preservation file extension: \"{}\"".format(extension),
                    group=path)
                new_error.source = path
                errors.append(new_error)

            # Check the image files have the full 8 digits
            if extension == ".tif":
                if "target" not in basename:
                    if PresNamingChecker.valid_naming_scheme.match(basename) is None:
                        valid = False
                        new_error = error_message.ValidationError(
                            "Does not match the valid preservation file naming pattern",
                            group=file_location.split(os.sep)[-1])
                        new_error.source = path
                        errors.append(new_error)

        return checkers.Results(self.checker_name(), valid=valid, errors=errors)
    def check(self, path: str):
        """
        Make sure that all files included in this folder are tiff files 
        and contain nothing else

        Args:
            path: Path to the folder to check

        Returns: list of errors

        """
        required_files = set()  # type: ignore
        required_files = {"checksum.md5", "marc.xml", "meta.yml"}
        valid_image_extensions = [".jp2"]
        valid_text_extensions = [".txt", ".xml", ".yml"]
        errors = []
        valid = True
        image_files = set()
        text_files = set()
        try:
            missing = list(self.find_missing_by_number(path))
            if missing:
                valid = False
                new_error = error_message.ValidationError(
                    "Expected files [{}] not found in access folder".format(", ".join(missing)),
                    group=path.split(os.sep)[-1])
                new_error.source = path
                errors.append(new_error)
        except ValueError as e:
            valid = False
            new_error = error_message.ValidationError("Error trying to find missing files. Reason: {}".format(e),
                                                      group=path.split(os.sep)[-1])
            new_error.source = path
            errors.append(new_error)

        # Sort the files into their own category
        for root, dirs, files in os.walk(path):
            for file_ in files:

                # if the filename is the required files set, remove them
                if file_ in required_files:
                    required_files.remove(file_)

                basename, ext = os.path.splitext(file_)
                if ext in valid_image_extensions:
                    image_files.add((root, file_))
                elif ext in valid_text_extensions:
                    text_files.add((root, file_))

        # If there are any files still in the required_files set are missing.
        if required_files:
            valid = False
            new_error = error_message.ValidationError(
                "Missing expected file(s), [{}]".format(", ".join(required_files)))
            new_error.source = path
            errors.append(new_error)
            # errors.append("{} is missing {}".format(path, _file))

        return checkers.Results(self.checker_name(), valid=valid, errors=errors)
Example #4
0
    def check(self, path):
        valid = True
        errors = []
        for error in self.find_root_directory_errors(path):
            valid = False
            errors.append(error)

        if valid:
            for error in self.find_subdirectory_errors(path):
                valid = False
                errors.append(error)

        return checkers.Results(self.checker_name(),
                                valid=valid,
                                errors=errors)
Example #5
0
    def check(self, path):
        valid = True
        errors = []
        required_files = (
            "target_l_001.tif",
            "target_l_002.tif",
            "target_r_001.tif",
            "target_r_002.tif",
        )
        error_group = path.split(os.sep)[-1]
        try:
            missing = list(self.find_missing_by_number(path))

            if missing:
                valid = False
                new_error = error_message.ValidationError(
                    "Expected files [{}] not found in preservation folder".
                    format(", ".join(missing)),
                    group=error_group)
                new_error.source = path
                errors.append(new_error)
        except ValueError as e:
            valid = False
            new_error = error_message.ValidationError(
                "Error trying to find missing files. Reason: {}".format(e),
                group=error_group)
            new_error.source = path
            errors.append(new_error)
        except FileNotFoundError as e:
            valid = False
            new_error = error_message.ValidationError(e, group=error_group)
            new_error.source = path
            errors.append(new_error)
            # return checkers.Results(self.checker_name(), valid=valid, errors=errors)
        # Find missing required_files
        missing = list(
            self.find_missing_required_files(path=path,
                                             expected_files=required_files))
        if missing:
            valid = False
            new_error = error_message.ValidationError(
                "Missing expected file(s), [{}]".format(", ".join(missing)),
                group=error_group)
            new_error.source = path
            errors.append(new_error)
        return checkers.Results(self.checker_name(),
                                valid=valid,
                                errors=errors)
    def check(self, path):
        valid = True
        errors = []
        file_location = os.path.dirname(path)
        group_name = file_location.split(os.sep)[-1]
        basename, extension = os.path.splitext(os.path.basename(path))
        if extension in self.extensions_to_check:
            if self.valid_naming_scheme.match(basename) is None:
                valid = False
                new_error = error_message.ValidationError(
                    "Does not match the valid file pattern for preservation files",
                    group=group_name)
                new_error.source = path
                errors.append(new_error)

        return checkers.Results(self.checker_name(), valid=valid, errors=errors)
Example #7
0
    def check(self, path):
        valid = True
        errors = []
        file_location = os.path.dirname(path)
        basename, extension = os.path.splitext(os.path.basename(path))
        if extension not in self.ignore_extension:

            if extension not in self.valid_extensions:
                valid = False
                new_error = error_message.ValidationError("Invalid file type",
                                                          group=path.split(
                                                              os.sep)[-1])
                new_error.source = path
                errors.append(new_error)

            # Check the image files have the full 8 digits
            if self.valid_naming_scheme.match(basename) is None:
                valid = False
                new_error = error_message.ValidationError(
                    "Does not match the valid file pattern for preservation files",
                    group=file_location.split(os.sep)[-1])
                new_error.source = path
                errors.append(new_error)

                #
                # # The only xml file should be marc.xml
                # if extension == ".xml":
                #     if basename != "marc":
                #         valid = False
                #         errors.append(
                #             "\"{}\" does not match the valid file pattern for preservation files".format(basename))
                #
                # # The only yml file should be meta.yml
                # if extension == ".yml":
                #     if basename != "meta":
                #         valid = False
                #         errors.append(
                #             "\"{}\" does not match the valid file result_type pattern for preservation files".format(basename))

        return checkers.Results(self.checker_name(),
                                valid=valid,
                                errors=errors)
    def check(self, path):
        valid = True
        errors = []

        return checkers.Results(self.checker_name(), valid=valid, errors=errors)
    def check(self, path):
        # NOTE: this uses the package because of the way hathi packages are formatted
        valid = True
        errors = []

        return checkers.Results(self.checker_name(), valid=valid, errors=errors)