Example #1
0
    def spill(self, src, dest):
        """
        Spill a workspace, i.e. unpack it and turn it into a workspace.

        See https://ocr-d.github.com/ocrd_zip#unpacking-ocrd-zip-to-a-workspace

        Arguments:
            src (string): Path to OCRD-ZIP
            dest (string): Path to directory to unpack data folder to
        """
        #  print(dest)

        if exists(dest) and not isdir(dest):
            raise Exception("Not a directory: %s" % dest)

        # If dest is an existing directory, try to derive its name from src
        if isdir(dest):
            workspace_name = re.sub(r'(\.ocrd)?\.zip$', '', basename(src))
            new_dest = join(dest, workspace_name)
            if exists(new_dest):
                raise Exception("Directory exists: %s" % new_dest)
            dest = new_dest

        log.info("Spilling %s to %s", src, dest)

        bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX)
        unzip_file_to_dir(src, bagdir)

        datadir = join(bagdir, 'data')
        for root, _, files in walk(datadir):
            for f in files:
                srcfile = join(root, f)
                destdir = join(dest, relpath(root, datadir))
                destfile = join(destdir, f)
                if not exists(destdir):
                    makedirs(destdir)
                log.debug("Copy %s -> %s", srcfile, destfile)
                copyfile(srcfile, destfile)

        # TODO copy allowed tag files if present

        # TODO validate bagit

        # Drop tempdir
        rmtree(bagdir)

        # Create workspace
        workspace = Workspace(self.resolver, directory=dest)

        # TODO validate workspace

        return workspace
    def validate(self,
                 skip_checksums=False,
                 skip_bag=False,
                 skip_unzip=False,
                 skip_delete=False,
                 processes=2):
        """
        Validate an OCRD-ZIP file for profile, bag and workspace conformance

        Arguments:
            skip_bag (boolean): Whether to skip all checks of manifests and files
            skip_checksums (boolean): Whether to omit checksum checks but still check basic BagIt conformance
            skip_unzip (boolean): Whether the OCRD-ZIP is unzipped, i.e. a directory
            skip_delete (boolean): Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation
            processes (integer): Number of processes used for checksum validation

        """
        if skip_unzip:
            bagdir = self.path_to_zip
            skip_delete = True
        else:
            #  try:
            self.profile_validator.validate_serialization(self.path_to_zip)
            #  except IOError as err:
            #      raise err
            #  except ProfileValidationError as err:
            #      self.report.add_error(err.value)
            bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX)
            unzip_file_to_dir(self.path_to_zip, bagdir)

        try:
            bag = Bag(bagdir)
            self._validate_profile(bag)

            if not skip_bag:
                self._validate_bag(bag,
                                   fast=skip_checksums,
                                   processes=processes)

        finally:
            if not skip_delete:
                # remove tempdir
                rmtree(bagdir)
        return self.report