Esempio n. 1
0
    def bag(self,
            workspace,
            ocrd_identifier,
            dest=None,
            ocrd_mets='mets.xml',
            ocrd_manifestation_depth='full',
            ocrd_base_version_checksum=None,
            processes=1,
            skip_zip=False,
            in_place=False,
            tag_files=None):
        """
        Bag a workspace

        See https://ocr-d.github.com/ocrd_zip#packing-a-workspace-as-ocrd-zip

        Arguments:
            workspace (ocrd.Workspace): workspace to bag
            ord_identifier (string): Ocrd-Identifier in bag-info.txt
            dest (string): Path of the generated OCRD-ZIP.
            ord_mets (string): Ocrd-Mets in bag-info.txt
            ord_manifestation_depth (string): Ocrd-Manifestation-Depth in bag-info.txt
            ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt
            processes (integer): Number of parallel processes checksumming
            skip_zip (boolean): Whether to leave directory unzipped
            in_place (boolean): Whether to **replace** the workspace with its BagIt variant
            tag_files (list<string>): Path names of additional tag files to be bagged at the root of the bag
        """
        if ocrd_manifestation_depth not in ('full', 'partial'):
            raise Exception("manifestation_depth must be 'full' or 'partial'")
        if in_place and (dest is not None):
            raise Exception("Setting 'dest' and 'in_place' is a contradiction")
        if in_place and not skip_zip:
            raise Exception(
                "Setting 'skip_zip' and not 'in_place' is a contradiction")

        if tag_files is None:
            tag_files = []

        # create bagdir
        bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX)

        if dest is None:
            if in_place:
                dest = workspace.directory
            elif not skip_zip:
                dest = '%s.ocrd.zip' % workspace.directory
            else:
                dest = '%s.ocrd' % workspace.directory

        log.info("Bagging %s to %s (temp dir %s)", workspace.directory,
                 '(in-place)' if in_place else dest, bagdir)

        # create data dir
        makedirs(join(bagdir, 'data'))

        # create bagit.txt
        with open(join(bagdir, 'bagit.txt'), 'wb') as f:
            f.write(BAGIT_TXT.encode('utf-8'))

        # create manifests
        total_bytes, total_files = self._bag_mets_files(
            workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes)

        # create bag-info.txt
        bag = Bag(bagdir)
        self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier,
                           ocrd_manifestation_depth,
                           ocrd_base_version_checksum)

        for tag_file in tag_files:
            copyfile(tag_file, join(bagdir, basename(tag_file)))

        # save bag
        bag.save()

        # ZIP it
        self._serialize_bag(workspace, bagdir, dest, in_place, skip_zip)

        log.info('Created bag at %s', dest)
        return dest
Esempio n. 2
0
class ImageBag:
    def __init__(self, path: str, auto_make: bool = False) -> None:
        self.path = realpath(expanduser(expandvars(normpath(path))))
        if auto_make:
            try:
                makedirs(self.path, exist_ok=False)
            except OSError:
                raise OSError('{} already exists, but auto_make = True'
                              ''.format(self.path))
            else:
                self.bag = make_bag(self.path)
        else:
            try:
                self.bag = Bag(self.path)
            except BagError as e:
                raise OSError(
                    '{} does not seem to be a valid Moondog Image bag: {}'
                    ''.format(self.path, str(e)))
        self.components = {}
        return None

    def accession(self, path: str):
        self._import_original(path)
        self._generate_master()

    def _import_original(self, path: str):
        d = self.components['original'] = {}
        d['accession_path'] = realpath(expanduser(expandvars(normpath(path))))
        d['filename'] = basename(d['accession_path'])
        fn, ext = splitext(d['filename'])
        target_path = join(self.path, 'data', d['filename'])
        shutil.copy2(d['accession_path'], target_path)
        with ExifTool() as et:
            meta = et.get_metadata(target_path)
        pprint(meta)
        xmp = XMPFiles(file_path=target_path).get_xmp()
        pprint(xmp)
        self._update(manifests=True)

    def _generate_master(self):
        infn = self.components['original']['filename']
        d = self.components['master'] = {}
        d['filename'] = 'master.tif'
        Image.open(join(self.path, 'data',
                        infn)).save(join(self.path, 'data', d['filename']))
        self._update(manifests=True)

    def _update(self, manifests=False):
        """Update the bag."""
        for fn, fmeta in self.components.items():
            for term, value in fmeta.items():
                bag_term = '{}-{}'.format(
                    fn.title(),
                    term.replace('_', ' ').title().replace(' ', '-'))
                try:
                    prior_value = self.bag.info[bag_term]
                except KeyError:
                    self.bag.info[bag_term] = value
                else:
                    if prior_value != value:
                        self.bag.info[bag_term] = value
        self.bag.save(manifests=manifests)