def bag(self, workspace, ocrd_identifier, dest=None, ocrd_mets='mets.xml', ocrd_manifestation_depth='full', ocrd_base_version_checksum=None, processes=1, skip_zip=False, in_place=False, tag_files=None): """ Bag a workspace See https://ocr-d.github.com/ocrd_zip#packing-a-workspace-as-ocrd-zip Arguments: workspace (ocrd.Workspace): workspace to bag ord_identifier (string): Ocrd-Identifier in bag-info.txt dest (string): Path of the generated OCRD-ZIP. ord_mets (string): Ocrd-Mets in bag-info.txt ord_manifestation_depth (string): Ocrd-Manifestation-Depth in bag-info.txt ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt processes (integer): Number of parallel processes checksumming skip_zip (boolean): Whether to leave directory unzipped in_place (boolean): Whether to **replace** the workspace with its BagIt variant tag_files (list<string>): Path names of additional tag files to be bagged at the root of the bag """ if ocrd_manifestation_depth not in ('full', 'partial'): raise Exception("manifestation_depth must be 'full' or 'partial'") if in_place and (dest is not None): raise Exception("Setting 'dest' and 'in_place' is a contradiction") if in_place and not skip_zip: raise Exception( "Setting 'skip_zip' and not 'in_place' is a contradiction") if tag_files is None: tag_files = [] # create bagdir bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX) if dest is None: if in_place: dest = workspace.directory elif not skip_zip: dest = '%s.ocrd.zip' % workspace.directory else: dest = '%s.ocrd' % workspace.directory log.info("Bagging %s to %s (temp dir %s)", workspace.directory, '(in-place)' if in_place else dest, bagdir) # create data dir makedirs(join(bagdir, 'data')) # create bagit.txt with open(join(bagdir, 'bagit.txt'), 'wb') as f: f.write(BAGIT_TXT.encode('utf-8')) # create manifests total_bytes, total_files = self._bag_mets_files( workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes) # create bag-info.txt bag = Bag(bagdir) self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier, ocrd_manifestation_depth, ocrd_base_version_checksum) for tag_file in tag_files: copyfile(tag_file, join(bagdir, basename(tag_file))) # save bag bag.save() # ZIP it self._serialize_bag(workspace, bagdir, dest, in_place, skip_zip) log.info('Created bag at %s', dest) return dest
class ImageBag: def __init__(self, path: str, auto_make: bool = False) -> None: self.path = realpath(expanduser(expandvars(normpath(path)))) if auto_make: try: makedirs(self.path, exist_ok=False) except OSError: raise OSError('{} already exists, but auto_make = True' ''.format(self.path)) else: self.bag = make_bag(self.path) else: try: self.bag = Bag(self.path) except BagError as e: raise OSError( '{} does not seem to be a valid Moondog Image bag: {}' ''.format(self.path, str(e))) self.components = {} return None def accession(self, path: str): self._import_original(path) self._generate_master() def _import_original(self, path: str): d = self.components['original'] = {} d['accession_path'] = realpath(expanduser(expandvars(normpath(path)))) d['filename'] = basename(d['accession_path']) fn, ext = splitext(d['filename']) target_path = join(self.path, 'data', d['filename']) shutil.copy2(d['accession_path'], target_path) with ExifTool() as et: meta = et.get_metadata(target_path) pprint(meta) xmp = XMPFiles(file_path=target_path).get_xmp() pprint(xmp) self._update(manifests=True) def _generate_master(self): infn = self.components['original']['filename'] d = self.components['master'] = {} d['filename'] = 'master.tif' Image.open(join(self.path, 'data', infn)).save(join(self.path, 'data', d['filename'])) self._update(manifests=True) def _update(self, manifests=False): """Update the bag.""" for fn, fmeta in self.components.items(): for term, value in fmeta.items(): bag_term = '{}-{}'.format( fn.title(), term.replace('_', ' ').title().replace(' ', '-')) try: prior_value = self.bag.info[bag_term] except KeyError: self.bag.info[bag_term] = value else: if prior_value != value: self.bag.info[bag_term] = value self.bag.save(manifests=manifests)