def workspace_from_folder(self, directory, return_mets=False, clobber_mets=False, convention='ocrd-gt'): """ Create a workspace from a folder, creating a METS file. Args: convention: See add_files_to_mets clobber_mets (boolean) : Whether to overwrite existing mets.xml. Default: False. return_mets (boolean) : Do not create the actual mets.xml file but return the :class:`OcrdMets`. Default: False. """ if directory is None: raise Exception("Must pass directory") if not os.path.isdir(directory): raise Exception("Directory does not exist or is not a directory: '%s'" % directory) if not clobber_mets and os.path.exists(os.path.join(directory, 'mets.xml')): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) mets = OcrdMets(content=METS_XML_EMPTY) if not os.path.exists(directory): os.makedirs(directory) directory = os.path.abspath(directory) self.add_files_to_mets(convention, mets, directory) if return_mets: return mets # print(mets.to_xml(xmllint=True).decode('utf-8')) mets_fpath = os.path.join(directory, 'mets.xml') with open(mets_fpath, 'wb') as fmets: log.info("Writing %s", mets_fpath) fmets.write(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def __init__(self, resolver, directory, mets=None): self.resolver = resolver self.directory = directory self.mets_filename = os.path.join(directory, 'mets.xml') if mets is None: mets = OcrdMets(filename=self.mets_filename) self.mets = mets # print(mets.to_xml(xmllint=True).decode('utf-8')) self.image_cache = { 'pil': {}, 'cv2': {}, 'exif': {}, }
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. """ if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) if not os.path.exists(directory): os.makedirs(directory) mets_fpath = os.path.join(directory, mets_basename) if not clobber_mets and os.path.exists(mets_fpath): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) mets = OcrdMets(content=METS_XML_EMPTY) with open(mets_fpath, 'wb') as fmets: log.info("Writing %s", mets_fpath) fmets.write(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def setUp(self): self.mets = OcrdMets( filename=assets.url_of('SBB0000F29300010000/mets.xml'))
def reload_mets(self): """ Reload METS from disk. """ self.mets = OcrdMets(filename=self.mets_target)
#!/usr/bin/env python from sys import argv from os.path import isfile from ocrd.model import OcrdMets fname = argv[1] if not isfile(fname): raise "File not found %s" % fname mets = OcrdMets(filename=fname) # pylint: disable=protected-access for f in mets.find_files(): if not f.pageId: groupid = f._el.get('GROUPID') if groupid: del f._el.attrib['GROUPID'] else: groupid = "FIXME" print( "!! File %s has neither GROUPID nor mets:fptr in the PHYSICAL structMap" % f.url) print("Setting page of %s to %s" % (f.ID, groupid)) f.pageId = groupid with open(fname, 'wb') as out: out.write(mets.to_xml(xmllint=True))