コード例 #1
0
    def workspace_from_folder(self, directory, return_mets=False, clobber_mets=False, convention='ocrd-gt'):
        """
        Create a workspace from a folder, creating a METS file.

        Args:
            convention: See add_files_to_mets
            clobber_mets (boolean) : Whether to overwrite existing mets.xml. Default: False.
            return_mets (boolean) : Do not create the actual mets.xml file but return the :class:`OcrdMets`. Default: False.
        """
        if directory is None:
            raise Exception("Must pass directory")
        if not os.path.isdir(directory):
            raise Exception("Directory does not exist or is not a directory: '%s'" % directory)
        if not clobber_mets and os.path.exists(os.path.join(directory, 'mets.xml')):
            raise Exception("Not clobbering existing mets.xml in '%s'." % directory)

        mets = OcrdMets(content=METS_XML_EMPTY)

        if not os.path.exists(directory):
            os.makedirs(directory)
        directory = os.path.abspath(directory)

        self.add_files_to_mets(convention, mets, directory)
        if return_mets:
            return mets

        #  print(mets.to_xml(xmllint=True).decode('utf-8'))
        mets_fpath = os.path.join(directory, 'mets.xml')
        with open(mets_fpath, 'wb') as fmets:
            log.info("Writing %s", mets_fpath)
            fmets.write(mets.to_xml(xmllint=True))

        return Workspace(self, directory, mets)
コード例 #2
0
ファイル: workspace.py プロジェクト: noahmetzger/core
 def __init__(self, resolver, directory, mets=None):
     self.resolver = resolver
     self.directory = directory
     self.mets_filename = os.path.join(directory, 'mets.xml')
     if mets is None:
         mets = OcrdMets(filename=self.mets_filename)
     self.mets = mets
     #  print(mets.to_xml(xmllint=True).decode('utf-8'))
     self.image_cache = {
         'pil': {},
         'cv2': {},
         'exif': {},
     }
コード例 #3
0
    def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False):
        """
        Create an empty workspace.
        """
        if directory is None:
            directory = tempfile.mkdtemp(prefix=TMP_PREFIX)
        if not os.path.exists(directory):
            os.makedirs(directory)

        mets_fpath = os.path.join(directory, mets_basename)
        if not clobber_mets and os.path.exists(mets_fpath):
            raise Exception("Not clobbering existing mets.xml in '%s'." % directory)
        mets = OcrdMets(content=METS_XML_EMPTY)
        with open(mets_fpath, 'wb') as fmets:
            log.info("Writing %s", mets_fpath)
            fmets.write(mets.to_xml(xmllint=True))

        return Workspace(self, directory, mets)
コード例 #4
0
ファイル: test_ocrd_mets.py プロジェクト: saw-leipzig/pyocrd
 def setUp(self):
     self.mets = OcrdMets(
         filename=assets.url_of('SBB0000F29300010000/mets.xml'))
コード例 #5
0
 def reload_mets(self):
     """
     Reload METS from disk.
     """
     self.mets = OcrdMets(filename=self.mets_target)
コード例 #6
0
#!/usr/bin/env python

from sys import argv
from os.path import isfile
from ocrd.model import OcrdMets

fname = argv[1]
if not isfile(fname):
    raise "File not found %s" % fname
mets = OcrdMets(filename=fname)

# pylint: disable=protected-access
for f in mets.find_files():
    if not f.pageId:
        groupid = f._el.get('GROUPID')
        if groupid:
            del f._el.attrib['GROUPID']
        else:
            groupid = "FIXME"
            print(
                "!! File %s has neither GROUPID nor mets:fptr in the PHYSICAL structMap"
                % f.url)
        print("Setting page of %s to %s" % (f.ID, groupid))
        f.pageId = groupid

with open(fname, 'wb') as out:
    out.write(mets.to_xml(xmllint=True))