def test_merge(self): with copy_of_directory(assets.path_to('kant_aufklaerung_1784/data')) as ws1dir, \ copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as ws2dir: ws1 = Workspace(self.resolver, ws1dir) ws2 = Workspace(self.resolver, ws2dir) assert len(ws1.mets.find_all_files()) == 6 ws1.merge(ws2) assert len(ws1.mets.find_all_files()) == 41 assert exists(join(ws1dir, 'OCR-D-IMG/FILE_0001_IMAGE.tif'))
def test_workspace_remove_groups_unforce(workspace_directory): """Remove groups by pattern recursive""" # arrange original_data = ET.parse(os.path.join(workspace_directory, 'mets.xml')).getroot() alto_groups = original_data.findall( './/{http://www.loc.gov/METS/}fileGrp[@USE="OCR-D-GT-ALTO"]') assert len(alto_groups) == 1 altos = alto_groups[0].findall('.//{http://www.loc.gov/METS/}file') assert len(altos) == 2 # act resolver = Resolver() workspace = Workspace(resolver, workspace_directory) workspace.remove_file_group('//OCR-D-GT.*', recursive=True) workspace.save_mets() # assert written_data = ET.parse(os.path.join(workspace_directory, 'mets.xml')).getroot() assert written_data is not None groups_new = written_data.findall( './/{http://www.loc.gov/METS/}fileGrp[@USE="OCR-D-GT-ALTO"]') assert not groups_new
def test_workspace_init_missing_mets(): """Raise Exception when missing mets-file in workspace""" with pytest.raises(Exception) as exc: Workspace(Resolver(), "foo/bar") assert "File does not exist" in str(exc.value)
def test_remove_file_group_rmdir(self): with copy_of_directory( assets.path_to('SBB0000F29300010000/data')) as tempdir: workspace = Workspace(self.resolver, directory=tempdir) self.assertTrue(exists(join(tempdir, 'OCR-D-IMG'))) workspace.remove_file_group('OCR-D-IMG', recursive=True) self.assertFalse(exists(join(tempdir, 'OCR-D-IMG')))
def workspace_from_folder(self, directory, return_mets=False, clobber_mets=False, convention='ocrd-gt'): """ Create a workspace from a folder, creating a METS file. Args: convention: See add_files_to_mets clobber_mets (boolean) : Whether to overwrite existing mets.xml. Default: False. return_mets (boolean) : Do not create the actual mets.xml file but return the :class:`OcrdMets`. Default: False. """ if directory is None: raise Exception("Must pass directory") if not os.path.isdir(directory): raise Exception("Directory does not exist or is not a directory: '%s'" % directory) if not clobber_mets and os.path.exists(os.path.join(directory, 'mets.xml')): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) mets = OcrdMets(content=METS_XML_EMPTY) if not os.path.exists(directory): os.makedirs(directory) directory = os.path.abspath(directory) self.add_files_to_mets(convention, mets, directory) if return_mets: return mets # print(mets.to_xml(xmllint=True).decode('utf-8')) mets_fpath = os.path.join(directory, 'mets.xml') with open(mets_fpath, 'wb') as fmets: log.info("Writing %s", mets_fpath) fmets.write(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def test_superfluous_copies_in_ws_dir(self): """ https://github.com/OCR-D/core/issues/227 """ def find_recursive(root): ret = [] for _, _, f in walk(root): for file in f: ret.append(file) return ret with TemporaryDirectory() as wsdir: with open( assets.path_to( 'SBB0000F29300010000/data/mets_one_file.xml'), 'r') as f_in: with open(join(wsdir, 'mets.xml'), 'w') as f_out: f_out.write(f_in.read()) self.assertEqual(len(find_recursive(wsdir)), 1) ws1 = Workspace(self.resolver, wsdir) for file in ws1.mets.find_files(): ws1.download_file(file) self.assertEqual(len(find_recursive(wsdir)), 2) self.assertTrue( exists(join(wsdir, 'OCR-D-IMG/FILE_0005_IMAGE.tif')))
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. Arguments: directory (string): Target directory for the workspace. \ If ``none``, create a temporary directory under :py:data:`ocrd.constants.TMP_PREFIX`. \ (The resulting path can be retrieved via :py:attr:`ocrd.Workspace.directory`.) Keyword Arguments: clobber_mets (boolean, False): Whether to overwrite existing ``mets.xml``. \ By default existing ``mets.xml`` will raise an exception. Returns: a new :py:class:`~ocrd.workspace.Workspace` """ log = getLogger('ocrd.resolver.workspace_from_nothing') if directory is None: directory = mkdtemp(prefix=TMP_PREFIX) Path(directory).mkdir(parents=True, exist_ok=True) mets_path = Path(directory, mets_basename) if mets_path.exists() and not clobber_mets: raise FileExistsError( "METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory)) mets = OcrdMets.empty_mets() log.info("Writing METS to %s", mets_path) mets_path.write_bytes(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets, mets_basename=mets_basename)
def test_remove_file_page_recursive_same_group(self): with copy_of_directory(assets.path_to('kant_aufklaerung_1784-complex/data')) as tempdir: with pushd_popd(tempdir): ws = Workspace(self.resolver, directory=tempdir) before = count_files() ws.remove_file('OCR-D-IMG-BINPAGE-sauvola_0001', page_recursive=True, page_same_group=True, force=False) after = count_files() self.assertEqual(after, before - 1, '2 file deleted')
def test_remove_file_page_recursive(self): with copy_of_directory(assets.path_to('kant_aufklaerung_1784-complex/data')) as tempdir: with pushd_popd(tempdir): ws = Workspace(self.resolver, directory=tempdir) self.assertEqual(len(ws.mets.find_files()), 119) ws.remove_file('OCR-D-OCR-OCRO-fraktur-SEG-LINE-tesseract-ocropy-DEWARP_0001', page_recursive=True, page_same_group=False, keep_file=True) self.assertEqual(len(ws.mets.find_files()), 83) ws.remove_file('PAGE_0017_ALTO', page_recursive=True)
def workspace_from_url(self, mets_url, dst_dir=None, clobber_mets=False, mets_basename=None, download=False, src_baseurl=None): """ Create a workspace from a METS by URL (i.e. clone it). Sets the mets.xml file Arguments: mets_url (string): Source mets URL dst_dir (string, None): Target directory for the workspace clobber_mets (boolean, False): Whether to overwrite existing mets.xml. By default existing mets.xml will raise an exception. download (boolean, False): Whether to download all the files src_baseurl (string, None): Base URL for resolving relative file locations Returns: Workspace """ if mets_url is None: raise ValueError("Must pass 'mets_url' workspace_from_url") # if mets_url is a relative filename, make it absolute if is_local_filename(mets_url) and not Path(mets_url).is_absolute(): mets_url = str(Path(Path.cwd() / mets_url)) # if mets_basename is not given, use the last URL segment of the mets_url if mets_basename is None: mets_basename = nth_url_segment(mets_url, -1) # If src_baseurl wasn't given, determine from mets_url by removing last url segment if not src_baseurl: last_segment = nth_url_segment(mets_url) src_baseurl = remove_non_path_from_url(remove_non_path_from_url(mets_url)[:-len(last_segment)]) # resolve dst_dir if not dst_dir: if is_local_filename(mets_url): log.debug("Deriving dst_dir %s from %s", Path(mets_url).parent, mets_url) dst_dir = Path(mets_url).parent else: log.debug("Creating ephemeral workspace '%s' for METS @ <%s>", dst_dir, mets_url) dst_dir = tempfile.mkdtemp(prefix=TMP_PREFIX) # XXX Path.resolve is always strict in Python <= 3.5, so create dst_dir unless it exists consistently if not Path(dst_dir).exists(): Path(dst_dir).mkdir(parents=True, exist_ok=False) dst_dir = str(Path(dst_dir).resolve()) log.debug("workspace_from_url\nmets_basename='%s'\nmets_url='%s'\nsrc_baseurl='%s'\ndst_dir='%s'", mets_basename, mets_url, src_baseurl, dst_dir) self.download_to_directory(dst_dir, mets_url, basename=mets_basename, if_exists='overwrite' if clobber_mets else 'raise') workspace = Workspace(self, dst_dir, mets_basename=mets_basename, baseurl=src_baseurl) if download: for f in workspace.mets.find_files(): workspace.download_file(f) return workspace
def test_workspace_remove_group_not_found(workspace_directory): """Group identified by name not found raises exception""" resolver = Resolver() workspace = Workspace(resolver, workspace_directory) with pytest.raises(Exception) as exc: workspace.remove_file_group('FOO-BAR') assert "No such fileGrp" in str(exc)
def workspace_from_url(self, mets_url, directory=None, clobber_mets=False, mets_basename=None, download=False, download_local=False): """ Create a workspace from a METS by URL. Sets the mets.xml file """ if directory is not None and not directory.startswith('/'): directory = os.path.abspath(directory) if mets_url is None: if directory is None: raise Exception("Must pass mets_url and/or directory to workspace_from_url") else: mets_url = 'file://%s/%s' % (directory, mets_basename) if mets_url.find('://') == -1: # resolve to absolute mets_url = os.path.abspath(mets_url) mets_url = 'file://' + mets_url if directory is None: # if mets_url is a file-url assume working directory to be where # the mets.xml resides if mets_url.startswith('file://'): # if directory was not given and mets_url is a file assume that # directory should be the directory where the mets.xml resides directory = os.path.dirname(mets_url[len('file://'):]) else: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) log.debug("Creating workspace '%s' for METS @ <%s>", directory, mets_url) # if mets_basename is not given, use the last URL segment of the mets_url if mets_basename is None: mets_basename = mets_url \ .rsplit('/', 1)[-1] \ .split('?')[0] \ .split('#')[0] mets_fpath = os.path.join(directory, mets_basename) log.debug("Copying mets url '%s' to '%s'", mets_url, mets_fpath) if 'file://' + mets_fpath == mets_url: log.debug("Target and source mets are identical") else: if os.path.exists(mets_fpath) and not clobber_mets: raise Exception("File '%s' already exists but clobber_mets is false" % mets_fpath) else: self.download_to_directory(directory, mets_url, basename=mets_basename) workspace = Workspace(self, directory, mets_basename=mets_basename) if download_local or download: for file_grp in workspace.mets.file_groups: if download_local: for f in workspace.mets.find_files(fileGrp=file_grp, local_only=True): workspace.download_file(f, subdir=file_grp) else: workspace.download_files_in_group(file_grp) return workspace
def setUp(self): self.resolver = Resolver() self.bagger = WorkspaceBagger(self.resolver) self.tempdir = mkdtemp() self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784') copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir) self.workspace_dir = join(self.bagdir, 'data') self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
def test_remove_file_force(self): with copy_of_directory( assets.path_to('SBB0000F29300010000/data')) as tempdir: workspace = Workspace(self.resolver, directory=tempdir) with self.assertRaisesRegex(FileNotFoundError, "not found"): # should fail workspace.remove_file('non-existing-id') # should succeed workspace.remove_file('non-existing-id', force=True)
def test_remove_file_group_force(self): with copy_of_directory( assets.path_to('SBB0000F29300010000/data')) as tempdir: workspace = Workspace(self.resolver, directory=tempdir) with self.assertRaisesRegex(Exception, "No such fileGrp"): # raise error unless force workspace.remove_file_group('I DO NOT EXIST') # no error workspace.remove_file_group('I DO NOT EXIST', force=True)
def test_remove_file_group_force(self): with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir: workspace = Workspace(self.resolver, directory=tempdir) with self.assertRaisesRegex(Exception, "No such fileGrp"): # should fail workspace.remove_file_group('I DO NOT EXIST') # should succeed workspace.remove_file_group('I DO NOT EXIST', force=True) # should also succeed workspace.overwrite_mode = True workspace.remove_file_group('I DO NOT EXIST', force=False)
def setUp(self): super().setUp() pass if exists(BACKUPDIR): rmtree(BACKUPDIR) self.resolver = Resolver() self.bagger = WorkspaceBagger(self.resolver) self.tempdir = mkdtemp() self.bagdir = join(self.tempdir, 'bag') copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir) self.workspace_dir = join(self.bagdir, 'data') self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
def test_merge(tmp_path): # arrange dst_path1 = tmp_path / 'kant_aufklaerung' dst_path1.mkdir() dst_path2 = tmp_path / 'sbb' dst_path2.mkdir() copytree(assets.path_to('kant_aufklaerung_1784/data'), dst_path1) copytree(assets.path_to('SBB0000F29300010000/data'), dst_path2) ws1 = Workspace(Resolver(), dst_path1) ws2 = Workspace(Resolver(), dst_path2) # assert number of files before assert len(ws1.mets.find_all_files()) == 6 # act ws1.merge(ws2) # assert assert len(ws1.mets.find_all_files()) == 41 assert exists(join(dst_path1, 'OCR-D-IMG/FILE_0001_IMAGE.tif'))
def test_rename_file_group(self): with copy_of_directory(assets.path_to('kant_aufklaerung_1784-page-region-line-word_glyph/data')) as tempdir: workspace = Workspace(self.resolver, directory=tempdir) with pushd_popd(tempdir): pcgts_before = page_from_file(next(workspace.mets.find_files(ID='OCR-D-GT-SEG-WORD_0001'))) assert pcgts_before.get_Page().imageFilename == 'OCR-D-IMG/OCR-D-IMG_0001.tif' # from os import system # print(system('find')) workspace.rename_file_group('OCR-D-IMG', 'FOOBAR') # print(system('find')) pcgts_after = page_from_file(next(workspace.mets.find_files(ID='OCR-D-GT-SEG-WORD_0001'))) assert pcgts_after.get_Page().imageFilename == 'FOOBAR/OCR-D-IMG_0001.tif' assert Path('FOOBAR/OCR-D-IMG_0001.tif').exists() assert not Path('OCR-D-IMG/OCR-D-IMG_0001.tif').exists()
def test_workspace_remove_single_group_recursive(workspace_directory): """Remove single group recursive by name succeeds""" # arrange resolver = Resolver() workspace = Workspace(resolver, workspace_directory) files = workspace.mets.find_files(fileGrp='OCR-D-GT-ALTO') assert len(files) == 2 # act workspace.remove_file_group('OCR-D-GT-ALTO', recursive=True) # assert files = workspace.mets.find_files(fileGrp='OCR-D-GT-ALTO') assert len(files) == 0
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. """ if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) Path(directory).mkdir(parents=True, exist_ok=True) mets_path = Path(directory, mets_basename) if mets_path.exists() and not clobber_mets: raise FileExistsError("METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory)) mets = OcrdMets.empty_mets() log.info("Writing METS to %s", mets_path) mets_path.write_bytes(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. """ if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) if not os.path.exists(directory): os.makedirs(directory) mets_fpath = os.path.join(directory, mets_basename) if not clobber_mets and os.path.exists(mets_fpath): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) mets = OcrdMets.empty_mets() with open(mets_fpath, 'wb') as fmets: log.info("Writing %s", mets_fpath) fmets.write(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def workspace_from_url(self, mets_url, directory=None): """ Create a workspace from a METS by URL. Sets the mets.xml file """ if mets_url is None: raise Exception("Must pass mets_url to workspace_from_url") if mets_url.find('://') == -1: mets_url = 'file://' + mets_url if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) log.debug("Creating workspace '%s' for METS @ <%s>", directory, mets_url) self.download_to_directory(directory, mets_url, basename='mets.xml', prefer_symlink=False) return Workspace(self, directory)
def unpack_workspace_from_filename(self, zip_filename, directory=None): """ :TODO: Unpack an OCRD-ZIP to a local workspace. 1. Create directory 3. Unpack zipfile into it 4. Initiate workspace Args: zip_filename (string) : Path to OCRD-ZIP file """ if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) log.debug("Unpacking to %s", directory) with ZipFile(zip_filename, 'r') as z: z.extractall(path=directory) return Workspace(self, directory)
def test_superfluous_copies_in_ws_dir(tmp_path): """ https://github.com/OCR-D/core/issues/227 """ # arrange src_path = assets.path_to('SBB0000F29300010000/data/mets_one_file.xml') dst_path = join(tmp_path, 'mets.xml') copyfile(src_path, dst_path) ws1 = Workspace(Resolver(), tmp_path) # assert directory files assert count_files(tmp_path) == 1 # act for file in ws1.mets.find_all_files(): ws1.download_file(file) # assert assert count_files(tmp_path) == 2 assert exists(join(tmp_path, 'OCR-D-IMG/FILE_0005_IMAGE.tif'))
def test_227_1(self): def find_recursive(root): ret = [] for _, _, f in os.walk(root): for file in f: ret.append(file) return ret with TemporaryDirectory() as wsdir: with open( assets.path_to( 'SBB0000F29300010000/data/mets_one_file.xml'), 'r') as f_in: with open(join(wsdir, 'mets.xml'), 'w') as f_out: f_out.write(f_in.read()) self.assertEqual(len(find_recursive(wsdir)), 1) ws1 = Workspace(self.resolver, wsdir) for file in ws1.mets.find_files(): ws1.download_file(file) self.assertEqual(len(find_recursive(wsdir)), 2) self.assertTrue(exists(join(wsdir, 'OCR-D-IMG/FILE_0005_IMAGE')))
def test_rename_file_group(tmp_path): # arrange copytree( assets.path_to( 'kant_aufklaerung_1784-page-region-line-word_glyph/data'), str(tmp_path)) workspace = Workspace(Resolver(), directory=str(tmp_path)) # before act # TODO clear semantics # requires rather odd additional path-setting because root path from # workspace is not propagated - works only if called inside workspace # which can be achieved with pushd_popd functionalities ocrd_file = next(workspace.mets.find_files(ID='OCR-D-GT-SEG-WORD_0001')) relative_name = ocrd_file.local_filename ocrd_file.local_filename = join(tmp_path, relative_name) pcgts_before = page_from_file(ocrd_file) # before assert assert pcgts_before.get_Page( ).imageFilename == 'OCR-D-IMG/OCR-D-IMG_0001.tif' # act workspace.rename_file_group('OCR-D-IMG', 'FOOBAR') next_ocrd_file = next( workspace.mets.find_files(ID='OCR-D-GT-SEG-WORD_0001')) next_ocrd_file.local_filename = join(tmp_path, relative_name) pcgts_after = page_from_file(next_ocrd_file) # assert assert pcgts_after.get_Page().imageFilename == 'FOOBAR/FOOBAR_0001.tif' assert Path(tmp_path / 'FOOBAR/FOOBAR_0001.tif').exists() assert not Path('OCR-D-IMG/OCR-D-IMG_0001.tif').exists() assert workspace.mets.get_physical_pages( for_fileIds=['OCR-D-IMG_0001']) == [None] assert workspace.mets.get_physical_pages(for_fileIds=['FOOBAR_0001']) == [ 'phys_0001' ]
def _fixture_kant_complex(tmp_path): copytree(assets.path_to('kant_aufklaerung_1784-complex/data'), str(tmp_path)) yield Workspace(Resolver, directory=tmp_path)
def workspace_from_url(self, mets_url, dst_dir=None, clobber_mets=False, mets_basename=None, download=False, baseurl=None): """ Create a workspace from a METS by URL. Sets the mets.xml file Arguments: mets_url (string): Source mets URL dst_dir (string, None): Target directory for the workspace clobber_mets (boolean, False): Whether to overwrite existing mets.xml. By default existing mets.xml will raise an exception. download (boolean, False): Whether to download all the files baseurl (string, None): Base URL for resolving relative file locations Returns: Workspace """ if dst_dir and not dst_dir.startswith('/'): dst_dir = abspath(dst_dir) if mets_url is None: if baseurl is None: raise Exception( "Must pass mets_url and/or baseurl to workspace_from_url") else: mets_url = 'file://%s/%s' % (baseurl, mets_basename if mets_basename else 'mets.xml') if baseurl is None: baseurl = mets_url.rsplit('/', 1)[0] log.debug( "workspace_from_url\nmets_url='%s'\nbaseurl='%s'\ndst_dir='%s'", mets_url, baseurl, dst_dir) # resolve to absolute if '://' not in mets_url: mets_url = 'file://%s' % abspath(mets_url) if dst_dir is None: # if mets_url is a file-url assume working directory is source directory if mets_url.startswith('file://'): # if dst_dir was not given and mets_url is a file assume that # dst_dir should be the directory where the mets.xml resides dst_dir = dirname(mets_url[len('file://'):]) else: dst_dir = tempfile.mkdtemp(prefix=TMP_PREFIX) log.debug("Creating workspace '%s' for METS @ <%s>", dst_dir, mets_url) # if mets_basename is not given, use the last URL segment of the mets_url if mets_basename is None: mets_basename = mets_url \ .rsplit('/', 1)[-1] \ .split('?')[0] \ .split('#')[0] dst_mets = join(dst_dir, mets_basename) log.debug("Copying mets url '%s' to '%s'", mets_url, dst_mets) if 'file://' + dst_mets == mets_url: log.debug("Target and source mets are identical") else: if exists(dst_mets) and not clobber_mets: raise Exception( "File '%s' already exists but clobber_mets is false" % dst_mets) else: self.download_to_directory(dst_dir, mets_url, basename=mets_basename) workspace = Workspace(self, dst_dir, mets_basename=mets_basename, baseurl=baseurl) if download: for f in workspace.mets.find_files(): workspace.download_file(f) return workspace
def workspace_from_url(self, mets_url, dst_dir=None, clobber_mets=False, mets_basename=None, download=False, src_baseurl=None): """ Create a workspace from a METS by URL (i.e. clone if :py:attr:`mets_url` is remote or :py:attr:`dst_dir` is given). Arguments: mets_url (string): Source METS URL or filesystem path Keyword Arguments: dst_dir (string, None): Target directory for the workspace. \ By default create a temporary directory under :py:data:`ocrd.constants.TMP_PREFIX`. \ (The resulting path can be retrieved via :py:attr:`ocrd.Workspace.directory`.) clobber_mets (boolean, False): Whether to overwrite existing ``mets.xml``. \ By default existing ``mets.xml`` will raise an exception. download (boolean, False): Whether to also download all the files referenced by the METS src_baseurl (string, None): Base URL for resolving relative file locations Download (clone) :py:attr:`mets_url` to ``mets.xml`` in :py:attr:`dst_dir`, unless the former is already local and the latter is ``none`` or already identical to its directory name. Returns: a new :py:class:`~ocrd.workspace.Workspace` """ log = getLogger('ocrd.resolver.workspace_from_url') if mets_url is None: raise ValueError("Must pass 'mets_url' workspace_from_url") # if mets_url is a relative filename, make it absolute if is_local_filename(mets_url) and not Path(mets_url).is_absolute(): mets_url = str(Path(Path.cwd() / mets_url)) # if mets_basename is not given, use the last URL segment of the mets_url if mets_basename is None: mets_basename = nth_url_segment(mets_url, -1) # If src_baseurl wasn't given, determine from mets_url by removing last url segment if not src_baseurl: last_segment = nth_url_segment(mets_url) src_baseurl = remove_non_path_from_url( remove_non_path_from_url(mets_url)[:-len(last_segment)]) # resolve dst_dir if not dst_dir: if is_local_filename(mets_url): log.debug("Deriving dst_dir %s from %s", Path(mets_url).parent, mets_url) dst_dir = Path(mets_url).parent else: log.debug("Creating ephemeral workspace '%s' for METS @ <%s>", dst_dir, mets_url) dst_dir = mkdtemp(prefix=TMP_PREFIX) # XXX Path.resolve is always strict in Python <= 3.5, so create dst_dir unless it exists consistently if not Path(dst_dir).exists(): Path(dst_dir).mkdir(parents=True, exist_ok=False) dst_dir = str(Path(dst_dir).resolve()) log.debug( "workspace_from_url\nmets_basename='%s'\nmets_url='%s'\nsrc_baseurl='%s'\ndst_dir='%s'", mets_basename, mets_url, src_baseurl, dst_dir) self.download_to_directory( dst_dir, mets_url, basename=mets_basename, if_exists='overwrite' if clobber_mets else 'skip') workspace = Workspace(self, dst_dir, mets_basename=mets_basename, baseurl=src_baseurl) if download: for f in workspace.mets.find_files(): workspace.download_file(f) return workspace