def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes): mets = workspace.mets # TODO allow filtering by fileGrp@USE and such oldpwd = getcwd() chdir(workspace.directory) for f in mets.find_files(): log.info("Resolving %s (%s)", f.url, ocrd_manifestation_depth) if is_local_filename(f.url): f.url = abspath(f.url) # XXX cannot happen because chdir above # elif is_local_filename(join(workspace.directory, 'data', f.url)): # f.url = abspath(join(workspace.directory, 'data', f.url)) elif ocrd_manifestation_depth != 'full': self._log_or_raise( "Not fetching non-local files, skipping %s" % f.url, oldpwd) continue elif not f.url.startswith('http'): self._log_or_raise("Not an http URL: %s" % f.url, oldpwd) continue log.info("Resolved %s", f.url) file_grp_dir = join(bagdir, 'data', f.fileGrp) if not isdir(file_grp_dir): makedirs(file_grp_dir) self.resolver.download_to_directory(file_grp_dir, f.url, basename=f.ID) f.url = join(f.fileGrp, f.ID) # save mets.xml with open(join(bagdir, 'data', ocrd_mets), 'wb') as f: f.write(workspace.mets.to_xml()) chdir(bagdir) total_bytes, total_files = make_manifests('data', processes, algorithms=['sha512']) chdir(oldpwd) return total_bytes, total_files
def download_file(self, f): """ Download a :py:mod:`ocrd.model.ocrd_file.OcrdFile` to the workspace. """ # os.chdir(self.directory) # log.info('f=%s' % f) oldpwd = os.getcwd() try: os.chdir(self.directory) if is_local_filename(f.url): f.local_filename = abspath(f.url) else: if f.local_filename: log.debug("Already downloaded: %s", f.local_filename) else: f.local_filename = self.download_url(f.url, basename='%s/%s' % (f.fileGrp, f.ID)) finally: os.chdir(oldpwd) # print(f) return f
def test_abspath(self): self.assertEqual(abspath('file:///'), '/')