Ejemplo n.º 1
0
    def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth,
                        ocrd_mets, processes):
        mets = workspace.mets

        # TODO allow filtering by fileGrp@USE and such
        oldpwd = getcwd()
        chdir(workspace.directory)
        for f in mets.find_files():
            log.info("Resolving %s (%s)", f.url, ocrd_manifestation_depth)
            if is_local_filename(f.url):
                f.url = abspath(f.url)
            # XXX cannot happen because chdir above
            #  elif is_local_filename(join(workspace.directory, 'data', f.url)):
            #      f.url = abspath(join(workspace.directory, 'data', f.url))
            elif ocrd_manifestation_depth != 'full':
                self._log_or_raise(
                    "Not fetching non-local files, skipping %s" % f.url,
                    oldpwd)
                continue
            elif not f.url.startswith('http'):
                self._log_or_raise("Not an http URL: %s" % f.url, oldpwd)
                continue
            log.info("Resolved %s", f.url)

            file_grp_dir = join(bagdir, 'data', f.fileGrp)
            if not isdir(file_grp_dir):
                makedirs(file_grp_dir)
            self.resolver.download_to_directory(file_grp_dir,
                                                f.url,
                                                basename=f.ID)
            f.url = join(f.fileGrp, f.ID)

        # save mets.xml
        with open(join(bagdir, 'data', ocrd_mets), 'wb') as f:
            f.write(workspace.mets.to_xml())

        chdir(bagdir)
        total_bytes, total_files = make_manifests('data',
                                                  processes,
                                                  algorithms=['sha512'])
        chdir(oldpwd)
        return total_bytes, total_files
Ejemplo n.º 2
0
    def download_file(self, f):
        """
        Download a :py:mod:`ocrd.model.ocrd_file.OcrdFile` to the workspace.
        """
        #  os.chdir(self.directory)
        #  log.info('f=%s' % f)
        oldpwd = os.getcwd()
        try:
            os.chdir(self.directory)
            if is_local_filename(f.url):
                f.local_filename = abspath(f.url)
            else:
                if f.local_filename:
                    log.debug("Already downloaded: %s", f.local_filename)
                else:
                    f.local_filename = self.download_url(f.url,
                                                         basename='%s/%s' %
                                                         (f.fileGrp, f.ID))
        finally:
            os.chdir(oldpwd)

        #  print(f)
        return f
Ejemplo n.º 3
0
 def test_abspath(self):
     self.assertEqual(abspath('file:///'), '/')