Ejemplo n.º 1
0
 def test_validate_ocrd_file(self):
     resolver = Resolver()
     workspace = resolver.workspace_from_url(assets.url_of('glyph-consistency/data/mets.xml'))
     with pushd_popd(workspace.directory):
         ocrd_file = workspace.mets.find_all_files(ID="FAULTY_GLYPHS_FILE")[0]
         report = PageValidator.validate(ocrd_file=ocrd_file)
         self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 17, '17 textequiv consistency errors')
Ejemplo n.º 2
0
    def test_tmpConfigfile(self):
        self.assertNotEqual(
            logging.getLogger('').getEffectiveLevel(), logging.NOTSET)
        with TemporaryDirectory() as tempdir:
            with pushd_popd(tempdir):
                with open('ocrd_logging.conf', 'w') as f:
                    # write logging configuration file (MWE)
                    f.write('''
                        [loggers]
                        keys=root

                        [handlers]
                        keys=consoleHandler

                        [formatters]
                        keys=

                        [logger_root]
                        level=ERROR
                        handlers=consoleHandler

                        [handler_consoleHandler]
                        class=StreamHandler
                        formatter=
                        args=(sys.stdout,)
                        ''')
                # this will call logging.config.fileConfig with disable_existing_loggers=True,
                # so the defaults from the import-time initLogging should be invalided
                initLogging()
                # ensure log level is set from temporary config file
                self.assertEqual(
                    logging.getLogger('').getEffectiveLevel(), logging.ERROR)
Ejemplo n.º 3
0
 def download_file(self, f, _recursion_count=0):
     """
     Download a :py:mod:`ocrd.model.ocrd_file.OcrdFile` to the workspace.
     """
     log = getLogger('ocrd.workspace.download_file')
     log.debug('download_file %s [_recursion_count=%s]' % (f, _recursion_count))
     with pushd_popd(self.directory):
         try:
             # If the f.url is already a file path, and is within self.directory, do nothing
             url_path = Path(f.url).resolve()
             if not (url_path.exists() and url_path.relative_to(str(Path(self.directory).resolve()))):
                 raise Exception("Not already downloaded, moving on")
         except Exception as e:
             basename = '%s%s' % (f.ID, MIME_TO_EXT.get(f.mimetype, '')) if f.ID else f.basename
             try:
                 f.url = self.resolver.download_to_directory(self.directory, f.url, subdir=f.fileGrp, basename=basename)
             except FileNotFoundError as e:
                 if not self.baseurl:
                     raise Exception("No baseurl defined by workspace. Cannot retrieve '%s'" % f.url)
                 if _recursion_count >= 1:
                     raise Exception("Already tried prepending baseurl '%s'. Cannot retrieve '%s'" % (self.baseurl, f.url))
                 log.debug("First run of resolver.download_to_directory(%s) failed, try prepending baseurl '%s': %s", f.url, self.baseurl, e)
                 f.url = '%s/%s' % (self.baseurl, f.url)
                 f.url = self.download_file(f, _recursion_count + 1).local_filename
         f.local_filename = f.url
         return f
Ejemplo n.º 4
0
    def remove_file_group(self, USE, recursive=False, force=False, keep_files=False, page_recursive=False, page_same_group=False):
        """
        Remove a fileGrp.

        Arguments:
            USE (string): USE attribute of the fileGrp to delete
            recursive (boolean): Whether to recursively delete all files in the group
            force (boolean): Continue removing even if group or containing files not found in METS
            keep_files (boolean): When deleting recursively whether to keep files on disk
            page_recursive (boolean): Whether to remove all images referenced in the file if the file is a PAGE-XML document.
            page_same_group (boolean): Remove only images in the same file group as the PAGE-XML. Has no effect unless ``page_recursive`` is ``True``.
        """
        if not force and self.overwrite_mode:
            force = True

        if (not USE.startswith(REGEX_PREFIX)) and (USE not in self.mets.file_groups) and (not force):
            raise Exception("No such fileGrp: %s" % USE)

        file_dirs = []
        if recursive:
            for f in self.mets.find_files(fileGrp=USE):
                self.remove_file(f, force=force, keep_file=keep_files, page_recursive=page_recursive, page_same_group=page_same_group)
                file_dirs.append(path.dirname(f.local_filename))

        self.mets.remove_file_group(USE, force=force)

        # PLEASE NOTE: this only removes directories in the workspace if they are empty
        # and named after the fileGrp which is a convention in OCR-D.
        with pushd_popd(self.directory):
            if Path(USE).is_dir() and not listdir(USE):
                Path(USE).rmdir()
            if file_dirs:
                for file_dir in set(file_dirs):
                    if Path(file_dir).is_dir() and not listdir(file_dir):
                        Path(file_dir).rmdir()
Ejemplo n.º 5
0
    def remove_file_group(self,
                          USE,
                          recursive=False,
                          force=False,
                          keep_files=False):
        """
        Remove a fileGrp.

        Arguments:
            USE (string): USE attribute of the fileGrp to delete
            recursive (boolean): Whether to recursively delete all files in the group
            force (boolean): Continue removing even if group or containing files not found in METS
            keep_files (boolean): When deleting recursively whether to keep files on disk
        """
        if USE not in self.mets.file_groups and not force:
            raise Exception("No such fileGrp: %s" % USE)
        if recursive:
            for f in self.mets.find_files(fileGrp=USE):
                self.remove_file(f.ID, force=force, keep_file=keep_files)
        if USE in self.mets.file_groups:
            self.mets.remove_file_group(USE)
        # XXX this only removes directories in the workspace if they are empty
        # and named after the fileGrp which is a convention in OCR-D.
        with pushd_popd(self.directory):
            if Path(USE).is_dir() and not listdir(USE):
                Path(USE).rmdir()
Ejemplo n.º 6
0
 def test_resolve_image_as_pil(self):
     with pushd_popd(assets.path_to('kant_aufklaerung_1784/data/')):
         ws = self.resolver.workspace_from_url('mets.xml')
         img = ws.resolve_image_as_pil('OCR-D-IMG/INPUT_0017.tif')
         self.assertEqual(img.width, 1457)
         img = ws.resolve_image_as_pil('OCR-D-IMG/INPUT_0017.tif', coords=([100, 100], [50, 50]))
         self.assertEqual(img.width, 50)
Ejemplo n.º 7
0
 def _validate(self):
     """
     Actual validation.
     """
     try:
         self._resolve_workspace()
     except Exception as e:  # pylint: disable=broad-except
         log.warning("Failed to instantiate workspace: %s", e)
         self.report.add_error("Failed to instantiate workspace: %s" % e)
         return self.report
     with pushd_popd(self.workspace.directory):
         try:
             if 'mets_unique_identifier' not in self.skip:
                 self._validate_mets_unique_identifier()
             if 'mets_file_group_names' not in self.skip:
                 self._validate_mets_file_group_names()
             if 'mets_files' not in self.skip:
                 self._validate_mets_files()
             if 'pixel_density' not in self.skip:
                 self._validate_pixel_density()
             if 'multipage' not in self.skip:
                 self._validate_multipage()
             if 'dimension' not in self.skip:
                 self._validate_dimension()
             if 'imagefilename' not in self.skip:
                 self._validate_imagefilename()
             if 'page' not in self.skip:
                 self._validate_page()
         except Exception:
             self.report.add_error("Validation aborted with exception: %s" %
                                   format_exc())
     return self.report
Ejemplo n.º 8
0
def do_the_update(bagdir, non_local_urls=False):
    directory = Path(bagdir, 'data')
    if not Path(directory, 'mets.xml').exists():
        LOG.error("Something's wrong with OCRD-ZIP at %s, no data/mets.xml!",
                  bagdir)
        return
    workspace = Workspace(resolver, directory=str(directory))
    with pushd_popd(directory):
        for f in workspace.mets.find_files():
            fp = Path(f.url)
            if not fp.exists() and not non_local_urls:
                LOG.debug("Skipping non-local file: %s", fp)
                continue
            ext = MIME_TO_EXT.get(f.mimetype)
            if not ext:
                LOG.error(
                    "No rule to translate '%s' to an extension. Skipping %s",
                    f.mimetype, fp)
                continue
            if fp.suffix == ext:
                LOG.debug("Already has the right extension, %s", fp.name)
                continue
            if fp.suffix and fp.suffix in EXT_TO_MIME and fp.suffix != ext:
                LOG.warning("Has the WRONG extension, is '%s' should be '%s'",
                            fp.suffix, ext)
                f.url = f.url[:-len(fp.suffix)]
            LOG.info('Renaming %s{,%s}', fp, ext)
            f.url = "%s%s" % (f.url, ext)
            if fp.exists():
                fp.rename('%s%s' % (fp, ext))
        workspace.save_mets()
        LOG.debug('Running bagit update script')
        update_checksums(bagdir)
    LOG.info("FINISHED: %s", bagdir)
Ejemplo n.º 9
0
    def _resolve_image_as_pil(self, image_url, coords=None):
        """
        Resolve an image URL to a PIL image.

        Args:
            - coords (list) : Coordinates of the bounding box to cut from the image

        Returns:
            Image or region in image as PIL.Image

        """
        log = getLogger('ocrd.workspace._resolve_image_as_pil')
        files = self.mets.find_files(url=image_url)
        f = files[0] if files else OcrdFile(None, url=image_url)
        image_filename = self.download_file(f).local_filename

        with pushd_popd(self.directory):
            pil_image = Image.open(image_filename)
            pil_image.load()  # alloc and give up the FD

        if coords is None:
            return pil_image

        log.debug("Converting PIL to OpenCV: %s", image_url)
        color_conversion = cv2.COLOR_GRAY2BGR if pil_image.mode in (
            '1', 'L') else cv2.COLOR_RGB2BGR
        pil_as_np_array = np.array(pil_image).astype(
            'uint8') if pil_image.mode == '1' else np.array(pil_image)
        cv2_image = cv2.cvtColor(pil_as_np_array, color_conversion)

        poly = np.array(coords, np.int32)
        log.debug("Cutting region %s from %s", coords, image_url)
        region_cut = cv2_image[np.min(poly[:, 1]):np.max(poly[:, 1]),
                               np.min(poly[:, 0]):np.max(poly[:, 0])]
        return Image.fromarray(region_cut)
 def test_dimensions(self):
     with TemporaryDirectory() as tempdir:
         wsdir = join(tempdir, 'foo')
         copytree(assets.path_to('kant_aufklaerung_1784/data'), wsdir)
         with pushd_popd(wsdir):
             os.system(
                 """sed -i 's,imageHeight="2083",imageHeight="1234",' OCR-D-GT-PAGE/PAGE_0017_PAGE.xml"""
             )
             report = WorkspaceValidator.validate(
                 self.resolver,
                 join(wsdir, 'mets.xml'),
                 src_dir=wsdir,
                 skip=[
                     'page', 'mets_unique_identifier',
                     'mets_file_group_names', 'mets_files', 'pixel_density',
                     'imagefilename', 'page_xsd', 'mets_xsd'
                 ],
                 download=True)
             self.assertIn(
                 "PAGE 'PAGE_0017_PAGE': @imageHeight != image's actual height (1234 != 2083)",
                 report.errors)
             #  print(report.errors)
             self.assertEqual(len(report.errors), 1)
             self.assertEqual(report.is_valid, False)
             report2 = WorkspaceValidator.validate(
                 self.resolver,
                 join(wsdir, 'mets.xml'),
                 src_dir=wsdir,
                 skip=[
                     'page', 'mets_unique_identifier',
                     'mets_file_group_names', 'mets_files', 'pixel_density',
                     'imagefilename', 'dimension', 'page_xsd', 'mets_xsd'
                 ],
                 download=False)
         self.assertEqual(report2.is_valid, True)
Ejemplo n.º 11
0
    def add_file(self, file_grp, content=None, **kwargs):
        """
        Add an output file. Creates an :class:`OcrdFile` to pass around and adds that to the
        OcrdMets OUTPUT section.
        """
        log.debug('outputfile file_grp=%s local_filename=%s content=%s',
                  file_grp, kwargs.get('local_filename'), content is not None)
        if content is not None and 'local_filename' not in kwargs:
            raise Exception("'content' was set but no 'local_filename'")

        with pushd_popd(self.directory):
            if 'local_filename' in kwargs:
                local_filename_dir = kwargs['local_filename'].rsplit('/', 1)[0]
                if not Path(local_filename_dir).is_dir():
                    makedirs(local_filename_dir)
                if 'url' not in kwargs:
                    kwargs['url'] = kwargs['local_filename']

            #  print(kwargs)
            ret = self.mets.add_file(file_grp, **kwargs)

            if content is not None:
                with open(kwargs['local_filename'], 'wb') as f:
                    if isinstance(content, str):
                        content = bytes(content, 'utf-8')
                    f.write(content)

        return ret
Ejemplo n.º 12
0
 def test_bulk_add_stdin(self):
     resolver = Resolver()
     with pushd_popd(tempdir=True) as wsdir:
         ws = resolver.workspace_from_nothing(directory=wsdir)
         Path(wsdir, 'BIN').mkdir()
         Path(wsdir, 'BIN/FILE_0001_BIN.IMG-wolf.png').write_text('')
         Path(wsdir, 'BIN/FILE_0002_BIN.IMG-wolf.png').write_text('')
         Path(wsdir, 'BIN/FILE_0001_BIN.xml').write_text('')
         Path(wsdir, 'BIN/FILE_0002_BIN.xml').write_text('')
         with mock_stdin(
                 'PHYS_0001 BIN FILE_0001_BIN.IMG-wolf BIN/FILE_0001_BIN.IMG-wolf.png BIN/FILE_0001_BIN.IMG-wolf.png image/png\n'
                 'PHYS_0002 BIN FILE_0002_BIN.IMG-wolf BIN/FILE_0002_BIN.IMG-wolf.png BIN/FILE_0002_BIN.IMG-wolf.png image/png\n'
                 'PHYS_0001 BIN FILE_0001_BIN BIN/FILE_0001_BIN.xml BIN/FILE_0001_BIN.xml application/vnd.prima.page+xml\n'
                 'PHYS_0002 BIN FILE_0002_BIN BIN/FILE_0002_BIN.xml BIN/FILE_0002_BIN.xml application/vnd.prima.page+xml\n'
         ):
             assert len(ws.mets.file_groups) == 0
             exit_code, out, err = self.invoke_cli(workspace_cli, [
                 'bulk-add', '-r',
                 r'(?P<pageid>.*) (?P<filegrp>.*) (?P<fileid>.*) (?P<src>.*) (?P<dest>.*) (?P<mimetype>.*)',
                 '-G', '{{ filegrp }}', '-g', '{{ pageid }}', '-i',
                 '{{ fileid }}', '-m', '{{ mimetype }}', '-u', "{{ dest }}",
                 '-'
             ])
             ws.reload_mets()
             assert len(ws.mets.file_groups) == 1
             assert len(list(ws.mets.find_files())) == 4
             f = next(ws.mets.find_files())
             assert f.mimetype == 'image/png'
             assert f.ID == 'FILE_0001_BIN.IMG-wolf'
             assert f.url == 'BIN/FILE_0001_BIN.IMG-wolf.png'
Ejemplo n.º 13
0
def prune_files(ctx, file_grp, mimetype, page_id, file_id):
    """
    Removes mets:files that point to non-existing local files

    (If any ``FILTER`` starts with ``//``, then its remainder
     will be interpreted as a regular expression.)
    """
    workspace = Workspace(ctx.resolver,
                          directory=ctx.directory,
                          mets_basename=basename(ctx.mets_url),
                          automatic_backup=ctx.automatic_backup)
    with pushd_popd(workspace.directory):
        for f in workspace.mets.find_files(
                ID=file_id,
                fileGrp=file_grp,
                mimetype=mimetype,
                pageId=page_id,
        ):
            try:
                if not f.local_filename or not exists(f.local_filename):
                    workspace.mets.remove_file(f.ID)
            except Exception as e:
                ctx.log.exception("Error removing %f: %s", f, e)
                raise (e)
        workspace.save_mets()
Ejemplo n.º 14
0
 def test_mets_basename_and_mets(self):
     with pushd_popd(tempdir=True) as tempdir:
         with self.assertRaisesRegex(
                 ValueError,
                 "Use either --mets or --mets-basename, not both"):
             self.invoke_cli(workspace_cli,
                             ['-m', 'foo.xml', '-M', 'not-foo.xml', 'init'])
Ejemplo n.º 15
0
 def files_for_page_id(self, page_id: str, file_group: str = DEFAULT_FILE_GROUP, mimetype: str = None) \
         -> List[OcrdFile]:
     with pushd_popd(self.workspace.directory):
         files: List[OcrdFile] = self.workspace.mets.find_files(
             fileGrp=file_group, pageId=page_id, mimetype=mimetype)
         files = [self.workspace.download_file(file) for file in files]
         return files
Ejemplo n.º 16
0
 def test_add_519(self):
     """
     https://github.com/OCR-D/core/issues/519
     """
     with TemporaryDirectory() as tempdir:
         wsdir = Path(tempdir, "workspace")
         wsdir.mkdir()
         srcdir = Path(tempdir, "source")
         srcdir.mkdir()
         srcfile = Path(srcdir, "srcfile.jpg")
         srcfile_content = 'foo'
         srcfile.write_text(srcfile_content)
         with pushd_popd(str(wsdir)):
             exit_code, out, err = self.invoke_cli(workspace_cli, ['init'])
             exit_code, out, err = self.invoke_cli(workspace_cli, [
                 'add', '-m', 'image/jpg', '-G', 'MAX', '-i',
                 'IMG_MAX_1818975', '-C',
                 str(srcfile)
             ])
             # print(out, err)
             self.assertEqual(exit_code, 0)
             self.assertTrue(Path(wsdir, 'MAX', 'srcfile.jpg').exists())
             self.assertEqual(
                 Path(wsdir, 'MAX', 'srcfile.jpg').read_text(),
                 srcfile_content)
Ejemplo n.º 17
0
 def test_mets_get_id_set_id(self):
     with pushd_popd(tempdir=True):
         self.invoke_cli(workspace_cli, ['init'])
         mets_id = 'foo123'
         self.invoke_cli(workspace_cli, ['set-id', mets_id])
         _, out, _ = self.invoke_cli(workspace_cli, ['get-id'])
         self.assertEqual(out, mets_id + '\n')
Ejemplo n.º 18
0
 def test_bulk_add(self):
     NO_FILES=100
     with TemporaryDirectory() as srcdir:
         Path(srcdir, "OCR-D-IMG").mkdir()
         Path(srcdir, "OCR-D-PAGE").mkdir()
         for i in range(NO_FILES):
             Path(srcdir, "OCR-D-IMG", "page_%04d.tif" % i).write_text('')
         for i in range(NO_FILES):
             Path(srcdir, "OCR-D-PAGE", "page_%04d.xml" % i).write_text('')
         with TemporaryDirectory() as wsdir:
             with pushd_popd(wsdir):
                 ws = self.resolver.workspace_from_nothing(directory=wsdir)
                 exit_code, out, err = self.invoke_cli(workspace_cli, [
                     'bulk-add',
                     '--ignore',
                     '--regex', r'^.*/(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\.(?P<ext>[^\.]*)$',
                     '--url', '{{ fileGrp }}/FILE_{{ pageid }}.{{ ext }}',
                     '--file-id', 'FILE_{{ fileGrp }}_{{ pageid }}',
                     '--page-id', 'PHYS_{{ pageid }}',
                     '--file-grp', '{{ fileGrp }}',
                     '%s/*/*' % srcdir
                 ])
                 # print('exit_code', exit_code)
                 # print('out', out)
                 # print('err', err)
                 ws.reload_mets()
                 self.assertEqual(len(ws.mets.file_groups), 2)
                 self.assertEqual(len(ws.mets.find_all_files()), 2 * NO_FILES)
                 self.assertEqual(len(ws.mets.find_all_files(mimetype='image/tiff')), NO_FILES)
                 self.assertEqual(len(ws.mets.find_all_files(ID='//FILE_OCR-D-IMG_000.*')), 10)
                 self.assertEqual(len(ws.mets.find_all_files(ID='//FILE_.*_000.*')), 20)
                 self.assertEqual(len(ws.mets.find_all_files(pageId='PHYS_0001')), 2)
                 self.assertEqual(ws.mets.find_all_files(ID='FILE_OCR-D-PAGE_0001')[0].url, 'OCR-D-PAGE/FILE_0001.xml')
Ejemplo n.º 19
0
 def test_bulk_add_gen_id(self):
     with pushd_popd(tempdir=True) as wsdir:
         ws = self.resolver.workspace_from_nothing(directory=wsdir)
         Path(wsdir, 'c').write_text('')
         _, out, err = self.invoke_cli(
             workspace_cli,
             [
                 'bulk-add',
                 '-r',
                 r'(?P<pageid>.*) (?P<filegrp>.*) (?P<src>.*) (?P<url>.*) (?P<mimetype>.*)',
                 '-G',
                 '{{ filegrp }}',
                 '-g',
                 '{{ pageid }}',
                 '-S',
                 '{{ src }}',
                 # '-i', '{{ fileid }}',  # XXX skip --file-id
                 '-m',
                 '{{ mimetype }}',
                 '-u',
                 "{{ url }}",
                 'a b c d e'
             ])
         ws.reload_mets()
         assert next(ws.mets.find_files()).ID == 'a_b_c_d_e'
         assert next(ws.mets.find_files()).url == 'd'
Ejemplo n.º 20
0
 def test_mets_basename(self):
     with TemporaryDirectory() as tempdir:
         with pushd_popd(tempdir):
             result = self.runner.invoke(workspace_cli, ['-m', 'foo.xml', 'init'])
             self.assertEqual(result.exit_code, 0)
             self.assertTrue(exists('foo.xml'))
             self.assertFalse(exists('mets.xml'))
Ejemplo n.º 21
0
 def download_file(self, f, _recursion_count=0):
     """
     Download a :py:mod:`ocrd.model.ocrd_file.OcrdFile` to the workspace.
     """
     log.debug('download_file %s [_recursion_count=%s]' %
               (f, _recursion_count))
     with pushd_popd(self.directory):
         # XXX FIXME hacky
         basename = '%s%s' % (f.ID, MIME_TO_EXT.get(
             f.mimetype, '')) if f.ID else f.basename
         try:
             f.url = self.resolver.download_to_directory(self.directory,
                                                         f.url,
                                                         subdir=f.fileGrp,
                                                         basename=basename)
         except FileNotFoundError as e:
             if not self.baseurl:
                 raise Exception(
                     "No baseurl defined by workspace. Cannot retrieve '%s'"
                     % f.url)
             if _recursion_count >= 1:
                 raise Exception(
                     "Already tried prepending baseurl '%s'. Cannot retrieve '%s'"
                     % (self.baseurl, f.url))
             log.debug(
                 "First run of resolver.download_to_directory(%s) failed, try prepending baseurl '%s': %s",
                 f.url, self.baseurl, e)
             f.url = '%s/%s' % (self.baseurl, f.url)
             f.url = self.download_file(f,
                                        _recursion_count + 1).local_filename
         # XXX FIXME HACK
         f.local_filename = f.url
         return f
Ejemplo n.º 22
0
 def test_bulk_add_missing_param(self):
     with pushd_popd(tempdir=True) as wsdir:
         ws = self.resolver.workspace_from_nothing(directory=wsdir)
         with pytest.raises(ValueError,
                            match=r"OcrdFile attribute 'pageId' unset"):
             _, out, err = self.invoke_cli(
                 workspace_cli,
                 [
                     'bulk-add',
                     '-r',
                     r'(?P<pageid>.*) (?P<filegrp>.*) (?P<fileid>.*) (?P<src>.*) (?P<url>.*) (?P<mimetype>.*)',
                     '-G',
                     '{{ filegrp }}',
                     # '-g', '{{ pageid }}', # XXX skip --page-id
                     '-i',
                     '{{ fileid }}',
                     '-m',
                     '{{ mimetype }}',
                     '-u',
                     "{{ url }}",
                     'a b c d e f',
                     '1 2 3 4 5 6'
                 ])
             print('out', out)
             print('err', err)
             assert 0
Ejemplo n.º 23
0
    def remove_file(self, ID, force=False, keep_file=False):
        """
        Remove a file from the workspace.

        Arguments:
            ID (string|OcrdFile): ID of the file to delete or the file itself
            force (boolean): Continue removing even if file not found in METS
            keep_file (boolean): Whether to keep files on disk
        """
        log.debug('Deleting mets:file %s', ID)
        try:
            ocrd_file = self.mets.remove_file(ID)
            if not keep_file:
                if not ocrd_file.local_filename:
                    log.warning("File not locally available %s", ocrd_file)
                    if not force:
                        raise Exception("File not locally available %s" %
                                        ocrd_file)
                else:
                    with pushd_popd(self.directory):
                        log.info("rm %s [cwd=%s]", ocrd_file.local_filename,
                                 self.directory)
                        unlink(ocrd_file.local_filename)
            return ocrd_file
        except FileNotFoundError as e:
            if not force:
                raise e
Ejemplo n.º 24
0
 def test_workspace_from_url_rel_dir(self):
     with TemporaryDirectory() as dst_dir:
         bogus_dst_dir = '../../../../../../../../../../../../../../../../%s'  % dst_dir[1:]
         with pushd_popd(FOLDER_KANT):
             ws1 = self.resolver.workspace_from_url('data/mets.xml', dst_dir=bogus_dst_dir)
             self.assertEqual(ws1.mets_target, pjoin(dst_dir, 'mets.xml'))
             self.assertEqual(ws1.directory, dst_dir)
Ejemplo n.º 25
0
 def test_mets_basename_and_not_mets(self):
     with pushd_popd(tempdir=True) as tempdir:
         _, out, err = self.invoke_cli(
             workspace_cli, ['-d', 'foo', '-M', 'not-foo.xml', 'init'])
         self.assertEqual(out, join(tempdir, 'foo') + '\n')
         self.assertIn(
             '--mets-basename is deprecated. Use --mets/--directory instead',
             err)
Ejemplo n.º 26
0
 def test_find_all_files(self):
     with TemporaryDirectory() as tempdir:
         wsdir = join(tempdir, 'ws')
         copytree(assets.path_to('SBB0000F29300010000/data'), wsdir)
         with pushd_popd(wsdir):
             result = self.runner.invoke(workspace_cli, ['find', '-G', 'OCR-D-IMG-BIN', '-k', 'fileGrp'])
             self.assertEqual(result.output, 'OCR-D-IMG-BIN\nOCR-D-IMG-BIN\n')
             self.assertEqual(result.exit_code, 0)
Ejemplo n.º 27
0
 def test_mets_directory_incompatible(self):
     with pushd_popd(tempdir=True) as tempdir:
         with self.assertRaisesRegex(
                 ValueError,
                 "--mets has a directory part inconsistent with --directory"
         ):
             self.invoke_cli(workspace_cli,
                             ['-d', 'foo', '-m', '/somewhere/else', 'init'])
Ejemplo n.º 28
0
 def test_processor_run(self):
     with copy_of_directory(
             assets.path_to('SBB0000F29300010000/data')) as tempdir:
         with pushd_popd(tempdir):
             result = self.runner.invoke(
                 cli_dummy_processor,
                 ['-p', '{"foo": 42}', '--mets', 'mets.xml'])
             self.assertEqual(result.exit_code, 0)
Ejemplo n.º 29
0
 def test_mets_directory_html(self):
     with pushd_popd(tempdir=True) as tempdir:
         with self.assertRaisesRegex(
                 ValueError,
                 r"--mets is an http\(s\) URL but no --directory was given"
         ):
             self.invoke_cli(workspace_cli,
                             ['-m', 'https://foo.bar/bla', 'init'])
Ejemplo n.º 30
0
 def test_parameter_override_wo_param(self):
     with copy_of_directory(
             assets.path_to('SBB0000F29300010000/data')) as tempdir:
         with pushd_popd(tempdir):
             code, out, err = self.invoke_cli(
                 cli_dummy_processor, ['-P', 'baz', 'two', *DEFAULT_IN_OUT])
             print(out)
             self.assertEqual(out, '{"baz": "two"}\n')