Example #1
0
 def test_tei_generation(self):
     self.stage_template()
     doc_count = Document.objects.count()
     repo_wrapper = self.pennpih_prep_config.repository_wrapper()
     doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
     prep = MedrenPrep(source_dir=self.staged_source,
                       document=doc,
                       prep_config=self.pennpih_prep_config)
     shutil.copyfile(self.mscodex1223_marmite, self.staged_pih)
     xml = prep.gen_partial_tei()
     root = self.assertXmlDocument(xml)
     self.assertXpathsExist(root, self.expected_xpaths)
     self.assertXpathValues(root, '//ns:titleStmt/ns:title/text()', (
         'Description of University of Pennsylvania Oversize Ms. Codex 1223: Fragments of the Digests of Justinian, Book 37, Titles 7-9',
     ))
     self.assertXpathValues(root,
                            '//ns:msContents/ns:msItem/ns:title/text()',
                            self.expected_titles)
     self.assertXpathValues(root, '//ns:msContents/ns:msItem/@n',
                            ('1r', '1v', '2r'))
     self.assertXpathValues(
         root, '//ns:msDesc/ns:physDesc/ns:decoDesc/ns:decoNote/text()',
         self.expected_deconotes)
     self.assertXpathValues(
         root, '//ns:msDesc/ns:physDesc/ns:decoDesc/ns:decoNote/@n',
         ('1v', ))
     self.assertXpathValues(
         root, '//ns:extent/text()',
         ('2 leaves : 429 x 289 (237 x 135) mm. bound to 439 x 295 mm', ))
Example #2
0
    def prep_dir(self, source_dir, prep_config, doc=None):
        try:
            prepstatus = None
            base_dir = os.path.basename(source_dir)
            status_txt = os.path.join(source_dir, 'status.txt')
            if not os.path.exists(status_txt):
                Status(source_dir).write_status(Status.PREP_BEGUN)

            setup = PrepSetup()
            repo_wrapper = prep_config.repository_wrapper()
            if doc is None:
                doc = setup.prep_document(repo_wrapper, base_dir)
            prepstatus = self._setup_prepstatus(doc)


            self.run_before(source_dir, prep_config)

            repo_prep_class = prep_config.get_prep_class()
            repo_prep = repo_prep_class(source_dir, doc, prep_config)
            repo_prep.prep_dir()

            if prep_config.process_directory():
                common_prep = CommonPrep(source_dir, doc, prep_config)
                common_prep.prep_dir()
            self._success_status(prepstatus)
            return doc
        except Exception as ex:
            if prepstatus is not None:
                self._failure_status(prepstatus, ex)
            raise
Example #3
0
 def test_complex_names(self):
     # setup
     shutil.copytree(self.complex_files, self.complex_staged)
     repo_wrapper = self.ljs_prep_config.repository_wrapper()
     doc = PrepSetup().prep_document(repo_wrapper, 'ljs472')
     prep = MedrenPrep(self.complex_staged, doc, self.ljs_prep_config)
     files = prep.build_file_list(self.complex_pih_xml)
     self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0193.tif')
     self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0194.tif')
     self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0195.tif')
     self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0196.tif')
     self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0065a.tif')
     self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0065b.tif')
     self.assertHasFile(files, 'extra', 'data/ljs472_wk1_back0002a.tif')
     self.assertHasFile(files, 'extra', 'data/ljs472_wk1_back0002a.tif')
     self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0193.tif')
     self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0194.tif')
     self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0195.tif')
     self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0196.tif')
     self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0065a.tif')
     self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0065b.tif')
     self.assertNotHasFile(files, 'document',
                           'data/ljs472_wk1_back0002a.tif')
     self.assertNotHasFile(files, 'document',
                           'data/ljs472_wk1_back0002a.tif')
Example #4
0
    def test_files_present(self):
        # setup
        self.stage_template()
        repo_wrapper = self.pennpih_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
        prep = CommonPrep(self.staged_source, doc, self.pennpih_prep_config)

        # run
        self.assertIsInstance(prep.files, FileList)
    def test_prep_dir_bad_pages(self):
        self.stage_template(template=self.bad_pages)
        repo_wrapper = self.haverford_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'XYZ_ABC_1')
        prep = SpreadsheetPrep(self.staged_source, doc,
                               self.haverford_prep_config)

        with self.assertRaises(OPennException) as oe:
            prep.prep_dir()

        self.assertIn('FILE_NAME', str(oe.exception))
Example #6
0
    def test_run(self):
        # setup
        self.stage_template()
        doc_count = Document.objects.count()
        repo_wrapper = self.pennpih_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
        prep = MedrenPrep(source_dir=self.staged_source,
                          document=doc,
                          prep_config=self.pennpih_prep_config)

        prep.prep_dir()
    def test_prep_dir_missing_file(self):
        self.stage_template()
        repo_wrapper = self.haverford_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'XYZ_ABC_1')
        prep = SpreadsheetPrep(self.staged_source, doc,
                               self.haverford_prep_config)
        tiffs = glob.glob(os.path.join(self.staged_source, '*.tif'))
        os.remove(tiffs[0])
        with self.assertRaises(OPennException) as oe:
            prep.prep_dir()

        self.assertIn('FILE_NAME', str(oe.exception))
Example #8
0
 def test_tei_extent(self):
     self.stage_template()
     repo_wrapper = self.pennpih_prep_config.repository_wrapper()
     doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
     prep = MedrenPrep(source_dir=self.staged_source,
                       document=doc,
                       prep_config=self.pennpih_prep_config)
     shutil.copyfile(self.msoversize8_marmite, self.staged_pih)
     os.remove(self.staged_holdingid)
     xml = prep.gen_partial_tei()
     root = self.assertXmlDocument(xml)
     self.assertXpathValues(root, '//ns:extent/text()',
                            ('1 item (1 leaf) : 36 x 44 cm', ))
Example #9
0
 def test_bad_holdingid(self):
     # setup
     self.stage_template()
     shutil.copyfile(self.bad_holdingid_txt, self.staged_holdingid)
     doc_count = Document.objects.count()
     repo_wrapper = self.pennpih_prep_config.repository_wrapper()
     doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
     prep = MedrenPrep(source_dir=self.staged_source,
                       document=doc,
                       prep_config=self.pennpih_prep_config)
     # run
     with self.assertRaisesRegexp(OPennException, r'999999999999999999'):
         prep.prep_dir()
Example #10
0
    def test_no_partial_tei(self):
        # setup
        os.mkdir(self.staged_source)
        os.mkdir(self.staged_data)
        touch(self.staged_file_list)

        # run
        with self.assertRaises(OPennException) as oe:
            repo_wrapper = self.pennpih_prep_config.repository_wrapper()
            doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
            prep = CommonPrep(self.staged_source, doc,
                              self.pennpih_prep_config)
            prep.prep_dir()
        self.assertIn('PARTIAL_TEI.xml', str(oe.exception))
Example #11
0
 def test_tei_with_item_number(self):
     self.stage_template()
     doc_count = Document.objects.count()
     repo_wrapper = self.pennpih_prep_config.repository_wrapper()
     doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
     prep = MedrenPrep(source_dir=self.staged_source,
                       document=doc,
                       prep_config=self.pennpih_prep_config)
     shutil.copyfile(self.ms_coll_390_item_1044_marmite, self.staged_pih)
     os.remove(self.staged_holdingid)
     xml = prep.gen_partial_tei()
     root = self.assertXmlDocument(xml)
     self.assertXpathValues(root,
                            '//ns:msDesc/ns:msIdentifier/ns:idno/text()',
                            ('Ms. Coll. 390 Item 1044', ))
Example #12
0
    def test_run(self):
        self.stage_template(self.template_dir)
        for image in self.columbia_mmw_files:
            self.touch(os.path.join(self.staged_source, image))
        doc_count = Document.objects.count()
        repo_wrapper = self.columbia_mmw_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'ms_or_15')
        prep = MMWPrep(source_dir=self.staged_source, document=doc,
                          prep_config = self.columbia_mmw_prep_config)

        prep.prep_dir()
        path = os.path.join(self.staged_source, 'PARTIAL_TEI.xml')
        self.assertTrue(os.path.exists(path), 'Expected path to exist: %s' % path)

        root = self.assertXmlDocument(open(path).read())
        self.assertXpathsExist(root, self.expected_xpaths)
Example #13
0
    def test_flp_ms(self):
        # self.stage_template(self.template_flp_dir)
        shutil.copytree(self.template_flp_dir, self.staged_flp_source)
        for image in self.flp_mmw_files:
            self.touch(os.path.join(self.staged_flp_source, image))
        doc_count = Document.objects.count()
        repo_wrapper = self.flp_mmw_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'lewis_o_003')
        prep = MMWPrep(source_dir=self.staged_flp_source, document=doc,
                        prep_config = self.flp_mmw_prep_config)

        prep.prep_dir()
        path = os.path.join(self.staged_flp_source, 'PARTIAL_TEI.xml')
        self.assertTrue(os.path.exists(path), 'Expected path to exist: %s' % path)

        root = self.assertXmlDocument(open(path).read())
        expected = list(self.expected_xpaths)
        expected.remove('/ns:TEI/ns:teiHeader/ns:fileDesc/ns:sourceDesc/ns:msDesc/ns:history/ns:origin/ns:origPlace',)
        self.assertXpathsExist(root, expected)
Example #14
0
 def test_run(self):
     # setup
     self.stage_template()
     repo_wrapper = self.pennpih_prep_config.repository_wrapper()
     doc_count = Document.objects.count()
     doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223')
     doc_id = doc.id
     prep = CommonPrep(self.staged_source, doc, self.pennpih_prep_config)
     image_count = Image.objects.count()
     deriv_count = Derivative.objects.count()
     # run
     prep.prep_dir()
     self.assertEqual(Document.objects.count(), doc_count + 1)
     self.assertEqual(Image.objects.count(),
                      image_count + prep.package_dir.file_list.file_count)
     self.assertEqual(Derivative.objects.count(),
                      deriv_count + prep.package_dir.file_list.deriv_count)
     doc = Document.objects.get(pk=doc_id)
     self.assertIsNotNone(doc.title)
     self.assertIsNotNone(doc.call_number)
    def test_run(self):
        # setup
        self.stage_template()
        repo_wrapper = self.haverford_prep_config.repository_wrapper()
        doc = PrepSetup().prep_document(repo_wrapper, 'XYZ_ABC_1')
        prep = SpreadsheetPrep(self.staged_source, doc,
                               self.haverford_prep_config)

        # run
        try:
            prep.prep_dir()
        except OPennException as oe:
            print "The exception cause is: %s" % (oe.cause_text, )
            self.fail("Prep should raise no exception; got: %s" % (oe, ))

        file_path = os.path.join(self.staged_source, 'data',
                                 self.template_image_names.split()[0])
        self.assertTrue(os.path.exists(file_path),
                        "File should exist: %s" % (file_path, ))

        file_path = os.path.join(self.staged_source, 'openn_metadata.xml')
        self.assertFalse(
            os.path.exists(file_path),
            "File should should have been removed: %s" % (file_path, ))

        file_path = os.path.join(self.staged_source, 'openn_metadata.xlsx')
        self.assertFalse(
            os.path.exists(file_path),
            "File should should have been removed: %s" % (file_path, ))

        file_path = os.path.join(self.staged_source, 'file_list.json')
        self.assertTrue(os.path.exists(file_path),
                        "File should exist: %s" % (file_path, ))

        file_path = os.path.join(self.staged_source, 'PARTIAL_TEI.xml')
        self.assertTrue(os.path.exists(file_path),
                        "File should exist: %s" % (file_path, ))