def prep_dir(self, source_dir, prep_config, doc=None): try: prepstatus = None base_dir = os.path.basename(source_dir) status_txt = os.path.join(source_dir, 'status.txt') if not os.path.exists(status_txt): Status(source_dir).write_status(Status.PREP_BEGUN) setup = PrepSetup() repo_wrapper = prep_config.repository_wrapper() if doc is None: doc = setup.prep_document(repo_wrapper, base_dir) prepstatus = self._setup_prepstatus(doc) self.run_before(source_dir, prep_config) repo_prep_class = prep_config.get_prep_class() repo_prep = repo_prep_class(source_dir, doc, prep_config) repo_prep.prep_dir() if prep_config.process_directory(): common_prep = CommonPrep(source_dir, doc, prep_config) common_prep.prep_dir() self._success_status(prepstatus) return doc except Exception as ex: if prepstatus is not None: self._failure_status(prepstatus, ex) raise
def test_tei_generation(self): self.stage_template() doc_count = Document.objects.count() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = MedrenPrep(source_dir=self.staged_source, document=doc, prep_config=self.pennpih_prep_config) shutil.copyfile(self.mscodex1223_marmite, self.staged_pih) xml = prep.gen_partial_tei() root = self.assertXmlDocument(xml) self.assertXpathsExist(root, self.expected_xpaths) self.assertXpathValues(root, '//ns:titleStmt/ns:title/text()', ( 'Description of University of Pennsylvania Oversize Ms. Codex 1223: Fragments of the Digests of Justinian, Book 37, Titles 7-9', )) self.assertXpathValues(root, '//ns:msContents/ns:msItem/ns:title/text()', self.expected_titles) self.assertXpathValues(root, '//ns:msContents/ns:msItem/@n', ('1r', '1v', '2r')) self.assertXpathValues( root, '//ns:msDesc/ns:physDesc/ns:decoDesc/ns:decoNote/text()', self.expected_deconotes) self.assertXpathValues( root, '//ns:msDesc/ns:physDesc/ns:decoDesc/ns:decoNote/@n', ('1v', )) self.assertXpathValues( root, '//ns:extent/text()', ('2 leaves : 429 x 289 (237 x 135) mm. bound to 439 x 295 mm', ))
def test_complex_names(self): # setup shutil.copytree(self.complex_files, self.complex_staged) repo_wrapper = self.ljs_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'ljs472') prep = MedrenPrep(self.complex_staged, doc, self.ljs_prep_config) files = prep.build_file_list(self.complex_pih_xml) self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0193.tif') self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0194.tif') self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0195.tif') self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0196.tif') self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0065a.tif') self.assertHasFile(files, 'document', 'data/ljs472_wk1_body0065b.tif') self.assertHasFile(files, 'extra', 'data/ljs472_wk1_back0002a.tif') self.assertHasFile(files, 'extra', 'data/ljs472_wk1_back0002a.tif') self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0193.tif') self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0194.tif') self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0195.tif') self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0196.tif') self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0065a.tif') self.assertNotHasFile(files, 'extra', 'data/ljs472_wk1_body0065b.tif') self.assertNotHasFile(files, 'document', 'data/ljs472_wk1_back0002a.tif') self.assertNotHasFile(files, 'document', 'data/ljs472_wk1_back0002a.tif')
def test_files_present(self): # setup self.stage_template() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = CommonPrep(self.staged_source, doc, self.pennpih_prep_config) # run self.assertIsInstance(prep.files, FileList)
def test_prep_dir_bad_pages(self): self.stage_template(template=self.bad_pages) repo_wrapper = self.haverford_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'XYZ_ABC_1') prep = SpreadsheetPrep(self.staged_source, doc, self.haverford_prep_config) with self.assertRaises(OPennException) as oe: prep.prep_dir() self.assertIn('FILE_NAME', str(oe.exception))
def test_run(self): # setup self.stage_template() doc_count = Document.objects.count() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = MedrenPrep(source_dir=self.staged_source, document=doc, prep_config=self.pennpih_prep_config) prep.prep_dir()
def test_prep_dir_missing_file(self): self.stage_template() repo_wrapper = self.haverford_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'XYZ_ABC_1') prep = SpreadsheetPrep(self.staged_source, doc, self.haverford_prep_config) tiffs = glob.glob(os.path.join(self.staged_source, '*.tif')) os.remove(tiffs[0]) with self.assertRaises(OPennException) as oe: prep.prep_dir() self.assertIn('FILE_NAME', str(oe.exception))
def test_tei_extent(self): self.stage_template() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = MedrenPrep(source_dir=self.staged_source, document=doc, prep_config=self.pennpih_prep_config) shutil.copyfile(self.msoversize8_marmite, self.staged_pih) os.remove(self.staged_holdingid) xml = prep.gen_partial_tei() root = self.assertXmlDocument(xml) self.assertXpathValues(root, '//ns:extent/text()', ('1 item (1 leaf) : 36 x 44 cm', ))
def test_bad_holdingid(self): # setup self.stage_template() shutil.copyfile(self.bad_holdingid_txt, self.staged_holdingid) doc_count = Document.objects.count() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = MedrenPrep(source_dir=self.staged_source, document=doc, prep_config=self.pennpih_prep_config) # run with self.assertRaisesRegexp(OPennException, r'999999999999999999'): prep.prep_dir()
def test_no_partial_tei(self): # setup os.mkdir(self.staged_source) os.mkdir(self.staged_data) touch(self.staged_file_list) # run with self.assertRaises(OPennException) as oe: repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = CommonPrep(self.staged_source, doc, self.pennpih_prep_config) prep.prep_dir() self.assertIn('PARTIAL_TEI.xml', str(oe.exception))
def test_tei_with_item_number(self): self.stage_template() doc_count = Document.objects.count() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') prep = MedrenPrep(source_dir=self.staged_source, document=doc, prep_config=self.pennpih_prep_config) shutil.copyfile(self.ms_coll_390_item_1044_marmite, self.staged_pih) os.remove(self.staged_holdingid) xml = prep.gen_partial_tei() root = self.assertXmlDocument(xml) self.assertXpathValues(root, '//ns:msDesc/ns:msIdentifier/ns:idno/text()', ('Ms. Coll. 390 Item 1044', ))
def test_run(self): self.stage_template(self.template_dir) for image in self.columbia_mmw_files: self.touch(os.path.join(self.staged_source, image)) doc_count = Document.objects.count() repo_wrapper = self.columbia_mmw_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'ms_or_15') prep = MMWPrep(source_dir=self.staged_source, document=doc, prep_config = self.columbia_mmw_prep_config) prep.prep_dir() path = os.path.join(self.staged_source, 'PARTIAL_TEI.xml') self.assertTrue(os.path.exists(path), 'Expected path to exist: %s' % path) root = self.assertXmlDocument(open(path).read()) self.assertXpathsExist(root, self.expected_xpaths)
def test_flp_ms(self): # self.stage_template(self.template_flp_dir) shutil.copytree(self.template_flp_dir, self.staged_flp_source) for image in self.flp_mmw_files: self.touch(os.path.join(self.staged_flp_source, image)) doc_count = Document.objects.count() repo_wrapper = self.flp_mmw_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'lewis_o_003') prep = MMWPrep(source_dir=self.staged_flp_source, document=doc, prep_config = self.flp_mmw_prep_config) prep.prep_dir() path = os.path.join(self.staged_flp_source, 'PARTIAL_TEI.xml') self.assertTrue(os.path.exists(path), 'Expected path to exist: %s' % path) root = self.assertXmlDocument(open(path).read()) expected = list(self.expected_xpaths) expected.remove('/ns:TEI/ns:teiHeader/ns:fileDesc/ns:sourceDesc/ns:msDesc/ns:history/ns:origin/ns:origPlace',) self.assertXpathsExist(root, expected)
def test_run(self): # setup self.stage_template() repo_wrapper = self.pennpih_prep_config.repository_wrapper() doc_count = Document.objects.count() doc = PrepSetup().prep_document(repo_wrapper, 'mscodex1223') doc_id = doc.id prep = CommonPrep(self.staged_source, doc, self.pennpih_prep_config) image_count = Image.objects.count() deriv_count = Derivative.objects.count() # run prep.prep_dir() self.assertEqual(Document.objects.count(), doc_count + 1) self.assertEqual(Image.objects.count(), image_count + prep.package_dir.file_list.file_count) self.assertEqual(Derivative.objects.count(), deriv_count + prep.package_dir.file_list.deriv_count) doc = Document.objects.get(pk=doc_id) self.assertIsNotNone(doc.title) self.assertIsNotNone(doc.call_number)
def test_run(self): # setup self.stage_template() repo_wrapper = self.haverford_prep_config.repository_wrapper() doc = PrepSetup().prep_document(repo_wrapper, 'XYZ_ABC_1') prep = SpreadsheetPrep(self.staged_source, doc, self.haverford_prep_config) # run try: prep.prep_dir() except OPennException as oe: print "The exception cause is: %s" % (oe.cause_text, ) self.fail("Prep should raise no exception; got: %s" % (oe, )) file_path = os.path.join(self.staged_source, 'data', self.template_image_names.split()[0]) self.assertTrue(os.path.exists(file_path), "File should exist: %s" % (file_path, )) file_path = os.path.join(self.staged_source, 'openn_metadata.xml') self.assertFalse( os.path.exists(file_path), "File should should have been removed: %s" % (file_path, )) file_path = os.path.join(self.staged_source, 'openn_metadata.xlsx') self.assertFalse( os.path.exists(file_path), "File should should have been removed: %s" % (file_path, )) file_path = os.path.join(self.staged_source, 'file_list.json') self.assertTrue(os.path.exists(file_path), "File should exist: %s" % (file_path, )) file_path = os.path.join(self.staged_source, 'PARTIAL_TEI.xml') self.assertTrue(os.path.exists(file_path), "File should exist: %s" % (file_path, ))