def test_add_encoding_desc(self): openn_tei = OPennTEI(open(TestOPennTEI.test_partial_tei)) # print openn_tei.to_string() desc_string = open(self.encoding_desc).read() openn_tei.add_encoding_desc(desc_string) # print openn_tei.to_string() root = self.assertXmlDocument(openn_tei.to_string()) self.assertXpathsExist(root, ('//ns:catDesc[text()= "Headpiece"]',))
def write_partial_tei(self, outdir, xml): outfile = os.path.join(outdir, 'PARTIAL_TEI.xml') f = open(outfile, 'w+') try: f.write(xml) # try to read it f.seek(0) tei = OPennTEI(f) tei.validate() finally: f.close() return outfile
def tei(self): if getattr(self, 'openn_tei', None) is None: f = open(self.package_dir.partial_tei_path, 'r') try: self.openn_tei = OPennTEI(f.read()) finally: f.close() return self.openn_tei
def get_tei(self, document): try: return OPennTEI(document.tei_xml) except OPennException as oex: msg = "Error processing document: id: %d, base_dir: '%s'" % ( document.id, document.base_dir) self.logger.error(msg) raise OPennException(msg, oex, str(oex))
def regen_partial_tei(self, doc, **kwargs): # validate directory # Move files: # # - pages.xlsx required # - marc.xml required unless bibid.txt present # - bibid.txt ignored; BibID should be in existing TEI # - holdingid.txt optional; may be required for Penn MSS (with BibID in TEI) data_dir = kwargs.get('METADATA_DIR', None) if data_dir is None or data_dir.strip() == '': raise OPennException("Missing required METADATA_DIR") if not os.path.exists(data_dir): raise OPennException("Cannot find METADATA_DIR: '%s'" % (data_dir,)) metadata_files = ('pages.xlsx', 'marc.xml', 'holdingid.txt') for file in metadata_files: full_path = os.path.abspath(os.path.join(data_dir, file)) if os.path.exists(full_path): dest = os.path.abspath(os.path.join(self.source_dir, file)) if full_path == dest: pass elif os.path.exists(full_path): shutil.copyfile(full_path, dest) tei = OPennTEI(doc.tei_xml) bibid = tei.bibid # make sure we have the marc.xml file if os.path.exists(self.marc_xml): pass elif bibid is None: OPennException("Saved TEI lacks BibID; required MARC file missing: '%s'" % (self.marc_xml,)) else: if not self.NEW_BIBID_RE.match(bibid): bibid = '99%s3503681' % (str(bibid),) self.write_xml(bibid, self.marc_xml) # create pages.xml from the page.xlsx self.write_openn_xml(self.openn_xml_path) # fake the pih.xml by merging pages.xml with marc.xml (from above) self.write_pih_xml() self.save_rights_data() partial_tei_xml = self.gen_partial_tei() self.write_partial_tei(self.source_dir, partial_tei_xml) self.validate_partial_tei() self.stage_marc_xml() self.add_removal(self.pih_filename) self.add_removal(self.bibid_filename) self.add_removal(self.holdingid_filename) self.add_removal(self.openn_xml_path) self.add_removal(self.xlsx_path) self.add_removal(os.path.join(self.source_dir, 'sha1manifest.txt'))
def regen_partial_tei(self, doc, **kwargs): data_dir = kwargs.get('METADATA_DIR', None) if data_dir: metadata_files = ['keywords.txt', 'holdingid.txt'] for file in metadata_files: full_path = os.path.abspath(os.path.join(data_dir, file)) if os.path.exists(full_path): dest = os.path.abspath(os.path.join(self.source_dir, file)) shutil.copyfile(full_path, dest) tei = OPennTEI(doc.tei_xml) bibid = tei.bibid if bibid is None: raise OPennException("Whoah now. bibid is none. That ain't right.") if len(bibid) < 8: bibid = '99%s3503681' % (str(bibid),) self.write_xml(bibid,self.pih_filename) partial_tei = self.build_partial_tei() self.write_partial_tei(self.source_dir, partial_tei) self.add_removal(self.pih_filename) self.add_removal(self.keywords_filename())
def test_binding(self): openn_tei = OPennTEI(open(self.mscodex906_tei)) self.assertEqual(u'Contemporary blind-stamped calf, rebacked; wormholes in boards, leather, and text near spine.', openn_tei.binding)
def test_decoration(self): openn_tei = OPennTEI(codecs.open(self.mscodex906_tei)) self.assertEqual(u'Many 2- and 3-line initials in red; capital at beginning of each line stroked with yellow; rubrication in red, often with cropped notes for rubrication or illustration barely visible at edge of page; spaces for illumination, approximately 9 lines in height, are frequent at the beginning of the manuscript but disappear in the second half (after f. 171v).', openn_tei.decoration)
def test_scripts(self): openn_tei = OPennTEI(codecs.open(self.mscodex906_tei)) self.assertIn(u'Written in Gothic cursive, with the first words or first line at the beginning of sections in bâtarde script; f. 306-322v (a single gathering) in a second hand.', openn_tei.scripts)
def test_collation(self): openn_tei = OPennTEI(codecs.open(self.mscodex906_tei)) self.assertEqual(u'Paper, 342; 1-18¹⁶ 19¹⁸ 20-22¹⁶; [ii], 1-317, 319-340, [i]; misnumbered at 318, no loss of text. Foliation and line numbering in a later hand. Signatures at bottom right; catchwords at bottom center, often cropped.', openn_tei.collation)
def test_colophon(self): openn_tei = OPennTEI(open(self.mscodex83_tei)) self.assertEqual('Colophon (f. 32v): Explicit hic liber; de pena sum modo liber/ Explicit hoc totum; pro pena da michi potum/ Explicit expliceat; ludere scriptor eat/ Finito libro sit laus et gloria Christo.', openn_tei.colophon)
def test_layouts(self): openn_tei = OPennTEI(open(self.ljs270_tei)) self.assertIn('Inscribed in 23 lines on 2 sides of the tablet.', openn_tei.layouts)
def __init__(self, document): self._document = document self._tei = OPennTEI(self.document.tei_xml)
def test_no_title(self): openn_tei = OPennTEI(open(self.mscodex1589_tei_no_title)) with self.assertRaises(OPennException) as oe: openn_tei.validate() self.assertIn('title', str(oe.exception))
def test_signatures(self): openn_tei = OPennTEI(open(self.mscodex75_tei)) self.assertEqual(u'Some signatures visible in red in the first half of some quires.', openn_tei.signatures)
def test_origin(self): openn_tei = OPennTEI(open(self.mscodex906_tei)) self.assertEqual(u'Written in France after 1474, based on primary watermark.', openn_tei.origin)
import sys # Don't run this stuff sys.exit(1) # UPDATE TEI from file system from openn.xml.openn_tei import OPennTEI doc = Document.objects.get(pk=6853) tei = OPennTEI(open('/mnt/scratch02/openn/site/Data/0032/ms_or_044/data/ms_or_044_TEI.xml').read()) doc.tei_xml = tei.to_string() doc.save() names = (('0007', 'lehigh_002', 'lehigh_codex_002'), ('0007', 'lehigh_003', 'lehigh_codex_003'), ('0007', 'lehigh_006', 'lehigh_codex_006'), ('0007', 'lehigh_007', 'lehigh_codex_007'), ('0007', 'Antiphon_25', 'lehigh_codex_025'), ('0007', 'BookofHoursoftheRomanuse_18', 'lehigh_codex_018'), ('0003', 'BMCMS2', 'BMC_MS02'), ('0003', 'BMCMS5', 'BMC_MS05'), ('0003', 'BMCMS31', 'BMC_MS31'), ('0003', 'BMCMS32', 'BMC_MS32'), ('0003', 'BMCMS33', 'BMC_MS33'), ('0003', 'BMCMS34', 'BMC_MS34'), ('0003', 'BMC_MS8', 'BMC_MS08'), ('0003', 'BMC_MS9', 'BMC_MS09'), ('0003', 'BMC_MS7', 'BMC_MS07'), ('0003', 'BMC_MS4', 'BMC_MS04'), ('0003', 'BMC_MS6', 'BMC_MS06'), ('0003', 'BMC_MS3', 'BMC_MS03'), ('0012', 'ms2_2224q', 'lcp_ms02'),
def build_partial_tei(self): xml_string = self.gen_partial_tei() tei = OPennTEI(xml_string) self.add_keywords(tei) self.add_encoding_desc(tei) return tei.to_string()
def _openn_tei(self): return OPennTEI(self._read_tei())
def test_watermark(self): openn_tei = OPennTEI(open(self.ljs498_tei)) self.assertEqual(u'Similar to Briquet, Aigle à deux têtes 285 (f. 45, 48; Gratz, 1594-1598; Millstatt (Carinthie), 1597-1600; Vienna, 1599) and Aigle à deux têtes 282 (Gratz, 1580; Osnabruck, 1588); similar to Briquet, Aigle à deux têtes 291 (f. 189; Gratz, 1598; Kempten, 1607-1627).', openn_tei.watermark)
def test_get_call_number(self): openn_tei = OPennTEI(open(TestOPennTEI.test_partial_tei)) self.assertEqual('Ms. Codex 1223', openn_tei.call_number)
def test_resource(self): openn_tei = OPennTEI(open(self.mscodex906_tei)) self.assertEqual(u'http://hdl.library.upenn.edu/1017/d/medren/3559152', openn_tei.resource)
def test_get_title(self): openn_tei = OPennTEI(open(TestOPennTEI.test_partial_tei)) self.assertEqual('Fragments of the Digests of Justinian, Book 37, Titles 7-9', openn_tei.title)
def test_no_idno(self): openn_tei = OPennTEI(open(self.mscodex1589_tei_no_idno)) with self.assertRaises(OPennException) as oe: openn_tei.validate() self.assertIn('Call number', str(oe.exception))
def test_foliation(self): openn_tei = OPennTEI(open(self.mscodex52_tei)) self.assertEqual('Paper, i (contemporary paper) + 119; 1-94, [95-119]; contemporary foliation in ink, modern foliation in pencil, upper right recto.', openn_tei.foliation)