Exemple #1
0
 def test_add_encoding_desc(self):
     openn_tei = OPennTEI(open(TestOPennTEI.test_partial_tei))
     # print openn_tei.to_string()
     desc_string = open(self.encoding_desc).read()
     openn_tei.add_encoding_desc(desc_string)
     # print openn_tei.to_string()
     root = self.assertXmlDocument(openn_tei.to_string())
     self.assertXpathsExist(root, ('//ns:catDesc[text()= "Headpiece"]',))
Exemple #2
0
    def write_partial_tei(self, outdir, xml):
        outfile = os.path.join(outdir, 'PARTIAL_TEI.xml')
        f = open(outfile, 'w+')
        try:
            f.write(xml)
            # try to read it
            f.seek(0)
            tei = OPennTEI(f)
            tei.validate()
        finally:
            f.close()

        return outfile
Exemple #3
0
 def tei(self):
     if getattr(self, 'openn_tei', None) is None:
         f = open(self.package_dir.partial_tei_path, 'r')
         try:
             self.openn_tei = OPennTEI(f.read())
         finally:
             f.close()
     return self.openn_tei
Exemple #4
0
 def get_tei(self, document):
     try:
         return OPennTEI(document.tei_xml)
     except OPennException as oex:
         msg = "Error processing document: id: %d, base_dir: '%s'" % (
             document.id, document.base_dir)
         self.logger.error(msg)
         raise OPennException(msg, oex, str(oex))
Exemple #5
0
    def regen_partial_tei(self, doc, **kwargs):
        # validate directory
        # Move files:
        #
        #  - pages.xlsx     required
        #  - marc.xml       required unless bibid.txt present
        #  - bibid.txt      ignored; BibID should be in existing TEI
        #  - holdingid.txt  optional; may be required for Penn MSS (with BibID in TEI)

        data_dir = kwargs.get('METADATA_DIR', None)
        if data_dir is None or data_dir.strip() == '':
            raise OPennException("Missing required METADATA_DIR")

        if not os.path.exists(data_dir):
            raise OPennException("Cannot find METADATA_DIR: '%s'" % (data_dir,))

        metadata_files = ('pages.xlsx', 'marc.xml', 'holdingid.txt')
        for file in metadata_files:
            full_path = os.path.abspath(os.path.join(data_dir, file))
            if os.path.exists(full_path):
                dest = os.path.abspath(os.path.join(self.source_dir, file))
                if full_path == dest:
                    pass
                elif os.path.exists(full_path):
                    shutil.copyfile(full_path, dest)

        tei = OPennTEI(doc.tei_xml)
        bibid = tei.bibid

        # make sure we have the marc.xml file
        if os.path.exists(self.marc_xml):
            pass
        elif bibid is None:
            OPennException("Saved TEI lacks BibID; required MARC file missing: '%s'" % (self.marc_xml,))
        else:
            if not self.NEW_BIBID_RE.match(bibid):
                bibid = '99%s3503681' % (str(bibid),)
            self.write_xml(bibid, self.marc_xml)

        # create pages.xml from the page.xlsx
        self.write_openn_xml(self.openn_xml_path)
        # fake the pih.xml by merging pages.xml with marc.xml (from above)
        self.write_pih_xml()
        self.save_rights_data()
        partial_tei_xml = self.gen_partial_tei()
        self.write_partial_tei(self.source_dir, partial_tei_xml)
        self.validate_partial_tei()
        self.stage_marc_xml()

        self.add_removal(self.pih_filename)
        self.add_removal(self.bibid_filename)
        self.add_removal(self.holdingid_filename)
        self.add_removal(self.openn_xml_path)
        self.add_removal(self.xlsx_path)
        self.add_removal(os.path.join(self.source_dir, 'sha1manifest.txt'))
Exemple #6
0
    def regen_partial_tei(self, doc, **kwargs):
        data_dir = kwargs.get('METADATA_DIR', None)
        if data_dir:
            metadata_files = ['keywords.txt', 'holdingid.txt']

            for file in metadata_files:
                full_path = os.path.abspath(os.path.join(data_dir, file))
                if os.path.exists(full_path):
                    dest = os.path.abspath(os.path.join(self.source_dir, file))
                    shutil.copyfile(full_path, dest)

        tei = OPennTEI(doc.tei_xml)
        bibid = tei.bibid
        if bibid is None:
            raise OPennException("Whoah now. bibid is none. That ain't right.")
        if len(bibid) < 8:
            bibid = '99%s3503681' % (str(bibid),)
        self.write_xml(bibid,self.pih_filename)

        partial_tei = self.build_partial_tei()
        self.write_partial_tei(self.source_dir, partial_tei)
        self.add_removal(self.pih_filename)
        self.add_removal(self.keywords_filename())
Exemple #7
0
 def test_binding(self):
     openn_tei = OPennTEI(open(self.mscodex906_tei))
     self.assertEqual(u'Contemporary blind-stamped calf, rebacked; wormholes in boards, leather, and text near spine.', openn_tei.binding)
Exemple #8
0
 def test_decoration(self):
     openn_tei = OPennTEI(codecs.open(self.mscodex906_tei))
     self.assertEqual(u'Many 2- and 3-line initials in red; capital at beginning of each line stroked with yellow; rubrication in red, often with cropped notes for rubrication or illustration barely visible at edge of page; spaces for illumination, approximately 9 lines in height, are frequent at the beginning of the manuscript but disappear in the second half (after f. 171v).', openn_tei.decoration)
Exemple #9
0
 def test_scripts(self):
     openn_tei = OPennTEI(codecs.open(self.mscodex906_tei))
     self.assertIn(u'Written in Gothic cursive, with the first words or first line at the beginning of sections in bâtarde script; f. 306-322v (a single gathering) in a second hand.', openn_tei.scripts)
Exemple #10
0
 def test_collation(self):
     openn_tei = OPennTEI(codecs.open(self.mscodex906_tei))
     self.assertEqual(u'Paper, 342; 1-18¹⁶ 19¹⁸ 20-22¹⁶; [ii], 1-317, 319-340, [i]; misnumbered at 318, no loss of text. Foliation and line numbering in a later hand. Signatures at bottom right; catchwords at bottom center, often cropped.', openn_tei.collation)
Exemple #11
0
 def test_colophon(self):
     openn_tei = OPennTEI(open(self.mscodex83_tei))
     self.assertEqual('Colophon (f. 32v): Explicit hic liber; de pena sum modo liber/ Explicit hoc totum; pro pena da michi potum/ Explicit expliceat; ludere scriptor eat/ Finito libro sit laus et gloria Christo.', openn_tei.colophon)
Exemple #12
0
 def test_layouts(self):
     openn_tei = OPennTEI(open(self.ljs270_tei))
     self.assertIn('Inscribed in 23 lines on 2 sides of the tablet.', openn_tei.layouts)
Exemple #13
0
 def __init__(self, document):
     self._document = document
     self._tei = OPennTEI(self.document.tei_xml)
Exemple #14
0
 def test_no_title(self):
     openn_tei = OPennTEI(open(self.mscodex1589_tei_no_title))
     with self.assertRaises(OPennException) as oe:
         openn_tei.validate()
     self.assertIn('title', str(oe.exception))
Exemple #15
0
 def test_signatures(self):
     openn_tei = OPennTEI(open(self.mscodex75_tei))
     self.assertEqual(u'Some signatures visible in red in the first half of some quires.', openn_tei.signatures)
Exemple #16
0
 def test_origin(self):
     openn_tei = OPennTEI(open(self.mscodex906_tei))
     self.assertEqual(u'Written in France after 1474, based on primary watermark.', openn_tei.origin)
Exemple #17
0
import sys

# Don't run this stuff
sys.exit(1)

# UPDATE TEI from file system
from openn.xml.openn_tei import OPennTEI
doc = Document.objects.get(pk=6853)
tei = OPennTEI(open('/mnt/scratch02/openn/site/Data/0032/ms_or_044/data/ms_or_044_TEI.xml').read())
doc.tei_xml = tei.to_string()
doc.save()

names = (('0007', 'lehigh_002', 'lehigh_codex_002'),
    ('0007', 'lehigh_003', 'lehigh_codex_003'),
    ('0007', 'lehigh_006', 'lehigh_codex_006'),
    ('0007', 'lehigh_007', 'lehigh_codex_007'),
    ('0007', 'Antiphon_25', 'lehigh_codex_025'),
    ('0007', 'BookofHoursoftheRomanuse_18', 'lehigh_codex_018'),
    ('0003', 'BMCMS2', 'BMC_MS02'),
    ('0003', 'BMCMS5', 'BMC_MS05'),
    ('0003', 'BMCMS31', 'BMC_MS31'),
    ('0003', 'BMCMS32', 'BMC_MS32'),
    ('0003', 'BMCMS33', 'BMC_MS33'),
    ('0003', 'BMCMS34', 'BMC_MS34'),
    ('0003', 'BMC_MS8', 'BMC_MS08'),
    ('0003', 'BMC_MS9', 'BMC_MS09'),
    ('0003', 'BMC_MS7', 'BMC_MS07'),
    ('0003', 'BMC_MS4', 'BMC_MS04'),
    ('0003', 'BMC_MS6', 'BMC_MS06'),
    ('0003', 'BMC_MS3', 'BMC_MS03'),
    ('0012', 'ms2_2224q', 'lcp_ms02'),
Exemple #18
0
 def build_partial_tei(self):
     xml_string = self.gen_partial_tei()
     tei = OPennTEI(xml_string)
     self.add_keywords(tei)
     self.add_encoding_desc(tei)
     return tei.to_string()
Exemple #19
0
 def _openn_tei(self):
     return OPennTEI(self._read_tei())
Exemple #20
0
 def test_watermark(self):
     openn_tei = OPennTEI(open(self.ljs498_tei))
     self.assertEqual(u'Similar to Briquet, Aigle à deux têtes 285 (f. 45, 48; Gratz, 1594-1598; Millstatt (Carinthie), 1597-1600; Vienna, 1599) and Aigle à deux têtes 282 (Gratz, 1580; Osnabruck, 1588); similar to Briquet, Aigle à deux têtes 291 (f. 189; Gratz, 1598; Kempten, 1607-1627).', openn_tei.watermark)
Exemple #21
0
 def test_get_call_number(self):
     openn_tei = OPennTEI(open(TestOPennTEI.test_partial_tei))
     self.assertEqual('Ms. Codex 1223', openn_tei.call_number)
Exemple #22
0
 def test_resource(self):
     openn_tei = OPennTEI(open(self.mscodex906_tei))
     self.assertEqual(u'http://hdl.library.upenn.edu/1017/d/medren/3559152', openn_tei.resource)
Exemple #23
0
 def test_get_title(self):
     openn_tei = OPennTEI(open(TestOPennTEI.test_partial_tei))
     self.assertEqual('Fragments of the Digests of Justinian, Book 37, Titles 7-9', openn_tei.title)
Exemple #24
0
 def test_no_idno(self):
     openn_tei = OPennTEI(open(self.mscodex1589_tei_no_idno))
     with self.assertRaises(OPennException) as oe:
         openn_tei.validate()
     self.assertIn('Call number', str(oe.exception))
Exemple #25
0
 def test_foliation(self):
     openn_tei = OPennTEI(open(self.mscodex52_tei))
     self.assertEqual('Paper, i (contemporary paper) + 119; 1-94, [95-119]; contemporary foliation in ink, modern foliation in pencil, upper right recto.', openn_tei.foliation)