def test_extref(self): self.content.node = etree.fromstring(self.EXTREF) fmt = format_ead(self.content) self.assert_('<a href="http://pid.emory.edu/ark:/25593/8zgst">Irish Literary Miscellany</a>' in fmt, 'extref tag converted to a href') self.content.node = etree.fromstring(self.EXTREF_NOLINK) fmt = format_ead(self.content) self.assert_('<a>Irish Literary Miscellany</a>' in fmt, 'formatter should not fail when extref has no href')
def test_title_emph(self): self.content.node = etree.fromstring(self.TITLE_EMPH) fmt = format_ead(self.content) self.assert_('<em>Biographical source:</em> "Shaw, George' in fmt, "emph tag rendered correctly in section with title") self.assert_('<span class="ead-title">Contemporary Authors Online</span>, Gale' in fmt, "title rendered correctly in sectino with emph tag")
def test_title(self): self.content.node = etree.fromstring(self.TITLE) fmt = format_ead(self.content) self.assert_('magazine <span class="ead-title">The Smart Set</span> from' in fmt, "title tag converted correctly to span class ead-title") # title variants # - doublequotes self.content.node = etree.fromstring(self.TITLE_QUOT) fmt = format_ead(self.content) self.assertEqual('"Terminus"', fmt) # - multiple self.content.node = etree.fromstring(self.TITLE_MULTI) fmt = format_ead(self.content) self.assertEqual('Some Author: "Terminus", "Saturday"', fmt) # - multiple titles + RDFa fmt = format_ead(self.content, rdfa=True) self.assertEqual('<span rel="dc:creator"><span typeof="schema:Person"><span property="schema:name">Some Author</span></span></span>: "<span inlist="inlist" property="dc:title">Terminus</span>", "<span inlist="inlist" property="dc:title">Saturday</span>"', fmt)
def test_exist_match(self): self.content.node = etree.fromstring(self.EXIST_MATCH) fmt = format_ead(self.content) self.assert_('Pitts v. <span class="exist-match">Freeman</span>' in fmt, 'exist:match tag converted to span for highlighting')
def test_notrans(self): self.content.node = etree.fromstring(self.NOTRANS) fmt = format_ead(self.content) self.assert_('magazine <span class="ead-title">The Smart Set</span>...' in fmt, "nested format rendered correctly")
def test_bold(self): self.content.node = etree.fromstring(self.BOLD) fmt = format_ead(self.content) self.assert_('<span class="ead-bold">Pitts v. Freeman</span> school desegregation' in fmt, "render bold converted correctly to span class ead-bold")
def test_italics(self): self.content.node = etree.fromstring(self.ITALICS) fmt = format_ead(self.content) self.assert_('<span class="ead-italic">Pitts v. Freeman</span> school desegregation' in fmt, "render italic converted correctly to span class ead-italic")
def check_eadxml(ead): """Sanity checks specific to the EAD xml, independent of file or eXist. Checks the following: - series and index ids are present - fields used for search/browse title match code expectations: - at most one top-level origination - no leading whitespace in list-title (origination or unittitle) - alphabetical first letter (for first-letter browse) - eadid matches site URL regex :param ead: :class:`~findingaids.fa.models.FindingAid` ead instance to be checked :returns: list of all errors found :rtype: list """ # NOTE: throughout, be sure to use unicode instead of string errors = [] # check that series ids are set if ead.dsc and ead.dsc.hasSeries(): for series in ead.dsc.c: errors.extend(check_series_ids(series)) # check that any index ids are set for index in ead.archdesc.index: if not index.id: errors.append("%(node)s id attribute is not set for %(label)s" % {'node': local_name(index.node), 'label': unicode(index.head)}) # eadid matches appropriate site URL regex if not re.match('^%s$' % EADID_URL_REGEX, ead.eadid.value): # entire eadid should match regex errors.append("eadid '%s' does not match site URL regular expression" \ % ead.eadid.value) # multiple tests to ensure xml used for search/browse list-title matches what code expects # -- since list title is pulled from multiple places, give enough context so it can be found & corrected list_title_path = "%s/%s" % (local_name(ead.list_title.node.getparent()), local_name(ead.list_title.node)) # - check for at most one top-level origination origination_count = ead.node.xpath('count(e:archdesc/e:did/e:origination)', namespaces={'e': EAD_NAMESPACE}) if int(origination_count) > 1: errors.append("Site expects only one archdesc/did/origination; found %d" \ % origination_count) # container list formatting (based on encoding practice) expects only 2 containers per did # - dids with more than 2 containers containers = ead.node.xpath('//e:did[count(e:container) > 2]', namespaces={'e': EAD_NAMESPACE}) if len(containers): errors.append("Site expects maximum of 2 containers per did; found %d did(s) with more than 2" \ % len(containers)) errors.append(['Line %d: %s' % (c.sourceline, tostring(c)) for c in containers]) # - dids with only one container containers = ead.node.xpath('//e:did[count(e:container) = 1]', namespaces={'e': EAD_NAMESPACE}) if len(containers): errors.append("Site expects 2 containers per did; found %d did(s) with only 1" \ % len(containers)) errors.append(['Line %d: %s' % (c.sourceline, tostring(c)) for c in containers]) # - no leading whitespace in list title title_node = ead.node.xpath("%s/text()" % ead.list_title_xpath, namespaces={'e': EAD_NAMESPACE}) if hasattr(title_node[0], 'text'): title_text = title_node[0].text else: title_text = unicode(title_node) if title_text is None: errors.append("List title seems to be empty") elif re.match(r'\s+', title_text): # using node.text because unicode() normalizes, which obscures whitespace problems errors.append("Found leading whitespace in list title field (%s): '%s'" % (list_title_path, ead.list_title.node.text)) # report with enough context that they can find the appropriate element to fix # - first letter of title matches regex -- only check if whitespace test fails elif not re.match(TITLE_LETTERS, ead.first_letter): errors.append("First letter ('%s') of list title field %s does not match browse letter URL regex '%s'" % \ (ead.first_letter, list_title_path, TITLE_LETTERS)) # leading space in unit title (could be list title but might not be) # NOTE: title can contain and even start with subtags such as <title> # or <emph>, which is hard to account for with lxml or text() xpath. # Using format_ead to generate html that would be displayed, and then # stripping tags to check for any leading whitespace within a leading tag title = striptags(format_ead(ead.unittitle)) if re.match(r'\s+', title): errors.append("Found leading whitespace in unittitle: '%s'" % title) # leading whitespace in control access fields (if any) if ead.archdesc.controlaccess and ead.archdesc.controlaccess.controlaccess: for ca in ead.archdesc.controlaccess.controlaccess: for term in ca.terms: # NOTE: using node text because term.value is now normalized if re.match(r'\s+', unicode(term.node.text)): errors.append("Found leading whitespace in controlaccess term '%s' (%s)" \ % (term.node.text, local_name(term.node))) # eadid url should contain resolvable ARK if ead.eadid.url is None or not is_ark(ead.eadid.url): errors.append("eadid url is either not set or not an ARK. " + "To correct, run the prep process again.") # eadid identifier should contain short-form ARK if ead.eadid.identifier is None or not is_ark(ead.eadid.identifier): errors.append("eadid identifier is either not set or not an ARK" + "To correct, run the prep process again.") # short- and long-form ARKs should match each other if ead.eadid.url is not None and ead.eadid.identifier is not None and \ not ead.eadid.url.endswith(ead.eadid.identifier): errors.append("eadid url and identifier do not match: url '%s' should end with identifier '%s'" \ % (ead.eadid.url, ead.eadid.identifier)) return errors