def test_ControlledAccessHeadings(self): ca = self.ead.archdesc.controlaccess self.assert_(isinstance(ca, eadmap.ControlledAccessHeadings)) self.assertEqual("Selected Search Terms", u(ca.head)) self.assert_(isinstance(ca.controlaccess[0], eadmap.ControlledAccessHeadings)) self.assertEqual("Personal Names", u(ca.controlaccess[0].head)) self.assert_(isinstance(ca.controlaccess[0].person_name[0], eadmap.Heading)) self.assertEqual("Barker, Sebastian.", ca.controlaccess[0].person_name[0].value) self.assert_(isinstance(ca.controlaccess[0].family_name[0], eadmap.Heading)) self.assertEqual("Dozier family.", ca.controlaccess[0].family_name[0].value) self.assertEqual("English poetry--Irish authors--20th century.", ca.controlaccess[1].subject[0].value) self.assertEqual("Ireland.", ca.controlaccess[2].geographic_name[0].value) self.assertEqual("Manuscripts.", ca.controlaccess[3].genre_form[0].value) self.assertEqual("Poet.", ca.controlaccess[4].occupation[0].value) self.assert_(isinstance(ca.controlaccess[5].corporate_name[0], eadmap.Heading)) self.assertEqual("Irish Academy of Letters", ca.controlaccess[5].corporate_name[0].value) self.assert_(isinstance(ca.controlaccess[6].function[0], eadmap.Heading)) self.assertEqual("Law enforcing.", ca.controlaccess[6].function[0].value) self.assert_(isinstance(ca.controlaccess[7].title[0], eadmap.Heading)) self.assertEqual("New Yorker (New York, 1925-)", ca.controlaccess[7].title[0].value) # terms - mapps to all types, mixed, in the order they appear all_terms = ca.controlaccess[8].terms self.assertEqual("title", all_terms[0].value) self.assertEqual("person", all_terms[1].value) self.assertEqual("family", all_terms[2].value) self.assertEqual("corp", all_terms[3].value) self.assertEqual("occupation", all_terms[4].value) self.assertEqual("subject", all_terms[5].value) self.assertEqual("geography", all_terms[6].value) self.assertEqual("genre", all_terms[7].value) self.assertEqual("function", all_terms[8].value)
def to_xml(self, dt): val = None if self.format is not None: val = u(dt.strftime(self.format)) else: val = u(dt.isoformat()) return val
def test_SubordinateComponents(self): dsc = self.ead.dsc self.assert_(isinstance(dsc, eadmap.SubordinateComponents)) self.assertEqual("combined", dsc.type) self.assertEqual("Description of Series", u(dsc.head)) # c01 - series self.assert_(isinstance(dsc.c[0], eadmap.Component)) self.assertEqual("series", dsc.c[0].level) self.assert_(isinstance(dsc.c[0].did, eadmap.DescriptiveIdentification)) self.assertEqual("Series 1", dsc.c[0].did.unitid.value) self.assertEqual("Writings by Seamus Heaney", u(dsc.c[0].did.unittitle)) self.assertEqual("Box 1: folders 1-12", dsc.c[0].did.physdesc) # c02 - file self.assert_(isinstance(dsc.c[0].c[0], eadmap.Component)) self.assertEqual("file", dsc.c[0].c[0].level) self.assert_(isinstance(dsc.c[0].c[0].did, eadmap.DescriptiveIdentification)) self.assert_("holograph manuscript" in u(dsc.c[0].c[0].did.unittitle)) self.assertEqual("box", dsc.c[0].c[0].did.container[0].type) self.assertEqual("1", dsc.c[0].c[0].did.container[0].value) self.assertEqual("folder", dsc.c[0].c[0].did.container[1].type) self.assertEqual("1", dsc.c[0].c[0].did.container[1].value) self.assertTrue(dsc.hasSeries()) self.assertFalse(dsc.c[0].hasSubseries()) # second series has a subseries self.assertTrue(dsc.c[1].hasSubseries()) # access c03 level item self.assertEqual("file", dsc.c[1].c[0].c[0].level) self.assert_("Hilary Boyle" in u(dsc.c[1].c[0].c[0].did.unittitle))
def test_fields(self): self.assertEqual('id1', self.mods.id) self.assertEqual('A simple record', self.mods.title) self.assertEqual('text', self.mods.resource_type) self.assertEqual('a general note', self.mods.note.label) self.assertEqual('general', self.mods.note.type) self.assertEqual(u'remember to...', u(self.mods.note)) self.assertEqual('remember to...', self.mods.note.text) self.assertEqual(u'2010-06-17', u(self.mods.origin_info.created[0])) self.assertEqual('2010-06-17', self.mods.origin_info.created[0].date) self.assertEqual('2010-06-18', self.mods.origin_info.issued[0].date) self.assertEqual('2010-06-19', self.mods.origin_info.captured[0].date) self.assertEqual('2010-06-20', self.mods.origin_info.valid[0].date) self.assertEqual('2010-06-21', self.mods.origin_info.modified[0].date) self.assertEqual('2010-06-22', self.mods.origin_info.copyright[0].date) self.assertEqual('2010-06-23', self.mods.origin_info.other[0].date) self.assertEqual('some_type', self.mods.origin_info.other[0].type) self.assertEqual(True, self.mods.origin_info.created[0].key_date) self.assertEqual('Little, Brown', self.mods.origin_info.publisher) self.assertEqual(u'http://so.me/uri', self.mods.identifiers[0].text) self.assertEqual(u'uri', self.mods.identifiers[0].type) self.assertEqual(u'Dawson choir recordings', self.mods.abstract.text) # name fields self.assertEqual(u'personal', self.mods.name.type) self.assertEqual(u'naf', self.mods.name.authority) self.assertEqual(u'n82032703', self.mods.name.id) self.assertEqual(u'Dawson, William Levi', self.mods.name.name_parts[0].text) self.assertEqual(u'1899-1990', self.mods.name.name_parts[1].text) self.assertEqual(u'date', self.mods.name.name_parts[1].type) self.assertEqual(u'William Levi Dawson (1899-1990)', self.mods.name.display_form) self.assertEqual(u'Tuskegee', self.mods.name.affiliation) self.assertEqual(u'text', self.mods.name.roles[0].type) self.assertEqual(u'marcrelator', self.mods.name.roles[0].authority) self.assertEqual(u'Composer', self.mods.name.roles[0].text) # access condition self.assertEqual(u'restrictions on access', self.mods.access_conditions[0].type) self.assertEqual(u'Restricted', self.mods.access_conditions[0].text) # related item self.assertEqual(u'host', self.mods.related_items[0].type) self.assertEqual(u'Emory University Archives', self.mods.related_items[0].title) self.assertEqual(u'local_sourcecoll_id', self.mods.related_items[0].identifiers[0].type) self.assertEqual(u'eua', self.mods.related_items[0].identifiers[0].text) # titleInfo subfields self.assertEqual('A simple record', self.mods.title_info.title) self.assertEqual(' (for test purposes)', self.mods.title_info.subtitle) self.assertEqual('alternative', self.mods.title_info_list[1].type) self.assertEqual('First line', self.mods.title_info_list[1].label) self.assertEqual('Alternative title', self.mods.title_info_list[1].title) # part self.assertEqual('volume', self.mods.parts[0].details[0].type) self.assertEqual('II', self.mods.parts[0].details[0].number) self.assertEqual('pages', self.mods.parts[0].extent.unit) self.assertEqual('5', self.mods.parts[0].extent.start) self.assertEqual('23', self.mods.parts[0].extent.end) # location self.assertEqual('http://so.me/other/uri', self.mods.locations[0].url) self.assertEqual('Atlanta', self.mods.locations[0].physical)
def test_unittitle(self): title = self.ead.unittitle self.assert_(isinstance(title, eadmap.UnitTitle)) self.assert_(isinstance(title.short, eadmap.UnitTitle)) self.assert_(isinstance(title.unitdate, eadmap.DateField)) self.assertEqual('1972-2005', u(title.unitdate)) # short title self.assertEqual('Seamus Heaney collection,', u(title.short)) self.assertEqual('Writings by Seamus Heaney', u(self.ead.dsc.c[0].did.unittitle.short))
def test_basic_fields(self): self.assertEqual(u(self.ead.title), "Seamus Heaney collection, 1972-1997") self.assertEqual(u(self.ead.eadid), 'heaney653') self.assertEqual(self.ead.id, "heaney653-011") self.assertEqual(self.ead.author, "Manuscript, Archives, and Rare Book Library, Emory University") # whitespace makes fields with tags a bit messier... self.assert_("Seamus Heaney collection," in u(self.ead.unittitle)) self.assert_("1972-2005" in u(self.ead.unittitle)) # several different extents in the physical description; # FIXME: all smashed together self.assert_("1 linear ft." in self.ead.physical_desc) self.assert_("(3 boxes)" in self.ead.physical_desc) self.assert_("12 oversized papers (OP)" in self.ead.physical_desc) self.assert_("materials relating to Irish poet Seamus Heaney" in u(self.ead.abstract))
def test_ProfileDescription(self): profiledesc = self.ead.profiledesc self.assert_(isinstance(profiledesc, eadmap.ProfileDescription)) self.assertEqual("English", profiledesc.languages[0]) self.assertEqual("eng", profiledesc.language_codes[0]) # profile creation date self.assert_(isinstance(profiledesc.date, eadmap.DateField)) self.assertEqual('May 5, 2005', u(profiledesc.date)) self.assertEqual('2005-05-05', profiledesc.date.normalized)
def test_DateField(self): date = self.ead.file_desc.publication.date self.assert_(isinstance(date, eadmap.DateField)) self.assertEqual("2005-05-05", date.normalized) self.assertEqual("May 5, 2005", u(date)) self.assertEqual("ce", date.era) self.assertEqual("gregorian", date.calendar) unitdate = self.ead.dsc.c[1].c[0].did.unitdate self.assert_(isinstance(unitdate, eadmap.DateField)) self.assertEqual("1974/1986", unitdate.normalized)
def _html_output(self, normal_row, error_row, row_ender, help_text_html, errors_on_separate_row): """Extend BaseForm's helper function for outputting HTML. Used by as_table(), as_ul(), as_p(). Combines the HTML version of the main form's fields with the HTML content for any subforms. """ parts = [] parts.append( super(XmlObjectForm, self)._html_output(normal_row, error_row, row_ender, help_text_html, errors_on_separate_row)) def _subform_output(subform): return subform._html_output(normal_row, error_row, row_ender, help_text_html, errors_on_separate_row) for name, subform in six.iteritems(self.subforms): # use form label if one was set if hasattr(subform, 'form_label'): name = subform.form_label parts.append( self._html_subform_output(subform, name, _subform_output)) for name, formset in six.iteritems(self.formsets): parts.append(u(formset.management_form)) # use form label if one was set # - use declared subform label if any if hasattr(formset.forms[0], 'form_label') and \ formset.forms[0].form_label is not None: name = formset.forms[0].form_label # fallback to generated label from field name elif hasattr(formset, 'form_label'): name = formset.form_label # collect the html output for all the forms in the formset subform_parts = list() for subform in formset.forms: subform_parts.append( self._html_subform_output(subform, gen_html=_subform_output, suppress_section=True)) # then wrap all forms in the section container, so formset label appears once parts.append( self._html_subform_output(name=name, content=u'\n'.join(subform_parts))) return mark_safe(u'\n'.join(parts))
def _html_output(self, normal_row, error_row, row_ender, help_text_html, errors_on_separate_row): """Extend BaseForm's helper function for outputting HTML. Used by as_table(), as_ul(), as_p(). Combines the HTML version of the main form's fields with the HTML content for any subforms. """ parts = [] parts.append(super(XmlObjectForm, self)._html_output(normal_row, error_row, row_ender, help_text_html, errors_on_separate_row)) def _subform_output(subform): return subform._html_output(normal_row, error_row, row_ender, help_text_html, errors_on_separate_row) for name, subform in six.iteritems(self.subforms): # use form label if one was set if hasattr(subform, 'form_label'): name = subform.form_label parts.append(self._html_subform_output(subform, name, _subform_output)) for name, formset in six.iteritems(self.formsets): parts.append(u(formset.management_form)) # use form label if one was set # - use declared subform label if any if hasattr(formset.forms[0], 'form_label') and \ formset.forms[0].form_label is not None: name = formset.forms[0].form_label # fallback to generated label from field name elif hasattr(formset, 'form_label'): name = formset.form_label # collect the html output for all the forms in the formset subform_parts = list() for subform in formset.forms: subform_parts.append(self._html_subform_output(subform, gen_html=_subform_output, suppress_section=True)) # then wrap all forms in the section container, so formset label appears once parts.append(self._html_subform_output(name=name, content=u'\n'.join(subform_parts))) return mark_safe(u'\n'.join(parts))
def test_index_indexentry(self): ad = self.ead.archdesc # index and indexentry self.assertEqual(2, len(ad.index)) index = ad.index[0] self.assert_(isinstance(index, eadmap.Index)) self.assertEqual('Index of Selected Correspondents', u(index.head)) self.assertEqual('index1', index.id) self.assert_('relates to the correspondence in Series 1' in u(index.note.content[0])) self.assertEqual(2, len(index.entry)) self.assert_(isinstance(index.entry[0], eadmap.IndexEntry)) self.assertEqual('Batten, Guinn', u(index.entry[0].name)) self.assert_(isinstance(index.entry[0].ptrgroup, eadmap.PointerGroup)) self.assertEqual(3, len(index.entry[0].ptrgroup.ref)) self.assert_(isinstance(index.entry[0].ptrgroup.ref[0], eadmap.Reference)) self.assertEqual('simple', index.entry[0].ptrgroup.ref[0].type) self.assert_('1995 July' in u(index.entry[0].ptrgroup.ref[0].value)) self.assertEqual('Belton, Neil', u(index.entry[1].name)) self.assert_('1993 November 3' in u(index.entry[1].ptrgroup.ref[-1].value)) # multiple indexes self.assert_(isinstance(ad.index[1], eadmap.Index)) self.assertEqual("Second Index", u(ad.index[1].head)) self.assertEqual("index2", ad.index[1].id)
def to_xml(self, value): if value is None: return value else: return u(value)
def test__unicode(self): stu = u(self.obj) self.assert_("42 13" in stu)
def __string__(self): if isinstance(self.node, six.string_types): return self.node return u(self).encode('ascii', 'xmlcharrefreplace')
def test_ArchivalDescription(self): self.assert_(isinstance(self.ead.archdesc, eadmap.ArchivalDescription)) ad = self.ead.archdesc self.assertEqual("Heaney, Seamus, 1939-", ad.origination) self.assert_(isinstance(ad.unitid, eadmap.Unitid)) self.assertEqual("Manuscript Collection No.653", ad.unitid.value) self.assertEqual("Manuscript Collection No.653", u(ad.unitid)) self.assertEqual('US', ad.unitid.country_code) self.assertEqual('geu-s', ad.unitid.repository_code) self.assertEqual(653, ad.unitid.identifier) self.assertEqual("1 linear ft.", ad.extent[0]) self.assertEqual("(3 boxes)", ad.extent[1]) self.assertEqual("12 oversized papers (OP)", ad.extent[2]) self.assertEqual("Materials entirely in English.", ad.langmaterial) self.assertEqual("In the Archives.", ad.location) self.assert_(isinstance(ad.access_restriction, eadmap.Section)) self.assertEqual("Restrictions on access", u(ad.access_restriction.head)) self.assert_("Special restrictions apply" in u(ad.access_restriction.content[0])) self.assert_(isinstance(ad.use_restriction, eadmap.Section)) self.assertEqual("Terms Governing Use and Reproduction", u(ad.use_restriction.head)) self.assert_("limitations noted in departmental policies" in u(ad.use_restriction.content[0])) self.assert_(isinstance(ad.alternate_form, eadmap.Section)) self.assertEqual("Publication Note", u(ad.alternate_form.head)) self.assert_("Published in" in u(ad.alternate_form.content[0])) self.assert_(isinstance(ad.originals_location, eadmap.Section)) self.assertEqual("Location of Originals", u(ad.originals_location.head)) self.assert_("Suppressed chapter" in u(ad.originals_location.content[0])) self.assert_(isinstance(ad.related_material, eadmap.Section)) self.assertEqual("Related Materials in This Repository", u(ad.related_material.head)) self.assert_("part of MSS" in u(ad.related_material.content[0])) self.assert_(isinstance(ad.separated_material, eadmap.Section)) self.assertEqual("Related Materials in This Repository", u(ad.separated_material.head)) self.assert_("Ciaran Carson papers, Peter Fallon" in u(ad.separated_material.content[0])) self.assert_(isinstance(ad.acquisition_info, eadmap.Section)) self.assertEqual("Source", u(ad.acquisition_info.head)) self.assert_("Collection assembled from various sources." in u(ad.acquisition_info.content[0])) self.assert_(isinstance(ad.custodial_history, eadmap.Section)) self.assertEqual("Custodial History", u(ad.custodial_history.head)) self.assert_("Originally received as part of" in u(ad.custodial_history.content[0])) self.assert_(isinstance(ad.preferred_citation, eadmap.Section)) self.assertEqual("Citation", u(ad.preferred_citation.head)) self.assert_("[after identification of item(s)" in u(ad.preferred_citation.content[0])) self.assert_(isinstance(ad.biography_history, eadmap.Section)) self.assertEqual("Biographical Note", u(ad.biography_history.head)) self.assert_("born on April 13" in u(ad.biography_history.content[0])) self.assert_("While at St. Joseph's" in u(ad.biography_history.content[1])) self.assert_(isinstance(ad.bibliography, eadmap.Section)) self.assertEqual("Publication Note", u(ad.bibliography.head)) self.assert_("Susan Jenkins Brown" in u(ad.bibliography.content[0])) self.assert_(isinstance(ad.scope_content, eadmap.Section)) self.assertEqual("Scope and Content Note", u(ad.scope_content.head)) self.assert_("consists of materials relating" in u(ad.scope_content.content[0])) self.assert_(isinstance(ad.arrangement, eadmap.Section)) self.assertEqual("Arrangement Note", u(ad.arrangement.head)) self.assert_("five series" in u(ad.arrangement.content[0])) self.assert_(isinstance(ad.other, eadmap.Section)) self.assertEqual("Finding Aid Note", u(ad.other.head)) self.assert_("Index to selected correspondents" in u(ad.other.content[0]))
def from_email_message(cls, message, local_id=None): ''' Convert an :class:`email.message.Message` or compatible message object into a CERP XML :class:`eulxml.xmlmap.cerp.Message`. If an id is specified, it will be stored in the Message <LocalId>. :param message: `email.message.Message` object :param id: optional message id to be set as `local_id` :returns: :class:`eulxml.xmlmap.cerp.Message` instance populated with message information ''' result = cls() if local_id is not None: result.local_id = id message_id = message.get('Message-Id') if message_id: result.message_id_supplied = True result.message_id = message_id result.mime_version = message.get('MIME-Version') dates = message.get_all('Date', []) result.orig_date_list.extend([parse_mail_date(d) for d in dates]) result.from_list.extend(message.get_all('From', [])) result.sender_list.extend(message.get_all('From', [])) try: result.to_list.extend(message.get_all('To', [])) except UnicodeError: print(repr(message['To'])) raise result.cc_list.extend(message.get_all('Cc', [])) result.bcc_list.extend(message.get_all('Bcc', [])) result.in_reply_to_list.extend(message.get_all('In-Reply-To', [])) result.references_list.extend(message.get_all('References', [])) result.subject_list.extend(message.get_all('Subject', [])) result.comments_list.extend(message.get_all('Comments', [])) result.keywords_list.extend(message.get_all('Keywords', [])) headers = [ Header(name=key, value=val) for key, val in message.items() ] result.headers.extend(headers) # FIXME: skip multipart messages for now if not message.is_multipart(): result.create_single_body() # FIXME: this is a small subset of the actual elements CERP allows. # we should add the rest of them, too. # message.get_content_type() always returns something. only # put it in the CERP if a Content-Type was explicitly specified. if message['Content-Type']: result.single_body.content_type_list.append(message.get_content_type()) if message.get_content_charset(): result.single_body.charset_list.append(message.get_content_charset()) if message.get_filename(): result.single_body.content_name_list.append(message.get_filename()) # FIXME: attaching the body_content only makes sense for text # content types. we'll eventually need a better solution for # non-text messages result.single_body.create_body_content() payload = message.get_payload(decode=False) # if not unicode, attempt to convert if isinstance(payload, six.binary_type): charset = message.get_charset() # decode according to the specified character set, if any if charset is not None: charset_decoder = codecs.getdecoder(str(charset)) payload, length = charset_decoder(payload) # otherwise, just try to convert else: payload = u(payload) # remove any control characters not allowed in XML control_char_map = dict.fromkeys(range(32)) for i in [9, 10, 13]: # preserve horizontal tab, line feed, carriage return del control_char_map[i] payload = u(payload).translate(control_char_map) result.single_body.body_content.content = payload else: # TODO: handle multipart logger.warn('CERP conversion does not yet handle multipart') # assume we've normalized newlines: result.eol = EOLMAP[os.linesep] return result