def testXml(self): """Test highwire elements are converted to XML bytes. Because the order of attributes is not guaranteed, we convert the generated bytes and expected bytes into ElementTrees to compare the attributes. """ untlpy = untldict2py(UNTL_DICT) highwire_elements = untlpy2highwirepy(untlpy) highwire_xml = generate_highwire_xml(highwire_elements) # Get a sorted list of attributes for child elements in the generated and expected XML. generated_tree = fromstring(highwire_xml) generated_attribs = [child.attrib for child in generated_tree] generated_attribs = sorted(generated_attribs, key=lambda i: (i['content'], i['name'])) expected_attribs = [child.attrib for child in fromstring(HIGHWIRE_XML)] expected_attribs = sorted(expected_attribs, key=lambda i: (i['content'], i['name'])) self.assertEqual(expected_attribs, generated_attribs) # Our generated XML has a `metadata` element with all `meta` element children. self.assertEqual(generated_tree.tag, 'metadata') for child in generated_tree: self.assertEqual(child.tag, 'meta')
def test_convert_none_content_object_to_dict_and_back(self): """Test for original dictionary equivalent to yielded one using py2dict, then dict2py, then finally back to py2dict on a None content. """ field = PYUNTL_DISPATCH['title']( content=None ) self.record.add_child(field) self.record.children[0].set_qualifier('officialtitle') missing_content_dict = py2dict(self.record) self.assertTrue(missing_content_dict == {'title': []}) py_from_dict = untldict2py(missing_content_dict) self.assertTrue( self.record.children[0].qualifier != py_from_dict.qualifier ) self.record.children[0].content = 'fake content' missing_content_dict = py2dict(self.record) self.assertTrue( missing_content_dict == { 'title': [ {'content': 'fake content', 'qualifier': 'officialtitle'} ] } )
def test_generate_highwire_xml(): untl_elements = untldoc.untldict2py(UNTL_DICTIONARY) highwire_list = untldoc.untlpy2highwirepy(untl_elements) xml = untldoc.generate_highwire_xml(highwire_list) expected_xml = ( b'<?xml version="1.0" encoding="UTF-8"?>\n' b'<metadata>\n' b' <meta content="Tres Actos" name="citation_title"/>\n' b' <meta content="Last, Furston, 1807-1865." name="citation_author"/>\n' b' <meta content="Fake Publishing" name="citation_publisher"/>\n' b' <meta content="1944" name="citation_publication_date"/>\n' b'</metadata>\n') # Get a sorted list of attributes for child elements in the generated and expected XML. generated_tree = fromstring(xml) generated_attribs = [child.attrib for child in generated_tree] generated_attribs = sorted(generated_attribs, key=lambda i: (i['content'], i['name'])) expected_attribs = [child.attrib for child in fromstring(expected_xml)] expected_attribs = sorted(expected_attribs, key=lambda i: (i['content'], i['name'])) assert expected_attribs == generated_attribs # Our generated XML has a `metadata` element with all `meta` element children. assert generated_tree.tag == 'metadata' for child in generated_tree: assert child.tag == 'meta'
def test_generate_highwire_json(): untl_elements = untldoc.untldict2py(UNTL_DICTIONARY) highwire_list = untldoc.untlpy2highwirepy(untl_elements) highwire_json = untldoc.generate_highwire_json(highwire_list) assert highwire_json == ( '{\n' ' "citation_author": [\n' ' {\n' ' "content": "Last, Furston, 1807-1865."\n' ' }\n' ' ],\n' ' "citation_publication_date": [\n' ' {\n' ' "content": "1944"\n' ' }\n' ' ],\n' ' "citation_publisher": [\n' ' {\n' ' "content": "Fake Publishing"\n' ' }\n' ' ],\n' ' "citation_title": [\n' ' {\n' ' "content": "Tres Actos"\n' ' }\n' ' ]\n' '}')
def test_sort_untl(self): c2 = untldict2py(UNTL_DICT) c2.sort_untl(UNTL_PTH_ORDER) # Get ordered list of children tags. tag_list = [UNTL_PTH_ORDER.index(elem.tag) for elem in c2.children] # Verify order is in order of UNTL_PTH_ORDER. self.assertTrue(all(current <= next_ for current, next_ in zip(tag_list, tag_list[1:])))
def test_generate_highwire_text(): untl_elements = untldoc.untldict2py(UNTL_DICTIONARY) highwire_list = untldoc.untlpy2highwirepy(untl_elements) text = untldoc.generate_highwire_text(highwire_list) assert text == ('citation_title: Tres Actos\n' 'citation_author: Last, Furston, 1807-1865.\n' 'citation_publisher: Fake Publishing\n' 'citation_publication_date: 1944')
def test_untldict2py_dict_includes_qualifier_only_element(): # Not sure if there are ever elements with no content and no children # in practice, but the code handles that scenario. untl_dict = deepcopy(UNTL_DICTIONARY) untl_dict['date'] = [{'qualifier': 'creation'}] root = untldoc.untldict2py(untl_dict) assert isinstance(root, us.UNTLElement) assert root.children[4].tag == 'date' assert root.children[4].qualifier == 'creation'
def test_untlpy2dcpy_resolve_values_retrieve_vocab(mock_vocab): mock_vocab.return_value = { 'languages': [{ 'url': 'http://example.com/languages/#spa', 'name': 'spa', 'label': 'Spanish' }] } untl_dict = {'language': [{'content': 'spa'}]} untl_elements = untldoc.untldict2py(untl_dict) root = untldoc.untlpy2dcpy(untl_elements, resolve_values=True) assert root.children[0].tag == 'language' assert root.children[0].content == 'Spanish'
def test_untlpy2highwirepy_not_official_title(): untl_dict = { 'title': [{ 'qualifier': 'alternatetitle', 'content': 'Tres Actos' }] } untl_elements = untldoc.untldict2py(untl_dict) highwire_list = untldoc.untlpy2highwirepy(untl_elements) assert len(highwire_list) == 1 assert highwire_list[0].qualifier == 'alternatetitle' assert highwire_list[0].name == 'citation_title' assert highwire_list[0].content == 'Tres Actos'
def test_untlpy2dcpy(): untl_dict = { 'coverage': [{ 'content': '1943', 'qualifier': 'sDate' }, { 'content': '1944', 'qualifier': 'eDate' }, { 'content': 'United States - Texas', 'qualifier': 'placeName' }], 'publisher': [{ 'content': { 'name': 'UNT Press', 'location': 'Denton, Texas' } }], 'creator': [{ 'content': { 'type': 'org', 'name': 'UNT' }, 'qualifier': 'aut' }], 'title': [{ 'content': 'UNT Book', 'qualifier': 'officialtitle' }], 'collection': [{ 'content': 'UNT' }] } untl_elements = untldoc.untldict2py(untl_dict) root = untldoc.untlpy2dcpy(untl_elements) assert isinstance(root, dc.DCElement) assert len(root.children) == 5 assert root.tag == 'dc' assert root.children[0].tag == 'coverage' assert root.children[0].content == 'United States - Texas' assert root.children[1].tag == 'publisher' assert root.children[1].content == 'UNT Press' assert root.children[2].tag == 'creator' assert root.children[2].content == 'UNT' assert root.children[3].tag == 'title' assert root.children[3].content == 'UNT Book' # Coverage sDate/eDate are combined and added at the end. assert root.children[4].tag == 'coverage' assert root.children[4].content == '1943-1944'
def test_untlpy2dcpy_resolve_urls(): verbose_vocab = { 'languages': [{ 'url': 'http://example.com/languages/#spa', 'name': 'spa', 'label': 'Spanish' }] } untl_dict = {'language': [{'content': 'spa'}]} untl_elements = untldoc.untldict2py(untl_dict) root = untldoc.untlpy2dcpy(untl_elements, resolve_urls=True, verbose_vocabularies=verbose_vocab) assert root.children[0].tag == 'language' assert root.children[0].content == 'http://example.com/languages/#spa'
def test_convert_content_no_qualifier_roundtrip(self): """Test adding a child without a qualifier doesn't create one when converting from py to dict to py. """ field = PYUNTL_DISPATCH['title']( content='Tie Till the Title' ) self.record.add_child(field) content_dict = py2dict(self.record) self.assertTrue( content_dict == {'title': [{'content': 'Tie Till the Title'}]} ) py_from_dict = untldict2py(content_dict) self.assertTrue( py_from_dict.children[0].qualifier is None )
def test_untlpy2dcpy_add_permalink_and_ark(): untl_dict = { 'title': [{ 'content': 'UNT Book', 'qualifier': 'officialtitle' }] } untl_elements = untldoc.untldict2py(untl_dict) root = untldoc.untlpy2dcpy(untl_elements, ark='ark:/67531/metatest1', domain_name='example.com', scheme='https') assert root.children[0].tag == 'title' assert root.children[1].tag == 'identifier' assert root.children[ 1].content == 'https://example.com/ark:/67531/metatest1/' assert root.children[2].tag == 'identifier' assert root.children[2].content == 'ark: ark:/67531/metatest1'
def test_untlpy2highwirepy(): untl_elements = untldoc.untldict2py(UNTL_DICTIONARY) highwire_list = untldoc.untlpy2highwirepy(untl_elements) assert len(highwire_list) == 4 for element in highwire_list: assert element.tag == 'meta' assert highwire_list[0].qualifier == 'aut' assert highwire_list[0].name == 'citation_author' assert highwire_list[0].content == 'Last, Furston, 1807-1865.' assert highwire_list[1].qualifier is None assert highwire_list[1].name == 'citation_publisher' assert highwire_list[1].content == 'Fake Publishing' assert highwire_list[2].qualifier == 'creation' assert highwire_list[2].name == 'citation_publication_date' assert highwire_list[2].content == '1944' assert highwire_list[3].qualifier == 'officialtitle' assert highwire_list[3].name == 'citation_title' assert highwire_list[3].content == 'Tres Actos'
def test_highwirepy2dict(): untl_elements = untldoc.untldict2py(UNTL_DICTIONARY) highwire_list = untldoc.untlpy2highwirepy(untl_elements) highwire_dict = untldoc.highwirepy2dict(highwire_list) assert highwire_dict == { 'citation_author': [{ 'content': 'Last, Furston, 1807-1865.' }], 'citation_publisher': [{ 'content': 'Fake Publishing' }], 'citation_publication_date': [{ 'content': '1944' }], 'citation_title': [{ 'content': 'Tres Actos' }] }
def test_untldict2py(): root = untldoc.untldict2py(UNTL_DICTIONARY) assert isinstance(root, us.UNTLElement) assert len(root.children) == 5 assert root.tag == 'metadata' assert root.children[0].tag == 'title' assert root.children[0].content == 'Tres Actos' assert root.children[0].qualifier == 'officialtitle' assert root.children[1].tag == 'creator' assert root.children[1].qualifier == 'aut' assert root.children[1].children[0].tag == 'name' assert root.children[1].children[0].content == 'Last, Furston, 1807-1865.' assert root.children[2].tag == 'publisher' assert root.children[2].children[0].tag == 'name' assert root.children[2].children[0].content == 'Fake Publishing' assert root.children[3].tag == 'collection' assert root.children[3].content == 'UNT' assert root.children[4].tag == 'date' assert root.children[4].content == '1944'
def testTextEscape(self): """Test highwire elements are converted to ANVL text when escaped.""" small_untl_dict = { 'title': [{ 'content': 'Clifford & Lassie', 'qualifier': 'officialtitle' }], 'meta': [{ 'content': 'ark:/67531/metapth38622', 'qualifier': 'ark' }, { 'content': '2008-06-29, 00:31:14', 'qualifier': 'metadataCreationDate' }] } untlpy = untldict2py(small_untl_dict) highwire_elements = untlpy2highwirepy(untlpy, escape=True) highwire_text = generate_highwire_text(highwire_elements) expected = ('citation_title: Clifford & Lassie\n' 'citation_online_date: 06/29/2008') self.assertEqual(highwire_text, expected)
def testHighwire2Dict(self): """Test dictionary creation from Highwire.""" untlpy = untldict2py(UNTL_DICT) highwi = untlpy2highwirepy(untlpy) hidict = highwirepy2dict(highwi) self.assertEqual(type(hidict), dict)
def setUp(self): """Set up the initial data.""" self.root_element = untldict2py(UNTL_DICT)
def testIncompleteness(self): """Test incompleteness.""" bad_pyuntl = untldict2py(BAD_UNTL_DICT) incompleteness = determine_completeness(bad_pyuntl) self.assertTrue(incompleteness < 0.02)
def test_complete_record(self): """Test each tag appears as created from the dict keys.""" self.record = untldict2py(UNTL_DICT) for c in self.record.children: self.assertTrue(c.tag in UNTL_DICT.keys())
def testETDfromUNTL(self): """Confirm that the ETD object conversion works.""" etd = untlpy2etd_ms(untldict2py(UNTL_DICT)) self.assertEqual(type(etd), ETD_MS)
def test_generate_form_data(self): """Test for an instance of a FormGenerator class.""" self.record = untldict2py(UNTL_DICT) form_gen = self.record.generate_form_data(sort_order=UNTL_PTH_ORDER) self.assertTrue(isinstance(form_gen, FormGenerator))
def testUNTL2HIGHWIRE(self): """Test conversion from UNTL to Highwire.""" untlpy = untldict2py(UNTL_DICT) highwi = untlpy2highwirepy(untlpy) for element in highwi: self.assertTrue(issubclass(type(element), HighwireElement))
def __init__(self, identifier, metadataLocations, staticFileLocations, mimetypeIconsPath, use, **kwargs): """ identifier can either be an absolute path to a mets.xml file, or a meta_id. In the latter case, it will derive the path from the meta_id """ self.metadataLocations = metadataLocations self.staticFileLocations = staticFileLocations self.use = use getCopy_url = kwargs.get('getCopy_url', None) # if the identifier is a filename, use that. Otherwise treat it as # a meta_id if identifier.endswith(".mets.xml"): self.mets_filename = identifier # self.metadata_file = identifier self.meta_id = os.path.split(identifier)[1].split(".")[0] self.pair_path = get_pair_path(self.meta_id) self.metadata_system = None else: self.meta_id = identifier # Get the pair path for the digital object self.pair_path = get_pair_path(self.meta_id) # Determine the location of the resource and the filename # of the resource's record self.mets_filename, self.metadata_system = get_mets_record_system( self.meta_id, self.pair_path, metadataLocations ) # Get dimensions data self.dimensions = get_dimensions_data(self.mets_filename) # If a getCopy url was given if getCopy_url: self.getCopy_data = get_getCopy_data(getCopy_url, self.meta_id) else: self.getCopy_data = {} # Open the METS document try: mets_filehandle = open_system_file(self.mets_filename) except Exception: raise ResourceObjectException("Could not open the Mets " + "document: %s" % (self.meta_id)) # Parse the mets document parsed_mets = etree.parse(mets_filehandle) # Close the mets file mets_filehandle.close() # Get Metadata File self.get_metadata_file(parsed_mets) # Get the descriptive metadata self.desc_MD = get_desc_metadata(self.metadata_file, self.metadata_type) # Get transcriptions data resource_type = self.desc_MD.get('resourceType') if resource_type: resource_type = resource_type[0].get('content') else: resource_type = None self.transcriptions = get_transcriptions_data( meta_id=self.meta_id, resource_type=resource_type, transcriptions_server_url=kwargs.get('transcriptions_server_url'), ) # Get the fileSets within the fileSec self.get_structMap(parsed_mets) # Get the embargo information, if it exists self.get_embargo() # Get the author citation string self.author_citation_string = get_author_citation_string(self.desc_MD) self.completeness = untldict2py(self.desc_MD).completeness
def test_untlpy2dcpy_only_eDate(): untl_dict = {'coverage': [{'content': '1955', 'qualifier': 'eDate'}]} untl_elements = untldoc.untldict2py(untl_dict) root = untldoc.untlpy2dcpy(untl_elements) assert root.children[0].tag == 'coverage' assert root.children[0].content == '1955'
def testCompleteness(self): """Test completeness.""" pyuntl = untldict2py(UNTL_DICT) completeness = determine_completeness(pyuntl) self.assertEqual(completeness, 1.0)
def testJson(self): """Test highwire elements are converted to a JSON string.""" untlpy = untldict2py(UNTL_DICT) highwire_elements = untlpy2highwirepy(untlpy) highwire_json = generate_highwire_json(highwire_elements) self.assertEqual(highwire_json, HIGHWIRE_JSON)
def testText(self): """Test highwire elements are converted to ANVL text.""" untlpy = untldict2py(UNTL_DICT) highwire_elements = untlpy2highwirepy(untlpy) highwire_text = generate_highwire_text(highwire_elements) self.assertEqual(highwire_text, HIGHWIRE_TEXT)
def test_create_pyuntl_from_dict(self): self.root_element = untldict2py(UNTL_DICT) self.assertTrue(isinstance(self.root_element, Metadata))