Exemple #1
0
    def testXml(self):
        """Test highwire elements are converted to XML bytes.

        Because the order of attributes is not guaranteed, we convert the
        generated bytes and expected bytes into ElementTrees to compare
        the attributes.
        """
        untlpy = untldict2py(UNTL_DICT)
        highwire_elements = untlpy2highwirepy(untlpy)
        highwire_xml = generate_highwire_xml(highwire_elements)

        # Get a sorted list of attributes for child elements in the generated and expected XML.
        generated_tree = fromstring(highwire_xml)
        generated_attribs = [child.attrib for child in generated_tree]
        generated_attribs = sorted(generated_attribs,
                                   key=lambda i: (i['content'], i['name']))
        expected_attribs = [child.attrib for child in fromstring(HIGHWIRE_XML)]
        expected_attribs = sorted(expected_attribs,
                                  key=lambda i: (i['content'], i['name']))
        self.assertEqual(expected_attribs, generated_attribs)

        # Our generated XML has a `metadata` element with all `meta` element children.
        self.assertEqual(generated_tree.tag, 'metadata')
        for child in generated_tree:
            self.assertEqual(child.tag, 'meta')
Exemple #2
0
 def test_convert_none_content_object_to_dict_and_back(self):
     """Test for original dictionary equivalent to yielded one
     using py2dict, then dict2py, then finally back to py2dict
     on a None content.
     """
     field = PYUNTL_DISPATCH['title'](
         content=None
     )
     self.record.add_child(field)
     self.record.children[0].set_qualifier('officialtitle')
     missing_content_dict = py2dict(self.record)
     self.assertTrue(missing_content_dict == {'title': []})
     py_from_dict = untldict2py(missing_content_dict)
     self.assertTrue(
         self.record.children[0].qualifier != py_from_dict.qualifier
     )
     self.record.children[0].content = 'fake content'
     missing_content_dict = py2dict(self.record)
     self.assertTrue(
         missing_content_dict == {
             'title': [
                 {'content': 'fake content', 'qualifier': 'officialtitle'}
             ]
         }
     )
Exemple #3
0
def test_generate_highwire_xml():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    xml = untldoc.generate_highwire_xml(highwire_list)
    expected_xml = (
        b'<?xml version="1.0" encoding="UTF-8"?>\n'
        b'<metadata>\n'
        b'  <meta content="Tres Actos" name="citation_title"/>\n'
        b'  <meta content="Last, Furston, 1807-1865." name="citation_author"/>\n'
        b'  <meta content="Fake Publishing" name="citation_publisher"/>\n'
        b'  <meta content="1944" name="citation_publication_date"/>\n'
        b'</metadata>\n')

    # Get a sorted list of attributes for child elements in the generated and expected XML.
    generated_tree = fromstring(xml)
    generated_attribs = [child.attrib for child in generated_tree]
    generated_attribs = sorted(generated_attribs,
                               key=lambda i: (i['content'], i['name']))
    expected_attribs = [child.attrib for child in fromstring(expected_xml)]
    expected_attribs = sorted(expected_attribs,
                              key=lambda i: (i['content'], i['name']))
    assert expected_attribs == generated_attribs

    # Our generated XML has a `metadata` element with all `meta` element children.
    assert generated_tree.tag == 'metadata'
    for child in generated_tree:
        assert child.tag == 'meta'
Exemple #4
0
 def test_convert_none_content_object_to_dict_and_back(self):
     """Test for original dictionary equivalent to yielded one
     using py2dict, then dict2py, then finally back to py2dict
     on a None content.
     """
     field = PYUNTL_DISPATCH['title'](
         content=None
     )
     self.record.add_child(field)
     self.record.children[0].set_qualifier('officialtitle')
     missing_content_dict = py2dict(self.record)
     self.assertTrue(missing_content_dict == {'title': []})
     py_from_dict = untldict2py(missing_content_dict)
     self.assertTrue(
         self.record.children[0].qualifier != py_from_dict.qualifier
     )
     self.record.children[0].content = 'fake content'
     missing_content_dict = py2dict(self.record)
     self.assertTrue(
         missing_content_dict == {
             'title': [
                 {'content': 'fake content', 'qualifier': 'officialtitle'}
             ]
         }
     )
Exemple #5
0
def test_generate_highwire_json():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    highwire_json = untldoc.generate_highwire_json(highwire_list)
    assert highwire_json == (
        '{\n'
        '    "citation_author": [\n'
        '        {\n'
        '            "content": "Last, Furston, 1807-1865."\n'
        '        }\n'
        '    ],\n'
        '    "citation_publication_date": [\n'
        '        {\n'
        '            "content": "1944"\n'
        '        }\n'
        '    ],\n'
        '    "citation_publisher": [\n'
        '        {\n'
        '            "content": "Fake Publishing"\n'
        '        }\n'
        '    ],\n'
        '    "citation_title": [\n'
        '        {\n'
        '            "content": "Tres Actos"\n'
        '        }\n'
        '    ]\n'
        '}')
Exemple #6
0
 def test_sort_untl(self):
     c2 = untldict2py(UNTL_DICT)
     c2.sort_untl(UNTL_PTH_ORDER)
     # Get ordered list of children tags.
     tag_list = [UNTL_PTH_ORDER.index(elem.tag) for elem in c2.children]
     # Verify order is in order of UNTL_PTH_ORDER.
     self.assertTrue(all(current <= next_ for current, next_ in zip(tag_list, tag_list[1:])))
Exemple #7
0
 def test_sort_untl(self):
     c2 = untldict2py(UNTL_DICT)
     c2.sort_untl(UNTL_PTH_ORDER)
     # Get ordered list of children tags.
     tag_list = [UNTL_PTH_ORDER.index(elem.tag) for elem in c2.children]
     # Verify order is in order of UNTL_PTH_ORDER.
     self.assertTrue(all(current <= next_ for current, next_ in zip(tag_list, tag_list[1:])))
Exemple #8
0
def test_generate_highwire_text():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    text = untldoc.generate_highwire_text(highwire_list)
    assert text == ('citation_title: Tres Actos\n'
                    'citation_author: Last, Furston, 1807-1865.\n'
                    'citation_publisher: Fake Publishing\n'
                    'citation_publication_date: 1944')
Exemple #9
0
def test_untldict2py_dict_includes_qualifier_only_element():
    # Not sure if there are ever elements with no content and no children
    # in practice, but the code handles that scenario.
    untl_dict = deepcopy(UNTL_DICTIONARY)
    untl_dict['date'] = [{'qualifier': 'creation'}]
    root = untldoc.untldict2py(untl_dict)
    assert isinstance(root, us.UNTLElement)
    assert root.children[4].tag == 'date'
    assert root.children[4].qualifier == 'creation'
Exemple #10
0
def test_untlpy2dcpy_resolve_values_retrieve_vocab(mock_vocab):
    mock_vocab.return_value = {
        'languages': [{
            'url': 'http://example.com/languages/#spa',
            'name': 'spa',
            'label': 'Spanish'
        }]
    }
    untl_dict = {'language': [{'content': 'spa'}]}
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements, resolve_values=True)
    assert root.children[0].tag == 'language'
    assert root.children[0].content == 'Spanish'
Exemple #11
0
def test_untlpy2highwirepy_not_official_title():
    untl_dict = {
        'title': [{
            'qualifier': 'alternatetitle',
            'content': 'Tres Actos'
        }]
    }
    untl_elements = untldoc.untldict2py(untl_dict)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    assert len(highwire_list) == 1
    assert highwire_list[0].qualifier == 'alternatetitle'
    assert highwire_list[0].name == 'citation_title'
    assert highwire_list[0].content == 'Tres Actos'
Exemple #12
0
def test_untlpy2dcpy():
    untl_dict = {
        'coverage': [{
            'content': '1943',
            'qualifier': 'sDate'
        }, {
            'content': '1944',
            'qualifier': 'eDate'
        }, {
            'content': 'United States - Texas',
            'qualifier': 'placeName'
        }],
        'publisher': [{
            'content': {
                'name': 'UNT Press',
                'location': 'Denton, Texas'
            }
        }],
        'creator': [{
            'content': {
                'type': 'org',
                'name': 'UNT'
            },
            'qualifier': 'aut'
        }],
        'title': [{
            'content': 'UNT Book',
            'qualifier': 'officialtitle'
        }],
        'collection': [{
            'content': 'UNT'
        }]
    }
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements)
    assert isinstance(root, dc.DCElement)
    assert len(root.children) == 5
    assert root.tag == 'dc'
    assert root.children[0].tag == 'coverage'
    assert root.children[0].content == 'United States - Texas'
    assert root.children[1].tag == 'publisher'
    assert root.children[1].content == 'UNT Press'
    assert root.children[2].tag == 'creator'
    assert root.children[2].content == 'UNT'
    assert root.children[3].tag == 'title'
    assert root.children[3].content == 'UNT Book'
    # Coverage sDate/eDate are combined and added at the end.
    assert root.children[4].tag == 'coverage'
    assert root.children[4].content == '1943-1944'
Exemple #13
0
def test_untlpy2dcpy_resolve_urls():
    verbose_vocab = {
        'languages': [{
            'url': 'http://example.com/languages/#spa',
            'name': 'spa',
            'label': 'Spanish'
        }]
    }
    untl_dict = {'language': [{'content': 'spa'}]}
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements,
                               resolve_urls=True,
                               verbose_vocabularies=verbose_vocab)
    assert root.children[0].tag == 'language'
    assert root.children[0].content == 'http://example.com/languages/#spa'
Exemple #14
0
 def test_convert_content_no_qualifier_roundtrip(self):
     """Test adding a child without a qualifier doesn't create one
     when converting from py to dict to py.
     """
     field = PYUNTL_DISPATCH['title'](
         content='Tie Till the Title'
     )
     self.record.add_child(field)
     content_dict = py2dict(self.record)
     self.assertTrue(
         content_dict == {'title': [{'content': 'Tie Till the Title'}]}
     )
     py_from_dict = untldict2py(content_dict)
     self.assertTrue(
         py_from_dict.children[0].qualifier is None
     )
Exemple #15
0
 def test_convert_content_no_qualifier_roundtrip(self):
     """Test adding a child without a qualifier doesn't create one
     when converting from py to dict to py.
     """
     field = PYUNTL_DISPATCH['title'](
         content='Tie Till the Title'
     )
     self.record.add_child(field)
     content_dict = py2dict(self.record)
     self.assertTrue(
         content_dict == {'title': [{'content': 'Tie Till the Title'}]}
     )
     py_from_dict = untldict2py(content_dict)
     self.assertTrue(
         py_from_dict.children[0].qualifier is None
     )
Exemple #16
0
def test_untlpy2dcpy_add_permalink_and_ark():
    untl_dict = {
        'title': [{
            'content': 'UNT Book',
            'qualifier': 'officialtitle'
        }]
    }
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements,
                               ark='ark:/67531/metatest1',
                               domain_name='example.com',
                               scheme='https')
    assert root.children[0].tag == 'title'
    assert root.children[1].tag == 'identifier'
    assert root.children[
        1].content == 'https://example.com/ark:/67531/metatest1/'
    assert root.children[2].tag == 'identifier'
    assert root.children[2].content == 'ark: ark:/67531/metatest1'
Exemple #17
0
def test_untlpy2highwirepy():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    assert len(highwire_list) == 4
    for element in highwire_list:
        assert element.tag == 'meta'
    assert highwire_list[0].qualifier == 'aut'
    assert highwire_list[0].name == 'citation_author'
    assert highwire_list[0].content == 'Last, Furston, 1807-1865.'
    assert highwire_list[1].qualifier is None
    assert highwire_list[1].name == 'citation_publisher'
    assert highwire_list[1].content == 'Fake Publishing'
    assert highwire_list[2].qualifier == 'creation'
    assert highwire_list[2].name == 'citation_publication_date'
    assert highwire_list[2].content == '1944'
    assert highwire_list[3].qualifier == 'officialtitle'
    assert highwire_list[3].name == 'citation_title'
    assert highwire_list[3].content == 'Tres Actos'
Exemple #18
0
def test_highwirepy2dict():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    highwire_dict = untldoc.highwirepy2dict(highwire_list)
    assert highwire_dict == {
        'citation_author': [{
            'content': 'Last, Furston, 1807-1865.'
        }],
        'citation_publisher': [{
            'content': 'Fake Publishing'
        }],
        'citation_publication_date': [{
            'content': '1944'
        }],
        'citation_title': [{
            'content': 'Tres Actos'
        }]
    }
Exemple #19
0
def test_untldict2py():
    root = untldoc.untldict2py(UNTL_DICTIONARY)
    assert isinstance(root, us.UNTLElement)
    assert len(root.children) == 5
    assert root.tag == 'metadata'
    assert root.children[0].tag == 'title'
    assert root.children[0].content == 'Tres Actos'
    assert root.children[0].qualifier == 'officialtitle'
    assert root.children[1].tag == 'creator'
    assert root.children[1].qualifier == 'aut'
    assert root.children[1].children[0].tag == 'name'
    assert root.children[1].children[0].content == 'Last, Furston, 1807-1865.'
    assert root.children[2].tag == 'publisher'
    assert root.children[2].children[0].tag == 'name'
    assert root.children[2].children[0].content == 'Fake Publishing'
    assert root.children[3].tag == 'collection'
    assert root.children[3].content == 'UNT'
    assert root.children[4].tag == 'date'
    assert root.children[4].content == '1944'
Exemple #20
0
 def testTextEscape(self):
     """Test highwire elements are converted to ANVL text when escaped."""
     small_untl_dict = {
         'title': [{
             'content': 'Clifford & Lassie',
             'qualifier': 'officialtitle'
         }],
         'meta': [{
             'content': 'ark:/67531/metapth38622',
             'qualifier': 'ark'
         }, {
             'content': '2008-06-29, 00:31:14',
             'qualifier': 'metadataCreationDate'
         }]
     }
     untlpy = untldict2py(small_untl_dict)
     highwire_elements = untlpy2highwirepy(untlpy, escape=True)
     highwire_text = generate_highwire_text(highwire_elements)
     expected = ('citation_title: Clifford &amp; Lassie\n'
                 'citation_online_date: 06/29/2008')
     self.assertEqual(highwire_text, expected)
Exemple #21
0
 def testHighwire2Dict(self):
     """Test dictionary creation from Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     hidict = highwirepy2dict(highwi)
     self.assertEqual(type(hidict), dict)
Exemple #22
0
 def setUp(self):
     """Set up the initial data."""
     self.root_element = untldict2py(UNTL_DICT)
Exemple #23
0
 def testIncompleteness(self):
     """Test incompleteness."""
     bad_pyuntl = untldict2py(BAD_UNTL_DICT)
     incompleteness = determine_completeness(bad_pyuntl)
     self.assertTrue(incompleteness < 0.02)
Exemple #24
0
 def testIncompleteness(self):
     """Test incompleteness."""
     bad_pyuntl = untldict2py(BAD_UNTL_DICT)
     incompleteness = determine_completeness(bad_pyuntl)
     self.assertTrue(incompleteness < 0.02)
Exemple #25
0
 def test_complete_record(self):
     """Test each tag appears as created from the dict keys."""
     self.record = untldict2py(UNTL_DICT)
     for c in self.record.children:
         self.assertTrue(c.tag in UNTL_DICT.keys())
Exemple #26
0
 def testETDfromUNTL(self):
     """Confirm that the ETD object conversion works."""
     etd = untlpy2etd_ms(untldict2py(UNTL_DICT))
     self.assertEqual(type(etd), ETD_MS)
Exemple #27
0
 def test_generate_form_data(self):
     """Test for an instance of a FormGenerator class."""
     self.record = untldict2py(UNTL_DICT)
     form_gen = self.record.generate_form_data(sort_order=UNTL_PTH_ORDER)
     self.assertTrue(isinstance(form_gen, FormGenerator))
Exemple #28
0
 def testUNTL2HIGHWIRE(self):
     """Test conversion from UNTL to Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     for element in highwi:
         self.assertTrue(issubclass(type(element), HighwireElement))
Exemple #29
0
    def __init__(self, identifier, metadataLocations, staticFileLocations,
                 mimetypeIconsPath, use, **kwargs):
        """
        identifier can either be an absolute path to a mets.xml file, or a
        meta_id.  In the latter case, it will derive the path from the meta_id
        """
        self.metadataLocations = metadataLocations
        self.staticFileLocations = staticFileLocations
        self.use = use
        getCopy_url = kwargs.get('getCopy_url', None)

        # if the identifier is a filename, use that.  Otherwise treat it as
        # a meta_id
        if identifier.endswith(".mets.xml"):
            self.mets_filename = identifier
            # self.metadata_file = identifier
            self.meta_id = os.path.split(identifier)[1].split(".")[0]
            self.pair_path = get_pair_path(self.meta_id)
            self.metadata_system = None
        else:
            self.meta_id = identifier
            # Get the pair path for the digital object
            self.pair_path = get_pair_path(self.meta_id)
            # Determine the location of the resource and the filename
            # of the resource's record
            self.mets_filename, self.metadata_system = get_mets_record_system(
                self.meta_id,
                self.pair_path,
                metadataLocations
            )
        # Get dimensions data
        self.dimensions = get_dimensions_data(self.mets_filename)
        # If a getCopy url was given
        if getCopy_url:
            self.getCopy_data = get_getCopy_data(getCopy_url, self.meta_id)
        else:
            self.getCopy_data = {}
        # Open the METS document
        try:
            mets_filehandle = open_system_file(self.mets_filename)
        except Exception:
            raise ResourceObjectException("Could not open the Mets " +
                                          "document: %s" % (self.meta_id))
        # Parse the mets document
        parsed_mets = etree.parse(mets_filehandle)
        # Close the mets file
        mets_filehandle.close()
        # Get Metadata File
        self.get_metadata_file(parsed_mets)
        # Get the descriptive metadata
        self.desc_MD = get_desc_metadata(self.metadata_file,
                                         self.metadata_type)
        # Get transcriptions data
        resource_type = self.desc_MD.get('resourceType')
        if resource_type:
            resource_type = resource_type[0].get('content')
        else:
            resource_type = None
        self.transcriptions = get_transcriptions_data(
            meta_id=self.meta_id,
            resource_type=resource_type,
            transcriptions_server_url=kwargs.get('transcriptions_server_url'),
        )
        # Get the fileSets within the fileSec
        self.get_structMap(parsed_mets)
        # Get the embargo information, if it exists
        self.get_embargo()
        # Get the author citation string
        self.author_citation_string = get_author_citation_string(self.desc_MD)
        self.completeness = untldict2py(self.desc_MD).completeness
Exemple #30
0
 def test_complete_record(self):
     """Test each tag appears as created from the dict keys."""
     self.record = untldict2py(UNTL_DICT)
     for c in self.record.children:
         self.assertTrue(c.tag in UNTL_DICT.keys())
Exemple #31
0
 def testUNTL2HIGHWIRE(self):
     """Test conversion from UNTL to Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     for element in highwi:
         self.assertTrue(issubclass(type(element), HighwireElement))
Exemple #32
0
 def test_generate_form_data(self):
     """Test for an instance of a FormGenerator class."""
     self.record = untldict2py(UNTL_DICT)
     form_gen = self.record.generate_form_data(sort_order=UNTL_PTH_ORDER)
     self.assertTrue(isinstance(form_gen, FormGenerator))
Exemple #33
0
 def setUp(self):
     """Set up the initial data."""
     self.root_element = untldict2py(UNTL_DICT)
Exemple #34
0
def test_untlpy2dcpy_only_eDate():
    untl_dict = {'coverage': [{'content': '1955', 'qualifier': 'eDate'}]}
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements)
    assert root.children[0].tag == 'coverage'
    assert root.children[0].content == '1955'
Exemple #35
0
 def testCompleteness(self):
     """Test completeness."""
     pyuntl = untldict2py(UNTL_DICT)
     completeness = determine_completeness(pyuntl)
     self.assertEqual(completeness, 1.0)
Exemple #36
0
 def testJson(self):
     """Test highwire elements are converted to a JSON string."""
     untlpy = untldict2py(UNTL_DICT)
     highwire_elements = untlpy2highwirepy(untlpy)
     highwire_json = generate_highwire_json(highwire_elements)
     self.assertEqual(highwire_json, HIGHWIRE_JSON)
Exemple #37
0
 def testCompleteness(self):
     """Test completeness."""
     pyuntl = untldict2py(UNTL_DICT)
     completeness = determine_completeness(pyuntl)
     self.assertEqual(completeness, 1.0)
Exemple #38
0
 def testText(self):
     """Test highwire elements are converted to ANVL text."""
     untlpy = untldict2py(UNTL_DICT)
     highwire_elements = untlpy2highwirepy(untlpy)
     highwire_text = generate_highwire_text(highwire_elements)
     self.assertEqual(highwire_text, HIGHWIRE_TEXT)
Exemple #39
0
 def test_create_pyuntl_from_dict(self):
     self.root_element = untldict2py(UNTL_DICT)
     self.assertTrue(isinstance(self.root_element, Metadata))
Exemple #40
0
 def testHighwire2Dict(self):
     """Test dictionary creation from Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     hidict = highwirepy2dict(highwi)
     self.assertEqual(type(hidict), dict)
Exemple #41
0
 def test_create_pyuntl_from_dict(self):
     self.root_element = untldict2py(UNTL_DICT)
     self.assertTrue(isinstance(self.root_element, Metadata))