Beispiel #1
0
def test_create_schema_catalog(tmpdir, sip, mets, catalog,
                               expected_rewrite_uri, expected_return_code):
    """Test that the script will generate a schema catalog if mets.xml file
    can be read. Other than output parameter, other parameters given should
    be reflected within the schema catalog file.
    """
    output = tmpdir.join('my_catalog_schema.xml').strpath
    sip = os.path.join(TESTDATADIR, 'sips', sip)
    mets = os.path.join(sip, mets)
    args = [mets, sip, output]
    if catalog:
        args.append('-c')
        args.append(catalog)

    (returncode, _, _) = shell.run_main(main, args)
    assert expected_return_code == returncode
    if expected_return_code == 0:
        root_element = xml_utils.readfile(output).getroot()
        assert root_element.attrib[xml_utils.xml_ns('base')].rstrip('/') == sip

        rewrite_uri_count = 0
        next_catalog_count = 0
        for child in root_element:
            if child.tag.endswith('rewriteURI'):
                rewrite_uri_count += 1
            if child.tag.endswith('nextCatalog'):
                next_catalog_count += 1
                if catalog:
                    assert child.attrib['catalog'] == catalog

        # There should always be one catalog.
        assert next_catalog_count == 1
        assert rewrite_uri_count == expected_rewrite_uri
    else:
        assert os.path.isfile(output) is False
def parse_flatfilenames(path, reference):
    """Returns the @name attribute for each flatFile whose
    @definitionReference attribute value matches the supplied value.
    """
    root = readfile(path).getroot()

    for flatfile in iter_flatfiles(root):
        if parse_reference(flatfile) == reference:
            flatfilename = parse_name(flatfile)

            yield flatfilename
def parse_flatfiledefinitions(path):
    """Parses ADDML data and splits the data into new ADDML data
    files for each flatFileDefinition in the original data file.
    Returns the ADDML data for each created file.
    """
    root = readfile(path).getroot()
    addmldata = root
    count = flatfiledefinition_count(root)

    for flatfiledef in iter_flatfiledefinitions(root):
        if count > 1:
            addmldata = create_new_addml(root, flatfiledef)

        yield addmldata
Beispiel #4
0
def test_create_new_addml_simple():
    """Tests the create_new_addml function by supplying testdata to the
    function and asserting that the correct number of ADDML sections are
    returned.
    """
    addml = 'tests/data/addml_simple.xml'
    root = h.readfile(addml)
    for ffdef in f.iter_flatfiledefinitions(root):
        testdef = ffdef
    addml_new = s.create_new_addml(root, testdef)
    assert f.flatfile_count(addml_new) == 1
    assert f.flatfiledefinition_count(addml_new) == 1
    assert a.sections_count(addml_new, 'flatFileType') == 1
    assert a.sections_count(addml_new, 'recordType') == 1
    assert a.sections_count(addml_new, 'fieldTypes') == 1
def test_set_charset_from_textmd():
    """Tests the set_charset_from_textmd function by asserting that
    the charset from the textMD metadata as been appended to the
    premis:formatName element.
    """
    root = h.readfile(TESTAIP_1_4_TEXTMD).getroot()

    new_root = set_charset_from_textmd(root)

    assert new_root.xpath('.//premis:formatName',
                          namespaces=NAMESPACES)[0].text == \
        'text/plain; charset=UTF-16'
    assert new_root.xpath('.//premis:formatName',
                          namespaces=NAMESPACES)[1].text == \
        'text/plain; charset=UTF-8'
Beispiel #6
0
def check_checksums(mets_path):
    """Check checksums for all digital objects in METS

    :mets_path: Path to mets
    :returns: Iterable containing all error messages

    """

    checked_files = {}

    if os.path.isdir(mets_path):
        mets_path = os.path.join(mets_path, 'mets.xml')

    sip_path = os.path.dirname(mets_path)

    def _message(metadata_info, message):
        """Format error message"""
        return ensure_text("%s: %s" % (
            message, os.path.relpath(metadata_info["filename"], sip_path)))

    mets_tree = u.readfile(mets_path)
    for metadata_info in iter_metadata_info(mets_tree, mets_path):

        checked_files[metadata_info["filename"]] = None

        if metadata_info['algorithm'] is None:
            yield _message(metadata_info, "Could not find checksum algorithm")
        else:

            try:
                hex_digest = hexdigest(metadata_info['filename'],
                                       metadata_info['algorithm'])
            except IOError as exception:
                if exception.errno == errno.ENOENT:
                    yield _message(metadata_info, "File does not exist")
                continue

            if hex_digest == metadata_info["digest"]:
                print(_message(metadata_info, "Checksum OK"))
            else:
                yield _message(metadata_info, "Invalid Checksum")

    for path in iter_files(sip_path):
        if path.endswith("ignore_validation_errors"):
            continue

        if path not in checked_files:
            yield _message({'filename': path}, "Nonlisted file")
Beispiel #7
0
def test_create_new_addml_complex():
    """Tests the create_new_addml function by supplying testdata to the
    function and asserting that the correct number of ADDML sections are
    returned and that various elements contain the correct data.
    """
    addml = 'tests/data/addml_complex.xml'
    root = h.readfile(addml)
    for ffdef in f.iter_flatfiledefinitions(root):
        testdef = a.find_section_by_name(root, 'flatFileDefinition',
                                         'testdef2')
    addml_new = s.create_new_addml(root, testdef)
    assert f.flatfile_count(addml_new) == 2
    assert f.flatfiledefinition_count(addml_new) == 1
    assert a.sections_count(addml_new, 'flatFileType') == 1
    assert a.sections_count(addml_new, 'recordType') == 1
    assert a.sections_count(addml_new, 'fieldTypes') == 1
    assert f.flatfile_count(addml_new) == 2
    assert f.parse_charset(addml_new) == 'ISO-8859-15'
    assert a.sections_count(addml_new, 'fieldDefinition') == 2
def get_charset_with_filename(path, filename):
    """Returns the charset from the ADDML data for a given file. The
    filename is matched against the @name attribute for each flatFile
    element and the correct charset is returned from the correct
    flatFileType section that matches the flatFile.
    """
    root = readfile(path).getroot()
    for flatfile in iter_flatfiles(root):
        if parse_name(flatfile) == filename:
            def_reference = parse_reference(flatfile)
            definition = find_section_by_name(root, 'flatFileDefinition',
                                              def_reference)
            type_reference = parse_reference(definition)
            flatfiletype = find_section_by_name(root, 'flatFileType',
                                                type_reference)
            charset = 'charset=%s' % parse_charset(flatfiletype)

            return charset
    return None
def test_move_mix():
    """Tests the move_mix function by asserting that the mix
    has been moved to a techMD metadata block, that the file
    in fileSec links to the metadata and that it doesn't exist
    within the premis metadata anymore.
    """
    root = h.readfile(TESTAIP_1_4_EXTENSIONS).getroot()

    for premis_mix in root.xpath(
            './mets:amdSec/mets:techMD/mets:mdWrap/mets:xmlData/premis:object/'
            'premis:objectCharacteristics/'
            'premis:objectCharacteristicsExtension/mix:mix',
            namespaces=NAMESPACES):

        root = move_mix(root, premis_mix)

    count = 0
    for mixdata in root.xpath(".//mets:mdWrap[@MDTYPE='NISOIMG']",
                              namespaces=m.NAMESPACES):

        count += 1
        techmd_id = mixdata.xpath('./ancestor::mets:techMD',
                                  namespaces=m.NAMESPACES)[0].get('ID')

    assert techmd_id
    assert count == 1

    for metsfile in root.xpath('./mets:fileSec//mets:file',
                               namespaces=m.NAMESPACES):
        if techmd_id in metsfile.get('ADMID'):
            fileid = metsfile.get('ID')

    assert fileid == 'file001'

    for premis_extension in root.xpath(
            './/premis:objectCharacteristicsExtension', namespaces=NAMESPACES):
        for elem in premis_extension:
            assert elem.tag != '{http://www.loc.gov/mix/v20}mix'
def test_readfile_utf8(utf8_file):
    """Test that it won't break reading UTF-8 charset set XML file."""
    u.readfile(utf8_file)
def test_mets_migration(testpath, metsfile, objid, catalog, contract, valid):
    """Tests that the script transform_mets outputs a METS document
    and migrates the contents to a newer fi:CATALOG
    version as specified in the command line arguments.
    """
    version = '1.7.4'

    old_root = h.readfile(metsfile).getroot().attrib
    old_elem_count = len(h.readfile(metsfile).getroot().xpath('./*'))
    for attrib in old_root:
        if len(attrib.split('}')) > 1:
            old_root[attrib.split('}')[1]] = old_root[attrib]
            del old_root[attrib]

    if h.readfile(metsfile).getroot().xpath('@*[local-name() = "CATALOG"]'):
        cat_spec = 'CATALOG'
    else:
        cat_spec = 'SPECIFICATION'

    if contract:
        contractid = 'urn:uuid:' + six.text_type(uuid4())
        if catalog:
            returncode = main([
                metsfile, '--objid', objid, '--to_version', catalog,
                '--workspace', testpath, '--contractid', contractid
            ])
        else:
            returncode = main([
                metsfile, '--objid', objid, '--workspace', testpath,
                '--contractid', contractid
            ])
    else:
        returncode = main([
            metsfile, '--objid', objid, '--to_version', catalog, '--workspace',
            testpath
        ])

    if valid:
        new_mets = os.path.join(testpath, 'mets.xml')
        assert os.path.isfile(new_mets)

        root = ET.parse(new_mets).getroot()
        assert len(root.xpath('./*')) == old_elem_count
        if catalog:
            if catalog == '1.7':
                version = catalog + '.4'
            else:
                version = catalog + '.0'

        new_root = copy.deepcopy(root)
        new_attribs = new_root.attrib
        for attrib in new_attribs:
            if len(attrib.split('}')) > 1:
                new_attribs[attrib.split('}')[1]] = new_attribs[attrib]
                del new_attribs[attrib]
        for attrib in old_root:
            assert attrib in new_attribs
            if attrib not in [
                    'CATALOG', 'SPECIFICATION', 'PROFILE', 'schemaLocation'
            ]:
                assert old_root[attrib] == new_attribs[attrib]

        assert 'MDTYPEVERSION' in root.xpath('.//mets:mdWrap',
                                             namespaces=m.NAMESPACES)[1].attrib

        assert root.xpath('./mets:metsHdr/@LASTMODDATE',
                          namespaces=m.NAMESPACES)

        assert root.xpath('./mets:metsHdr/@LASTMODDATE',
                          namespaces=m.NAMESPACES) > root.xpath(
                              './mets:metsHdr/@CREATEDATE',
                              namespaces=m.NAMESPACES)

        if version == '1.6.0' and cat_spec == 'SPECIFICATION':
            version = '1.6.1'

        if version == '1.7.4':
            assert 'CONTRACTID' in new_attribs
            assert root.get('{http://digitalpreservation.fi/schemas/'
                            'mets/fi-extensions}%s' % cat_spec) == version
            assert root.get('{http://digitalpreservation.fi/schemas/'
                            'mets/fi-extensions}CONTRACTID')
            assert root.get('PROFILE') == 'http://digitalpreservation.fi/' \
                                          'mets-profiles/cultural-heritage'
        else:
            assert root.get('{http://www.kdk.fi/standards/'
                            'mets/kdk-extensions}%s' % cat_spec) == version
            assert '{http://www.kdk.fi/standards/' \
                   'mets/kdk-extensions}CONTRACTID' not in root.attrib
            assert '{http://digitalpreservation.fi/schemas/' \
                   'mets/fi-extensions}CONTRACTID' not in root.attrib
            assert root.get('PROFILE') == 'http://www.kdk.fi/kdk-mets-profile'
            assert 'CONTRACTID' not in new_attribs

    else:
        assert returncode == 117
def test_dip_migration(testpath, metsfile, objid, catalog, valid):
    """Tests that the script transform_mets outputs a METS document
    and migrates the contents to a newer fi:CATALOG
    version as specified in the command line arguments.
    """
    version = '1.7.4'
    filename = objid + '.xml'

    old_elem_count = len(h.readfile(metsfile).getroot().xpath('./*'))

    contractid = 'urn:uuid:' + six.text_type(uuid4())
    if catalog:
        returncode = main([
            metsfile, '--objid', objid, '--to_version', catalog, '--workspace',
            testpath, '--contractid', contractid, '--record_status',
            'dissemination', '--output_filename', filename
        ])
    else:
        returncode = main([
            metsfile, '--objid', objid, '--workspace', testpath,
            '--contractid', contractid, '--record_status', 'dissemination',
            '--output_filename', filename
        ])

    if valid:
        new_mets = os.path.join(testpath, filename)
        assert os.path.isfile(new_mets)

        root = ET.parse(new_mets).getroot()
        assert len(root.xpath('./*')) == old_elem_count
        if catalog:
            if catalog == '1.7':
                version = catalog + '.4'
            else:
                version = catalog + '.0'

        assert root.get('OBJID') == objid

        assert 'MDTYPEVERSION' in root.xpath('.//mets:mdWrap',
                                             namespaces=m.NAMESPACES)[1].attrib

        assert 'ID' not in root.attrib
        assert not root.xpath('@*[local-name() = "SPECIFICATION"]')
        assert root.xpath('@*[local-name() = "CATALOG"]')

        assert root.xpath('./mets:metsHdr/@RECORDSTATUS',
                          namespaces=m.NAMESPACES)[0] == 'dissemination'

        assert root.xpath('./mets:metsHdr/@CREATEDATE',
                          namespaces=m.NAMESPACES)

        assert not root.xpath('./mets:metsHdr/@LASTMODDATE',
                              namespaces=m.NAMESPACES)

        if version == '1.7.4':
            assert root.get('{http://digitalpreservation.fi/schemas/'
                            'mets/fi-extensions}CATALOG') == version
            assert root.get('{http://digitalpreservation.fi/schemas/'
                            'mets/fi-extensions}CONTRACTID')
            assert root.get('PROFILE') == 'http://digitalpreservation.fi/' \
                                          'mets-profiles/cultural-heritage'
        else:
            assert root.get('{http://www.kdk.fi/standards/'
                            'mets/kdk-extensions}CATALOG') == version
            assert '{http://www.kdk.fi/standards/' \
                   'mets/kdk-extensions}CONTRACTID' not in root.attrib
            assert '{http://digitalpreservation.fi/schemas/' \
                   'mets/fi-extensions}CONTRACTID' not in root.attrib
            assert root.get('PROFILE') == 'http://www.kdk.fi/kdk-mets-profile'

    else:
        assert returncode == 117