Beispiel #1
0
def move_mix(root, premis_mix):
    """Moves current MIX metadata block from
    premis:objectCharacteristicsExtension to an own mets:techMD
    block and appends the created ID of the the new techMD block
    to the file's AMDID attribute in the mets:fileSec.

    :root: the METS data as XML
    :premis_mix: the MIX metadata within premis

    :returns: the METS data root
    """

    mix_id = '_' + six.text_type(uuid4())
    techmd_id = premis_mix.xpath('./ancestor::mets:techMD',
                                 namespaces=NAMESPACES)[0].get('ID')
    amdsec = root.xpath('.//mets:amdSec', namespaces=NAMESPACES)[0]

    xml_data = mets.xmldata(child_elements=[copy.deepcopy(premis_mix)])
    md_wrap = mets.mdwrap('NISOIMG', '2.0', child_elements=[xml_data])
    techmd = mets.techmd(mix_id, child_elements=[md_wrap])
    amdsec.append(techmd)

    for mets_file in root.xpath('./mets:fileSec//mets:file',
                                namespaces=NAMESPACES):
        if techmd_id in mets_file.get('ADMID'):
            mets_file.set('ADMID', mets_file.get('ADMID') + ' ' + mix_id)

    premis_extension = premis_mix.xpath(
        './ancestor::premis:objectCharacteristicsExtension',
        namespaces=NAMESPACES)[0]
    premis_extension.getparent().remove(premis_extension)

    return root
def create_premis_event(tree, event_type, event_datetime, event_detail,
                        event_outcome, event_outcome_detail,
                        linking_agent_identifier, event_id):
    """Create event
    """
    event_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=str(uuid4()),
                                         prefix='event')

    premis_event_outcome = premis.outcome(event_outcome, event_outcome_detail)

    if linking_agent_identifier is not None:
        child_elements = [premis_event_outcome, linking_agent_identifier]
    else:
        child_elements = [premis_event_outcome]
    premis_event = premis.event(event_identifier,
                                event_type,
                                event_datetime,
                                event_detail,
                                child_elements=child_elements)

    xmldata = mets.xmldata(child_elements=[premis_event])
    mdwrap = mets.mdwrap('PREMIS:EVENT', '2.3', child_elements=[xmldata])
    digiprovmd = mets.digiprovmd(event_id, child_elements=[mdwrap])
    tree.append(digiprovmd)
Beispiel #3
0
def test_mdwrap():
    """test mdwrap"""
    xml = '<mets:mdWrap xmlns:mets="http://www.loc.gov/METS/" ' \
          'MDTYPE="OTHER" MDTYPEVERSION="8.2" OTHERMDTYPE="ADDML"/>'
    wrap = mets.mdwrap(mdtype='OTHER',
                       othermdtype='ADDML',
                       mdtypeversion='8.2')
    assert compare_trees(wrap, ET.fromstring(xml)) is True
Beispiel #4
0
    def write_md(self,
                 metadata,
                 mdtype,
                 mdtypeversion,
                 othermdtype=None,
                 section=None,
                 stdout=False):
        """
        Wraps XML metadata into MD element and writes it to a lxml.etree XML
        file in the workspace. The output filename is
            <mdtype>-<hash>-othermd.xml,
        where <mdtype> is the type of metadata given as parameter and <hash>
        is a string generated from the metadata.

        Serializing and hashing the root xml element can be rather time
        consuming and as such this method should not be called for each file
        unless more efficient way of separating files by the metadata can't
        be easily implemented. This implementation should be done by the
        subclasses of metadata_creator.

        :metadata (Element): metadata XML element
        :mdtype (string): Value of mdWrap MDTYPE attribute
        :mdtypeversion (string): Value of mdWrap MDTYPEVERSION attribute
        :othermdtype (string): Value of mdWrap OTHERMDTYPE attribute
        :section (string): Type of mets metadata section
        :stdout (boolean): Print also to stdout
        :returns: md_id, filename - Metadata id and filename
        """
        digest = generate_digest(metadata)
        suffix = othermdtype if othermdtype else mdtype
        filename = encode_path("%s-%s-amd.xml" % (digest, suffix))
        md_id = '_{}'.format(digest)
        filename = os.path.join(self.workspace, filename)

        if not os.path.exists(filename):

            xmldata = mets.xmldata()
            xmldata.append(metadata)
            mdwrap = mets.mdwrap(mdtype, mdtypeversion, othermdtype)
            mdwrap.append(xmldata)
            if section == 'digiprovmd':
                amd = mets.digiprovmd(md_id)
            else:
                amd = mets.techmd(md_id)
            amd.append(mdwrap)
            amdsec = mets.amdsec()
            amdsec.append(amd)
            mets_ = mets.mets()
            mets_.append(amdsec)

            with open(filename, 'wb+') as outfile:
                outfile.write(xml_helpers.utils.serialize(mets_))
                if stdout:
                    print(xml_helpers.utils.serialize(mets_).decode("utf-8"))
                print("Wrote lxml.etree %s administrative metadata to file "
                      "%s" % (mdtype, outfile.name))

        return md_id, filename
def main(arguments=None):
    """The main method for import_description"""
    args = parse_arguments(arguments)

    if args.dmdsec_target:
        url_t_path = encode_path(args.dmdsec_target, suffix='-dmdsec.xml')
    else:
        url_t_path = 'dmdsec.xml'

    with open(args.dmdsec_location, 'r') as content_file:
        content = content_file.read()

    _mets = mets.mets()

    tree = lxml.etree.fromstring(content)

    if args.desc_root == 'remove':
        childs = tree.findall('*')
    else:
        childs = [tree]
    xmldata_e = mets.xmldata(child_elements=childs)
    ns = h.get_namespace(childs[0])

    if ns in METS_MDTYPES.keys():
        mdt = METS_MDTYPES[ns]['mdtype']
        if 'othermdtype' in METS_MDTYPES[ns]:
            mdo = METS_MDTYPES[ns]['othermdtype']
        else:
            mdo = None
        mdv = METS_MDTYPES[ns]['version']
    else:
        raise TypeError("Invalid namespace: %s" % ns)

    mdwrap_e = mets.mdwrap(mdtype=mdt,
                           othermdtype=mdo,
                           mdtypeversion=mdv,
                           child_elements=[xmldata_e])
    dmdsec_e = mets.dmdsec(encode_id(url_t_path), child_elements=[mdwrap_e])

    _mets.append(dmdsec_e)

    if args.stdout:
        print h.serialize(_mets)

    output_file = os.path.join(args.workspace, url_t_path)
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "import_description created file: %s" % output_file

    return 0
Beispiel #6
0
def _create_mets(premis_element, digiprovmd_id, mdtype):
    """Creates a METS XML element that contains PREMIS element

    :param premis_element: PREMIS element
    :param digiprovmd_id: ID attribute of digiprovMD element
    :param mdtype: MDTYPE of mdWrap element
    :returns: METS XML element
    """
    xmldata = mets.xmldata(child_elements=[premis_element])
    mdwrap = mets.mdwrap(mdtype, '2.3', child_elements=[xmldata])
    digiprovmd = mets.digiprovmd(digiprovmd_id, child_elements=[mdwrap])
    amdsec = mets.amdsec(child_elements=[digiprovmd])
    _mets = mets.mets(child_elements=[amdsec])

    return _mets
def create_premis_agent(tree, agent_id, agent_name, agent_type):
    """Create agent
    """
    uuid = str(uuid4())
    agent_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=uuid,
                                         prefix='agent')
    premis_agent = premis.agent(agent_identifier, agent_name, agent_type)

    linking_agent_identifier = premis.identifier(identifier_type='UUID',
                                                 identifier_value=uuid,
                                                 prefix='linkingAgent')

    xmldata = mets.xmldata(child_elements=[premis_agent])
    mdwrap = mets.mdwrap('PREMIS:AGENT', '2.3', child_elements=[xmldata])
    digiprovmd = mets.digiprovmd(agent_id, child_elements=[mdwrap])
    tree.append(digiprovmd)

    return linking_agent_identifier
def create_mets(input_file, dmd_id, remove_root=False):
    """Create METS element tree that contains dmdSec element. Descriptive
    metadata is imported from XML file. The whole XML document or just the
    child elements of root can be imported.

    :input_file: path to input file
    :dmd_id: dmdSec identifier
    :remove_root: import only child elements
    :returns: METS document element tree
    """

    # Read metadata from XML file.
    tree = lxml.etree.parse(input_file)
    if remove_root:
        metadata = tree.findall('*')
    else:
        metadata = [tree.getroot()]

    # Check metadata type
    namespace = metadata[0].nsmap[metadata[0].prefix]
    if namespace in METS_MDTYPES.keys():
        mdtype = METS_MDTYPES[namespace]['mdtype']
    else:
        raise TypeError("Invalid namespace: %s" % namespace)
    othermdtype = METS_MDTYPES[namespace].get('othermdtype', None)
    version = METS_MDTYPES[namespace]['version']

    # Create METS Element
    xmldata_element = mets.xmldata(child_elements=metadata)
    mdwrap_element = mets.mdwrap(mdtype=mdtype,
                                 othermdtype=othermdtype,
                                 mdtypeversion=version,
                                 child_elements=[xmldata_element])
    dmdsec_element = mets.dmdsec(dmd_id, child_elements=[mdwrap_element])
    mets_element = mets.mets(child_elements=[dmdsec_element])

    tree = lxml.etree.ElementTree(mets_element)
    lxml.etree.cleanup_namespaces(tree)
    return tree
def main(arguments=None):
    """The main method for argparser"""
    args = parse_arguments(arguments)

    # Loop files and create premis objects
    files = collect_filepaths(dirs=args.files, base=args.base_path)
    for filename in files:
        if args.base_path != '':
            filerel = os.path.relpath(filename, args.base_path)
        else:
            filerel = filename

        xmldata = mets.xmldata()
        premis_object = create_premis_object(
            xmldata, filename, args.skip_inspection, args.format_name,
            args.format_version, args.digest_algorithm, args.message_digest,
            args.date_created, args.charset)

        mdwrap = mets.mdwrap('PREMIS:OBJECT', '2.3', child_elements=[xmldata])
        techmd = mets.techmd(encode_id(
            encode_path(filerel, suffix="-techmd.xml")),
                             child_elements=[mdwrap])
        amdsec = mets.amdsec(child_elements=[techmd])
        _mets = mets.mets(child_elements=[amdsec])

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(args.workspace):
            os.makedirs(args.workspace)

        filename = encode_path(filerel, suffix="-techmd.xml")

        with open(os.path.join(args.workspace, filename), 'w+') as outfile:
            outfile.write(h.serialize(_mets))
            print "Wrote METS technical metadata to file %s" % outfile.name

    return 0