Exemple #1
0
def event(event_id, event_type, event_date_time, event_detail,
          child_elements=None, linking_objects=None, linking_agents=None):
    """Create PREMIS event element.

    :event_id: PREMIS event identifier
    :event_type: Type for the event
    :event_date_time: Event time
    :event_detail: Event details
    :child_elements: Any child elements appended to the event (default=None)
    :linking_objects: Any linking objects appended to the event (default=None)

    Returns the following ElementTree structure::

        <premis:event>

            <premis:eventType>digital signature validation</premis:eventType>
            <premis:eventDateTime>2015-02-03T13:04:25</premis:eventDateTime>
            <premis:eventDetail>
                Submission information package digital signature validation
            </premis:eventDetail>

            {{ child elements }}

        </premis:event>

    """
    _event = _element('event')

    _event.append(event_id)

    _event_type = _subelement(_event, 'eventType')
    _event_type.text = decode_utf8(event_type)

    _event_date_time = _subelement(_event, 'eventDateTime')
    _event_date_time.text = decode_utf8(event_date_time)

    _event_detail = _subelement(_event, 'eventDetail')
    _event_detail.text = decode_utf8(event_detail)

    if child_elements:
        for elem in child_elements:
            _event.append(elem)

    if linking_agents:
        for _agent in linking_agents:
            linking_agent = identifier(
                _agent.findtext('.//' + premis_ns('agentIdentifierType')),
                _agent.findtext('.//' + premis_ns('agentIdentifierValue')),
                'linkingAgent')
            _event.append(linking_agent)

    if linking_objects:
        for _object in linking_objects:
            linking_object = identifier(
                _object.findtext('.//' + premis_ns('objectIdentifierType')),
                _object.findtext('.//' + premis_ns('objectIdentifierValue')),
                'linkingObject')
            _event.append(linking_object)

    return _event
def test_decode_utf8():
    """
    Test that byte strings are decoded to Unicode,
    while Unicode strings are returned as is
    """
    assert u.decode_utf8(b't\xc3\xa4hti') == "tähti"
    assert u.decode_utf8("tähti") == "tähti"
def delimfileformat(recordseparator, fieldseparatingchar, quotingchar=None):
    """Creates the ADDML delimFileFormat section.

    :recordseparator: the charcter separating the records
    :fieldseparatingchar: the character separating the fields
    :quotingchar: the quoting character used around the
    fields (default=None)

    Returns the following lxml.etree strucure:
        <addml:delimFileFormat>
            <addml:recordSeparator>CR+LF</addml:recordSeparator>
            <addml:fieldSeparatingChar>;</addml:fieldSeparatingChar>
            <addml:quotingChar>'</addml:quotingChar>
        </addml:delimFileFormat>
    """
    delimfileformat_el = _element('delimFileFormat')

    recordseparator_el = _subelement(delimfileformat_el, 'recordSeparator')
    recordseparator_el.text = h.decode_utf8(recordseparator)

    fieldseparatingchar_el = _subelement(delimfileformat_el,
                                         'fieldSeparatingChar')
    fieldseparatingchar_el.text = h.decode_utf8(fieldseparatingchar)

    if quotingchar:
        quotingchar_el = _subelement(delimfileformat_el, 'quotingChar')
        quotingchar_el.text = h.decode_utf8(quotingchar)

    return delimfileformat_el
Exemple #4
0
def parse_element_with_id(root, identifier, section=None):
    """Return single element with given ID from given section. If no
    section is given, ID is searched from everywhere in the METS, which
    is extremely slow if file is large.

    ID is single unqiue reference to one of the following elements::

        <techMD>, <sourceMD>, <rightsMD>, <digiprovMD>

    :root: Root element
    :identifier: ID as string
    :returns: References element

    """
    identifier = decode_utf8(identifier)

    if section:
        section = decode_utf8(section)

    if section == "amdSec":
        query = "/mets:mets/mets:amdSec/*[@ID='{}']".format(identifier)
    elif section == "dmdSec":
        query = "/mets:mets/mets:dmdSec[@ID='{}']".format(identifier)
    elif section == "fileSec":
        query = "/mets:mets/mets:fileSec/mets:fileGrp/"
        "mets:file[@ID='{}']".format(identifier)
    else:
        query = "//*[@ID='%s']" % identifier
    results = root.xpath(query, namespaces=NAMESPACES)
    if len(results) == 1:
        return results[0]
    else:
        return None
Exemple #5
0
def mets(profile='local', objid=None, label=None, namespaces=None,
         child_elements=None):
    """Create METS Element"""

    if namespaces is None:
        namespaces = NAMESPACES

    if objid is None:
        objid = six.text_type(uuid.uuid4())

    _mets = _element('mets', ns=namespaces)
    _mets.set(
        xsi_ns('schemaLocation'),
        'http://www.loc.gov/METS/ '
        'http://www.loc.gov/standards/mets/mets.xsd')
    _mets.set('PROFILE', decode_utf8(profile))
    _mets.set('OBJID', decode_utf8(objid))
    if label:
        _mets.set('LABEL', decode_utf8(label))

    if child_elements:
        for elem in child_elements:
            _mets.append(elem)

    return _mets
Exemple #6
0
def environment(characteristic=None,
                purposes=None,
                notes=None,
                child_elements=None):
    """Return the PREMIS environment structure.

    :param characteristic: PREMIS environment characteristic as a string
    :param purposes: A list of environment purposes to be appended
    :param notes: A list of environment notes to be appended
    :param child_elements: A list of child elements to be appended
    :returns: ElementTree DOM tree
    """

    _environment = _element('environment')

    if characteristic:
        char_elem = _subelement(_environment, 'environmentCharacteristic')
        char_elem.text = decode_utf8(characteristic)

    if purposes:
        for purpose in purposes:
            purpose_elem = _subelement(_environment, 'environmentPurpose')
            purpose_elem.text = decode_utf8(purpose)

    if notes:
        for note in notes:
            note_elem = _subelement(_environment, 'environmentNote')
            note_elem.text = decode_utf8(note)

    if child_elements:
        for elem in child_elements:
            _environment.append(elem)

    return _environment
Exemple #7
0
def mptr(loctype=None, xlink_href=None, xlink_type=None):
    """Return the fptr element"""

    _mptr = _element('mptr', ns={'xlink': XLINK_NS})
    _mptr.set('LOCTYPE', decode_utf8(loctype))
    _mptr.set(xlink_ns('href'), decode_utf8(xlink_href))
    _mptr.set(xlink_ns('type'), decode_utf8(xlink_type))

    return _mptr
Exemple #8
0
def structmap(type_attr=None, label=None):
    """Return the structmap element"""

    _structmap = _element('structMap')
    # _structMap.append(div_element)
    if type_attr:
        _structmap.set('TYPE', decode_utf8(type_attr))
    if label:
        _structmap.set('LABEL', decode_utf8(label))

    return _structmap
Exemple #9
0
def format_registry(registry_name, registry_key):
    """
    :param registry_name:
    :param registry_key:
    :return: Element object for format registry.
    """
    format_registry_el = _element('formatRegistry')
    registry_name_el = _subelement(format_registry_el, 'formatRegistryName')
    registry_name_el.text = decode_utf8(registry_name)
    registry_key_el = _subelement(format_registry_el, 'formatRegistryKey')
    registry_key_el.text = decode_utf8(registry_key)
    return format_registry_el
Exemple #10
0
def outcome(outcome, detail_note=None, detail_extension=None,
            single_extension_element=False):
    """Create PREMIS event outcome DOM structure.

    :outcome: Event outcome (success, failure)
    :detail_note: String description for the event outcome
    :detail_extension: List of detail extension etree elements
    :single_extension_element:
        True: all element trees in detail_extension are placed in a single
              eventOutcomeDetailExtension element.
        False: each element tree in detail_extension is placed in a separate
               eventOutcomeDetailExtension element.

    Returns the following ElementTree structure::

        <premis:eventOutcomeInformation>
            <premis:eventOutcome>success</premis:eventOutcome>
            <premis:eventOutcomeDetail>
                <premis:eventOutcomeDetailNote>
                    mets.xml sha1 4d0c38dedcb5e5fc93586cfa2b7ebedbd63 OK
                </premis:eventOutcomeDetailNote>
            </premis:eventOutcomeDetail>
        </premis:eventOutcomeInformation>

    """

    outcome_information = _element('eventOutcomeInformation')

    _outcome = _subelement(outcome_information, 'eventOutcome')
    _outcome.text = decode_utf8(outcome)

    detail = _subelement(outcome_information, 'eventOutcomeDetail')

    if detail_note is not None:
        _detail_note = _subelement(detail, 'eventOutcomeDetailNote')
        _detail_note.text = decode_utf8(detail_note)

    if detail_extension:
        if single_extension_element:
            # Add all extensions into one eventOutcomeDetailExtension element
            _detail_extension = _subelement(detail,
                                            'eventOutcomeDetailExtension')
            for extension in detail_extension:
                _detail_extension.append(extension)
        else:
            # Separate eventOutcomeDetailExtension element for each extension
            for extension in detail_extension:
                _detail_extension = _subelement(detail,
                                                'eventOutcomeDetailExtension')
                _detail_extension.append(extension)

    return outcome_information
Exemple #11
0
def format_designation(format_name, format_version=None):
    """
    :param format_name:
    :param format_version:
    :return: Element object for format designation.
    """
    format_designation_el = _element('formatDesignation')
    format_name_el = _subelement(format_designation_el, 'formatName')
    format_name_el.text = decode_utf8(format_name)
    if format_version:
        format_version_el = _subelement(format_designation_el, 'formatVersion')
        format_version_el.text = decode_utf8(format_version)
    return format_designation_el
def dmdsec(element_id, child_elements=None, created_date=None):
    """Return the dmdSec element"""

    if created_date is None:
        created_date = current_iso_datetime()

    dmdsec_elem = _element('dmdSec')
    dmdsec_elem.set('ID', decode_utf8(element_id))
    dmdsec_elem.set('CREATED', decode_utf8(created_date))
    if child_elements:
        for elem in child_elements:
            dmdsec_elem.append(elem)

    return dmdsec_elem
Exemple #13
0
def parse_wrap_mdtype(wrap):
    """Return the MDTYPE, OTHERMDTYPE and MDTYPEVERSION
    attributes from an element.
    """
    mdtype = wrap.attrib.get('MDTYPE', None)
    other = wrap.attrib.get('OTHERMDTYPE', None)
    version = wrap.attrib.get('MDTYPEVERSION', None)
    if mdtype is not None:
        mdtype = decode_utf8(mdtype)
    if other is not None:
        other = decode_utf8(other)
    if version is not None:
        version = decode_utf8(version)
    return {'mdtype': mdtype, 'othermdtype': other, 'mdtypeversion': version}
Exemple #14
0
def premis_ns(tag, prefix=""):
    """Prefix ElementTree tags with PREMIS namespace.
    object -> {info:lc...premis}object

    :tag: Tag name as string
    :returns: Prefixed tag

    """
    tag = decode_utf8(tag)

    if prefix:
        prefix = decode_utf8(prefix)
        tag = tag[0].upper() + tag[1:]
        return '{%s}%s%s' % (PREMIS_NS, prefix, tag)
    return '{%s}%s' % (PREMIS_NS, tag)
def techmd(element_id, created_date=None, child_elements=None):
    """Return the techMD element"""

    if created_date is None:
        created_date = current_iso_datetime()

    _techmd = _element('techMD')
    _techmd.set('ID', decode_utf8(element_id))
    _techmd.set('CREATED', decode_utf8(created_date))

    if child_elements:
        for elem in child_elements:
            _techmd.append(elem)

    return _techmd
Exemple #16
0
def mets_ns(tag, prefix=""):
    """Prefix ElementTree tags with METS namespace.

    object -> {http://...}object

    :tag: Tag name as string
    :returns: Prefixed tag

    """
    tag = decode_utf8(tag)

    if prefix:
        prefix = decode_utf8(prefix)
        tag = tag[0].upper() + tag[1:]
        return '{%s}%s%s' % (METS_NS, prefix, tag)
    return '{%s}%s' % (METS_NS, tag)
Exemple #17
0
def fptr(fileid=None):
    """Return the fptr element"""

    _fptr = _element('fptr')
    _fptr.set('FILEID', decode_utf8(fileid))

    return _fptr
Exemple #18
0
def parse_note(agent):
    """
    :param agent: Agent Element object.
    :return: Unicode string
    """
    return decode_utf8(
        agent.xpath(".//premis:agentNote/text()", namespaces=NAMESPACES)[0])
Exemple #19
0
def parse_identifier_type_value(id_elem, prefix='object'):
    """Return identifierType and IdentifierValue from given PREMIS id.
    If segment contains multiple identifiers, returns first
    occurrence.

    :id_elem: Premis identifier
    :returns: (identifier_type, identifier_value)

    """
    prefix = decode_utf8(prefix)

    if prefix == 'relatedObject':
        if id_elem.tag != premis_ns('relatedObjectIdentification'):
            id_elem = id_elem.find(premis_ns('relatedObjectIdentification'))
        if id_elem is not None:
            return (
                id_elem.find(
                    './' + premis_ns('relatedObjectIdentifierType')).text,
                id_elem.find(
                    './' + premis_ns('relatedObjectIdentifierValue')).text)
        return None
    if id_elem.tag != premis_ns('Identifier', prefix):
        id_elem = id_elem.find(premis_ns('Identifier', prefix))
    if id_elem is not None:
        return (
            id_elem.find('./' + premis_ns('IdentifierType', prefix)).text,
            id_elem.find('./' + premis_ns('IdentifierValue', prefix)).text)
    return None
Exemple #20
0
def date_created(date):
    """
    :param date:
    :return: Element object for date created.
    """
    date_el = _element('dateCreatedByApplication')
    date_el.text = decode_utf8(date)
    return date_el
Exemple #21
0
def parse_objid(mets_el):
    """Return mets:OBJID from given `mets` document

    :mets: ElementTree document
    :returns: objid

    """

    return decode_utf8(mets_el.get("OBJID"))
Exemple #22
0
def relationship(relationship_type, relationship_subtype, related_object):
    """Create PREMIS relationship DOM segment.

    :relationship_type: Relationship type from PREMIS vocabulary
    :relationship_subtype: Relationship subtype from PREMIS vocabulary
    :related_object: Related object linked to relationship
    :returns: ElementTree DOM tree

    Produces the following PREMIS segment::

      <premis:relationship>

          <premis:relationshipType>structural</premis:relationshipType>
          <premis:relationshipSubType>
              is included in
          </premis:relationshipSubType>

          {{ premis_identifier(prefix=related) }}

      </premis:relationship>

    """
    if related_object is None:
        return None

    _relationship = _element('relationship')

    _type = _subelement(_relationship, 'relationshipType')
    _type.text = decode_utf8(relationship_type)

    _subtype = _subelement(_relationship, 'relationshipSubType')
    _subtype.text = decode_utf8(relationship_subtype)

    (related_type, related_value) = parse_identifier_type_value(related_object)

    related_identifier = identifier(related_type,
                                    related_value,
                                    prefix='relatedObject')

    _relationship.append(related_identifier)

    return _relationship
def addml_basic_elem(tag, contents):
    """Creates ADDML basic elems that are elements which
    contain text as values. Only create elements if the supplied tag
    value is inlcuded in the tags list.
    """
    tags = ['charset', 'dataType']
    if tag in tags:
        addml_el = _element(tag)
        addml_el.text = h.decode_utf8(contents)
        return addml_el
    return None
Exemple #24
0
def filegrp(use=None, child_elements=None):
    """Return the fileGrp element"""

    _filegrp = _element('fileGrp')
    if use:
        _filegrp.set('USE', decode_utf8(use))
    if child_elements:
        for elem in child_elements:
            _filegrp.append(elem)

    return _filegrp
Exemple #25
0
def parse_identifier(section, prefix='object'):
    """
    :param section:
    :param prefix:
    :return: Element object.
    """
    prefix = decode_utf8(prefix)

    if prefix == 'relatedObject':
        return section.find('.//' + premis_ns('Identification', prefix))
    return section.find('.//' + premis_ns('Identifier', prefix))
Exemple #26
0
def mdwrap(mdtype, mdtypeversion, othermdtype="", child_elements=None):
    """Create an mdWrap element with the mandatory
    attributes and append the child elements to the element.

    :mdtype: value for the MDTYPE attribute
    :mdtypeversion: value for the MDTYPEVERSION attribute
    :othermdtype: value for the optional OTHERMDTYPE attribute (use
                  if MDTYPE='OTHER')
    :child_elements: the child elements as a list

    :returns: the mets:mdWrap element as XML
    """
    mdwrap_e = _element('mdWrap')
    mdwrap_e.set('MDTYPE', decode_utf8(mdtype))
    mdwrap_e.set('MDTYPEVERSION', decode_utf8(mdtypeversion))
    if mdtype == 'OTHER':
        mdwrap_e.set('OTHERMDTYPE', decode_utf8(othermdtype))
    if child_elements:
        for elem in child_elements:
            mdwrap_e.append(elem)
    return mdwrap_e
Exemple #27
0
def fixity(message_digest, digest_algorithm='MD5'):
    """
    :param message_digest:
    :param digest_algorithm:
    :return: Element object for fixity.
    """
    fixity_el = _element('fixity')
    fixity_algorithm = _subelement(fixity_el, 'messageDigestAlgorithm')
    fixity_algorithm.text = decode_utf8(digest_algorithm)
    fixity_checksum = _subelement(fixity_el, 'messageDigest')
    fixity_checksum.text = message_digest
    return fixity_el
Exemple #28
0
def agent(agent_id, agent_name, agent_type, note=None):
    """Returns PREMIS agent element

    :agent_id: PREMIS identifier for the agent
    :agent_name: Agent name
    :agent_type: Agent type

    Returns the following ElementTree structure::

        <premis:agent>
            <premis:agentIdentifier>
                <premis:agentIdentifierType>
                    preservation-agent-id</premis:agentIdentifierType>
                <premis:agentIdentifierValue>
                    preservation-agent-check_virus_clamscan.py-0.63-1422
                </premis:agentIdentifierValue>
            </premis:agentIdentifier>
            <premis:agentName>check_virus_clamscan.py</premis:agentName>
            <premis:agentType>software</premis:agentType>
        </premis:agent>

    """

    _agent = _element('agent')

    _agent.append(agent_id)

    _agent_name = _subelement(_agent, 'agentName')
    _agent_name.text = decode_utf8(agent_name)

    _agent_type = _subelement(_agent, 'agentType')
    _agent_type.text = decode_utf8(agent_type)

    if note is not None:
        _agent_type = _subelement(_agent, 'agentNote')
        _agent_type.text = decode_utf8(note)

    return _agent
Exemple #29
0
def object_characteristics(composition_level='0', child_elements=None):
    """
    :param composition_level:
    :param child_elements:
    :return: Element object for object characteristics.
    """
    object_char = _element('objectCharacteristics')

    composition = _subelement(object_char, 'compositionLevel')
    composition.text = decode_utf8(composition_level)
    if child_elements:
        for elem in child_elements:
            object_char.append(elem)
    return object_char
Exemple #30
0
def agents_with_type(agents, agent_type='organization'):
    """Return all agents from list of `agents` with given `agent_type`.

    :task_report: Report to search from
    :returns: Generator object which iterates all (agent_type, agent_name)

    """
    agent_type = decode_utf8(agent_type)

    for _agent in agents:
        agent_name = _agent.findtext(premis_ns('agentName'))
        _agent_type = _agent.findtext(premis_ns('agentType'))

        if _agent_type == agent_type:
            yield (agent_type, agent_name)