Example #1
0
def create_streams(streams, premis_file):
    """Create PREMIS objects for streams

    :streams: Stream dict
    :premis_file: Created PREMIS XML file for the digital object file
    """
    if len(streams) < 2:
        return None

    premis_list = {}
    for index, stream in six.iteritems(streams):
        if stream['stream_type'] not in ['video', 'audio']:
            continue

        id_value = six.text_type(uuid4())
        identifier = premis.identifier(identifier_type='UUID',
                                       identifier_value=id_value)
        premis_format_des = premis.format_designation(stream['mimetype'],
                                                      stream['version'])
        premis_format = premis.format(child_elements=[premis_format_des])
        premis_objchar = premis.object_characteristics(
            child_elements=[premis_format])
        el_premis_object = premis.object(identifier,
                                         child_elements=[premis_objchar],
                                         bitstream=True)

        premis_list[index] = el_premis_object

        premis_file.append(
            premis.relationship('structural', 'includes', el_premis_object))

    return premis_list
Example #2
0
def create_premis_event(tree, event_type, event_datetime, event_detail,
                        event_outcome, event_outcome_detail,
                        linking_agent_identifier, event_id):
    """Create event
    """
    event_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=str(uuid4()),
                                         prefix='event')

    premis_event_outcome = premis.outcome(event_outcome, event_outcome_detail)

    if linking_agent_identifier is not None:
        child_elements = [premis_event_outcome, linking_agent_identifier]
    else:
        child_elements = [premis_event_outcome]
    premis_event = premis.event(event_identifier,
                                event_type,
                                event_datetime,
                                event_detail,
                                child_elements=child_elements)

    xmldata = mets.xmldata(child_elements=[premis_event])
    mdwrap = mets.mdwrap('PREMIS:EVENT', '2.3', child_elements=[xmldata])
    digiprovmd = mets.digiprovmd(event_id, child_elements=[mdwrap])
    tree.append(digiprovmd)
Example #3
0
def create_report_event(result, report_object, report_agent):
    """Create premis element for digital object validation event."""
    event_id = premis.identifier(identifier_type="preservation-event-id",
                                 identifier_value=str(uuid.uuid4()),
                                 prefix='event')
    outresult = 'success' if result["is_valid"] is True else 'failure'

    if result["errors"]:
        detail_note = (result["messages"] + '\n' + result["errors"])
    else:
        detail_note = result["messages"]

    extensions = result.get('extensions', None)
    outcome = premis.outcome(outcome=outresult,
                             detail_note=detail_note,
                             detail_extension=extensions)

    report_event = premis.event(
        event_id=event_id,
        event_type="validation",
        event_date_time=datetime.datetime.now().isoformat(),
        event_detail="Digital object validation",
        child_elements=[outcome],
        linking_objects=[report_object],
        linking_agents=[report_agent])

    return report_event
Example #4
0
def create_premis_agent(tree, agent_id, agent_name, agent_type):
    """Create agent
    """
    uuid = str(uuid4())
    agent_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=uuid,
                                         prefix='agent')
    premis_agent = premis.agent(agent_identifier, agent_name, agent_type)

    linking_agent_identifier = premis.identifier(identifier_type='UUID',
                                                 identifier_value=uuid,
                                                 prefix='linkingAgent')

    xmldata = mets.xmldata(child_elements=[premis_agent])
    mdwrap = mets.mdwrap('PREMIS:AGENT', '2.3', child_elements=[xmldata])
    digiprovmd = mets.digiprovmd(agent_id, child_elements=[mdwrap])
    tree.append(digiprovmd)

    return linking_agent_identifier
Example #5
0
def create_premis_event(**attributes):
    """Creates METS digiprovMD element that contains PREMIS event element.
    Linking agent identifier element is added to PREMIS event element, if agent
    identifier is provided as parameter.

    :attributes: The following keys:
                  event_type: PREMIS event type
                  event_datetime: Timestamp of the event
                  event_target: Target path of the event
                  event_detail: Short information about the event
                  event_outcome: Event outcome
                  event_outcome_detail: Deteiled information about the event
                  linking_agents: Linking agent identifier type,
                                  identifier value and role (tuple)
    :returns: PREMIS event XML element
    """
    attributes = _attribute_values(attributes)
    event_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=six.text_type(
                                             uuid4()),
                                         prefix='event')

    premis_event_outcome = premis.outcome(attributes["event_outcome"],
                                          attributes["event_outcome_detail"])

    child_elements = [premis_event_outcome]

    # Create linkingAgentIdentifier element if agent identifiers are provided
    for linking_agent in attributes["linking_agents"]:
        linking_agent_identifier = premis.identifier(
            identifier_type=linking_agent[0],
            identifier_value=linking_agent[1],
            prefix='linkingAgent',
            role=linking_agent[2])
        child_elements.append(linking_agent_identifier)

    premis_event_elem = premis.event(event_identifier,
                                     attributes["event_type"],
                                     attributes["event_datetime"],
                                     attributes["event_detail"],
                                     child_elements=child_elements)

    return premis_event_elem
def create_premis_object(tree,
                         fname,
                         skip_inspection=None,
                         format_name=None,
                         format_version=None,
                         digest_algorithm='MD5',
                         message_digest=None,
                         date_created=None,
                         charset=None):
    """Create Premis object for given file."""

    techmd = {}
    if not skip_inspection:
        for validator in iter_validators(fileinfo(fname)):
            validation_result = validator.result()
            if not validation_result['is_valid']:
                raise Exception('File %s is not valid: %s', fname,
                                validation_result['errors'])

        techmd = validation_result['result']

    if message_digest is None:
        message_digest = md5(fname)
    if digest_algorithm is None:
        digest_algorithm = 'MD5'
    if format_name is None:
        format_name = techmd['format']['mimetype']
    if format_version is None and (techmd and 'version' in techmd['format']):
        format_version = techmd['format']['version']
    if charset or (techmd and 'charset' in techmd['format']):
        format_name += '; charset=' + charset \
            if charset else '; charset=' + techmd['format']['charset']
    if date_created is None:
        date_created = creation_date(fname)

    premis_fixity = premis.fixity(message_digest, digest_algorithm)
    premis_format_des = premis.format_designation(format_name, format_version)
    premis_format = premis.format(child_elements=[premis_format_des])
    premis_date_created = premis.date_created(date_created)
    premis_create = premis.creating_application(
        child_elements=[premis_date_created])
    premis_objchar = premis.object_characteristics(
        child_elements=[premis_fixity, premis_format, premis_create])

    # Create object element
    object_identifier = premis.identifier(identifier_type='UUID',
                                          identifier_value=str(uuid4()))

    el_premis_object = premis.object(object_identifier,
                                     child_elements=[premis_objchar])
    tree.append(el_premis_object)

    return tree
Example #7
0
def create_premis_event(event_type, event_datetime, event_detail,
                        event_outcome, event_outcome_detail, agent_identifier):
    """Creates METS digiprovMD element that contains PREMIS event element.
    Linking agent identifier element is added to PREMIS event element, if agent
    identifier is provided as parameter.

    :param event_type: Event type
    :param event_datetime: Event time
    :param event_detail: Event details
    :param event_outcome: Event outcome ("success" or "failure")
    :param event_outcome_detail: Event outcome description
    :param agent_identifier: PREMIS agent identifier or ``None``
    :returns: PREMIS event XML element
    """
    event_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=six.text_type(
                                             uuid4()),
                                         prefix='event')

    premis_event_outcome = premis.outcome(event_outcome, event_outcome_detail)

    child_elements = [premis_event_outcome]

    # Create linkingAgentIdentifier element if agent identifier is provided
    if agent_identifier is not None:
        linking_agent_identifier = premis.identifier(
            identifier_type='UUID',
            identifier_value=agent_identifier,
            prefix='linkingAgent')
        child_elements.append(linking_agent_identifier)

    premis_event_elem = premis.event(event_identifier,
                                     event_type,
                                     event_datetime,
                                     event_detail,
                                     child_elements=child_elements)

    return premis_event_elem
Example #8
0
def create_report_object(metadata_info, linking_sip_type, linking_sip_id):
    """Create premis element for digital object."""
    dep_id = premis.identifier(metadata_info['object_id']['type'],
                               metadata_info['object_id']['value'],
                               prefix='dependency')
    dependency = premis.dependency(identifiers=[dep_id])
    environ = premis.environment(child_elements=[dependency])

    related_id = premis.identifier(identifier_type=linking_sip_type,
                                   identifier_value=linking_sip_id,
                                   prefix='object')
    related = premis.relationship(relationship_type='structural',
                                  relationship_subtype='is included in',
                                  related_object=related_id)

    object_id = premis.identifier('preservation-object-id', str(uuid.uuid4()))

    report_object = premis.object(object_id=object_id,
                                  original_name=metadata_info['filename'],
                                  child_elements=[environ, related],
                                  representation=True)

    return report_object
Example #9
0
def create_report_agent():
    """Create premis agent describing who/what performed validation."""
    # TODO: Agent could be the used validator instead of script file
    agent_name = "check_sip_digital_objects.py-v0.0"
    agent_id_value = 'preservation-agent-' + agent_name + '-' + \
                     str(uuid.uuid4())
    agent_id = premis.identifier(identifier_type='preservation-agent-id',
                                 identifier_value=agent_id_value,
                                 prefix='agent')
    report_agent = premis.agent(agent_id=agent_id,
                                agent_name=agent_name,
                                agent_type='software')

    return report_agent
Example #10
0
def create_premis_agent(agent_name, agent_type, agent_identifier):
    """Creates METS digiprovMD element that contains PREMIS agent element with
    unique identifier.

    :param agent_name: content of PREMIS agentName element
    :param agent_type: content of PREMIS agentType element
    :param agent_identifier: content of PREMIS agentIdentifierValue element
    :returns: PREMIS event XML element
    """
    agent_identifier = premis.identifier(identifier_type='UUID',
                                         identifier_value=agent_identifier,
                                         prefix='agent')
    premis_agent = premis.agent(agent_identifier, agent_name, agent_type)

    return premis_agent
Example #11
0
def create_premis_agent(**attributes):
    """Creates METS digiprovMD element that contains PREMIS agent element with
    unique identifier.

    :attributes: The following keys:
                 agent_name: content of PREMIS agentName element
                 agent_type: content of PREMIS agentType element
                 agent_identifier: PREMIS agent identifier
    :returns: PREMIS event XML element
    """
    agent_identifier = premis.identifier(
        identifier_type=attributes["agent_identifier"][0],
        identifier_value=attributes["agent_identifier"][1],
        prefix='agent')
    premis_agent = premis.agent(agent_identifier,
                                attributes["agent_name"],
                                attributes["agent_type"],
                                note=attributes["agent_note"])

    return premis_agent
Example #12
0
def create_premis_object(fname,
                         scraper,
                         file_format=None,
                         checksum=None,
                         date_created=None,
                         charset=None,
                         identifier=None,
                         format_registry=None):
    """Create Premis object for given file."""

    if scraper.info[0]['class'] == 'FileExists' and \
            len(scraper.info[0]['errors']) > 0:
        raise IOError(scraper.info[0]['errors'])
    for _, info in six.iteritems(scraper.info):
        if info['class'] == 'ScraperNotFound':
            raise ValueError('File format is not supported.')

    if checksum in [None, ()]:
        message_digest = scraper.checksum(algorithm='md5')
        digest_algorithm = 'MD5'
    else:
        message_digest = checksum[1]
        digest_algorithm = checksum[0]

    if file_format in [None, ()]:
        format_name = scraper.mimetype

        # Set the default version for predefined mimetypes.
        format_version = DEFAULT_VERSIONS.get(format_name, None)

        # Scraper's version information will override the version
        # information if any is found.
        if scraper.version and scraper.version != UNKNOWN_VERSION:
            format_version = scraper.version

        # Case for unapplicable versions where version information don't exist.
        if format_version == NO_VERSION:
            format_version = ''
    else:
        format_name = file_format[0]
        format_version = file_format[1]

    if not charset and scraper.streams[0]['stream_type'] == 'text':
        charset = scraper.streams[0]['charset']

    check_metadata(format_name, format_version, scraper.streams, fname)

    if charset:
        if charset not in ALLOWED_CHARSETS:
            raise ValueError('Invalid charset.')
        format_name += '; charset={}'.format(charset)

    if date_created is None:
        date_created = creation_date(fname)

    if identifier in [None, ()]:
        object_identifier = premis.identifier(identifier_type='UUID',
                                              identifier_value=six.text_type(
                                                  uuid4()))
    else:
        object_identifier = premis.identifier(identifier_type=identifier[0],
                                              identifier_value=identifier[1])

    premis_fixity = premis.fixity(message_digest, digest_algorithm)
    premis_format_des = premis.format_designation(format_name, format_version)
    if format_registry in [None, ()]:
        premis_format = premis.format(child_elements=[premis_format_des])
    else:
        premis_registry = premis.format_registry(format_registry[0],
                                                 format_registry[1])
        premis_format = premis.format(
            child_elements=[premis_format_des, premis_registry])
    premis_date_created = premis.date_created(date_created)
    premis_create = \
        premis.creating_application(child_elements=[premis_date_created])
    premis_objchar = premis.object_characteristics(
        child_elements=[premis_fixity, premis_format, premis_create])

    # Create object element
    el_premis_object = premis.object(object_identifier,
                                     child_elements=[premis_objchar])

    return el_premis_object
Example #13
0
def create_premis_object(fname, streams, **attributes):
    """
    Create Premis object for given file.

    :fname: File name of the digital object
    :streams: Streams from the Scraper
    :attributes: The following keys:
                 charset: Character encoding of a file,
                 file_format: File format and version (tuple) of a file,
                 format_registry: Format registry name and value (tuple),
                 identifier: File identifier type and value (tuple),
                 checksum: Checksum algorithm and value (tuple),
                 date_created: Creation date of a file
    :returns: PREMIS object as etree
    :raises: ValueError if character set is invalid for text files.
    """
    attributes = _attribute_values(attributes)
    if not attributes["checksum"]:
        attributes["checksum"] = ("MD5", calc_checksum(fname))
    date_created = attributes["date_created"] or creation_date(fname)
    if streams[0]['stream_type'] == 'text':
        charset = attributes["charset"] or streams[0]['charset']
    else:
        charset = None

    if not attributes["file_format"]:
        if streams[0]["version"] and streams[0]["version"] != UNKNOWN_VERSION:
            format_version = '' if streams[0]["version"] == NO_VERSION else \
                streams[0]["version"]
        else:
            format_version = DEFAULT_VERSIONS.get(streams[0]["mimetype"], None)

        file_format = (streams[0]["mimetype"], format_version)
    else:
        file_format = (attributes["file_format"][0],
                       attributes["file_format"][1])

    check_metadata(file_format[0], file_format[1], streams, fname)

    charset_mime = ""
    if charset:
        if charset not in ALLOWED_CHARSETS:
            raise ValueError('Invalid charset.')
        charset_mime = '; charset={}'.format(charset)

    if attributes["identifier"]:
        identifier_type = attributes["identifier"][0]
        identifier_value = attributes["identifier"][1]
    else:
        identifier_type = 'UUID'
        identifier_value = six.text_type(uuid4())

    object_identifier = premis.identifier(
        identifier_type=identifier_type,
        identifier_value=identifier_value
    )

    premis_fixity = premis.fixity(attributes["checksum"][1],
                                  attributes["checksum"][0])
    premis_format_des = premis.format_designation(
        file_format[0] + charset_mime, file_format[1])
    if not attributes["format_registry"]:
        premis_format = premis.format(child_elements=[premis_format_des])
    else:
        premis_registry = premis.format_registry(
            attributes["format_registry"][0],
            attributes["format_registry"][1])
        premis_format = premis.format(child_elements=[premis_format_des,
                                                      premis_registry])
    premis_date_created = premis.date_created(date_created)
    premis_create = \
        premis.creating_application(child_elements=[premis_date_created])
    premis_objchar = premis.object_characteristics(
        child_elements=[premis_fixity, premis_format, premis_create])

    # Create object element
    el_premis_object = premis.object(
        object_identifier, child_elements=[premis_objchar])

    return el_premis_object