コード例 #1
0
def create_streams(streams, premis_file):
    """Create PREMIS objects for streams

    :streams: Stream dict
    :premis_file: Created PREMIS XML file for the digital object file
    """
    if len(streams) < 2:
        return None

    premis_list = {}
    for index, stream in six.iteritems(streams):
        if stream['stream_type'] not in ['video', 'audio']:
            continue

        id_value = six.text_type(uuid4())
        identifier = premis.identifier(identifier_type='UUID',
                                       identifier_value=id_value)
        premis_format_des = premis.format_designation(stream['mimetype'],
                                                      stream['version'])
        premis_format = premis.format(child_elements=[premis_format_des])
        premis_objchar = premis.object_characteristics(
            child_elements=[premis_format])
        el_premis_object = premis.object(identifier,
                                         child_elements=[premis_objchar],
                                         bitstream=True)

        premis_list[index] = el_premis_object

        premis_file.append(
            premis.relationship('structural', 'includes', el_premis_object))

    return premis_list
コード例 #2
0
def create_premis_object(tree,
                         fname,
                         skip_inspection=None,
                         format_name=None,
                         format_version=None,
                         digest_algorithm='MD5',
                         message_digest=None,
                         date_created=None,
                         charset=None):
    """Create Premis object for given file."""

    techmd = {}
    if not skip_inspection:
        for validator in iter_validators(fileinfo(fname)):
            validation_result = validator.result()
            if not validation_result['is_valid']:
                raise Exception('File %s is not valid: %s', fname,
                                validation_result['errors'])

        techmd = validation_result['result']

    if message_digest is None:
        message_digest = md5(fname)
    if digest_algorithm is None:
        digest_algorithm = 'MD5'
    if format_name is None:
        format_name = techmd['format']['mimetype']
    if format_version is None and (techmd and 'version' in techmd['format']):
        format_version = techmd['format']['version']
    if charset or (techmd and 'charset' in techmd['format']):
        format_name += '; charset=' + charset \
            if charset else '; charset=' + techmd['format']['charset']
    if date_created is None:
        date_created = creation_date(fname)

    premis_fixity = premis.fixity(message_digest, digest_algorithm)
    premis_format_des = premis.format_designation(format_name, format_version)
    premis_format = premis.format(child_elements=[premis_format_des])
    premis_date_created = premis.date_created(date_created)
    premis_create = premis.creating_application(
        child_elements=[premis_date_created])
    premis_objchar = premis.object_characteristics(
        child_elements=[premis_fixity, premis_format, premis_create])

    # Create object element
    object_identifier = premis.identifier(identifier_type='UUID',
                                          identifier_value=str(uuid4()))

    el_premis_object = premis.object(object_identifier,
                                     child_elements=[premis_objchar])
    tree.append(el_premis_object)

    return tree
コード例 #3
0
def create_report_object(metadata_info, linking_sip_type, linking_sip_id):
    """Create premis element for digital object."""
    dep_id = premis.identifier(metadata_info['object_id']['type'],
                               metadata_info['object_id']['value'],
                               prefix='dependency')
    dependency = premis.dependency(identifiers=[dep_id])
    environ = premis.environment(child_elements=[dependency])

    related_id = premis.identifier(identifier_type=linking_sip_type,
                                   identifier_value=linking_sip_id,
                                   prefix='object')
    related = premis.relationship(relationship_type='structural',
                                  relationship_subtype='is included in',
                                  related_object=related_id)

    object_id = premis.identifier('preservation-object-id', str(uuid.uuid4()))

    report_object = premis.object(object_id=object_id,
                                  original_name=metadata_info['filename'],
                                  child_elements=[environ, related],
                                  representation=True)

    return report_object
コード例 #4
0
def create_premis_object(fname,
                         scraper,
                         file_format=None,
                         checksum=None,
                         date_created=None,
                         charset=None,
                         identifier=None,
                         format_registry=None):
    """Create Premis object for given file."""

    if scraper.info[0]['class'] == 'FileExists' and \
            len(scraper.info[0]['errors']) > 0:
        raise IOError(scraper.info[0]['errors'])
    for _, info in six.iteritems(scraper.info):
        if info['class'] == 'ScraperNotFound':
            raise ValueError('File format is not supported.')

    if checksum in [None, ()]:
        message_digest = scraper.checksum(algorithm='md5')
        digest_algorithm = 'MD5'
    else:
        message_digest = checksum[1]
        digest_algorithm = checksum[0]

    if file_format in [None, ()]:
        format_name = scraper.mimetype

        # Set the default version for predefined mimetypes.
        format_version = DEFAULT_VERSIONS.get(format_name, None)

        # Scraper's version information will override the version
        # information if any is found.
        if scraper.version and scraper.version != UNKNOWN_VERSION:
            format_version = scraper.version

        # Case for unapplicable versions where version information don't exist.
        if format_version == NO_VERSION:
            format_version = ''
    else:
        format_name = file_format[0]
        format_version = file_format[1]

    if not charset and scraper.streams[0]['stream_type'] == 'text':
        charset = scraper.streams[0]['charset']

    check_metadata(format_name, format_version, scraper.streams, fname)

    if charset:
        if charset not in ALLOWED_CHARSETS:
            raise ValueError('Invalid charset.')
        format_name += '; charset={}'.format(charset)

    if date_created is None:
        date_created = creation_date(fname)

    if identifier in [None, ()]:
        object_identifier = premis.identifier(identifier_type='UUID',
                                              identifier_value=six.text_type(
                                                  uuid4()))
    else:
        object_identifier = premis.identifier(identifier_type=identifier[0],
                                              identifier_value=identifier[1])

    premis_fixity = premis.fixity(message_digest, digest_algorithm)
    premis_format_des = premis.format_designation(format_name, format_version)
    if format_registry in [None, ()]:
        premis_format = premis.format(child_elements=[premis_format_des])
    else:
        premis_registry = premis.format_registry(format_registry[0],
                                                 format_registry[1])
        premis_format = premis.format(
            child_elements=[premis_format_des, premis_registry])
    premis_date_created = premis.date_created(date_created)
    premis_create = \
        premis.creating_application(child_elements=[premis_date_created])
    premis_objchar = premis.object_characteristics(
        child_elements=[premis_fixity, premis_format, premis_create])

    # Create object element
    el_premis_object = premis.object(object_identifier,
                                     child_elements=[premis_objchar])

    return el_premis_object
コード例 #5
0
def create_premis_object(fname, streams, **attributes):
    """
    Create Premis object for given file.

    :fname: File name of the digital object
    :streams: Streams from the Scraper
    :attributes: The following keys:
                 charset: Character encoding of a file,
                 file_format: File format and version (tuple) of a file,
                 format_registry: Format registry name and value (tuple),
                 identifier: File identifier type and value (tuple),
                 checksum: Checksum algorithm and value (tuple),
                 date_created: Creation date of a file
    :returns: PREMIS object as etree
    :raises: ValueError if character set is invalid for text files.
    """
    attributes = _attribute_values(attributes)
    if not attributes["checksum"]:
        attributes["checksum"] = ("MD5", calc_checksum(fname))
    date_created = attributes["date_created"] or creation_date(fname)
    if streams[0]['stream_type'] == 'text':
        charset = attributes["charset"] or streams[0]['charset']
    else:
        charset = None

    if not attributes["file_format"]:
        if streams[0]["version"] and streams[0]["version"] != UNKNOWN_VERSION:
            format_version = '' if streams[0]["version"] == NO_VERSION else \
                streams[0]["version"]
        else:
            format_version = DEFAULT_VERSIONS.get(streams[0]["mimetype"], None)

        file_format = (streams[0]["mimetype"], format_version)
    else:
        file_format = (attributes["file_format"][0],
                       attributes["file_format"][1])

    check_metadata(file_format[0], file_format[1], streams, fname)

    charset_mime = ""
    if charset:
        if charset not in ALLOWED_CHARSETS:
            raise ValueError('Invalid charset.')
        charset_mime = '; charset={}'.format(charset)

    if attributes["identifier"]:
        identifier_type = attributes["identifier"][0]
        identifier_value = attributes["identifier"][1]
    else:
        identifier_type = 'UUID'
        identifier_value = six.text_type(uuid4())

    object_identifier = premis.identifier(
        identifier_type=identifier_type,
        identifier_value=identifier_value
    )

    premis_fixity = premis.fixity(attributes["checksum"][1],
                                  attributes["checksum"][0])
    premis_format_des = premis.format_designation(
        file_format[0] + charset_mime, file_format[1])
    if not attributes["format_registry"]:
        premis_format = premis.format(child_elements=[premis_format_des])
    else:
        premis_registry = premis.format_registry(
            attributes["format_registry"][0],
            attributes["format_registry"][1])
        premis_format = premis.format(child_elements=[premis_format_des,
                                                      premis_registry])
    premis_date_created = premis.date_created(date_created)
    premis_create = \
        premis.creating_application(child_elements=[premis_date_created])
    premis_objchar = premis.object_characteristics(
        child_elements=[premis_fixity, premis_format, premis_create])

    # Create object element
    el_premis_object = premis.object(
        object_identifier, child_elements=[premis_objchar])

    return el_premis_object