def create_streams(streams, premis_file): """Create PREMIS objects for streams :streams: Stream dict :premis_file: Created PREMIS XML file for the digital object file """ if len(streams) < 2: return None premis_list = {} for index, stream in six.iteritems(streams): if stream['stream_type'] not in ['video', 'audio']: continue id_value = six.text_type(uuid4()) identifier = premis.identifier(identifier_type='UUID', identifier_value=id_value) premis_format_des = premis.format_designation(stream['mimetype'], stream['version']) premis_format = premis.format(child_elements=[premis_format_des]) premis_objchar = premis.object_characteristics( child_elements=[premis_format]) el_premis_object = premis.object(identifier, child_elements=[premis_objchar], bitstream=True) premis_list[index] = el_premis_object premis_file.append( premis.relationship('structural', 'includes', el_premis_object)) return premis_list
def create_premis_event(tree, event_type, event_datetime, event_detail, event_outcome, event_outcome_detail, linking_agent_identifier, event_id): """Create event """ event_identifier = premis.identifier(identifier_type='UUID', identifier_value=str(uuid4()), prefix='event') premis_event_outcome = premis.outcome(event_outcome, event_outcome_detail) if linking_agent_identifier is not None: child_elements = [premis_event_outcome, linking_agent_identifier] else: child_elements = [premis_event_outcome] premis_event = premis.event(event_identifier, event_type, event_datetime, event_detail, child_elements=child_elements) xmldata = mets.xmldata(child_elements=[premis_event]) mdwrap = mets.mdwrap('PREMIS:EVENT', '2.3', child_elements=[xmldata]) digiprovmd = mets.digiprovmd(event_id, child_elements=[mdwrap]) tree.append(digiprovmd)
def create_report_event(result, report_object, report_agent): """Create premis element for digital object validation event.""" event_id = premis.identifier(identifier_type="preservation-event-id", identifier_value=str(uuid.uuid4()), prefix='event') outresult = 'success' if result["is_valid"] is True else 'failure' if result["errors"]: detail_note = (result["messages"] + '\n' + result["errors"]) else: detail_note = result["messages"] extensions = result.get('extensions', None) outcome = premis.outcome(outcome=outresult, detail_note=detail_note, detail_extension=extensions) report_event = premis.event( event_id=event_id, event_type="validation", event_date_time=datetime.datetime.now().isoformat(), event_detail="Digital object validation", child_elements=[outcome], linking_objects=[report_object], linking_agents=[report_agent]) return report_event
def create_premis_agent(tree, agent_id, agent_name, agent_type): """Create agent """ uuid = str(uuid4()) agent_identifier = premis.identifier(identifier_type='UUID', identifier_value=uuid, prefix='agent') premis_agent = premis.agent(agent_identifier, agent_name, agent_type) linking_agent_identifier = premis.identifier(identifier_type='UUID', identifier_value=uuid, prefix='linkingAgent') xmldata = mets.xmldata(child_elements=[premis_agent]) mdwrap = mets.mdwrap('PREMIS:AGENT', '2.3', child_elements=[xmldata]) digiprovmd = mets.digiprovmd(agent_id, child_elements=[mdwrap]) tree.append(digiprovmd) return linking_agent_identifier
def create_premis_event(**attributes): """Creates METS digiprovMD element that contains PREMIS event element. Linking agent identifier element is added to PREMIS event element, if agent identifier is provided as parameter. :attributes: The following keys: event_type: PREMIS event type event_datetime: Timestamp of the event event_target: Target path of the event event_detail: Short information about the event event_outcome: Event outcome event_outcome_detail: Deteiled information about the event linking_agents: Linking agent identifier type, identifier value and role (tuple) :returns: PREMIS event XML element """ attributes = _attribute_values(attributes) event_identifier = premis.identifier(identifier_type='UUID', identifier_value=six.text_type( uuid4()), prefix='event') premis_event_outcome = premis.outcome(attributes["event_outcome"], attributes["event_outcome_detail"]) child_elements = [premis_event_outcome] # Create linkingAgentIdentifier element if agent identifiers are provided for linking_agent in attributes["linking_agents"]: linking_agent_identifier = premis.identifier( identifier_type=linking_agent[0], identifier_value=linking_agent[1], prefix='linkingAgent', role=linking_agent[2]) child_elements.append(linking_agent_identifier) premis_event_elem = premis.event(event_identifier, attributes["event_type"], attributes["event_datetime"], attributes["event_detail"], child_elements=child_elements) return premis_event_elem
def create_premis_object(tree, fname, skip_inspection=None, format_name=None, format_version=None, digest_algorithm='MD5', message_digest=None, date_created=None, charset=None): """Create Premis object for given file.""" techmd = {} if not skip_inspection: for validator in iter_validators(fileinfo(fname)): validation_result = validator.result() if not validation_result['is_valid']: raise Exception('File %s is not valid: %s', fname, validation_result['errors']) techmd = validation_result['result'] if message_digest is None: message_digest = md5(fname) if digest_algorithm is None: digest_algorithm = 'MD5' if format_name is None: format_name = techmd['format']['mimetype'] if format_version is None and (techmd and 'version' in techmd['format']): format_version = techmd['format']['version'] if charset or (techmd and 'charset' in techmd['format']): format_name += '; charset=' + charset \ if charset else '; charset=' + techmd['format']['charset'] if date_created is None: date_created = creation_date(fname) premis_fixity = premis.fixity(message_digest, digest_algorithm) premis_format_des = premis.format_designation(format_name, format_version) premis_format = premis.format(child_elements=[premis_format_des]) premis_date_created = premis.date_created(date_created) premis_create = premis.creating_application( child_elements=[premis_date_created]) premis_objchar = premis.object_characteristics( child_elements=[premis_fixity, premis_format, premis_create]) # Create object element object_identifier = premis.identifier(identifier_type='UUID', identifier_value=str(uuid4())) el_premis_object = premis.object(object_identifier, child_elements=[premis_objchar]) tree.append(el_premis_object) return tree
def create_premis_event(event_type, event_datetime, event_detail, event_outcome, event_outcome_detail, agent_identifier): """Creates METS digiprovMD element that contains PREMIS event element. Linking agent identifier element is added to PREMIS event element, if agent identifier is provided as parameter. :param event_type: Event type :param event_datetime: Event time :param event_detail: Event details :param event_outcome: Event outcome ("success" or "failure") :param event_outcome_detail: Event outcome description :param agent_identifier: PREMIS agent identifier or ``None`` :returns: PREMIS event XML element """ event_identifier = premis.identifier(identifier_type='UUID', identifier_value=six.text_type( uuid4()), prefix='event') premis_event_outcome = premis.outcome(event_outcome, event_outcome_detail) child_elements = [premis_event_outcome] # Create linkingAgentIdentifier element if agent identifier is provided if agent_identifier is not None: linking_agent_identifier = premis.identifier( identifier_type='UUID', identifier_value=agent_identifier, prefix='linkingAgent') child_elements.append(linking_agent_identifier) premis_event_elem = premis.event(event_identifier, event_type, event_datetime, event_detail, child_elements=child_elements) return premis_event_elem
def create_report_object(metadata_info, linking_sip_type, linking_sip_id): """Create premis element for digital object.""" dep_id = premis.identifier(metadata_info['object_id']['type'], metadata_info['object_id']['value'], prefix='dependency') dependency = premis.dependency(identifiers=[dep_id]) environ = premis.environment(child_elements=[dependency]) related_id = premis.identifier(identifier_type=linking_sip_type, identifier_value=linking_sip_id, prefix='object') related = premis.relationship(relationship_type='structural', relationship_subtype='is included in', related_object=related_id) object_id = premis.identifier('preservation-object-id', str(uuid.uuid4())) report_object = premis.object(object_id=object_id, original_name=metadata_info['filename'], child_elements=[environ, related], representation=True) return report_object
def create_report_agent(): """Create premis agent describing who/what performed validation.""" # TODO: Agent could be the used validator instead of script file agent_name = "check_sip_digital_objects.py-v0.0" agent_id_value = 'preservation-agent-' + agent_name + '-' + \ str(uuid.uuid4()) agent_id = premis.identifier(identifier_type='preservation-agent-id', identifier_value=agent_id_value, prefix='agent') report_agent = premis.agent(agent_id=agent_id, agent_name=agent_name, agent_type='software') return report_agent
def create_premis_agent(agent_name, agent_type, agent_identifier): """Creates METS digiprovMD element that contains PREMIS agent element with unique identifier. :param agent_name: content of PREMIS agentName element :param agent_type: content of PREMIS agentType element :param agent_identifier: content of PREMIS agentIdentifierValue element :returns: PREMIS event XML element """ agent_identifier = premis.identifier(identifier_type='UUID', identifier_value=agent_identifier, prefix='agent') premis_agent = premis.agent(agent_identifier, agent_name, agent_type) return premis_agent
def create_premis_agent(**attributes): """Creates METS digiprovMD element that contains PREMIS agent element with unique identifier. :attributes: The following keys: agent_name: content of PREMIS agentName element agent_type: content of PREMIS agentType element agent_identifier: PREMIS agent identifier :returns: PREMIS event XML element """ agent_identifier = premis.identifier( identifier_type=attributes["agent_identifier"][0], identifier_value=attributes["agent_identifier"][1], prefix='agent') premis_agent = premis.agent(agent_identifier, attributes["agent_name"], attributes["agent_type"], note=attributes["agent_note"]) return premis_agent
def create_premis_object(fname, scraper, file_format=None, checksum=None, date_created=None, charset=None, identifier=None, format_registry=None): """Create Premis object for given file.""" if scraper.info[0]['class'] == 'FileExists' and \ len(scraper.info[0]['errors']) > 0: raise IOError(scraper.info[0]['errors']) for _, info in six.iteritems(scraper.info): if info['class'] == 'ScraperNotFound': raise ValueError('File format is not supported.') if checksum in [None, ()]: message_digest = scraper.checksum(algorithm='md5') digest_algorithm = 'MD5' else: message_digest = checksum[1] digest_algorithm = checksum[0] if file_format in [None, ()]: format_name = scraper.mimetype # Set the default version for predefined mimetypes. format_version = DEFAULT_VERSIONS.get(format_name, None) # Scraper's version information will override the version # information if any is found. if scraper.version and scraper.version != UNKNOWN_VERSION: format_version = scraper.version # Case for unapplicable versions where version information don't exist. if format_version == NO_VERSION: format_version = '' else: format_name = file_format[0] format_version = file_format[1] if not charset and scraper.streams[0]['stream_type'] == 'text': charset = scraper.streams[0]['charset'] check_metadata(format_name, format_version, scraper.streams, fname) if charset: if charset not in ALLOWED_CHARSETS: raise ValueError('Invalid charset.') format_name += '; charset={}'.format(charset) if date_created is None: date_created = creation_date(fname) if identifier in [None, ()]: object_identifier = premis.identifier(identifier_type='UUID', identifier_value=six.text_type( uuid4())) else: object_identifier = premis.identifier(identifier_type=identifier[0], identifier_value=identifier[1]) premis_fixity = premis.fixity(message_digest, digest_algorithm) premis_format_des = premis.format_designation(format_name, format_version) if format_registry in [None, ()]: premis_format = premis.format(child_elements=[premis_format_des]) else: premis_registry = premis.format_registry(format_registry[0], format_registry[1]) premis_format = premis.format( child_elements=[premis_format_des, premis_registry]) premis_date_created = premis.date_created(date_created) premis_create = \ premis.creating_application(child_elements=[premis_date_created]) premis_objchar = premis.object_characteristics( child_elements=[premis_fixity, premis_format, premis_create]) # Create object element el_premis_object = premis.object(object_identifier, child_elements=[premis_objchar]) return el_premis_object
def create_premis_object(fname, streams, **attributes): """ Create Premis object for given file. :fname: File name of the digital object :streams: Streams from the Scraper :attributes: The following keys: charset: Character encoding of a file, file_format: File format and version (tuple) of a file, format_registry: Format registry name and value (tuple), identifier: File identifier type and value (tuple), checksum: Checksum algorithm and value (tuple), date_created: Creation date of a file :returns: PREMIS object as etree :raises: ValueError if character set is invalid for text files. """ attributes = _attribute_values(attributes) if not attributes["checksum"]: attributes["checksum"] = ("MD5", calc_checksum(fname)) date_created = attributes["date_created"] or creation_date(fname) if streams[0]['stream_type'] == 'text': charset = attributes["charset"] or streams[0]['charset'] else: charset = None if not attributes["file_format"]: if streams[0]["version"] and streams[0]["version"] != UNKNOWN_VERSION: format_version = '' if streams[0]["version"] == NO_VERSION else \ streams[0]["version"] else: format_version = DEFAULT_VERSIONS.get(streams[0]["mimetype"], None) file_format = (streams[0]["mimetype"], format_version) else: file_format = (attributes["file_format"][0], attributes["file_format"][1]) check_metadata(file_format[0], file_format[1], streams, fname) charset_mime = "" if charset: if charset not in ALLOWED_CHARSETS: raise ValueError('Invalid charset.') charset_mime = '; charset={}'.format(charset) if attributes["identifier"]: identifier_type = attributes["identifier"][0] identifier_value = attributes["identifier"][1] else: identifier_type = 'UUID' identifier_value = six.text_type(uuid4()) object_identifier = premis.identifier( identifier_type=identifier_type, identifier_value=identifier_value ) premis_fixity = premis.fixity(attributes["checksum"][1], attributes["checksum"][0]) premis_format_des = premis.format_designation( file_format[0] + charset_mime, file_format[1]) if not attributes["format_registry"]: premis_format = premis.format(child_elements=[premis_format_des]) else: premis_registry = premis.format_registry( attributes["format_registry"][0], attributes["format_registry"][1]) premis_format = premis.format(child_elements=[premis_format_des, premis_registry]) premis_date_created = premis.date_created(date_created) premis_create = \ premis.creating_application(child_elements=[premis_date_created]) premis_objchar = premis.object_characteristics( child_elements=[premis_fixity, premis_format, premis_create]) # Create object element el_premis_object = premis.object( object_identifier, child_elements=[premis_objchar]) return el_premis_object