Ejemplo n.º 1
0
 def addFile(self, file_name, mets_filegroup):
     #reload(sys)
     #sys.setdefaultencoding('utf8')
     file_url = "file://./%s" % os.path.relpath(file_name, self.root_path)
     file_mimetype, _ = self.mime.guess_type(file_url)
     file_checksum = self.sha256(file_name)
     file_size = os.path.getsize(file_name)
     file_cdate = get_file_ctime_iso_date_str(file_name,
                                              DT_ISO_FMT_SEC_PREC)
     file_id = "ID" + uuid.uuid4().__str__()
     mets_file = M.file({
         "MIMETYPE": file_mimetype,
         "CHECKSUMTYPE": "SHA-256",
         "CREATED": file_cdate,
         "CHECKSUM": file_checksum,
         "USE": "Datafile",
         "ID": file_id,
         "SIZE": file_size
     })
     mets_filegroup.append(mets_file)
     #_,fname = os.path.split(file_name)
     mets_FLocat = M.FLocat({
         q(XLINK_NS, 'href'): file_url,
         "LOCTYPE": "URL",
         q(XLINK_NS, 'type'): 'simple'
     })
     mets_file.append(mets_FLocat)
     return file_id
Ejemplo n.º 2
0
    def setParentRelation(self, identifier):
        parentmets = os.path.join(self.root_path, 'METS.xml')
        packagetype = self.mets_data['type']
        if os.path.exists(parentmets):
            parser = etree.XMLParser(resolve_entities=False,
                                     remove_blank_text=True,
                                     strip_cdata=False)
            parent_parse = etree.parse(parentmets, parser)
            parent_root = parent_parse.getroot()

            parent = M.div({'LABEL': "parent %s" % packagetype})
            pointer = M.mptr({
                "LOCTYPE":
                "OTHER",
                "OTHERLOCTYPE":
                "UUID",
                q(XLINK_NS, "title"):
                ("Referencing a parent %s." % packagetype),
                q(XLINK_NS, "href"):
                "urn:uuid:" + identifier,
                "ID":
                "ID" + uuid.uuid4().__str__()
            })
            parent.append(pointer)

            parent_map = parent_root.find(
                "%s[@LABEL='parent %s']" %
                (q(METS_NS, 'structMap'), packagetype))
            if parent_map is not None:
                parent_div = parent_map.find(
                    "%s[@LABEL='parent %s identifiers']" %
                    (q(METS_NS, 'div'), packagetype))
                parent_div.append(parent)
            else:
                parent_map = M.structMap({
                    'LABEL': 'parent %s' % packagetype,
                    'TYPE': 'logical'
                })
                parent_div = M.div(
                    {'LABEL': 'parent %s identifiers' % packagetype})
                parent_map.append(parent_div)
                parent_div.append(parent)
                parent_root.insert(len(parent_root), parent_map)

            str = etree.tostring(parent_root,
                                 encoding='UTF-8',
                                 pretty_print=True,
                                 xml_declaration=True)
            with open(parentmets, 'w') as output_file:
                output_file.write(str)
        else:
            print 'Couldn\'t find the parent %ss Mets file.' % packagetype
Ejemplo n.º 3
0
    def addObject(self, abs_path):
        '''
        Must be called with the absolute path to a file.

        @param abs_path:    absolute file path
        @return:            Premis object
        '''

        hash = self.sha256(abs_path)
        file_url = "file://./%s" % os.path.relpath(abs_path, self.root_path)
        fmt = self.fid.identify_file(abs_path)
        size = os.path.getsize(abs_path)
        premis_id = 'ID' + uuid.uuid4().__str__()

        # create a Premis object
        object = P.object(
            {
                q(XSI_NS, 'type'): 'file',
                "xmlID": premis_id
            },
            P.objectIdentifier(P.objectIdentifierType('filepath'),
                               P.objectIdentifierValue(file_url)),
            P.objectCharacteristics(
                P.compositionLevel(0),
                P.fixity(P.messageDigestAlgorithm("SHA-256"),
                         P.messageDigest(hash),
                         P.messageDigestOriginator("hashlib")),
                P.size(size),
                P.format(
                    P.formatRegistry(P.formatRegistryName("PRONOM"),
                                     P.formatRegistryKey(fmt),
                                     P.formatRegistryRole("identification"))),
            ),
        )
        return object
Ejemplo n.º 4
0
 def make_mdref(self, path, file, id, mdtype):
     mimetype, _ = self.mime.guess_type(os.path.join(path, file))
     rel_path = "file://./%s" % os.path.relpath(os.path.join(path, file),
                                                self.root_path)
     mets_mdref = {
         "LOCTYPE": "URL",
         "MIMETYPE": mimetype,
         "CREATED": current_timestamp(),
         q(XLINK_NS, "type"): "simple",
         q(XLINK_NS, "href"): rel_path,
         "CHECKSUMTYPE": "SHA-256",
         "CHECKSUM": get_sha256_hash(os.path.join(path, file)),
         "ID": id,
         "MDTYPE": mdtype
     }
     return mets_mdref
Ejemplo n.º 5
0
    def validate_file(self, file):
        '''
        Validates every file found inside a Mets, so far: size, checksum, fixity. If a file exists, the counter for
        self.total_files is diminished.

        @param file:    XML Element of a file that will be validated.
        @return:
        '''
        err = []
        log = []

        # get information about the file
        attr_path = file.getchildren()[0].attrib[q(XLINK_NS, 'href')]
        attr_size = file.attrib['SIZE']
        attr_checksum = file.attrib['CHECKSUM']
        attr_checksumtype = file.attrib['CHECKSUMTYPE']
        # mimetpye = file.attrib['MIMETYPE']

        # check if file exists, if yes validate it
        fitem = remove_protocol(attr_path)
        file_path = os.path.join(self.rootpath, fitem).replace('\\', '/')
        if not os.path.exists(file_path):
            err.append(
                "Unable to find file referenced in delivery METS file: %s" %
                file_path)
        else:
            self.total_files -= 1
            # check if file size is valid
            # TODO: is this even needed?
            file_size = os.path.getsize(file_path)
            if not int(file_size) == int(attr_size):
                err.append(
                    "Actual file size %s does not equal file size attribute value %s"
                    % (file_size, attr_size))
                # workaround for earkweb.log in AIP metadata/ folder on IP root level
                if file_path[-22:] == './metadata/earkweb.log':
                    err.pop()
                    log.append(
                        'Forced validation result \'True\' for file: %s' %
                        (file_path))

            # validate checksum
            checksum_validation = ChecksumValidation()
            checksum_result = checksum_validation.validate_checksum(
                file_path, attr_checksum, attr_checksumtype)

            # workaround for earkweb.log in AIP metadata/ folder on IP root level
            if file_path[-22:] == './metadata/earkweb.log':
                checksum_result = True

            if not checksum_result == True:
                err.append('Checksum validation failed for: %s' % file_path)

        for error in err:
            print 'File validation error: ' + error
            self.validation_errors.append(error)
Ejemplo n.º 6
0
 def __init__(self, f=None):
     if f is None:
         self.root = P.premis(
             {
                 q(XSI_NS, 'schemaLocation'):
                 PREMIS_NS + ' ../schemas/premis-v2-2.xsd'
             }, )
         self.root.set('version', '2.0')
     else:
         self.root = objectify.parse(f).getroot()
Ejemplo n.º 7
0
 def add_object(self, identifier_value):
     sequence_insert(
         self.root,
         P.object({q(XSI_NS, 'type'): 'file'},
                  P.objectIdentifier(
                      P.objectIdentifierType('LOCAL'),
                      P.objectIdentifierValue(identifier_value)),
                  P.objectCharacteristics(
                      P.compositionLevel(0),
                      P.format(
                          P.formatRegistry(P.formatRegistryName(),
                                           P.formatRegistryKey)))),
         self.premis_successor_sections)
Ejemplo n.º 8
0
    def validate_mets(self, mets):
        '''
        Validates a Mets file. The Mets file is parsed with etree.iterparse(), which allows event-driven parsing of
        large files. On certain events/conditions actions are taken, like file validation or adding Mets files found
        inside representations to a list so that they will be evaluated later on.

        @param mets:    Path leading to a Mets file that will be evaluated.
        @return:        Boolean validation result.
        '''
        if mets.startswith('file://./'):
            mets = os.path.join(self.rootpath, mets[9:])
            # change self.rootpath so it fits any relative path found in the current (subsequent) mets
            self.rootpath = mets.rsplit('/', 1)[0]
        else:
            self.rootpath = mets.rsplit('/', 1)[0]

        try:
            parsed_mets = etree.iterparse(open(mets),
                                          events=('start', 'end'),
                                          schema=self.schema_mets)
            for event, element in parsed_mets:
                # Define what to do with specific tags.
                if event == 'end' and element.tag == q(METS_NS, 'file'):
                    # files
                    self.total_files += 1
                    self.validate_file(element)
                    element.clear()
                    while element.getprevious() is not None:
                        del element.getparent()[0]
                elif event == 'end' and element.tag == q(
                        METS_NS, 'div') and element.attrib['LABEL'].startswith(
                            'representations/'):
                    if fnmatch.fnmatch(
                            element.attrib['LABEL'].rsplit('/', 1)[1],
                            '*_mig-*'):
                        # representation mets files
                        rep = element.attrib['LABEL'].rsplit('/', 1)[1]
                        for child in element.getchildren():
                            if child.tag == q(METS_NS, 'mptr'):
                                metspath = child.attrib[q(XLINK_NS, 'href')]
                                sub_mets = rep, metspath
                                self.subsequent_mets.append(sub_mets)
                        element.clear()
                        while element.getprevious() is not None:
                            del element.getparent()[0]
                elif event == 'end' and element.tag == q(METS_NS, 'dmdSec'):
                    # dmdSec
                    pass
                elif event == 'end' and element.tag == q(METS_NS, 'amdSec'):
                    # pass
                    if len(element.getchildren()) > 0:
                        for element in element.getchildren():
                            # elements are: didiprovMD
                            if len(element.getchildren()) > 0:
                                for element in element.getchildren():
                                    # elements are: mdRef
                                    if element.tag == etree.Comment or element.tag == etree.PI:  # filter out comments (they also count as children)
                                        pass
                                    elif element.attrib['MDTYPE'] == 'PREMIS':
                                        if element.attrib[q(
                                                XLINK_NS, 'href')].startswith(
                                                    'file://./'):
                                            rel_path = element.attrib[q(
                                                XLINK_NS, 'href')]
                                            premis = os.path.join(
                                                self.rootpath, rel_path[9:])
                                            try:
                                                parsed_premis = etree.iterparse(
                                                    open(premis),
                                                    events=('start', ),
                                                    schema=self.schema_premis)
                                                for event, element in parsed_premis:
                                                    pass
                                                print 'Successfully validated Premis file: %s' % premis
                                            except etree.XMLSyntaxError, e:
                                                print 'VALIDATION ERROR: The Premis file %s yielded errors:' % premis
                                                print e.error_log
                                                self.validation_errors.append(
                                                    e.error_log)
                                        else:
                                            pass
                                    else:
                                        pass
        except etree.XMLSyntaxError, e:
            self.validation_errors.append(e.error_log)
Ejemplo n.º 9
0
    def createMets(self, mets_data):
        self.mets_data = mets_data
        packageid = mets_data['packageid']
        packagetype = mets_data['type']
        schemafolder = mets_data['schemas']
        parent = mets_data['parent']

        print 'creating Mets'
        ###########################
        # create METS skeleton
        ###########################

        # create Mets root
        METS_ATTRIBUTES = {
            "OBJID": "urn:uuid:" + packageid,
            "LABEL":
            "METS file describing the %s matching the OBJID." % packagetype,
            "PROFILE": "http://www.ra.ee/METS/v01/IP.xml",
            "TYPE": packagetype
        }
        root = M.mets(METS_ATTRIBUTES)

        if os.path.isfile(os.path.join(schemafolder, 'mets_1_11.xsd')):
            mets_schema_location = os.path.relpath(
                os.path.join(schemafolder, 'mets_1_11.xsd'), self.root_path)
        else:
            mets_schema_location = 'empty'
        if os.path.isfile(os.path.join(schemafolder, 'xlink.xsd')):
            xlink_schema_loaction = os.path.relpath(
                os.path.join(schemafolder, 'xlink.xsd'), self.root_path)
        else:
            xlink_schema_loaction = 'empty'

        root.attrib[
            '{%s}schemaLocation' %
            XSI_NS] = "http://www.loc.gov/METS/ %s http://www.w3.org/1999/xlink %s" % (
                mets_schema_location, xlink_schema_loaction)

        # create Mets header
        mets_hdr = M.metsHdr({
            "CREATEDATE": current_timestamp(),
            "RECORDSTATUS": "NEW"
        })
        root.append(mets_hdr)

        # add an agent
        mets_hdr.append(
            self.createAgent("CREATOR", "OTHER", "SOFTWARE", "E-ARK earkweb",
                             "VERSION=0.0.1"))

        # add document ID
        mets_hdr.append(M.metsDocumentID("METS.xml"))

        # create amdSec
        mets_amdSec = M.amdSec({"ID": "ID" + uuid.uuid4().__str__()})
        root.append(mets_amdSec)

        # create fileSec
        mets_fileSec = M.fileSec()
        root.append(mets_fileSec)

        # general filegroup
        mets_filegroup = M.fileGrp({
            "ID": "ID" + uuid.uuid4().__str__(),
            "USE": "general filegroup"
        })
        mets_fileSec.append(mets_filegroup)

        # structMap 'E-ARK structural map' - default, physical structure
        mets_earkstructmap = M.structMap({
            "LABEL": "E-ARK structural map",
            "TYPE": "physical"
        })
        root.append(mets_earkstructmap)
        package_div = M.div({"LABEL": packageid})
        # append physical structMap
        mets_earkstructmap.append(package_div)

        # structMap and div for the whole package (metadata, schema and /data)
        mets_structmap = M.structMap({
            "LABEL": "Simple %s structuring" % packagetype,
            "TYPE": "logical"
        })
        root.append(mets_structmap)
        mets_structmap_div = M.div({"LABEL": "Package structure"})
        mets_structmap.append(mets_structmap_div)

        # metadata structmap - IP root level!
        mets_structmap_metadata_div = M.div({"LABEL": "metadata files"})
        mets_structmap_div.append(mets_structmap_metadata_div)

        # structmap for schema files
        mets_structmap_schema_div = M.div({"LABEL": "schema files"})
        mets_structmap_div.append(mets_structmap_schema_div)

        # content structmap - all representations! (is only filled if no separate METS exists for the rep)
        mets_structmap_content_div = M.div({"LABEL": "content files"})
        mets_structmap_div.append(mets_structmap_content_div)

        # create structmap and div for Mets files from representations
        # mets_structmap_reps = M.structMap({"TYPE": "logical", "LABEL": "representations"})
        # root.append(mets_structmap_reps)
        # mets_div_reps = M.div({"LABEL": "representations", "TYPE": "type"})
        # mets_structmap_reps.append(mets_div_reps)

        # create structmap for parent/child relation, if applicable
        if parent != '':
            print 'creating link to parent %s' % packagetype
            mets_structmap_relation = M.structMap({
                'TYPE': 'logical',
                'LABEL': 'parent'
            })
            root.append(mets_structmap_relation)
            mets_div_rel = M.div(
                {'LABEL': '%s parent identifier' % packagetype})
            mets_structmap_relation.append(mets_div_rel)
            parent_pointer = M.mptr({
                "LOCTYPE":
                "OTHER",
                "OTHERLOCTYPE":
                "UUID",
                q(XLINK_NS, "title"):
                ("Referencing the parent %s of this (urn:uuid:%s) %s." %
                 (packagetype, packageid, packagetype)),
                q(XLINK_NS, "href"):
                "urn:uuid:" + parent,
                "ID":
                "ID" + uuid.uuid4().__str__()
            })
            mets_div_rel.append(parent_pointer)

        ###########################
        # add to Mets skeleton
        ###########################

        # add the package content to the Mets skeleton
        for directory, subdirectories, filenames in os.walk(self.root_path):
            # build the earkstructmap
            path = os.path.relpath(directory, self.root_path)
            physical_div = ''
            if path != '.':
                physical_div = M.div({"LABEL": path})
                package_div.append(physical_div)
            # if directory.endswith('metadata/earkweb'):
            #     # Ignore temp files only needed for IP processing with earkweb
            #     del filenames[:]
            #     del subdirectories[:]
            if directory.endswith('submission/metadata') or directory.endswith(
                    'submission/schemas'):
                del filenames[:]
                del subdirectories[:]
            if directory == os.path.join(self.root_path, 'metadata'):
                # Metadata on IP root level - if there are folders for representation-specific metadata,
                # check if the corresponding representation has a Mets file. If yes, skip; if no, add to IP root Mets.
                for filename in filenames:
                    if filename == 'earkweb.log':
                        mets_digiprovmd = M.digiprovMD(
                            {"ID": "ID" + uuid.uuid4().__str__()})
                        mets_amdSec.append(mets_digiprovmd)
                        id = "ID" + uuid.uuid4().__str__()
                        ref = self.make_mdref(directory, filename, id, 'OTHER')
                        mets_mdref = M.mdRef(ref)
                        mets_digiprovmd.append(mets_mdref)
                        mets_structmap_metadata_div.append(
                            M.fptr({"FILEID": id}))
                        physical_div.append(M.fptr({"FILEID": id}))
                del subdirectories[:]  # prevent loop to iterate subfolders outside of this if statement
                dirlist = os.listdir(os.path.join(self.root_path, 'metadata'))
                for dirname in dirlist:
                    if fnmatch.fnmatch(dirname, '*_mig-*'):
                        # TODO: maybe list it all the time?
                        # this folder contains metadata for a representation/migration, currently:
                        # only listed if no representation Mets file exists
                        if os.path.isfile(
                                os.path.join(self.root_path,
                                             'representations/%s/METS.xml') %
                                dirname):
                            pass
                        else:
                            for dir, subdir, files in os.walk(
                                    os.path.join(self.root_path, 'metadata/%s')
                                    % dirname):
                                for filename in files:
                                    if dir.endswith('descriptive'):
                                        mets_dmd = M.dmdSec({
                                            "ID":
                                            "ID" + uuid.uuid4().__str__()
                                        })
                                        root.insert(1, mets_dmd)
                                        id = "ID" + uuid.uuid4().__str__()
                                        ref = self.make_mdref(
                                            dir, filename, id, 'OTHER')
                                        mets_mdref = M.mdRef(ref)
                                        mets_dmd.append(mets_mdref)
                                        mets_structmap_metadata_div.append(
                                            M.fptr({"FILEID": id}))
                                        physical_div.append(
                                            M.fptr({"FILEID": id}))
                                    elif dir.endswith('preservation'):
                                        mets_digiprovmd = M.digiprovMD({
                                            "ID":
                                            "ID" + uuid.uuid4().__str__()
                                        })
                                        mets_amdSec.append(mets_digiprovmd)
                                        id = "ID" + uuid.uuid4().__str__()
                                        mdtype = ''
                                        if filename.startswith(
                                                'premis') or filename.endswith(
                                                    'premis.xml'):
                                            mdtype = 'PREMIS'
                                        else:
                                            mdtype = 'OTHER'
                                        ref = self.make_mdref(
                                            dir, filename, id, mdtype)
                                        mets_mdref = M.mdRef(ref)
                                        mets_digiprovmd.append(mets_mdref)
                                        mets_structmap_metadata_div.append(
                                            M.fptr({"FILEID": id}))
                                        physical_div.append(
                                            M.fptr({"FILEID": id}))
                                    elif filename:
                                        print 'Unclassified metadata file %s in %s.' % (
                                            filename, dir)
                    else:
                        # metadata that should be listed in the Mets
                        for dir, subdir, files in os.walk(
                                os.path.join(self.root_path, 'metadata/%s') %
                                dirname):
                            if len(files) > 0:
                                for filename in files:
                                    #if dir.endswith('descriptive'):
                                    if dirname == 'descriptive':
                                        mets_dmd = M.dmdSec({
                                            "ID":
                                            "ID" + uuid.uuid4().__str__()
                                        })
                                        root.insert(1, mets_dmd)
                                        id = "ID" + uuid.uuid4().__str__()
                                        # TODO: change MDTYPE
                                        ref = self.make_mdref(
                                            dir, filename, id, 'OTHER')
                                        mets_mdref = M.mdRef(ref)
                                        mets_dmd.append(mets_mdref)
                                        mets_structmap_metadata_div.append(
                                            M.fptr({"FILEID": id}))
                                        physical_div.append(
                                            M.fptr({"FILEID": id}))
                                    #elif dir.endswith('preservation'):
                                    elif dirname == 'preservation' or dirname == 'earkweb':
                                        mets_digiprovmd = M.digiprovMD({
                                            "ID":
                                            "ID" + uuid.uuid4().__str__()
                                        })
                                        mets_amdSec.append(mets_digiprovmd)
                                        id = "ID" + uuid.uuid4().__str__()
                                        mdtype = ''
                                        if filename.startswith(
                                                'premis') or filename.endswith(
                                                    'premis.xml'):
                                            mdtype = 'PREMIS'
                                        elif filename:
                                            mdtype = 'OTHER'
                                        ref = self.make_mdref(
                                            dir, filename, id, mdtype)
                                        mets_mdref = M.mdRef(ref)
                                        mets_digiprovmd.append(mets_mdref)
                                        mets_structmap_metadata_div.append(
                                            M.fptr({"FILEID": id}))
                                        physical_div.append(
                                            M.fptr({"FILEID": id}))
                                    elif filename:
                                        print 'Unclassified metadata file %s in %s.' % (
                                            filename, dir)
            else:
                # Any other folder outside of /<root>/metadata
                for filename in filenames:
                    if directory == self.root_path:
                        # ignore files on IP root level
                        del filename
                    else:
                        # TODO: list rep metadata only in the rep Mets?
                        rel_path_file = "file://./%s" % os.path.relpath(
                            os.path.join(directory, filename), self.root_path)
                        if filename.lower() == 'mets.xml':
                            # delete the subdirectories list to stop os.walk from traversing further;
                            # mets file should be added as <mets:mptr> to <structMap> for corresponding rep
                            del subdirectories[:]
                            rep_name = directory.rsplit('/', 1)[1]
                            # create structMap div and append to representations structMap
                            # mets_structmap_rep_div = M.div({"LABEL": rep_name, "TYPE": "representation mets", "ID": "ID" + uuid.uuid4().__str__()})
                            # mets_div_reps.append(mets_structmap_rep_div)
                            # add mets file as <mets:mptr>
                            metspointer = M.mptr({
                                "LOCTYPE":
                                "URL",
                                q(XLINK_NS, "title"):
                                ("Mets file describing representation: %s of %s: urn:uuid:%s."
                                 % (rep_name, packagetype, packageid)),
                                q(XLINK_NS, "href"):
                                rel_path_file,
                                "ID":
                                "ID" + uuid.uuid4().__str__()
                            })
                            #mets_structmap_rep_div.append(metspointer)
                            #mets_structmap_rep_div.append(M.fptr({"FILEID": id}))
                            physical_div.append(
                                metspointer
                            )  # IMPORTANT: The <mptr> element needs to be the first entry in a <div>, or the Mets will be invalid!
                            # also create a <fptr> for the Mets file
                            id = self.addFile(
                                os.path.join(directory, filename),
                                mets_filegroup)
                            physical_div.append(M.fptr({"FILEID": id}))
                        elif filename and directory.endswith('schemas'):
                            # schema files
                            id = self.addFile(
                                os.path.join(directory, filename),
                                mets_filegroup)
                            mets_structmap_schema_div.append(
                                M.fptr({'FILEID': id}))
                            physical_div.append(M.fptr({'FILEID': id}))
                        elif filename:
                            id = self.addFile(
                                os.path.join(directory, filename),
                                mets_filegroup)
                            mets_structmap_content_div.append(
                                M.fptr({'FILEID': id}))
                            physical_div.append(M.fptr({'FILEID': id}))

        str = etree.tostring(root,
                             encoding='UTF-8',
                             pretty_print=True,
                             xml_declaration=True)

        path_mets = os.path.join(self.root_path, 'METS.xml')
        with open(path_mets, 'w') as output_file:
            output_file.write(str)
Ejemplo n.º 10
0
    def createPremis(self):
        PREMIS_ATTRIBUTES = {"version": "2.0"}
        premis = P.premis(PREMIS_ATTRIBUTES)
        premis.attrib[
            '{%s}schemaLocation' %
            XSI_NS] = "info:lc/xmlns/premis-v2 ../../schemas/premis-v2-2.xsd"

        # if there are no /data files, this will ensure that there is at least one object (the IP itself)
        premis_id = 'ID' + uuid.uuid4().__str__()
        object = P.object(
            {
                q(XSI_NS, 'type'): 'representation',
                "xmlID": premis_id
            },
            P.objectIdentifier(
                P.objectIdentifierType('repository'),
                P.objectIdentifierValue('package-id-goes-here-?')),
        )
        premis.append(object)

        # create premis objects for files in this representation (self.root_path/data)
        for directory, subdirectories, filenames in os.walk(
                os.path.join(self.root_path, 'data')):
            for filename in filenames:
                object = self.addObject(os.path.join(directory, filename))
                premis.append(object)

        # # event
        # identifier_value = 'AIP Creation'
        # linking_agent = 'earkweb'
        # linking_object=None
        # premis.append(P.event(
        #         P.eventIdentifier(
        #             P.eventIdentifierType('local'),
        #             P.eventIdentifierValue(identifier_value)
        #         ),
        #         P.eventType,
        #         P.eventDateTime(current_timestamp()),
        #         P.linkingAgentIdentifier(
        #             P.linkingAgentIdentifierType('local'),
        #             P.linkingAgentIdentifierValue(linking_agent)
        #         ),
        #
        #         P.linkingAgentIdentifier(
        #             P.linkingAgentIdentifierType('local'),
        #             P.linkingAgentIdentifierValue(linking_object)
        #         )
        #         if linking_object is not None else None
        #     ))

        # add agent
        identifier_value = 'earkweb'
        premis.append(
            P.agent(
                P.agentIdentifier(P.agentIdentifierType('LOCAL'),
                                  P.agentIdentifierValue(identifier_value)),
                P.agentName('E-ARK AIP to DIP Converter'),
                P.agentType('Software')))

        str = etree.tostring(premis,
                             encoding='UTF-8',
                             pretty_print=True,
                             xml_declaration=True)
        preservation_dir = os.path.join(self.root_path,
                                        './metadata/preservation')
        if not os.path.exists(preservation_dir):
            os.mkdir(preservation_dir)
        path_premis = os.path.join(self.root_path,
                                   './metadata/preservation/premis.xml')
        with open(path_premis, 'w') as output_file:
            output_file.write(str)

        return
Ejemplo n.º 11
0
    def createMigrationPremis(self, premis_info):
        PREMIS_ATTRIBUTES = {"version": "2.0"}
        premis = P.premis(PREMIS_ATTRIBUTES)
        premis.attrib[
            '{%s}schemaLocation' %
            XSI_NS] = "info:lc/xmlns/premis-v2 ../../schemas/premis-v2-2.xsd"

        # creates an object that references the package or representation
        # TODO: identifier!
        premis_id = 'ID' + uuid.uuid4().__str__()
        object = P.object(
            {
                q(XSI_NS, 'type'): 'representation',
                "xmlID": premis_id
            },
            P.objectIdentifier(
                P.objectIdentifierType('repository'),
                P.objectIdentifierValue('package-id-goes-here-?')),
        )
        premis.append(object)

        # parse the migration.xml, add events and objects
        migrations = etree.iterparse(open(premis_info['info']),
                                     events=('start', ))
        eventlist = []
        for event, element in migrations:
            if element.tag == 'migration':
                event_id = 'ID' + uuid.uuid4().__str__()
                if self.root_path.endswith(element.attrib['targetrep']):
                    source_object_abs = os.path.join(
                        element.attrib['sourcedir'], element.attrib['file'])
                    source_object_rel = "file://./%s" % os.path.relpath(
                        source_object_abs, self.root_path)
                    target_object_abs = os.path.join(
                        element.attrib['targetdir'], element.attrib['output'])
                    target_object_rel = "file://./%s" % os.path.relpath(
                        target_object_abs, self.root_path)

                    # event
                    event = P.event(
                        P.eventIdentifier(P.eventIdentifierType('local'),
                                          P.eventIdentifierValue(event_id)),
                        P.eventType('migration'),
                        P.eventDateTime(
                            element.attrib['starttime']
                        ),  # TODO: use event start or event end time?
                        P.eventOutcomeInformation(P.eventOutcome('success')),
                        P.linkingAgentIdentifier(
                            P.linkingAgentIdentifierType('software'),
                            P.linkingAgentIdentifierValue(
                                'should probably come from migrations.xml')),
                        P.linkingObjectIdentifier(
                            P.linkingObjectIdentifierType('filepath'),
                            P.linkingObjectIdentifierValue(target_object_rel)))
                    eventlist.append(event)

                    # object
                    object = self.addObject(target_object_abs)
                    # add the relationship to the migration event and the source file
                    relationship = P.relationship(
                        P.relationshipType('derivation'),
                        P.relationshipSubType('has source'),
                        P.relatedObjectIdentification(
                            P.relatedObjectIdentifierType('filepath'),
                            P.relatedObjectIdentifierValue(source_object_rel),
                            P.relatedObjectSequence('0')),
                        P.relatedEventIdentification(
                            P.relatedEventIdentifierType('local'),
                            P.relatedEventIdentifierValue(event_id),
                            P.relatedEventSequence('1')),
                    )
                    object.append(relationship)

                    premis.append(object)
                else:
                    pass
            else:
                pass

        # append all events to premis root - they must be below the objects (due to validation)
        for event in eventlist:
            premis.append(event)

        # add agent
        identifier_value = 'earkweb'
        premis.append(
            P.agent(
                P.agentIdentifier(P.agentIdentifierType('LOCAL'),
                                  P.agentIdentifierValue(identifier_value)),
                P.agentName('E-ARK AIP to DIP Converter'),
                P.agentType('Software')))

        # create the Premis file
        str = etree.tostring(premis,
                             encoding='UTF-8',
                             pretty_print=True,
                             xml_declaration=True)
        preservation_dir = os.path.join(self.root_path,
                                        'metadata/preservation')
        if not os.path.exists(preservation_dir):
            os.makedirs(preservation_dir)
        path_premis = os.path.join(self.root_path,
                                   'metadata/preservation/premis.xml')
        with open(path_premis, 'w') as output_file:
            output_file.write(str)

        return