def restructureTRIMForComplianceFileUUIDsAssigned( unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%"): for dir in requiredDirectories: reqDirPath = os.path.join(unitPath, dir) if not os.path.isdir(reqDirPath): os.mkdir(reqDirPath) for item in os.listdir(unitPath): if item in requiredDirectories: continue src = os.path.join(unitPath, item) if os.path.isdir(src): objectsDir = os.path.join(unitPath, "objects", item) os.mkdir(objectsDir) for item2 in os.listdir(src): itemPath = os.path.join(src, item2) dst = os.path.join(objectsDir, item2) updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) if item2.endswith("Metadata.xml"): TRIMfileID = os.path.join(item, item2[:-1 - len("Metadata.xml")]) files = getFileUUIDLike('%' + TRIMfileID + '%', unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) fileUUID = None fileGrpUUID = None for key, value in files.iteritems(): if key.endswith("Metadata.xml"): fileUUID = value else: fileGrpUUID = value if fileUUID and fileGrpUUID: fileGrpUse = "TRIM file metadata" updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID) elif fileUUID and not fileGrpUUID: updateFileGrpUse(fileUUID, "TRIM container metadata") os.removedirs(src) else: destDir = "metadata" if item == "manifest.txt": destDir = "metadata/submissionDocumentation" dst = os.path.join(unitPath, destDir, item) updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) files = getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for key, value in files.iteritems(): fileUUID = value updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureTRIMForComplianceFileUUIDsAssigned( unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%" ): for dir in requiredDirectories: reqDirPath = os.path.join(unitPath, dir) if not os.path.isdir(reqDirPath): os.mkdir(reqDirPath) for item in os.listdir(unitPath): if item in requiredDirectories: continue src = os.path.join(unitPath, item) if os.path.isdir(src): objectsDir = os.path.join(unitPath, "objects", item) os.mkdir(objectsDir) for item2 in os.listdir(src): itemPath = os.path.join(src, item2) dst = os.path.join(objectsDir, item2) updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) if item2.endswith("Metadata.xml"): TRIMfileID = os.path.join(item, item2[: -1 - len("Metadata.xml")]) files = getFileUUIDLike( "%" + TRIMfileID + "%", unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith ) fileUUID = None fileGrpUUID = None for key, value in files.iteritems(): if key.endswith("Metadata.xml"): fileUUID = value else: fileGrpUUID = value if fileUUID and fileGrpUUID: fileGrpUse = "TRIM file metadata" updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID) elif fileUUID and not fileGrpUUID: updateFileGrpUse(fileUUID, "TRIM container metadata") os.removedirs(src) else: destDir = "metadata" if item == "manifest.txt": destDir = "metadata/submissionDocumentation" dst = os.path.join(unitPath, destDir, item) updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) files = getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for key, value in files.iteritems(): fileUUID = value updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureTRIMForComplianceFileUUIDsAssigned(unitPath, unitIdentifier, unitIdentifierType="transfer", unitPathReplaceWith="%transferDirectory%"): # Create required directories archivematicaFunctions.create_directories(REQUIRED_DIRECTORIES, unitPath) # The types returned by os.listdir() depends on the type of the argument # passed to it. In this case, we want all of the returned names to be # bytestrings because they may contain arbitrary, non-Unicode characters. unitPath = str(unitPath) for item in os.listdir(unitPath): if item in REQUIRED_DIRECTORIES: continue src = os.path.join(unitPath, item) if os.path.isdir(src): objectsDir = os.path.join(unitPath, "objects", item) os.mkdir(objectsDir) for item2 in os.listdir(src): itemPath = os.path.join(src, item2) dst = os.path.join(objectsDir, item2) fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) if item2.endswith("Metadata.xml"): TRIMfileID = os.path.join(item, item2[:-1 - len("Metadata.xml")]) files = fileOperations.getFileUUIDLike('%' + TRIMfileID + '%', unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) fileUUID = None fileGrpUUID = None for key, value in files.items(): if key.endswith("Metadata.xml"): fileUUID = value else: fileGrpUUID = value if fileUUID and fileGrpUUID: fileGrpUse = "TRIM file metadata" fileOperations.updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID) elif fileUUID and not fileGrpUUID: fileOperations.updateFileGrpUse(fileUUID, "TRIM container metadata") os.removedirs(src) else: destDir = "metadata" if item == "manifest.txt": destDir = "metadata/submissionDocumentation" dst = os.path.join(unitPath, destDir, item) fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) files = fileOperations.getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for key, value in files.items(): fileUUID = value fileOperations.updateFileGrpUse(fileUUID, "TRIM metadata")
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): transferUUID = job.args[1] transferName = job.args[2] transferPath = job.args[3] date = job.args[4] topDirectory = None currentDirectory = "" fileCount = 0 exitCode = 0 for line in open(os.path.join(transferPath, "manifest.txt"), "r"): if line.startswith(" Directory of "): if topDirectory is None: topDirectory = line.strip() currentDirectory = transferPath originalTransferName = topDirectory.split("\\")[-1] if originalTransferName != transferName: job.pyprint( "Warning, transfer was renamed from: ", originalTransferName, file=sys.stderr, ) else: currentDirectory = (line.strip().replace( topDirectory + "\\", transferPath, 1).replace("\\", "/")) # file/dir lines aren't and don't start with whitespace. if not line.strip(): continue if line.startswith(" ") or line.startswith("\t"): continue isDir = False if line.find("<DIR>") != -1: isDir = True sections = re.split("\s+", line.strip()) baseName = sections[-1] # assumes no spaces in file name path = os.path.join(transferPath, currentDirectory, baseName) if isDir: # don't check if parent directory exists if baseName == "..": continue # check if directory exists if os.path.isdir(path): job.pyprint( "Verified directory exists: ", path.replace(transferPath, "%TransferDirectory%"), ) else: job.pyprint( "Directory does not exists: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr, ) exitCode += 1 else: if os.path.isfile(path): job.pyprint( "Verified file exists: ", path.replace(transferPath, "%TransferDirectory%"), ) fileCount += 1 fileID = getFileUUIDLike( path, transferPath, transferUUID, "transfer", "%transferDirectory%", ) if not len(fileID): job.pyprint( "Could not find fileUUID for: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr, ) exitCode += 1 for paths, fileUUID in fileID.items(): eventDetail = 'program="archivematica"; module="trimVerifyManifest"' eventOutcome = "Pass" eventOutcomeDetailNote = "Verified file exists" eventIdentifierUUID = uuid.uuid4().__str__() databaseFunctions.insertIntoEvents( fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType="manifest check", eventDateTime=date, eventOutcome=eventOutcome, eventOutcomeDetailNote= eventOutcomeDetailNote, eventDetail=eventDetail, ) else: i = path.rfind(".") path2 = path[:i] + path[i:].lower() if i != -1 and os.path.isfile(path2): job.pyprint( "Warning, verified file exists, but with implicit extension case: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr, ) fileCount += 1 fileID = getFileUUIDLike( path2, transferPath, transferUUID, "transfer", "%transferDirectory%", ) if not len(fileID): job.pyprint( "Could not find fileUUID for: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr, ) exitCode += 1 for paths, fileUUID in fileID.items(): eventDetail = 'program="archivematica"; module="trimVerifyManifest"' eventOutcome = "Pass" eventOutcomeDetailNote = "Verified file exists, but with implicit extension case" eventIdentifierUUID = uuid.uuid4().__str__( ) databaseFunctions.insertIntoEvents( fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType="manifest check", eventDateTime=date, eventOutcome=eventOutcome, eventOutcomeDetailNote= eventOutcomeDetailNote, eventDetail=eventDetail, ) else: job.pyprint( "File does not exists: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr, ) exitCode += 1 if fileCount: job.set_status(exitCode) else: job.pyprint("No files found.", file=sys.stderr) job.set_status(255)
#make end time end of year endTimeEndOfYearDiff = datetime(endTime.year, 12, 31) - endTime endTime = endTime + endTimeEndOfYearDiff indexForOnlyDate = 10 startTime = startTime.__str__()[:indexForOnlyDate] endTime = endTime.__str__()[:indexForOnlyDate] for file in os.listdir(dirPath): filePath = os.path.join(dirPath, file) if file == "ContainerMetadata.xml" or file.endswith("Metadata.xml") or not os.path.isfile(filePath): continue fileUUID = getFileUUIDLike(filePath, transferPath, transferUUID, "transferUUID", "%transferDirectory%")[filePath.replace(transferPath, "%transferDirectory%", 1)] FileMetadataAppliesToType = '7f04d9d4-92c2-44a5-93dc-b7bfdf0c1f17' #RightsStatement sql = """INSERT INTO RightsStatement SET metadataAppliesToType='%s', metadataAppliesToidentifier='%s', rightsStatementIdentifierType='UUID', rightsStatementIdentifierValue='%s', fkAgent=1, rightsBasis='Other';""" % (FileMetadataAppliesToType, fileUUID, uuid.uuid4().__str__()) RightsStatement = databaseInterface.insertAndReturnID(sql) #RightsStatementOtherRightsInformation sql = """INSERT INTO RightsStatementOtherRightsInformation SET fkRightsStatement=%d,
# make end time end of year endTimeEndOfYearDiff = datetime(endTime.year, 12, 31) - endTime endTime = endTime + endTimeEndOfYearDiff indexForOnlyDate = 10 startTime = startTime.__str__()[:indexForOnlyDate] endTime = endTime.__str__()[:indexForOnlyDate] for file in os.listdir(dirPath): filePath = os.path.join(dirPath, file) if file == "ContainerMetadata.xml" or file.endswith( "Metadata.xml") or not os.path.isfile(filePath): continue fileUUID = getFileUUIDLike(filePath, transferPath, transferUUID, "transfer", "%transferDirectory%")[filePath.replace( transferPath, "%transferDirectory%", 1)] FileMetadataAppliesToType = '7f04d9d4-92c2-44a5-93dc-b7bfdf0c1f17' # RightsStatement statement = RightsStatement.objects.create( metadataappliestotype_id=FileMetadataAppliesToType, metadataappliestoidentifier=fileUUID, rightsstatementidentifiertype="UUID", rightsstatementidentifiervalue=str(uuid.uuid4()), rightsholder=1, rightsbasis="Other") # RightsStatementOtherRightsInformation info = RightsStatementOtherRightsInformation.objects.create( rightsstatement=statement, otherrightsbasis="Policy")
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): # job.args[2] (transferName) is unused. transferUUID = job.args[1] transferPath = job.args[3] date = job.args[4] exitCode = 0 for transfer_dir in os.listdir(transferPath): dirPath = os.path.join(transferPath, transfer_dir) if not os.path.isdir(dirPath): continue for transfer_file in os.listdir(dirPath): filePath = os.path.join(dirPath, transfer_file) if transfer_file == 'ContainerMetadata.xml' or transfer_file.endswith( 'Metadata.xml' ) or not os.path.isfile(filePath): continue i = transfer_file.rfind('.') if i != -1: xmlFile = transfer_file[:i] + '_Metadata.xml' else: xmlFile = transfer_file + '_Metadata.xml' xmlFilePath = os.path.join(dirPath, xmlFile) try: tree = etree.parse(xmlFilePath) root = tree.getroot() xmlMD5 = root.find('Document/MD5').text except: job.pyprint('Error parsing: ', xmlFilePath, file=sys.stderr) exitCode += 1 continue objectMD5 = get_file_checksum(filePath, 'md5') if objectMD5 == xmlMD5: job.pyprint( 'File OK: ', xmlMD5, filePath.replace(transferPath, '%TransferDirectory%')) fileID = getFileUUIDLike(filePath, transferPath, transferUUID, 'transfer', '%transferDirectory%') for path, fileUUID in fileID.items(): eventDetail = 'program="python"; module="hashlib.md5()"' eventOutcome = 'Pass' eventOutcomeDetailNote = '%s %s' % ( xmlFile.__str__(), 'verified') eventIdentifierUUID = uuid.uuid4().__str__() databaseFunctions.insertIntoEvents( fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType='fixity check', eventDateTime=date, eventOutcome=eventOutcome, eventOutcomeDetailNote= eventOutcomeDetailNote, eventDetail=eventDetail) else: job.pyprint('Checksum mismatch: ', filePath.replace( transferPath, '%TransferDirectory%'), file=sys.stderr) exitCode += 1 job.set_status(exitCode)
#extension = root.find("Document/Extension").text xmlMD5 = root.find("Document/MD5").text except: print >>sys.stderr, "Error parsing: ", xmlFilePath exitCode += 1 continue #if extension.lower() != file[i+1:].lower(): # print >>sys.stderr, "Warning, extension mismatch(file/xml): ", file[:i], extension , file[i+1:] objectMD5 = md5_for_file(filePath) if objectMD5 == xmlMD5: print "File OK: ", xmlMD5, filePath.replace(transferPath, "%TransferDirectory%") fileID = getFileUUIDLike(filePath, transferPath, transferUUID, "transferUUID", "%transferDirectory%") for path, fileUUID in fileID.iteritems(): eventDetail = "program=\"python\"; module=\"hashlib.md5()\"" eventOutcome="Pass" eventOutcomeDetailNote = "%s %s" % (xmlFile.__str__(), "verified") eventIdentifierUUID=uuid.uuid4().__str__() databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="fixity check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \ eventOutcomeDetailNote=eventOutcomeDetailNote, \ eventDetail=eventDetail) else: print >>sys.stderr, "Checksum mismatch: ", filePath.replace(transferPath, "%TransferDirectory%") exitCode += 1
continue #check if directory exists if os.path.isdir(path): print("Verified directory exists: ", path.replace(transferPath, "%TransferDirectory%")) else: print("Directory does not exists: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr) exitCode += 1 else: if os.path.isfile(path): print("Verified file exists: ", path.replace(transferPath, "%TransferDirectory%")) fileCount += 1 fileID = getFileUUIDLike(path, transferPath, transferUUID, "transfer", "%transferDirectory%") if not len(fileID): print("Could not find fileUUID for: ", path.replace(transferPath, "%TransferDirectory%"), file=sys.stderr) exitCode += 1 for paths, fileUUID in fileID.items(): eventDetail = "program=\"archivematica\"; module=\"trimVerifyManifest\"" eventOutcome = "Pass" eventOutcomeDetailNote = "Verified file exists" eventIdentifierUUID = uuid.uuid4().__str__() databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="manifest check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \
try: tree = etree.parse(xmlFilePath) root = tree.getroot() xmlMD5 = root.find('Document/MD5').text except: print('Error parsing: ', xmlFilePath, file=sys.stderr) exitCode += 1 continue objectMD5 = get_file_checksum(filePath, 'md5') if objectMD5 == xmlMD5: print('File OK: ', xmlMD5, filePath.replace(transferPath, '%TransferDirectory%')) fileID = getFileUUIDLike(filePath, transferPath, transferUUID, 'transfer', '%transferDirectory%') for path, fileUUID in fileID.items(): eventDetail = 'program="python"; module="hashlib.md5()"' eventOutcome = 'Pass' eventOutcomeDetailNote = '%s %s' % (xmlFile.__str__(), 'verified') eventIdentifierUUID = uuid.uuid4().__str__() databaseFunctions.insertIntoEvents( fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType='fixity check', eventDateTime=date, eventOutcome=eventOutcome, eventOutcomeDetailNote=eventOutcomeDetailNote, eventDetail=eventDetail )
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): # job.args[2] (transferName) is unused. # job.args[4] (date) is unused. transferUUID = job.args[1] transferPath = job.args[3] exitCode = 0 for dir in os.listdir(transferPath): dirPath = os.path.join(transferPath, dir) if not os.path.isdir(dirPath): continue xmlFilePath = os.path.join(dirPath, "ContainerMetadata.xml") try: tree = etree.parse(xmlFilePath) root = tree.getroot() except: job.pyprint("Error parsing: ", xmlFilePath.replace( transferPath, "%transferDirectory%", 1), file=sys.stderr) exitCode += 1 continue try: RetentionSchedule = root.find( "Container/RetentionSchedule").text DateClosed = root.find("Container/DateClosed").text except: job.pyprint("Error retrieving values from: ", xmlFilePath.replace( transferPath, "%transferDirectory%", 1), file=sys.stderr) exitCode += 1 continue retentionPeriod = getTimedeltaFromRetensionSchedule( RetentionSchedule) startTime = job.pyprint(job, DateClosed) endTime = startTime + retentionPeriod # make end time end of year endTimeEndOfYearDiff = datetime(endTime.year, 12, 31) - endTime endTime = endTime + endTimeEndOfYearDiff indexForOnlyDate = 10 startTime = startTime.__str__()[:indexForOnlyDate] endTime = endTime.__str__()[:indexForOnlyDate] for file in os.listdir(dirPath): filePath = os.path.join(dirPath, file) if file == "ContainerMetadata.xml" or file.endswith( "Metadata.xml" ) or not os.path.isfile(filePath): continue fileUUID = getFileUUIDLike( filePath, transferPath, transferUUID, "transfer", "%transferDirectory%")[filePath.replace( transferPath, "%transferDirectory%", 1)] FileMetadataAppliesToType = '7f04d9d4-92c2-44a5-93dc-b7bfdf0c1f17' # RightsStatement statement = RightsStatement.objects.create( metadataappliestotype_id=FileMetadataAppliesToType, metadataappliestoidentifier=fileUUID, rightsstatementidentifiertype="UUID", rightsstatementidentifiervalue=str(uuid.uuid4()), rightsholder=1, rightsbasis="Other") # RightsStatementOtherRightsInformation info = RightsStatementOtherRightsInformation.objects.create( rightsstatement=statement, otherrightsbasis="Policy") # RightsStatementOtherRightsDocumentationIdentifier RightsStatementOtherRightsDocumentationIdentifier.objects.create( rightsstatementotherrights=info) # RightsStatementRightsGranted granted = RightsStatementRightsGranted.objects.create( rightsstatement=statement, act="Disseminate", startdate=startTime, enddate=endTime) # RightsStatementRightsGrantedNote RightsStatementRightsGrantedNote.objects.create( rightsgranted=granted, rightsgrantednote="Closed until " + endTime) # RightsStatementRightsGrantedRestriction RightsStatementRightsGrantedRestriction.objects.create( rightsgranted=granted, restriction="Disallow") job.set_status(exitCode)