def check_storage(): EL_root = etree.Element('needcopies') Policy_obj_list = ESSArchPolicy.objects.filter(PolicyStat=1).all() for Policy_obj in Policy_obj_list: sm_obj_list = [[Policy_obj.sm_1, Policy_obj.sm_type_1, Policy_obj.sm_format_1, Policy_obj.sm_blocksize_1, Policy_obj.sm_maxCapacity_1, Policy_obj.sm_minChunkSize_1, Policy_obj.sm_minContainerSize_1, Policy_obj.sm_target_1], [Policy_obj.sm_2, Policy_obj.sm_type_2, Policy_obj.sm_format_2, Policy_obj.sm_blocksize_2, Policy_obj.sm_maxCapacity_2, Policy_obj.sm_minChunkSize_2, Policy_obj.sm_minContainerSize_2, Policy_obj.sm_target_2], [Policy_obj.sm_3, Policy_obj.sm_type_3, Policy_obj.sm_format_3, Policy_obj.sm_blocksize_3, Policy_obj.sm_maxCapacity_3, Policy_obj.sm_minChunkSize_3, Policy_obj.sm_minContainerSize_3, Policy_obj.sm_target_3], [Policy_obj.sm_4, Policy_obj.sm_type_4, Policy_obj.sm_format_4, Policy_obj.sm_blocksize_4, Policy_obj.sm_maxCapacity_4, Policy_obj.sm_minChunkSize_4, Policy_obj.sm_minContainerSize_4, Policy_obj.sm_target_4], ] ip_obj_list = ArchiveObject.objects.filter(PolicyId=Policy_obj.PolicyID, StatusProcess=3000, StatusActivity=0).all() for ip_obj in ip_obj_list: storage_obj_list = ip_obj.storage_set.all() sm_num = 0 for sm_obj in sm_obj_list: sm_num += 1 if sm_obj[0] == 1: storage_count = 0 storageMediumID_list = [] for storage_obj in storage_obj_list: storageMedium_obj = storage_obj.storageMediumUUID if str(sm_obj[1])[0] == '2': #Disk if storageMedium_obj.storageMedium == sm_obj[1] and storageMedium_obj.storageMediumID == 'disk': storage_count+=1 storageMediumID_list.append(storageMedium_obj.storageMediumID) elif str(sm_obj[1])[0] == '3': #Tape if storageMedium_obj.storageMedium == sm_obj[1] and storageMedium_obj.storageMediumID.startswith(sm_obj[7]): storage_count+=1 storageMediumID_list.append(storageMedium_obj.storageMediumID) if storage_count == 0: EL_object = etree.SubElement(EL_root, 'object', attrib={'id':ip_obj.ObjectIdentifierValue, 'target':sm_obj[7], }) print 'Missing storage entry for storage method number: %s, target: %s, for object: %s' % ( sm_num, sm_obj[7], ip_obj.ObjectIdentifierValue) elif storage_count == 1: print 'Found storage entry for storage method number: %s, target: %s (%s), for object: %s' % ( sm_num, sm_obj[7], ','.join(str(e) for e in storageMediumID_list), ip_obj.ObjectIdentifierValue) else: print 'Warning found to many storage entry for storage method number: %s, target: %s (%s), for object: %s' % ( sm_num, sm_obj[7], ','.join(str(e) for e in storageMediumID_list), ip_obj.ObjectIdentifierValue) doc = etree.ElementTree(element=EL_root, file=None) ESSMD.writeToFile(doc,'/ESSArch/log/needcopies/needcopies.xml')
def text2blob(self): #DOC,errno,why = ESSMETS.parseMetsFromFile('/ESSArch/bin/src/testdata/X0000001_METS.xml') DOC,errno,why = ESSMD.parseFromFile('/ESSArch/bin/src/testdata/X0000001_METS.xml') if errno: print why METSstr = etree.tostring(DOC,encoding='UTF-8', xml_declaration=True, pretty_print=True) blob = MySQLdb.escape_string(METSstr) print 'METSstr',METSstr print 'blob',blob #self.table = 'IngestObjectMetadata' ArchiveObjectMetadata_obj = ArchiveObjectMetadata() ArchiveObjectMetadata_obj.ObjectIdentifierValue = ObjectIdentifierValue ArchiveObjectMetadata_obj.ObjectMetadataType = 26 ArchiveObjectMetadata_obj.ObjectMetadataBLOB = blob ArchiveObjectMetadata_obj.save()
def text2blob(self): #DOC,errno,why = ESSMETS.parseMetsFromFile('/ESSArch/bin/src/testdata/X0000001_METS.xml') DOC, errno, why = ESSMD.parseFromFile( '/ESSArch/bin/src/testdata/X0000001_METS.xml') if errno: print why METSstr = etree.tostring(DOC, encoding='UTF-8', xml_declaration=True, pretty_print=True) blob = MySQLdb.escape_string(METSstr) print 'METSstr', METSstr print 'blob', blob #self.table = 'IngestObjectMetadata' ArchiveObjectMetadata_obj = ArchiveObjectMetadata() ArchiveObjectMetadata_obj.ObjectIdentifierValue = ObjectIdentifierValue ArchiveObjectMetadata_obj.ObjectMetadataType = 26 ArchiveObjectMetadata_obj.ObjectMetadataBLOB = blob ArchiveObjectMetadata_obj.save()
def ThreadMain(self,ProcName): logging.info('Starting ' + ProcName) self.tz=timezone.get_default_timezone() self.ChecksumAlgorithm_CHOICES_dict = dict(ChecksumAlgorithm_CHOICES) self.ChecksumAlgorithm_CHOICES_invdict = ESSPGM.Check().invert_dict(self.ChecksumAlgorithm_CHOICES_dict) while 1: if self.mDieFlag==1: break # Request for death self.mLock.acquire() self.Time,self.Run = ESSDB.DB().action('ESSProc','GET',('Time','Run'),('Name',ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) self.RunFlag=0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock=thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','IngestTable'))[0][0] self.PolicyTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','PolicyTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' #if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget,errno,why = ESSDB.DB().action(self.IngestTable,'GET4',('ObjectIdentifierValue','ObjectPackageName', 'PolicyId','MetaObjectIdentifier', 'MetaObjectSize','DataObjectSize', 'ObjectSize', 'ObjectUUID'), ('StatusActivity','=','0','AND', 'StatusProcess','BETWEEN',49,'AND',51)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[0]=='0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1]==1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[1]==1: logging.info('Process is in pause state') else: break self.ObjectIdentifierValue = self.obj[0] self.ObjectPackageName = self.obj[1] self.PolicyId = self.obj[2] #self.MetaObjectIdentifier = self.obj[3] #METS filename #self.MetaObjectSize = self.obj[4] #METS size (bytes) #self.DataObjectSize = self.obj[5] self.ObjectSize = self.obj[6] self.ObjectUUID = self.obj[7] logging.debug('self.obj: '+str(self.obj)) self.ok = 1 self.PolicyDB,errno,why = ESSDB.DB().action(self.PolicyTable,'GET3',('AIPpath','IngestMetadata','ChecksumAlgorithm','IngestPath','ValidateChecksum','ValidateXML'),('PolicyID',self.PolicyId)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) self.ok = 0 if self.ok: ########################################################### # set variables self.AIPpath = self.PolicyDB[0][0] self.metatype = self.PolicyDB[0][1] self.ChecksumAlgorithm = self.PolicyDB[0][2] self.SIPpath = self.PolicyDB[0][3] self.ValidateChecksum = self.PolicyDB[0][4] self.ValidateXML = self.PolicyDB[0][5] self.ObjectPath = os.path.join(self.AIPpath,self.ObjectPackageName) self.Pmets_objpath = os.path.join(self.AIPpath,self.ObjectIdentifierValue + '_Package_METS.xml') if self.metatype > 0: self.startVerTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 50, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Start validate AIP package: ' + self.ObjectIdentifierValue) self.Cmets_obj = None self.Cmets_objpath = None self.premis_obj = None self.premis_objpath = None self.addml_obj = None self.addml_objpath = None ########################################## # Get PMETS info if self.ok: [self.Package_info,self.CMets_info],errno,why = ESSMD.getPMETSInfo(FILENAME=self.Pmets_objpath) if errno: self.event_info = 'Failed to get PMETS info for object: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, Package_info: %s, CMets_info: %s',self.ObjectIdentifierValue,str(self.Package_info),str(self.CMets_info)) # CMets_info and Package_info: ['A0007600_Content_METS.xml', 'MD5', 'b0270cb4d196b72b87fe27ce6242df18', 64058, 'text/xml'] #if self.Package_info[1] == 'MD5': self.PackageMessageDigestAlgorithm = 1 self.PackageMessageDigestAlgorithm = self.ChecksumAlgorithm_CHOICES_invdict[self.Package_info[1]] if self.Package_info[2]: self.PackageMessageDigest = self.Package_info[2] if self.Package_info[3]: self.PackageSize = int(self.Package_info[3]) self.Cmets_obj = self.CMets_info[0] if self.metatype in [1,2,3]: self.Cmets_objpath = os.path.join(self.AIPpath,self.Cmets_obj) elif self.metatype in [4]: self.Cmets_objpath = os.path.join(self.SIPpath,self.Cmets_obj) #if self.CMets_info[1] == 'MD5': self.CMetsMessageDigestAlgorithm = 1 self.CMetsMessageDigestAlgorithm = self.ChecksumAlgorithm_CHOICES_invdict[self.CMets_info[1]] if self.CMets_info[2]: self.CMetsMessageDigest = self.CMets_info[2] if self.CMets_info[3]: self.CMetsSize = int(self.CMets_info[3]) ########################################################## # Check if ObjectPath and Cmets_objpath exist if os.path.exists(self.ObjectPath) and os.path.exists(self.Cmets_objpath): self.ok = 1 else: self.event_info = 'The path to Object: %s or METS_Metaobject: %s is not accessible!' % (self.ObjectPath,self.Cmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 ########################################## # Get MetsFgrp001TotalSize from METSfile if self.ok: self.MetsFgrp001TotalSize,errno,why = ESSMD.getFileSizeFgrp001(FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get MetsFgrp001TotalSize for object: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, MetsFgrp001TotalSize: %s',self.ObjectIdentifierValue,str(self.MetsFgrp001TotalSize)) ########################################## # Get PremisObjectTotalSize from METSfile if self.ok: if self.metatype in [1,2,3]: self.premis_objpath = self.Cmets_objpath elif self.metatype in [4]: # Get metadata from METS file res_info, res_files, res_struct, errno, why = ESSMD.getMETSFileList(FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get metadata from content METS for object: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: for res_file in res_files: if res_file[0] == 'amdSec' and res_file[2] == 'digiprovMD': self.premis_obj = res_file[8][5:] self.premis_objpath = '%s/%s/%s' % (self.SIPpath,self.ObjectIdentifierValue,self.premis_obj) elif res_file[0] == 'amdSec' and res_file[2] == 'techMD' and res_file[16] == 'ADDML': self.addml_obj = res_file[8][5:] self.addml_objpath = '%s/%s/%s' % (self.SIPpath,self.ObjectIdentifierValue,self.addml_obj) self.PremisObjectTotalSize,errno,why = ESSMD.getFileSizePremis(FILENAME=self.premis_objpath) if errno: self.event_info = 'Failed to get PREMISobjects total size in METSfile for object: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, PremisObjectTotalSize: %s',self.ObjectIdentifierValue,str(self.PremisObjectTotalSize)) ################################################### # Check if MetsFgrp001TotalSize is equal to PremisObjectTotalSize if self.ok: if self.MetsFgrp001TotalSize == self.PremisObjectTotalSize: logging.info('Succeeded to verify METS Fgrp001 and PREMIS for object: %s, MetsFgrp001TotalSize: %s is equal to PremisObjectTotalSize: %s',self.ObjectIdentifierValue,str(self.MetsFgrp001TotalSize),str(self.PremisObjectTotalSize)) else: self.event_info = 'Failed to verify METS Fgrp001 and PREMIS for object: %s, MetsFgrp001TotalSize: %s is not equal to PremisObjectTotalSize: %s' % (self.ObjectIdentifierValue,str(self.MetsFgrp001TotalSize),str(self.PremisObjectTotalSize)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 ########################################## # Get CMetsTotalSize if self.ok: self.CMetsTotalSize,errno,why = ESSMD.getTotalSize(FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get CMetsTotalSize for object: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, raw CMetsTotalSize: %s',self.ObjectIdentifierValue,str(self.CMetsTotalSize)) if self.CMetsSize: # Add Content Mets filesize self.CMetsTotalSize[0] += 1 self.CMetsTotalSize[1] += self.CMetsSize logging.debug('Object: %s, CMetsTotalSize: %s',self.ObjectIdentifierValue,str(self.CMetsTotalSize)) ########################################## # Get TarFileSize if self.ok: self.TarFileSize,errno,why = ESSPGM.Check().getFileSizeTAR(self.ObjectPath) if errno: self.event_info = 'Failed to get TarFileSize for object: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, TarFileSize: %s',self.ObjectIdentifierValue,str(self.TarFileSize)) ################################################### # Check if CMetsTotalSize is equal to TarFileSize if self.ok: if self.CMetsTotalSize == self.TarFileSize: logging.info('Succeeded to verify total package size for object: %s, CMetsTotalSize: %s is equal to TarFileSize: %s',self.ObjectIdentifierValue,str(self.CMetsTotalSize),str(self.TarFileSize)) else: self.event_info = 'Failed to verify total package size for object: %s, CMetsTotalSize: %s is not equal to TarFileSize: %s' % (self.ObjectIdentifierValue,str(self.CMetsTotalSize),str(self.TarFileSize)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 ######################## # get checksum for PMETS file if self.ok: self.PMetsMessageDigestAlgorithm = self.ChecksumAlgorithm self.PMetsMessageDigest = '' self.Pmets_obj_checksum,errno,why = ESSPGM.Check().checksum(self.Pmets_objpath, self.PMetsMessageDigestAlgorithm) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % (self.Cmets_objpath,str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Checksum for PMETS file: %s',self.Pmets_obj_checksum) self.PMetsMessageDigestAlgorithm = 1 self.PMetsMessageDigest = self.Pmets_obj_checksum ######################## # get checksum for CMETS file if self.ok: self.Cmets_obj_checksum,errno,why = ESSPGM.Check().checksum(self.Cmets_objpath, self.CMetsMessageDigestAlgorithm) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % (self.Cmets_objpath,str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Checksum for CMETS file: %s',self.Cmets_obj_checksum) ################################################### # Check CMetsMessageDigest if self.ok: if self.ValidateChecksum: if self.CMetsMessageDigest == self.Cmets_obj_checksum: logging.info('Succeeded to verify Content Mets MessageDigest for object: %s',self.ObjectIdentifierValue) else: self.event_info = 'Failed to verify Content Mets MessageDigest for object: %s, CMetsMessageDigest: %s is equal to FileMessageDigest: %s' % (self.ObjectIdentifierValue,str(self.CMetsMessageDigest),str(self.Cmets_obj_checksum)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.warning('Checksum validate is disabled for object: %s',self.ObjectIdentifierValue) if self.ValidateXML: ######################## # XML Schema Validate CMETS file if self.ok: errno,why = ESSMD.validate(FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to schema validate Content METS file for object: %s, why: %s' % (self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to schema validate Content METS file for object: %s',self.ObjectIdentifierValue) ######################## # XML Schema Validate PMETS file if self.ok: errno,why = ESSMD.validate(FILENAME=self.Pmets_objpath) if errno: self.event_info = 'Failed to schema validate Package METS file for object: %s, why: %s' % (self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to schema validate Package METS file for object: %s',self.ObjectIdentifierValue) ######################## # XML Schema Validate PREMIS file if self.ok and self.premis_obj: errno,why = ESSMD.validate(FILENAME=self.premis_objpath) if errno: self.event_info = 'Failed to schema validate PREMIS file for object: %s, why: %s' % (self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to schema validate PREMIS file for object: %s',self.ObjectIdentifierValue) ######################## # XML Schema Validate ADDML file if self.ok and self.addml_obj: errno,why = ESSMD.validate(FILENAME=self.addml_objpath) if errno: self.event_info = 'Failed to schema validate ADDML file for object: %s, why: %s' % (self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to schema validate ADDML file for object: %s',self.ObjectIdentifierValue) else: logging.warning('Schema validate XML is disabled for object: %s',self.ObjectIdentifierValue) self.stopVerTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) self.VerTime = self.stopVerTime-self.startVerTime if self.VerTime.seconds < 1: self.VerTime = datetime.timedelta(seconds=1) #Fix min time to 1 second if it is zero. self.ObjectSizeMB = int(self.ObjectSize)/1048576 self.VerMBperSEC = int(self.ObjectSizeMB)/int(self.VerTime.seconds) ################################## # Write metadatafiles to DB-blob or FTP if self.ok: if ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','MD_FTP_HOST'))[0][0]: if self.Cmets_obj: ################################## # Write CMETS metadatafile to DB-blob res,errno,why = ESSmetablob.prod().StoreMetadataBlob(ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self.ObjectIdentifierValue, ObjectMetadataType=26, FILENAME=self.Cmets_objpath, FTPFileName=string.replace(self.Cmets_obj,self.ObjectIdentifierValue+'/',''), FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store Content METS file to FTP server or DB-blob: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to store Content METS file to FTP server or DB-blob for object: %s',self.ObjectIdentifierValue) if self.premis_obj: ################################## # Write PREMIS metadatafile to DB-blob res,errno,why = ESSmetablob.prod().StoreMetadataBlob(ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self.ObjectIdentifierValue, ObjectMetadataType=27, FILENAME=self.premis_objpath, FTPFileName=self.premis_obj, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store PREMIS file to FTP server or DB-blob: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to store PREMIS file to FTP server or DB-blob for object: %s',self.ObjectIdentifierValue) if self.addml_obj: ################################## # Write ADDML metadatafile to DB-blob res,errno,why = ESSmetablob.prod().StoreMetadataBlob(ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self.ObjectIdentifierValue, ObjectMetadataType=25, FILENAME=self.addml_objpath, FTPFileName=self.addml_obj, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store ADDML file to FTP server or DB-blob: %s, errno: %s, why: %s' % (self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: logging.info('Succeeded to store ADDML file to FTP server or DB-blob for object: %s',self.ObjectIdentifierValue) else: logging.info('Skip to store metadata to FTP server or DB-blob for object: %s',self.ObjectIdentifierValue) if not self.ok: errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 51, 4) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to validate AIP package: ' + self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) else: logging.info('Succeeded to validate AIP package: ' + self.ObjectIdentifierValue + ' , ' + str(self.VerMBperSEC) + ' MB/Sec and Time: ' + str(self.VerTime)) self.timestamp_utc = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('StatusProcess','59', 'StatusActivity','0', 'CMetaMessageDigestAlgorithm',self.CMetsMessageDigestAlgorithm, 'CMetaMessageDigest',self.CMetsMessageDigest, 'PMetaMessageDigestAlgorithm',self.PMetsMessageDigestAlgorithm, 'PMetaMessageDigest',self.PMetsMessageDigest, 'LastEventDate',self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue, 'LocalDBdatetime',self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'0','',2,self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'UPD',('StatusProcess','59', 'StatusActivity','0', 'CMetaMessageDigestAlgorithm',self.CMetsMessageDigestAlgorithm, 'CMetaMessageDigest',self.CMetsMessageDigest, 'PMetaMessageDigestAlgorithm',self.PMetsMessageDigestAlgorithm, 'PMetaMessageDigest',self.PMetsMessageDigest, 'LastEventDate',self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ExtDBdatetime',self.timestamp_utc.replace(tzinfo=None)),('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) elif self.metatype == 0: #self.metatype 0 = No metadata logging.info('Skip to validate AIP package: ' + self.ObjectIdentifierValue) self.timestamp_utc = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('StatusProcess','59', 'StatusActivity','0', 'CMetaMessageDigestAlgorithm','0', 'PMetaMessageDigestAlgorithm','0', 'LastEventDate',self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue, 'LocalDBdatetime',self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1050','','ESSArch AIPValidate',ProcVersion,'0','Skip to validate AIP package',2,self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'UPD',('StatusProcess','59', 'StatusActivity','0', 'CMetaMessageDigestAlgorithm','0', 'PMetaMessageDigestAlgorithm','0', 'LastEventDate',self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ExtDBdatetime',self.timestamp_utc.replace(tzinfo=None)),('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag=0
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) self.tz = timezone.get_default_timezone() self.ChecksumAlgorithm_CHOICES_dict = dict(ChecksumAlgorithm_CHOICES) self.ChecksumAlgorithm_CHOICES_invdict = ESSPGM.Check().invert_dict( self.ChecksumAlgorithm_CHOICES_dict) while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) self.RunFlag = 0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock = thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' #if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'ObjectPackageName', 'PolicyId', 'MetaObjectIdentifier', 'MetaObjectSize', 'DataObjectSize', 'ObjectSize', 'ObjectUUID'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 49, 'AND', 51)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[0] == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1] == 1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action( 'ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[1] == 1: logging.info('Process is in pause state') else: break self.ObjectIdentifierValue = self.obj[0] self.ObjectPackageName = self.obj[1] self.PolicyId = self.obj[2] #self.MetaObjectIdentifier = self.obj[3] #METS filename #self.MetaObjectSize = self.obj[4] #METS size (bytes) #self.DataObjectSize = self.obj[5] self.ObjectSize = self.obj[6] self.ObjectUUID = self.obj[7] logging.debug('self.obj: ' + str(self.obj)) self.ok = 1 ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) if self.ok: ########################################################### # set variables self.AIPpath = ArchivePolicy_obj.AIPpath self.metatype = ArchivePolicy_obj.IngestMetadata self.ChecksumAlgorithm = ArchivePolicy_obj.ChecksumAlgorithm self.SIPpath = ArchivePolicy_obj.IngestPath self.ValidateChecksum = ArchivePolicy_obj.ValidateChecksum self.ValidateXML = ArchivePolicy_obj.ValidateXML self.ObjectPath = os.path.join(self.AIPpath, self.ObjectPackageName) self.Pmets_objpath = os.path.join( self.AIPpath, self.ObjectIdentifierValue + '_Package_METS.xml') if self.metatype > 0: self.startVerTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 50, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Start validate AIP package: ' + self.ObjectIdentifierValue) self.Cmets_obj = None self.Cmets_objpath = None self.premis_obj = None self.premis_objpath = None self.addml_obj = None self.addml_objpath = None ########################################## # Get PMETS info if self.ok: [self.Package_info, self.CMets_info], errno, why = ESSMD.getPMETSInfo( FILENAME=self.Pmets_objpath) if errno: self.event_info = 'Failed to get PMETS info for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug( 'Object: %s, Package_info: %s, CMets_info: %s', self.ObjectIdentifierValue, str(self.Package_info), str(self.CMets_info)) # CMets_info and Package_info: ['A0007600_Content_METS.xml', 'MD5', 'b0270cb4d196b72b87fe27ce6242df18', 64058, 'text/xml'] #if self.Package_info[1] == 'MD5': self.PackageMessageDigestAlgorithm = 1 self.PackageMessageDigestAlgorithm = self.ChecksumAlgorithm_CHOICES_invdict[ self.Package_info[1]] if self.Package_info[2]: self.PackageMessageDigest = self.Package_info[ 2] if self.Package_info[3]: self.PackageSize = int(self.Package_info[3]) self.Cmets_obj = self.CMets_info[0] if self.metatype in [1, 2, 3]: self.Cmets_objpath = os.path.join( self.AIPpath, self.Cmets_obj) elif self.metatype in [4]: self.Cmets_objpath = os.path.join( self.SIPpath, self.Cmets_obj) #if self.CMets_info[1] == 'MD5': self.CMetsMessageDigestAlgorithm = 1 self.CMetsMessageDigestAlgorithm = self.ChecksumAlgorithm_CHOICES_invdict[ self.CMets_info[1]] if self.CMets_info[2]: self.CMetsMessageDigest = self.CMets_info[2] if self.CMets_info[3]: self.CMetsSize = int(self.CMets_info[3]) ########################################################## # Check if ObjectPath and Cmets_objpath exist if os.path.exists(self.ObjectPath) and os.path.exists( self.Cmets_objpath): self.ok = 1 else: self.event_info = 'The path to Object: %s or METS_Metaobject: %s is not accessible!' % ( self.ObjectPath, self.Cmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 ########################################## # Get MetsFgrp001TotalSize from METSfile if self.ok: self.MetsFgrp001TotalSize, errno, why = ESSMD.getFileSizeFgrp001( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get MetsFgrp001TotalSize for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug( 'Object: %s, MetsFgrp001TotalSize: %s', self.ObjectIdentifierValue, str(self.MetsFgrp001TotalSize)) ########################################## # Get PremisObjectTotalSize from METSfile if self.ok: if self.metatype in [1, 2, 3]: self.premis_objpath = self.Cmets_objpath elif self.metatype in [4]: # Get metadata from METS file res_info, res_files, res_struct, errno, why = ESSMD.getMETSFileList( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get metadata from content METS for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: for res_file in res_files: if res_file[0] == 'amdSec' and res_file[ 2] == 'digiprovMD': self.premis_obj = res_file[8][5:] self.premis_objpath = '%s/%s/%s' % ( self.SIPpath, self.ObjectIdentifierValue, self.premis_obj) elif res_file[0] == 'amdSec' and res_file[ 2] == 'techMD' and res_file[ 16] == 'ADDML': self.addml_obj = res_file[8][5:] self.addml_objpath = '%s/%s/%s' % ( self.SIPpath, self.ObjectIdentifierValue, self.addml_obj) self.PremisObjectTotalSize, errno, why = ESSMD.getFileSizePremis( FILENAME=self.premis_objpath) if errno: self.event_info = 'Failed to get PREMISobjects total size in METSfile for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug( 'Object: %s, PremisObjectTotalSize: %s', self.ObjectIdentifierValue, str(self.PremisObjectTotalSize)) ################################################### # Check if MetsFgrp001TotalSize is equal to PremisObjectTotalSize if self.ok: if self.MetsFgrp001TotalSize == self.PremisObjectTotalSize: logging.info( 'Succeeded to verify METS Fgrp001 and PREMIS for object: %s, MetsFgrp001TotalSize: %s is equal to PremisObjectTotalSize: %s', self.ObjectIdentifierValue, str(self.MetsFgrp001TotalSize), str(self.PremisObjectTotalSize)) else: self.event_info = 'Failed to verify METS Fgrp001 and PREMIS for object: %s, MetsFgrp001TotalSize: %s is not equal to PremisObjectTotalSize: %s' % ( self.ObjectIdentifierValue, str(self.MetsFgrp001TotalSize), str(self.PremisObjectTotalSize)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 ########################################## # Get CMetsTotalSize if self.ok: self.CMetsTotalSize, errno, why = ESSMD.getTotalSize( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to get CMetsTotalSize for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, raw CMetsTotalSize: %s', self.ObjectIdentifierValue, str(self.CMetsTotalSize)) if self.CMetsSize: # Add Content Mets filesize self.CMetsTotalSize[0] += 1 self.CMetsTotalSize[1] += self.CMetsSize logging.debug('Object: %s, CMetsTotalSize: %s', self.ObjectIdentifierValue, str(self.CMetsTotalSize)) ########################################## # Get TarFileSize if self.ok: self.TarFileSize, errno, why = ESSPGM.Check( ).getFileSizeTAR(self.ObjectPath) if errno: self.event_info = 'Failed to get TarFileSize for object: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Object: %s, TarFileSize: %s', self.ObjectIdentifierValue, str(self.TarFileSize)) ################################################### # Check if CMetsTotalSize is equal to TarFileSize if self.ok: if self.CMetsTotalSize == self.TarFileSize: logging.info( 'Succeeded to verify total package size for object: %s, CMetsTotalSize: %s is equal to TarFileSize: %s', self.ObjectIdentifierValue, str(self.CMetsTotalSize), str(self.TarFileSize)) else: self.event_info = 'Failed to verify total package size for object: %s, CMetsTotalSize: %s is not equal to TarFileSize: %s' % ( self.ObjectIdentifierValue, str(self.CMetsTotalSize), str( self.TarFileSize)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 ######################## # get checksum for PMETS file if self.ok: self.PMetsMessageDigestAlgorithm = self.ChecksumAlgorithm self.PMetsMessageDigest = '' self.Pmets_obj_checksum, errno, why = ESSPGM.Check( ).checksum( self.Pmets_objpath, self.PMetsMessageDigestAlgorithm) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.Cmets_objpath, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Checksum for PMETS file: %s', self.Pmets_obj_checksum) self.PMetsMessageDigestAlgorithm = 1 self.PMetsMessageDigest = self.Pmets_obj_checksum ######################## # get checksum for CMETS file if self.ok: self.Cmets_obj_checksum, errno, why = ESSPGM.Check( ).checksum( self.Cmets_objpath, self.CMetsMessageDigestAlgorithm) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.Cmets_objpath, str(why)) logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.debug('Checksum for CMETS file: %s', self.Cmets_obj_checksum) ################################################### # Check CMetsMessageDigest if self.ok: if self.ValidateChecksum: if self.CMetsMessageDigest == self.Cmets_obj_checksum: logging.info( 'Succeeded to verify Content Mets MessageDigest for object: %s', self.ObjectIdentifierValue) else: self.event_info = 'Failed to verify Content Mets MessageDigest for object: %s, CMetsMessageDigest: %s is equal to FileMessageDigest: %s' % ( self.ObjectIdentifierValue, str(self.CMetsMessageDigest), str(self.Cmets_obj_checksum)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.warning( 'Checksum validate is disabled for object: %s', self.ObjectIdentifierValue) if self.ValidateXML: ######################## # XML Schema Validate CMETS file if self.ok: errno, why = ESSMD.validate( FILENAME=self.Cmets_objpath) if errno: self.event_info = 'Failed to schema validate Content METS file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate Content METS file for object: %s', self.ObjectIdentifierValue) ######################## # XML Schema Validate PMETS file if self.ok: errno, why = ESSMD.validate( FILENAME=self.Pmets_objpath) if errno: self.event_info = 'Failed to schema validate Package METS file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate Package METS file for object: %s', self.ObjectIdentifierValue) ######################## # XML Schema Validate PREMIS file if self.ok and self.premis_obj: errno, why = ESSMD.validate( FILENAME=self.premis_objpath) if errno: self.event_info = 'Failed to schema validate PREMIS file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate PREMIS file for object: %s', self.ObjectIdentifierValue) ######################## # XML Schema Validate ADDML file if self.ok and self.addml_obj: errno, why = ESSMD.validate( FILENAME=self.addml_objpath) if errno: self.event_info = 'Failed to schema validate ADDML file for object: %s, why: %s' % ( self.ObjectIdentifierValue, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to schema validate ADDML file for object: %s', self.ObjectIdentifierValue) else: logging.warning( 'Schema validate XML is disabled for object: %s', self.ObjectIdentifierValue) self.stopVerTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.VerTime = self.stopVerTime - self.startVerTime if self.VerTime.seconds < 1: self.VerTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.ObjectSizeMB = int(self.ObjectSize) / 1048576 self.VerMBperSEC = int(self.ObjectSizeMB) / int( self.VerTime.seconds) ################################## # Write metadatafiles to DB-blob or FTP if self.ok: if ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'MD_FTP_HOST'))[0][0]: if self.Cmets_obj: ################################## # Write CMETS metadatafile to DB-blob res, errno, why = ESSmetablob.prod( ).StoreMetadataBlob( ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self. ObjectIdentifierValue, ObjectMetadataType=26, FILENAME=self.Cmets_objpath, FTPFileName=string.replace( self.Cmets_obj, self.ObjectIdentifierValue + '/', ''), FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store Content METS file to FTP server or DB-blob: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to store Content METS file to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) if self.premis_obj: ################################## # Write PREMIS metadatafile to DB-blob res, errno, why = ESSmetablob.prod( ).StoreMetadataBlob( ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self. ObjectIdentifierValue, ObjectMetadataType=27, FILENAME=self.premis_objpath, FTPFileName=self.premis_obj, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store PREMIS file to FTP server or DB-blob: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to store PREMIS file to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) if self.addml_obj: ################################## # Write ADDML metadatafile to DB-blob res, errno, why = ESSmetablob.prod( ).StoreMetadataBlob( ObjectUUID=self.ObjectUUID, ObjectIdentifierValue=self. ObjectIdentifierValue, ObjectMetadataType=25, FILENAME=self.addml_objpath, FTPFileName=self.addml_obj, FTPflag=1, DBflag=0, AgentIdentifierValue=AgentIdentifierValue) if errno: self.event_info = 'Failed to store ADDML file to FTP server or DB-blob: %s, errno: %s, why: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 else: logging.info( 'Succeeded to store ADDML file to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) else: logging.info( 'Skip to store metadata to FTP server or DB-blob for object: %s', self.ObjectIdentifierValue) if not self.ok: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 51, 4) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to validate AIP package: ' + self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) else: logging.info('Succeeded to validate AIP package: ' + self.ObjectIdentifierValue + ' , ' + str(self.VerMBperSEC) + ' MB/Sec and Time: ' + str(self.VerTime)) self.timestamp_utc = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone( self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', self.CMetsMessageDigestAlgorithm, 'CMetaMessageDigest', self.CMetsMessageDigest, 'PMetaMessageDigestAlgorithm', self.PMetsMessageDigestAlgorithm, 'PMetaMessageDigest', self.PMetsMessageDigest, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '0', '', 2, self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', self.CMetsMessageDigestAlgorithm, 'CMetaMessageDigest', self.CMetsMessageDigest, 'PMetaMessageDigestAlgorithm', self.PMetsMessageDigestAlgorithm, 'PMetaMessageDigest', self.PMetsMessageDigest, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error( 'Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error( 'Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) elif self.metatype == 0: #self.metatype 0 = No metadata logging.info('Skip to validate AIP package: ' + self.ObjectIdentifierValue) self.timestamp_utc = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', '0', 'PMetaMessageDigestAlgorithm', '0', 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1050', '', 'ESSArch AIPValidate', ProcVersion, '0', 'Skip to validate AIP package', 2, self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('StatusProcess', '59', 'StatusActivity', '0', 'CMetaMessageDigestAlgorithm', '0', 'PMetaMessageDigestAlgorithm', '0', 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag = 0
def ThreadMain(self,ProcName): logging.info('Starting ' + ProcName) while 1: if self.mDieFlag==1: break # Request for death self.mLock.acquire() self.Time,self.Run = ESSDB.DB().action('ESSProc','GET',('Time','Run'),('Name',ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) self.RunFlag=0 self.mLock.release() #if Debug: print 'RunFlag: 0' time.sleep(2) continue # Process Item lock=thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','IngestTable'))[0][0] self.PolicyTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','PolicyTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' #if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget,errno,why = ESSDB.DB().action(self.IngestTable,'GET4',('ObjectIdentifierValue','ObjectUUID','PolicyId','INFORMATIONCLASS'), ('StatusActivity','=','0','AND', 'StatusProcess','BETWEEN',24,'AND',26)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[0]=='0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1]==1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[1]==1: logging.info('Process is in pause state') else: break self.ok = 1 ########################################################### # get policy info self.ObjectIdentifierValue = ESSPGM.Check().str2unicode(self.obj[0]) self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] self.INFORMATIONCLASS = self.obj[3] logging.info('Start to validate format for SIP: %s', self.ObjectIdentifierValue) self.ChecksumAlgorithm_CHOICES_dict = dict(ChecksumAlgorithm_CHOICES) self.ChecksumAlgorithm_CHOICES_invdict = ESSPGM.Check().invert_dict(self.ChecksumAlgorithm_CHOICES_dict) self.PolicyDB,errno,why = ESSDB.DB().action(self.PolicyTable,'GET3',('AIPpath','IngestMetadata','INFORMATIONCLASS','ChecksumAlgorithm','IngestPath','Mode'),('PolicyID',self.PolicyId)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) self.ok = 0 if self.ok: ########################################################### # set variables self.AIPpath = ESSPGM.Check().str2unicode(self.PolicyDB[0][0]) self.metatype = self.PolicyDB[0][1] self.Policy_INFORMATIONCLASS = self.PolicyDB[0][2] self.ChecksumAlgorithm = self.PolicyDB[0][3] self.ChecksumAlgorithm_name = self.ChecksumAlgorithm_CHOICES_dict[self.ChecksumAlgorithm] self.SIPpath = ESSPGM.Check().str2unicode(self.PolicyDB[0][4]) self.DBmode = self.PolicyDB[0][5] logging.debug('self.obj: %s', str(self.obj)) logging.debug('self.ObjectIdentifierValue: %s', self.ObjectIdentifierValue) logging.debug('Len self.ObjectIdentifierValue: %s', len(self.ObjectIdentifierValue)) logging.debug('self.SIPpath: %s', self.SIPpath) logging.debug('self.AIPpath: %s', self.AIPpath) if self.metatype == 2: ############################################ # Create PREMISfile from TIFFEdit.RES if metatype is 2 logging.info('Start to convert RESfile to PREMISfile for object: ' + self.ObjectIdentifierValue) self.xml_PREMIS,self.errno,self.why = ESSMD.RES2PREMIS(os.path.join(self.SIPpath,self.ObjectIdentifierValue),AgentIdentifierValue[8:]) if self.errno == 10: self.event_info = 'Failed to parse RESfile, error.num: %s error.det: %s' % (str(self.errno),str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.errno == 20: self.event_info = 'I/O error to access RESfile, error.num: %s error.det: %s' % (str(self.errno),str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.errno == 30: self.event_info = 'Validation errors for PREMIS file, error.num: %s error.det: %s' % (str(self.errno),str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.errno == 40: self.event_info = 'Problem to write PREMIS file, error.num: %s error.det: %s' % (str(self.errno),str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) if self.errno > 1: self.event_info = 'Problem to convert RES to PREMIS for SIP package: %s, error.num: %s error.desc: %s' % (self.ObjectIdentifierValue,str(self.errno),str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 elif self.errno == 1: self.event_info = 'Warning in convert RES to PREMIS for SIP package: %s, error.num: %s warning.desc: %s' % (self.ObjectIdentifierValue,str(self.errno),str(self.why)) logging.warning(self.event_info) ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'0',self.event_info,self.DBmode,self.ObjectIdentifierValue) else: ESSPGM.Events().create('1022','RES2PREMIS','ESSArch SIPValidateFormat',ProcVersion,'0','',self.DBmode,self.ObjectIdentifierValue) if self.ok: ############################################ # Clean RES SIP from "junk" files self.errno,self.why = ESSPGM.Check().CleanRES_SIP(os.path.join(self.SIPpath,self.ObjectIdentifierValue)) if self.errno: self.event_info = 'Problem to clean RES SIP from "junk files" for SIP package: %s, error.num: %s error.desc: %s' % (self.ObjectIdentifierValue,str(self.errno),str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022','CleanRES_SIP','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 else: ESSPGM.Events().create('1022','CleanRES_SIP','ESSArch SIPValidateFormat',ProcVersion,'0','',self.DBmode,self.ObjectIdentifierValue) elif self.metatype == 1: ########################################################### # Create PREMISfile from Content_METS if metatype is 1 res,errno,why = ESSMD.METS2PREMIS(self.SIPpath,self.ObjectIdentifierValue) if not errno: logging.info('Succeeded to convert Content_METS to PREMISfile for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to convert Content_METS to PREMISfile for information package: %s, errno: %s, detail: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) #ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 elif self.metatype in [4]: self.SIPpath = os.path.join(self.SIPpath,self.ObjectIdentifierValue) if self.ok: if self.metatype in [1,2,3]: ########################################################### # get object_list from PREMIS file self.Premis_filepath = u'%s/%s/%s_PREMIS.xml' % (self.SIPpath,self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.object_list,errno,why = ESSMD.getPremisObjects(FILENAME=self.Premis_filepath) if errno == 0: logging.info('Succeeded to get object_list from premis for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to get object_list from premis for information package: %s, errno: %s, detail: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 elif self.metatype in [4]: ########################################################### # get object_list from METS if os.path.exists(os.path.join(self.SIPpath,'sip.xml')): mets_file = 'sip.xml' self.SIPmets_objpath = os.path.join(self.SIPpath,mets_file) elif os.path.exists(os.path.join(self.SIPpath,'mets.xml')): mets_file = 'mets.xml' self.SIPmets_objpath = os.path.join(self.SIPpath,mets_file) #elif os.path.exists(os.path.join(self.SIPpath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # mets_file = '%s_Content_METS.xml' % self.ObjectIdentifierValue # self.SIPmets_objpath = os.path.join(self.SIPpath,mets_file) else: self.SIPmets_objpath = '' self.object_list,errno,why = ESSMD.getAIPObjects(FILENAME=self.SIPmets_objpath) if errno == 0: logging.info('Succeeded to get object_list from METS for information package: %s', self.ObjectIdentifierValue) self.F_Checksum,errno,why = ESSPGM.Check().checksum(self.SIPmets_objpath, self.ChecksumAlgorithm) # Checksum self.F_SIZE = os.stat(self.SIPmets_objpath)[6] self.object_list.append([mets_file,self.ChecksumAlgorithm_name,self.F_Checksum,self.F_SIZE,'']) else: self.event_info = 'Problem to get object_list from METS for information package: %s, errno: %s, detail: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # update ObjectIdentifierValue to StatusProcess: 25 and StatusActivity: 5 errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 25, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Format validate object: ' + self.ObjectIdentifierValue) if self.ok: ########################################################### # Start to format validate SIP self.startTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) self.ObjectNumItems = 0 self.ObjectSize = 0 if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.tmp_object_id = (u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS']) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) #self.tmp_object_id = ('%s/TIFFEdit.RES') % self.ObjectIdentifierValue #self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETA', '']) self.tmp_object_id = (u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS']) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.tmp_object_id = (u'%s/%s_ADDML.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETAxmlWrap', 'ADDML']) self.tmp_object_id = (u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS']) for self.object in self.object_list: logging.debug('variable self.SIPpath: %s, type: %s' % (self.SIPpath,type(self.SIPpath))) logging.debug('variable self.object[0]: %s, type: %s' % (self.object[0],type(self.object[0]))) self.filepath = os.path.join(self.SIPpath, self.object[0]) logging.debug('variable self.filepath: %s, type: %s' % (self.filepath,type(self.filepath))) #self.filepath = ESSPGM.Check().Unicode2isoStr(self.filepath.encode('utf-8')) #self.filepath_iso = ESSPGM.Check().unicode2str(self.filepath) #logging.debug('variable self.filepath_iso: %s, type: %s' % (self.filepath_iso,type(self.filepath_iso))) if self.metatype in [1,2,3] and self.ObjectNumItems == 0: if self.object[0] == self.ObjectIdentifierValue: logging.info('First premis object match information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'First premis object do not match information package: %s, premis_object: %s' % (self.ObjectIdentifierValue,self.object[0]) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath,os.X_OK): pass else: self.event_info = 'Object path: %s do not exist or is not executable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath,os.R_OK): pass else: self.event_info = 'Object path: %s do not exist or is not readable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath,os.W_OK): pass else: self.event_info = 'Missing permission, Object path: %s is not writeable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break if self.metatype in [1,2,3]: if self.ok and not (self.ObjectNumItems == 0 or self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): if int(os.stat(self.filepath)[6]) == int(self.object[4]): self.ObjectSize += int(self.object[4]) else: self.event_info = 'Filesize for object path: %s is %s and premis object size is %s. The sizes must match!' % (self.filepath,str(os.stat(self.filepath)[6]),str(self.object[4])) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break if self.ok: self.F_Checksum,errno,why = ESSPGM.Check().checksum(self.filepath,self.object[1]) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % (self.filepath,str(why)) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.F_Checksum == self.object[2]: pass else: self.event_info = 'Checksum for object path: %s is %s and premis object checksum is %s. The checksum must match!' % (self.filepath,self.F_Checksum,self.object[2]) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break elif self.ok and not self.ObjectNumItems == 0 and (self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): if int(os.stat(self.filepath)[6]) > 0: pass else: self.event_info = 'Filesize for object path: %s is 0 bytes. The size should be more then 0 bytes!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break elif self.metatype in [4]: if self.ok: #[objectIdentifierValue,messageDigestAlgorithm,messageDigest,a_SIZE,a_MIMETYPE] if int(os.stat(self.filepath)[6]) == int(self.object[3]): self.ObjectSize += int(self.object[3]) else: self.event_info = 'Filesize for object path: %s is %s and METS object size is %s. The sizes must match!' % (self.filepath,str(os.stat(self.filepath)[6]),str(self.object[3])) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break if self.ok: self.F_Checksum,errno,why = ESSPGM.Check().checksum(self.filepath,self.object[1]) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % (self.filepath,str(why)) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.F_Checksum == self.object[2]: pass else: self.event_info = 'Checksum for object path: %s is %s and METS object checksum is %s. The checksum must match!' % (self.filepath,self.F_Checksum,self.object[2]) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 break self.ObjectNumItems += 1 if self.ok: if self.metatype in [1,2,3]: ############################################################################### # Check if SIP filesystem path contain files that not exist in metadatafile for self.filesystem_object in ESSPGM.Check().GetFiletree(os.path.join(self.SIPpath,self.ObjectIdentifierValue)): self.missmatch_flag = 0 for self.object in self.object_list: #if os.path.join(self.ObjectIdentifierValue,self.filesystem_object) == self.object[0].encode('utf-8'): if os.path.join(self.ObjectIdentifierValue,self.filesystem_object) == self.object[0]: self.missmatch_flag = 0 break else: self.missmatch_flag = 1 if self.missmatch_flag: self.filesystempath = u'%s/%s/%s' % (self.SIPpath,self.ObjectIdentifierValue,self.filesystem_object) self.event_info = 'Filesystem file: %s do not exist in metadatafile for object: %s' % (self.filesystempath,os.path.join(self.SIPpath,self.ObjectIdentifierValue)) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 if self.metatype in [4]: ############################################################################### # Check if SIP filesystem path contain files that not exist in metadatafile for self.filesystem_object in ESSPGM.Check().GetFiletree(self.SIPpath): self.missmatch_flag = 0 for self.object in self.object_list: if self.filesystem_object == self.object[0]: self.missmatch_flag = 0 break else: self.missmatch_flag = 1 if self.missmatch_flag: self.filesystempath = u'%s/%s' % (self.SIPpath,self.filesystem_object) self.event_info = 'Filesystem file: %s do not exist in metadatafile for object: %s' % (self.filesystempath,self.SIPpath) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.metatype in [4]: ############################################################################### # Check if SIP INFORMATIONCLASS match Policy if self.INFORMATIONCLASS == self.Policy_INFORMATIONCLASS: self.event_info = 'Object: %s InformationClass: %s match defined InformaionClass: %s in PolicyID: %s' % (self.ObjectIdentifierValue,self.INFORMATIONCLASS,self.Policy_INFORMATIONCLASS,self.PolicyId) logging.info(self.event_info) else: self.event_info = 'Object: %s InformationClass: %s do not match defined InformationClass: %s in PolicyID: %s' % (self.ObjectIdentifierValue,self.INFORMATIONCLASS,self.Policy_INFORMATIONCLASS,self.PolicyId) logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 if self.ok: self.stopTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) self.MeasureTime = self.stopTime-self.startTime self.ObjectSizeMB = self.ObjectSize/1048576 if self.MeasureTime.seconds < 1: self.MeasureTime = datetime.timedelta(seconds=1) #Fix min time to 1 second if it is zero. self.VerMBperSEC = int(self.ObjectSizeMB)/int(self.MeasureTime.seconds) if self.ok: logging.info('Succeeded to validate SIP package: ' + self.ObjectIdentifierValue + ' , ' + str(self.VerMBperSEC) + ' MB/Sec and Time: ' + str(self.MeasureTime)) errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 29, 0) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'0','',self.DBmode,self.ObjectIdentifierValue) else: errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 26, 4) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to validate SIP package: ' + self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag=0
def ThreadMain(self,ProcName): logging.info('Starting ' + ProcName) TimeZone = timezone.get_default_timezone_name() self.tz=pytz.timezone(TimeZone) METS_NAMESPACE = SchemaProfile.objects.get(entity = 'mets_namespace').value METS_SCHEMALOCATION = SchemaProfile.objects.get(entity = 'mets_schemalocation').value METS_PROFILE = SchemaProfile.objects.get(entity = 'mets_profile').value XLINK_NAMESPACE = SchemaProfile.objects.get(entity = 'xlink_namespace').value XSI_NAMESPACE = SchemaProfile.objects.get(entity = 'xsi_namespace').value while 1: if self.mDieFlag==1: break # Request for death self.mLock.acquire() self.Time,self.Run = ESSDB.DB().action('ESSProc','GET',('Time','Run'),('Name',ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) self.RunFlag=0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock=thread.allocate_lock() Cmets_obj = Parameter.objects.get(entity='content_descriptionfile').value self.IngestTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','IngestTable'))[0][0] self.PolicyTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','PolicyTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' self.dbget,errno,why = ESSDB.DB().action(self.IngestTable,'GET4',('ObjectIdentifierValue', 'ObjectUUID', 'PolicyId', 'ObjectSize'), ('StatusProcess','BETWEEN',39,'AND',40,'AND', 'StatusActivity','=','0')) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ok = 1 self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[0]=='0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1]==1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[1]==1: logging.info('Process is in pause state') else: break self.ObjectIdentifierValue = self.obj[0] self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] self.ObjectSize = self.obj[3] self.PolicyDB,errno,why = ESSDB.DB().action(self.PolicyTable,'GET3',('AIPpath','IngestMetadata','ChecksumAlgorithm','IngestPath'),('PolicyID',self.PolicyId)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) self.ok = 0 if self.ok: ########################################################### # set variables self.AIPpath = self.PolicyDB[0][0] self.metatype = self.PolicyDB[0][1] self.ChecksumAlgorithm = self.PolicyDB[0][2] self.CA = dict(ChecksumAlgorithm_CHOICES)[self.ChecksumAlgorithm] self.SIPpath = self.PolicyDB[0][3] self.p_obj = self.ObjectIdentifierValue + '.tar' self.ObjectPath = os.path.join(self.AIPpath,self.p_obj) self.SIProotpath = os.path.join(self.SIPpath,self.ObjectIdentifierValue) if self.metatype in [4]: #self.Cmets_obj = '%s/%s_Content_METS.xml' % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) #self.Cmets_objpath = os.path.join(self.SIPpath,self.Cmets_obj) #self.Cmets_obj = Cmets_obj.replace('{uuid}',self.ObjectIdentifierValue) self.Cmets_obj = Cmets_obj.replace('{objid}',self.ObjectIdentifierValue) self.Cmets_objpath = os.path.join(self.SIProotpath,self.Cmets_obj) elif self.metatype in [1,2,3]: self.Cmets_obj = '%s_Content_METS.xml' % (self.ObjectIdentifierValue) self.Cmets_objpath = os.path.join(self.AIPpath,self.Cmets_obj) self.Pmets_obj = '%s_Package_METS.xml' % (self.ObjectIdentifierValue) self.Pmets_objpath = os.path.join(self.AIPpath,self.Pmets_obj) self.AIC_UUID = None self.AIC_UUID_rel_ObjectUUIDs = [] if self.ok: METS_agent_list = [] METS_altRecordID_list = [] if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.METS_LABEL = 'ESSArch AIP' # Get SIP Content METS information self.METSfilepath = os.path.join(self.SIPpath,self.ObjectIdentifierValue + '/metadata/SIP/' + self.ObjectIdentifierValue + '_Content_METS.xml') res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList(FILENAME=self.METSfilepath) for agent in res_info[2]: if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) METS_agent_list.append(['CREATOR','INDIVIDUAL','',AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) self.METS_LABEL = 'Imaging AIP RA' METS_agent_list.append(['ARCHIVIST','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','INDIVIDUAL','',AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.METS_LABEL = 'Born Digital AIP RA' METS_agent_list.append(['ARCHIVIST','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','INDIVIDUAL','',AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) elif self.metatype in [4]: ############################################ # Object have metatype 4 (eARD METS) res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList(FILENAME=self.Cmets_objpath) for agent in res_info[2]: #if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) self.METS_LABEL = res_info[0][0] METS_agent_list.append(['CREATOR',None, 'INDIVIDUAL',None,AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR',None, 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) for altRecordID in res_info[3]: METS_altRecordID_list.append(altRecordID) logging.debug('self.obj: '+str(self.obj)) if self.ChecksumAlgorithm > 0: #self.ChecksumAlgorithm 1 = MD5, 2 = SHA-256 self.startCalTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 40, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Start create Package METS for: ' + self.ObjectIdentifierValue) if self.ok: ########################################################### # Create PMETS for AIP package self.M_CHECKSUM, errno, why = ESSPGM.Check().checksum(self.Cmets_objpath,self.CA) if errno: self.event_info = 'Problem to get checksum for METS object for AIP package: ' + str(self.Cmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 self.M_statinfo = os.stat(self.Cmets_objpath) self.M_SIZE = self.M_statinfo.st_size self.M_utc_mtime = datetime.datetime.utcfromtimestamp(self.M_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.M_lociso_mtime = self.M_utc_mtime.astimezone(self.tz).isoformat() self.P_CHECKSUM, errno, why = ESSPGM.Check().checksum(self.ObjectPath,self.CA) if errno: self.event_info = 'Problem to get checksum for AIP package: ' + str(self.ObjectPath) logging.error(self.event_info) ESSPGM.Events().create('1040','','ESSArch AIPChecksum',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 self.P_statinfo = os.stat(self.ObjectPath) self.P_SIZE = self.P_statinfo.st_size self.P_utc_mtime = datetime.datetime.utcfromtimestamp(self.P_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.P_lociso_mtime = self.P_utc_mtime.astimezone(self.tz).isoformat() if self.metatype in [1,2,3]: self.PMETSdoc = ESSMD.createPMets( ID=self.ObjectIdentifierValue, LABEL=self.METS_LABEL, AGENT=METS_agent_list, P_SIZE=self.P_SIZE, P_CREATED=self.P_lociso_mtime, P_CHECKSUM=self.P_CHECKSUM, P_CHECKSUMTYPE=self.CA, M_SIZE=self.M_SIZE, M_CREATED=self.M_lociso_mtime, M_CHECKSUM=self.M_CHECKSUM, M_CHECKSUMTYPE=self.CA, ) errno,why = ESSMD.writeToFile(self.PMETSdoc,self.Pmets_objpath) if errno: self.event_info = 'Problem to write PMETS to file for AIP package: ' + str(self.Pmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1040','','ESSArch AIPChecksum',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) time.sleep(2) self.ok = 0 elif self.metatype in [4]: ms_files = [] ms_files.append(['amdSec', None, 'techMD', 'techMD001', None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s/%s' % (self.ObjectIdentifierValue,self.Cmets_obj), 'simple', self.M_CHECKSUM, self.CA, self.M_SIZE, 'text/xml', self.M_lociso_mtime, 'OTHER', 'METS', None]) ms_files.append(['fileSec', None, None, None, None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s' % self.p_obj, 'simple', self.P_CHECKSUM, self.CA, self.P_SIZE, 'application/x-tar', self.P_lociso_mtime, 'tar', 'techMD001', None]) # define namespaces self.namespacedef = 'xmlns:mets="%s"' % METS_NAMESPACE self.namespacedef += ' xmlns:xlink="%s"' % XLINK_NAMESPACE self.namespacedef += ' xmlns:xsi="%s"' % XSI_NAMESPACE self.namespacedef += ' xsi:schemaLocation="%s %s"' % (METS_NAMESPACE, METS_SCHEMALOCATION) errno,info_list = ESSMD.Create_IP_mets(ObjectIdentifierValue = self.ObjectIdentifierValue, METS_ObjectPath = self.Pmets_objpath, agent_list = METS_agent_list, altRecordID_list = METS_altRecordID_list, file_list = ms_files, namespacedef = self.namespacedef, METS_LABEL = self.METS_LABEL, METS_PROFILE = METS_PROFILE, METS_TYPE = 'AIP', METS_DocumentID = self.Pmets_obj, TimeZone = TimeZone) if errno: logging.error('Problem to create Package METS file, why: %s' % str(info_list)) self.ObjectMessageDigest = self.P_CHECKSUM self.stopCalTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) self.CalTime = self.stopCalTime-self.startCalTime self.ObjectSizeMB = self.ObjectSize/1048576 if self.CalTime.seconds < 1: self.CalTime = datetime.timedelta(seconds=1) #Fix min time to 1 second if it is zero. self.CalMBperSEC = int(self.ObjectSizeMB)/int(self.CalTime.seconds) logging.info('Finished calculate checksum: ' + self.ObjectIdentifierValue + ' , ' + str(self.CalMBperSEC) + ' MB/Sec and Time: ' + str(self.CalTime)) if self.ok: self.timestamp_utc = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ObjectMessageDigestAlgorithm',self.ChecksumAlgorithm, 'ObjectMessageDigest',self.ObjectMessageDigest, 'MetaObjectSize',self.M_SIZE, 'LastEventDate',self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue, 'LocalDBdatetime',self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if errno == 0 and ExtDBupdate: ext_res,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'UPD',('ObjectMessageDigestAlgorithm',self.ChecksumAlgorithm, 'ObjectMessageDigest',self.ObjectMessageDigest, 'MetaObjectSize',self.M_SIZE, 'LastEventDate',self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ExtDBdatetime',self.timestamp_utc.replace(tzinfo=None)),('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if self.ok and self.metatype == 4: #################################################### # Create AIC METS File: aic_obj = ArchiveObject.objects.filter(relaic_set__UUID=self.ObjectUUID)[:1] if aic_obj: self.AIC_UUID = aic_obj.get().ObjectUUID logging.info('Succeeded to get AIC_UUID: %s from DB' % self.AIC_UUID) else: logging.warning('AIC not found for IP object: %s, skip to create AIC METS file' % self.ObjectUUID) if self.ok and self.AIC_UUID: ip_obj_list = ArchiveObject.objects.filter(Q(StatusProcess=3000) | Q(ObjectUUID=self.ObjectUUID), reluuid_set__AIC_UUID=self.AIC_UUID).order_by('Generation') if ip_obj_list: logging.info('Start create AIC METS: ' + self.AIC_UUID) self.AICmets_objpath = os.path.join(self.AIPpath,self.AIC_UUID + '_AIC_METS.xml') ms_files = [] for ip_obj in ip_obj_list: logging.info('Add IP: %s to AIC METS: %s' % (ip_obj.ObjectUUID,self.AIC_UUID)) ms_files.append(['fileSec', None, None, None, None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s' % ip_obj.ObjectUUID, 'simple', ip_obj.ObjectMessageDigest, dict(ChecksumAlgorithm_CHOICES)[ip_obj.ObjectMessageDigestAlgorithm], ip_obj.ObjectSize, 'application/x-tar', ip_obj.CreateDate, 'IP Package', None, None]) # define namespaces self.namespacedef = 'xmlns:mets="%s"' % METS_NAMESPACE self.namespacedef += ' xmlns:xlink="%s"' % XLINK_NAMESPACE self.namespacedef += ' xmlns:xsi="%s"' % XSI_NAMESPACE self.namespacedef += ' xsi:schemaLocation="%s %s"' % (METS_NAMESPACE, METS_SCHEMALOCATION) errno,info_list = ESSMD.Create_IP_mets(ObjectIdentifierValue = self.AIC_UUID, METS_ObjectPath = self.AICmets_objpath, agent_list = [], altRecordID_list = [], file_list = ms_files, namespacedef = self.namespacedef, METS_LABEL = 'AIC relation to IP', METS_PROFILE = METS_PROFILE, METS_TYPE = 'AIC', METS_DocumentID = self.AIC_UUID + '_AIC_METS.xml', TimeZone = TimeZone) if errno: logging.error('Problem to create AIC METS file, why: %s' % str(info_list)) else: logging.error('Problem to get objects related to AIC_UUID: %s from DB' % (self.AIC_UUID)) self.ok = 0 if self.ok: errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 49, 0) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Succeeded to create checksum for Object: %s' % self.ObjectIdentifierValue logging.info(self.event_info) ESSPGM.Events().create('1040','','ESSArch AIPChecksum',ProcVersion,'0',self.event_info,2,self.ObjectIdentifierValue) else: errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 40, 100) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to create checksum for Object: %s' % self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1040','','ESSArch AIPChecksum',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) elif self.ChecksumAlgorithm == 0: #self.ChecksumAlgorithm 0 = No checksum logging.info('Skip creation of checksum: ' + self.ObjectIdentifierValue) self.ObjectMessageDigest = '' self.MetaObjectSize = os.stat(self.Cmets_objpath)[6] self.timestamp_utc = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ObjectMessageDigestAlgorithm',self.ChecksumAlgorithm, 'ObjectMessageDigest',self.ObjectMessageDigest, 'StatusProcess','49', 'StatusActivity','0', 'MetaObjectSize',self.MetaObjectSize, 'LastEventDate',self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue, 'LocalDBdatetime',self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1040','','ESSArch AIPChecksum',ProcVersion,'0','Skip creation of checksum',2,self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'UPD',('ObjectMessageDigestAlgorithm',self.ChecksumAlgorithm, 'ObjectMessageDigest',self.ObjectMessageDigest, 'StatusProcess','49', 'StatusActivity','0', 'MetaObjectSize',self.MetaObjectSize, 'LastEventDate',self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ExtDBdatetime',self.timestamp_utc.replace(tzinfo=None)),('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag=0
def check_storage(): EL_root = etree.Element('needcopies') Policy_obj_list = ESSArchPolicy.objects.filter(PolicyStat=1).all() for Policy_obj in Policy_obj_list: sm_obj_list = [ [ Policy_obj.sm_1, Policy_obj.sm_type_1, Policy_obj.sm_format_1, Policy_obj.sm_blocksize_1, Policy_obj.sm_maxCapacity_1, Policy_obj.sm_minChunkSize_1, Policy_obj.sm_minContainerSize_1, Policy_obj.sm_target_1 ], [ Policy_obj.sm_2, Policy_obj.sm_type_2, Policy_obj.sm_format_2, Policy_obj.sm_blocksize_2, Policy_obj.sm_maxCapacity_2, Policy_obj.sm_minChunkSize_2, Policy_obj.sm_minContainerSize_2, Policy_obj.sm_target_2 ], [ Policy_obj.sm_3, Policy_obj.sm_type_3, Policy_obj.sm_format_3, Policy_obj.sm_blocksize_3, Policy_obj.sm_maxCapacity_3, Policy_obj.sm_minChunkSize_3, Policy_obj.sm_minContainerSize_3, Policy_obj.sm_target_3 ], [ Policy_obj.sm_4, Policy_obj.sm_type_4, Policy_obj.sm_format_4, Policy_obj.sm_blocksize_4, Policy_obj.sm_maxCapacity_4, Policy_obj.sm_minChunkSize_4, Policy_obj.sm_minContainerSize_4, Policy_obj.sm_target_4 ], ] ip_obj_list = ArchiveObject.objects.filter( PolicyId=Policy_obj.PolicyID, StatusProcess=3000, StatusActivity=0).all() for ip_obj in ip_obj_list: storage_obj_list = ip_obj.storage_set.all() sm_num = 0 for sm_obj in sm_obj_list: sm_num += 1 if sm_obj[0] == 1: storage_count = 0 storageMediumID_list = [] for storage_obj in storage_obj_list: storageMedium_obj = storage_obj.storageMediumUUID if str(sm_obj[1])[0] == '2': #Disk if storageMedium_obj.storageMedium == sm_obj[ 1] and storageMedium_obj.storageMediumID == 'disk': storage_count += 1 storageMediumID_list.append( storageMedium_obj.storageMediumID) elif str(sm_obj[1])[0] == '3': #Tape if storageMedium_obj.storageMedium == sm_obj[ 1] and storageMedium_obj.storageMediumID.startswith( sm_obj[7]): storage_count += 1 storageMediumID_list.append( storageMedium_obj.storageMediumID) if storage_count == 0: EL_object = etree.SubElement( EL_root, 'object', attrib={ 'id': ip_obj.ObjectIdentifierValue, 'target': sm_obj[7], }) print 'Missing storage entry for storage method number: %s, target: %s, for object: %s' % ( sm_num, sm_obj[7], ip_obj.ObjectIdentifierValue) elif storage_count == 1: print 'Found storage entry for storage method number: %s, target: %s (%s), for object: %s' % ( sm_num, sm_obj[7], ','.join( str(e) for e in storageMediumID_list), ip_obj.ObjectIdentifierValue) else: print 'Warning found to many storage entry for storage method number: %s, target: %s (%s), for object: %s' % ( sm_num, sm_obj[7], ','.join( str(e) for e in storageMediumID_list), ip_obj.ObjectIdentifierValue) doc = etree.ElementTree(element=EL_root, file=None) ESSMD.writeToFile(doc, '/ESSArch/log/needcopies/needcopies.xml')
def ObjectValidate(self): self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' # Check if exist extDB and got projektid self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'PolicyID', 'StatusProcess', 'StatusActivity', 'ObjectUUID'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 9, 'AND', 14, 'OR', 'StatusActivity', '=', '4', 'AND', 'StatusProcess', 'BETWEEN', 10, 'AND', 11)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ObjectIdentifierValue = self.obj[0] self.PolicyID = self.obj[1] self.StatusProcess = self.obj[2] self.StatusActivity = self.obj[3] ObjectUUID = self.obj[4] self.DBmode = 0 self.ext_ObjectGuid = ObjectUUID self.ext_EntryDate = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.ext_EntryAgentIdentifierValue = None self.ext_OAISPackageType = 2 self.ext_preservationLevelValue = 1 self.ext_ObjectActive = 0 self.objectstatus = 0 self.ext_ProjectGroupCode = '' self.ext_ObjectPackageName = '' if Debug: logging.info('StatusProcess 9, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) #Check.... if self.PolicyID: ArchivePolicy_objs = ArchivePolicy.objects.filter( PolicyStat=1, PolicyID=str(self.PolicyID))[:1] if ArchivePolicy_objs: ArchivePolicy_obj = ArchivePolicy_objs.get() if ArchivePolicy_obj.Mode in range(0, 2): self.DBmode = ArchivePolicy_obj.Mode logging.info( 'Policy found for Object: %s in ESSArch mode' % self.ObjectIdentifierValue) if ArchivePolicy_obj.IngestMetadata in [1, 2, 3]: metsfilename = os.path.join( ArchivePolicy_obj.IngestPath, self.ObjectIdentifierValue + '_Package_METS.xml') elif ArchivePolicy_obj.IngestMetadata in [4]: ObjectPath = os.path.join( ArchivePolicy_obj.IngestPath, self.ObjectIdentifierValue) if os.path.exists( os.path.join(ObjectPath, 'sip.xml')): metsfilename = os.path.join( ObjectPath, 'sip.xml') elif os.path.exists( os.path.join(ObjectPath, 'mets.xml')): metsfilename = os.path.join( ObjectPath, 'mets.xml') #elif os.path.exists(os.path.join(ObjectPath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # metsfilename = os.path.join(ObjectPath,'%s_Content_METS.xml' % self.ObjectIdentifierValue) else: metsfilename = '' #metsfilename = '%s/sip.xml' % os.path.join(ArchivePolicy_obj.IngestPath,self.ObjectIdentifierValue) else: metsfilename = '' res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList( FILENAME=metsfilename) if not error: # cut off microsecond and timezone info ".xxxxxxx+02:00" #for c in res_info[1][0]: # if c == '.' or c == '+': # break # else: # self.ext_EntryDate += c self.ext_EntryDate = parse_datetime( res_info[1][0]).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = res_info[2][ 0][4] try: self.ext_ObjectGuid = str( uuid.UUID(self.ObjectIdentifierValue)) except ValueError, why: logging.warning( 'ObjectIdentifierValue: %s is not a valid UUID, why: %s , start to generate a new UUID' % (self.ObjectIdentifierValue, str(why))) self.ext_ObjectGuid = str(uuid.uuid1()) logging.info( 'New UUID: %s for ObjectIdentifierValue: %s' % (self.ext_ObjectGuid, str(self.ObjectIdentifierValue))) self.ext_ObjectActive = 1 self.ext_OAISPackageType = 2 self.ext_preservationLevelValue = 1 self.objectstatus = 1 else: self.objectstatus = 102 # Problem to get information from package METS elif ArchivePolicy_obj.Mode == 2: # AIS but POLICYID from METS, Check in AIS if object is active. self.DBmode = ArchivePolicy_obj.Mode self.extOBJdbget, ext_errno, ext_why = ESSMSSQL.DB( ).action(self.IngestTable, 'GET3', ('ProjectGroupCode', 'ObjectPackageName', 'ObjectGuid', 'ObjectActive', 'EntryDate', 'EntryAgentIdentifierValue', 'OAISPackageType', 'preservationLevelValue'), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) #self.extOBJdbget = [[10,'','7283074a-00c0-11e2-a78f-002215836500',1,'2010-07-12 16:57:45','entryagent',2,1]] #ext_errno = 0 #ext_why = 'whywhy' if ext_errno: logging.error('Failed to access External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) elif self.extOBJdbget: if Debug: logging.info( 'Found object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) self.objectstatus = 10 # Object found in external DB if self.objectstatus < 100: ######################################## # Check if object alredy have an AIP if disable_ObjectPackageName: self.ext_ObjectPackageName = '' else: self.ext_ObjectPackageName = self.extOBJdbget[ 0][1] if not self.ext_ObjectPackageName: self.objectstatus = 12 # Object do not have an AIP else: self.objectstatus = 112 # Object already have an AIP if self.objectstatus < 100: ######################################## # Get GUID/UUID #self.ext_ObjectGuid = uuid.UUID(bytes_le=self.extOBJdbget[0][2]) #When pymssql self.ext_ObjectGuid = uuid.UUID( self.extOBJdbget[0][2]) if self.objectstatus < 100: ######################################## # Check if object is active self.ext_ObjectActive = self.extOBJdbget[0][3] if self.ext_ObjectActive == 1: self.objectstatus = 13 # Object is active else: self.objectstatus = 113 # Object is not active if self.objectstatus < 100: ######################################## # Check if POLICYID in local DB "METS" is equal to ProjectGroupCode in AIS self.ext_ProjectGroupCode = str( self.extOBJdbget[0][0]) if self.ext_ProjectGroupCode == self.PolicyID: self.objectstatus = 1 # Object have an ProjectCode logging.info( 'Object: %s found in AIS with correct POLICYID' % self.ObjectIdentifierValue) else: self.objectstatus = 111 # Object do not have an ProjectCode ######################################## self.ext_EntryDate = self.extOBJdbget[0][ 4].replace(microsecond=0, tzinfo=self.tz).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = self.extOBJdbget[ 0][5] self.ext_OAISPackageType = self.extOBJdbget[0][6] self.ext_preservationLevelValue = self.extOBJdbget[ 0][7] ######################################## # Special function only for test if force_ProjectGroupCode: logging.info( 'Force set ProjectGroupCode for Object: %s' % self.ObjectIdentifierValue) self.objectstatus = 10 self.ext_ProjectGroupCode = force_ProjectGroupCode self.ext_ObjectPackageName = '' self.ext_ObjectGuid = str( uuid.uuid1()) # updDB self.ext_ObjectActive = 1 # updDB self.ext_EntryDate = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) # updDB self.ext_EntryAgentIdentifierValue = None # updDB self.ext_OAISPackageType = 2 # updDB self.ext_preservationLevelValue = 1 # updDB else: self.objectstatus = 110 # Object not found in external DB if Debug: logging.info( 'Missing object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) else: self.objectstatus = 100 # Policy is not in ESSArch mode else: self.objectstatus = 101 # Policy not found or not active #model.meta.Session.close() else: self.DBmode = 2 # AIS self.extOBJdbget, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'GET3', ('ProjectGroupCode', 'ObjectPackageName', 'ObjectGuid', 'ObjectActive', 'EntryDate', 'EntryAgentIdentifierValue', 'OAISPackageType', 'preservationLevelValue'), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to access External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) elif self.extOBJdbget: #if not ext_errno and self.extOBJdbget: if Debug: logging.info( 'Found object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) self.objectstatus = 10 # Object found in external DB ######################################## # Check if object alredy have an AIP self.ext_ProjectGroupCode = str(self.extOBJdbget[0][0]) if self.objectstatus < 100 and self.ext_ProjectGroupCode: self.objectstatus = 11 # Object have an ProjectCode else: self.objectstatus = 111 # Object do not have an ProjectCode ######################################## # Check if object alredy have an AIP if disable_ObjectPackageName: self.ext_ObjectPackageName = '' else: self.ext_ObjectPackageName = self.extOBJdbget[0][1] if self.objectstatus < 100 and not self.ext_ObjectPackageName: self.objectstatus = 12 # Object do not have an AIP else: self.objectstatus = 112 # Object already have an AIP ######################################## # Get GUID/UUID #self.ext_ObjectGuid = uuid.UUID(bytes_le=self.extOBJdbget[0][2]) #When pymssql self.ext_ObjectGuid = uuid.UUID(self.extOBJdbget[0][2]) ######################################## # Check if object is active self.ext_ObjectActive = self.extOBJdbget[0][3] if self.objectstatus < 100 and self.ext_ObjectActive == 1: self.objectstatus = 13 # Object is active else: self.objectstatus = 113 # Object is not active ######################################## self.ext_EntryDate = self.extOBJdbget[0][4].replace( microsecond=0, tzinfo=self.tz).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = self.extOBJdbget[0][5] self.ext_OAISPackageType = self.extOBJdbget[0][6] self.ext_preservationLevelValue = self.extOBJdbget[0][7] if Debug: logging.info('ext_ProjectGroupCode is: ' + str(self.ext_ProjectGroupCode) + ' for ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) if force_ProjectGroupCode: logging.info( 'Force set ProjectGroupCode for Object: %s' % self.ObjectIdentifierValue) self.objectstatus = 10 self.ext_ProjectGroupCode = force_ProjectGroupCode self.ext_ObjectPackageName = '' self.ext_ObjectGuid = str(uuid.uuid1()) # updDB self.ext_ObjectActive = 1 # updDB self.ext_EntryDate = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) # updDB self.ext_EntryAgentIdentifierValue = None # updDB self.ext_OAISPackageType = 2 # updDB self.ext_preservationLevelValue = 1 # updDB if self.objectstatus < 100: ArchivePolicy_objs = ArchivePolicy.objects.filter( PolicyStat=1, AISProjectID=self.ext_ProjectGroupCode)[:1] if ArchivePolicy_objs: self.PolicyID = ArchivePolicy_objs.get().PolicyID if Debug: logging.info('PolicyID: ' + str(self.PolicyID)) self.objectstatus = 14 # Object got PolicyID else: self.objectstatus = 114 # Object mising PolicyID else: self.objectstatus = 110 # Object not found in external DB if Debug: logging.info( 'Missing object: %s in AIS, self.extOBJdbget: %s', self.ObjectIdentifierValue, str(self.extOBJdbget)) if self.objectstatus == 100: # Policy is not in ESSArch mode ################################################################### # Policy is not in ESSArch mode(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Policy is not in ESSArch mode for Object: ' + str( self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 101: # Policy not found or not active ################################################################### # Policy not found or active(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Policy not found or active for Object: ' + str( self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 102: # Problem to get information from Package_METS ################################################################### # Problem to get information from Package_METS(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Problem to get information from Package_METS for Object: ' + str( self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 110: ################################################################### #Object don't exist in extDB(10) and Need of assistance(4) ################################################################### self.StatusProcess = 10 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 10, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif self.objectstatus == 111: ################################################################### #Object don't have any projektkod in extDB(11) and Need of assistance(4) ################################################################### self.StatusProcess = 11 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 11, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) elif self.objectstatus == 112: ################################################################### #Object already have an AIP!! ################################################################### self.StatusProcess = 13 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 13, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str( self.ObjectIdentifierValue) + ' already have an AIP!' logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 113: ################################################################### # Object is not active!! ################################################################### self.StatusProcess = 14 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 14, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str( self.ObjectIdentifierValue ) + ' is not active in external DB!' logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 114: ################################################################### #Object don't have any local policy(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info( 'Change to StatusProcess 12, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str( self.ObjectIdentifierValue ) + ' do not have any local policy!' logging.error(self.event_info) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.objectstatus == 14 or self.objectstatus == 1: ################################################################### #Object got a policy(19) and RFNext and OK(0) ################################################################### self.StatusProcess = 19 self.StatusActivity = 0 if Debug: logging.info( 'Change to StatusProcess 19, ObjectIdentifierValue ' + str(self.ObjectIdentifierValue)) ESSPGM.Events().create('1010', '', 'ESSArch SIPValidateAIS', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) logging.info( 'objectstatus:%s,StatusProcess:%s,StatusActivity:%s,EntryDate:%s' % (self.objectstatus, self.StatusProcess, self.StatusActivity, self.ext_EntryDate)) if self.objectstatus: self.timestamp_utc = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('PolicyId', self.PolicyID, 'ObjectUUID', self.ext_ObjectGuid, 'EntryDate', self.ext_EntryDate.replace(tzinfo=None), 'EntryAgentIdentifierValue', self.ext_EntryAgentIdentifierValue, 'StatusProcess', self.StatusProcess, 'StatusActivity', self.StatusActivity, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'OAISPackageType', self.ext_OAISPackageType, 'preservationLevelValue', self.ext_preservationLevelValue, 'ObjectActive', self.ext_ObjectActive, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if errno == 0 and self.ext_IngestTable: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('PolicyId', self.PolicyID, 'StatusProcess', self.StatusProcess, 'StatusActivity', self.StatusActivity, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why))
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) TimeZone = timezone.get_default_timezone_name() self.tz = pytz.timezone(TimeZone) METS_NAMESPACE = SchemaProfile.objects.get( entity='mets_namespace').value METS_SCHEMALOCATION = SchemaProfile.objects.get( entity='mets_schemalocation').value METS_PROFILE = SchemaProfile.objects.get(entity='mets_profile').value XLINK_NAMESPACE = SchemaProfile.objects.get( entity='xlink_namespace').value XSI_NAMESPACE = SchemaProfile.objects.get(entity='xsi_namespace').value while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) self.RunFlag = 0 self.mLock.release() if Debug: logging.info('RunFlag: 0') time.sleep(2) continue # Process Item lock = thread.allocate_lock() Cmets_obj = Parameter.objects.get( entity='content_descriptionfile').value self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'ObjectUUID', 'PolicyId', 'ObjectSize'), ('StatusProcess', 'BETWEEN', 39, 'AND', 40, 'AND', 'StatusActivity', '=', '0')) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ok = 1 self.ProcDB = ESSDB.DB().action('ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[0] == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1] == 1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action( 'ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[1] == 1: logging.info('Process is in pause state') else: break self.ObjectIdentifierValue = self.obj[0] self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] self.ObjectSize = self.obj[3] ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) if self.ok: ########################################################### # set variables self.AIPpath = ArchivePolicy_obj.AIPpath self.metatype = ArchivePolicy_obj.IngestMetadata self.ChecksumAlgorithm = ArchivePolicy_obj.ChecksumAlgorithm self.CA = dict(ChecksumAlgorithm_CHOICES)[ self.ChecksumAlgorithm] self.SIPpath = ArchivePolicy_obj.IngestPath self.p_obj = self.ObjectIdentifierValue + '.tar' self.ObjectPath = os.path.join(self.AIPpath, self.p_obj) self.SIProotpath = os.path.join(self.SIPpath, self.ObjectIdentifierValue) if self.metatype in [4]: #self.Cmets_obj = '%s/%s_Content_METS.xml' % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) #self.Cmets_objpath = os.path.join(self.SIPpath,self.Cmets_obj) #self.Cmets_obj = Cmets_obj.replace('{uuid}',self.ObjectIdentifierValue) self.Cmets_obj = Cmets_obj.replace( '{objid}', self.ObjectIdentifierValue) self.Cmets_objpath = os.path.join( self.SIProotpath, self.Cmets_obj) elif self.metatype in [1, 2, 3]: self.Cmets_obj = '%s_Content_METS.xml' % ( self.ObjectIdentifierValue) self.Cmets_objpath = os.path.join( self.AIPpath, self.Cmets_obj) self.Pmets_obj = '%s_Package_METS.xml' % ( self.ObjectIdentifierValue) self.Pmets_objpath = os.path.join(self.AIPpath, self.Pmets_obj) self.AIC_UUID = None self.AIC_UUID_rel_ObjectUUIDs = [] if self.ok: METS_agent_list = [] METS_altRecordID_list = [] if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.METS_LABEL = 'ESSArch AIP' # Get SIP Content METS information self.METSfilepath = os.path.join( self.SIPpath, self.ObjectIdentifierValue + '/metadata/SIP/' + self.ObjectIdentifierValue + '_Content_METS.xml') res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList( FILENAME=self.METSfilepath) for agent in res_info[2]: if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) METS_agent_list.append([ 'CREATOR', 'INDIVIDUAL', '', AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) self.METS_LABEL = 'Imaging AIP RA' METS_agent_list.append([ 'ARCHIVIST', 'ORGANIZATION', '', 'Riksarkivet', [] ]) METS_agent_list.append( ['CREATOR', 'ORGANIZATION', '', 'Riksarkivet', []]) METS_agent_list.append([ 'CREATOR', 'INDIVIDUAL', '', AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.METS_LABEL = 'Born Digital AIP RA' METS_agent_list.append([ 'ARCHIVIST', 'ORGANIZATION', '', 'Riksarkivet', [] ]) METS_agent_list.append( ['CREATOR', 'ORGANIZATION', '', 'Riksarkivet', []]) METS_agent_list.append([ 'CREATOR', 'INDIVIDUAL', '', AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) elif self.metatype in [4]: ############################################ # Object have metatype 4 (eARD METS) res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList( FILENAME=self.Cmets_objpath) for agent in res_info[2]: #if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) self.METS_LABEL = res_info[0][0] METS_agent_list.append([ 'CREATOR', None, 'INDIVIDUAL', None, AgentIdentifierValue, [] ]) METS_agent_list.append([ 'CREATOR', None, 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion] ]) for altRecordID in res_info[3]: METS_altRecordID_list.append(altRecordID) logging.debug('self.obj: ' + str(self.obj)) if self.ChecksumAlgorithm > 0: #self.ChecksumAlgorithm 1 = MD5, 2 = SHA-256 self.startCalTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 40, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Start create Package METS for: ' + self.ObjectIdentifierValue) if self.ok: ########################################################### # Create PMETS for AIP package self.M_CHECKSUM, errno, why = ESSPGM.Check().checksum( self.Cmets_objpath, self.CA) if errno: self.event_info = 'Problem to get checksum for METS object for AIP package: ' + str( self.Cmets_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030', '', 'ESSArch AIPCreator', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 self.M_statinfo = os.stat(self.Cmets_objpath) self.M_SIZE = self.M_statinfo.st_size self.M_utc_mtime = datetime.datetime.utcfromtimestamp( self.M_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.M_lociso_mtime = self.M_utc_mtime.astimezone( self.tz).isoformat() self.P_CHECKSUM, errno, why = ESSPGM.Check().checksum( self.ObjectPath, self.CA) if errno: self.event_info = 'Problem to get checksum for AIP package: ' + str( self.ObjectPath) logging.error(self.event_info) ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 self.P_statinfo = os.stat(self.ObjectPath) self.P_SIZE = self.P_statinfo.st_size self.P_utc_mtime = datetime.datetime.utcfromtimestamp( self.P_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.P_lociso_mtime = self.P_utc_mtime.astimezone( self.tz).isoformat() if self.metatype in [1, 2, 3]: self.PMETSdoc = ESSMD.createPMets( ID=self.ObjectIdentifierValue, LABEL=self.METS_LABEL, AGENT=METS_agent_list, P_SIZE=self.P_SIZE, P_CREATED=self.P_lociso_mtime, P_CHECKSUM=self.P_CHECKSUM, P_CHECKSUMTYPE=self.CA, M_SIZE=self.M_SIZE, M_CREATED=self.M_lociso_mtime, M_CHECKSUM=self.M_CHECKSUM, M_CHECKSUMTYPE=self.CA, ) errno, why = ESSMD.writeToFile( self.PMETSdoc, self.Pmets_objpath) if errno: self.event_info = 'Problem to write PMETS to file for AIP package: ' + str( self.Pmets_objpath) logging.error(self.event_info) ESSPGM.Events().create( '1040', '', 'ESSArch AIPChecksum', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) time.sleep(2) self.ok = 0 elif self.metatype in [4]: ms_files = [] ms_files.append([ 'amdSec', None, 'techMD', 'techMD001', None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s/%s' % (self.ObjectIdentifierValue, self.Cmets_obj), 'simple', self.M_CHECKSUM, self.CA, self.M_SIZE, 'text/xml', self.M_lociso_mtime, 'OTHER', 'METS', None ]) ms_files.append([ 'fileSec', None, None, None, None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s' % self.p_obj, 'simple', self.P_CHECKSUM, self.CA, self.P_SIZE, 'application/x-tar', self.P_lociso_mtime, 'tar', 'techMD001', None ]) # define namespaces self.namespacedef = 'xmlns:mets="%s"' % METS_NAMESPACE self.namespacedef += ' xmlns:xlink="%s"' % XLINK_NAMESPACE self.namespacedef += ' xmlns:xsi="%s"' % XSI_NAMESPACE self.namespacedef += ' xsi:schemaLocation="%s %s"' % ( METS_NAMESPACE, METS_SCHEMALOCATION) errno, info_list = ESSMD.Create_IP_mets( ObjectIdentifierValue=self. ObjectIdentifierValue, METS_ObjectPath=self.Pmets_objpath, agent_list=METS_agent_list, altRecordID_list=METS_altRecordID_list, file_list=ms_files, namespacedef=self.namespacedef, METS_LABEL=self.METS_LABEL, METS_PROFILE=METS_PROFILE, METS_TYPE='AIP', METS_DocumentID=self.Pmets_obj, TimeZone=TimeZone) if errno: logging.error( 'Problem to create Package METS file, why: %s' % str(info_list)) self.ObjectMessageDigest = self.P_CHECKSUM self.stopCalTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.CalTime = self.stopCalTime - self.startCalTime self.ObjectSizeMB = self.ObjectSize / 1048576 if self.CalTime.seconds < 1: self.CalTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.CalMBperSEC = int(self.ObjectSizeMB) / int( self.CalTime.seconds) logging.info('Finished calculate checksum: ' + self.ObjectIdentifierValue + ' , ' + str(self.CalMBperSEC) + ' MB/Sec and Time: ' + str(self.CalTime)) if self.ok: self.timestamp_utc = datetime.datetime.utcnow( ).replace(microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone( self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'MetaObjectSize', self.M_SIZE, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'MetaObjectSize', self.M_SIZE, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error( 'Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error( 'Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if self.ok and self.metatype == 4: #################################################### # Create AIC METS File: aic_obj = ArchiveObject.objects.filter( relaic_set__UUID=self.ObjectUUID)[:1] if aic_obj: self.AIC_UUID = aic_obj.get().ObjectUUID logging.info( 'Succeeded to get AIC_UUID: %s from DB' % self.AIC_UUID) else: logging.warning( 'AIC not found for IP object: %s, skip to create AIC METS file' % self.ObjectUUID) if self.ok and self.AIC_UUID: ip_obj_list = ArchiveObject.objects.filter( Q(StatusProcess=3000) | Q(ObjectUUID=self.ObjectUUID), reluuid_set__AIC_UUID=self.AIC_UUID).order_by( 'Generation') if ip_obj_list: logging.info('Start create AIC METS: ' + self.AIC_UUID) self.AICmets_objpath = os.path.join( self.AIPpath, self.AIC_UUID + '_AIC_METS.xml') ms_files = [] for ip_obj in ip_obj_list: logging.info( 'Add IP: %s to AIC METS: %s' % (ip_obj.ObjectUUID, self.AIC_UUID)) ms_files.append([ 'fileSec', None, None, None, None, None, 'ID%s' % str(uuid.uuid1()), 'URL', 'file:%s' % ip_obj.ObjectUUID, 'simple', ip_obj.ObjectMessageDigest, dict(ChecksumAlgorithm_CHOICES)[ ip_obj.ObjectMessageDigestAlgorithm], ip_obj.ObjectSize, 'application/x-tar', ip_obj.CreateDate, 'IP Package', None, None ]) # define namespaces self.namespacedef = 'xmlns:mets="%s"' % METS_NAMESPACE self.namespacedef += ' xmlns:xlink="%s"' % XLINK_NAMESPACE self.namespacedef += ' xmlns:xsi="%s"' % XSI_NAMESPACE self.namespacedef += ' xsi:schemaLocation="%s %s"' % ( METS_NAMESPACE, METS_SCHEMALOCATION) errno, info_list = ESSMD.Create_IP_mets( ObjectIdentifierValue=self.AIC_UUID, METS_ObjectPath=self.AICmets_objpath, agent_list=[], altRecordID_list=[], file_list=ms_files, namespacedef=self.namespacedef, METS_LABEL='AIC relation to IP', METS_PROFILE=METS_PROFILE, METS_TYPE='AIC', METS_DocumentID=self.AIC_UUID + '_AIC_METS.xml', TimeZone=TimeZone) if errno: logging.error( 'Problem to create AIC METS file, why: %s' % str(info_list)) else: logging.error( 'Problem to get objects related to AIC_UUID: %s from DB' % (self.AIC_UUID)) self.ok = 0 if self.ok: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 49, 0) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Succeeded to create checksum for Object: %s' % self.ObjectIdentifierValue logging.info(self.event_info) ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '0', self.event_info, 2, self.ObjectIdentifierValue) else: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 40, 100) if errno: logging.error( 'Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to create checksum for Object: %s' % self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) elif self.ChecksumAlgorithm == 0: #self.ChecksumAlgorithm 0 = No checksum logging.info('Skip creation of checksum: ' + self.ObjectIdentifierValue) self.ObjectMessageDigest = '' self.MetaObjectSize = os.stat(self.Cmets_objpath)[6] self.timestamp_utc = datetime.datetime.utcnow().replace( microsecond=0, tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'StatusProcess', '49', 'StatusActivity', '0', 'MetaObjectSize', self.MetaObjectSize, 'LastEventDate', self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue, 'LocalDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1040', '', 'ESSArch AIPChecksum', ProcVersion, '0', 'Skip creation of checksum', 2, self.ObjectIdentifierValue) if errno == 0 and ExtDBupdate: ext_res, ext_errno, ext_why = ESSMSSQL.DB().action( self.IngestTable, 'UPD', ('ObjectMessageDigestAlgorithm', self.ChecksumAlgorithm, 'ObjectMessageDigest', self.ObjectMessageDigest, 'StatusProcess', '49', 'StatusActivity', '0', 'MetaObjectSize', self.MetaObjectSize, 'LastEventDate', self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue', AgentIdentifierValue), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res, errno, why = ESSDB.DB().action( self.IngestTable, 'UPD', ('ExtDBdatetime', self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue', self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag = 0
def ThreadMain(self, ProcName): logging.info('Starting ' + ProcName) while 1: if self.mDieFlag == 1: break # Request for death self.mLock.acquire() self.Time, self.Run = ESSDB.DB().action('ESSProc', 'GET', ('Time', 'Run'), ('Name', ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) self.RunFlag = 0 self.mLock.release() #if Debug: print 'RunFlag: 0' time.sleep(2) continue # Process Item lock = thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig', 'GET', ('Value', ), ('Name', 'IngestTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' #if Debug: logging.info('Start to list worklist (self.dbget)') self.dbget, errno, why = ESSDB.DB().action( self.IngestTable, 'GET4', ('ObjectIdentifierValue', 'ObjectUUID', 'PolicyId', 'INFORMATIONCLASS'), ('StatusActivity', '=', '0', 'AND', 'StatusProcess', 'BETWEEN', 24, 'AND', 26)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[0] == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc', 'UPD', ('Status', '0', 'Run', '0', 'PID', '0'), ('Name', ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1] == 1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action( 'ESSProc', 'GET', ('Run', 'Pause'), ('Name', ProcName))[0] if self.ProcDB[1] == 1: logging.info('Process is in pause state') else: break self.ok = 1 ########################################################### # get policy info self.ObjectIdentifierValue = ESSPGM.Check().str2unicode( self.obj[0]) self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] self.INFORMATIONCLASS = self.obj[3] logging.info('Start to validate format for SIP: %s', self.ObjectIdentifierValue) self.ChecksumAlgorithm_CHOICES_dict = dict( ChecksumAlgorithm_CHOICES) self.ChecksumAlgorithm_CHOICES_invdict = ESSPGM.Check( ).invert_dict(self.ChecksumAlgorithm_CHOICES_dict) ArchivePolicy_obj = ArchivePolicy.objects.get( PolicyStat=1, PolicyID=self.PolicyId) if self.ok: ########################################################### # set variables self.AIPpath = ESSPGM.Check().str2unicode( ArchivePolicy_obj.AIPpath) self.metatype = ArchivePolicy_obj.IngestMetadata self.Policy_INFORMATIONCLASS = ArchivePolicy_obj.INFORMATIONCLASS self.ChecksumAlgorithm = ArchivePolicy_obj.ChecksumAlgorithm self.ChecksumAlgorithm_name = self.ChecksumAlgorithm_CHOICES_dict[ self.ChecksumAlgorithm] self.SIPpath = ESSPGM.Check().str2unicode( ArchivePolicy_obj.IngestPath) self.DBmode = ArchivePolicy_obj.Mode logging.debug('self.obj: %s', str(self.obj)) logging.debug('self.ObjectIdentifierValue: %s', self.ObjectIdentifierValue) logging.debug('Len self.ObjectIdentifierValue: %s', len(self.ObjectIdentifierValue)) logging.debug('self.SIPpath: %s', self.SIPpath) logging.debug('self.AIPpath: %s', self.AIPpath) if self.metatype == 2: ############################################ # Create PREMISfile from TIFFEdit.RES if metatype is 2 logging.info( 'Start to convert RESfile to PREMISfile for object: ' + self.ObjectIdentifierValue) self.xml_PREMIS, self.errno, self.why = ESSMD.RES2PREMIS( os.path.join(self.SIPpath, self.ObjectIdentifierValue), AgentIdentifierValue[8:]) if self.errno == 10: self.event_info = 'Failed to parse RESfile, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.errno == 20: self.event_info = 'I/O error to access RESfile, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.errno == 30: self.event_info = 'Validation errors for PREMIS file, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) elif self.errno == 40: self.event_info = 'Problem to write PREMIS file, error.num: %s error.det: %s' % ( str(self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) if self.errno > 1: self.event_info = 'Problem to convert RES to PREMIS for SIP package: %s, error.num: %s error.desc: %s' % ( self.ObjectIdentifierValue, str( self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 elif self.errno == 1: self.event_info = 'Warning in convert RES to PREMIS for SIP package: %s, error.num: %s warning.desc: %s' % ( self.ObjectIdentifierValue, str( self.errno), str(self.why)) logging.warning(self.event_info) ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '0', self.event_info, self.DBmode, self.ObjectIdentifierValue) else: ESSPGM.Events().create('1022', 'RES2PREMIS', 'ESSArch SIPValidateFormat', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) if self.ok: ############################################ # Clean RES SIP from "junk" files self.errno, self.why = ESSPGM.Check().CleanRES_SIP( os.path.join(self.SIPpath, self.ObjectIdentifierValue)) if self.errno: self.event_info = 'Problem to clean RES SIP from "junk files" for SIP package: %s, error.num: %s error.desc: %s' % ( self.ObjectIdentifierValue, str( self.errno), str(self.why)) logging.error(self.event_info) ESSPGM.Events().create( '1022', 'CleanRES_SIP', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 else: ESSPGM.Events().create( '1022', 'CleanRES_SIP', 'ESSArch SIPValidateFormat', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) elif self.metatype == 1: ########################################################### # Create PREMISfile from Content_METS if metatype is 1 res, errno, why = ESSMD.METS2PREMIS( self.SIPpath, self.ObjectIdentifierValue) if not errno: logging.info( 'Succeeded to convert Content_METS to PREMISfile for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to convert Content_METS to PREMISfile for information package: %s, errno: %s, detail: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) #ESSPGM.Events().create('1025','','ESSArch SIPValidateFormat',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) self.ok = 0 elif self.metatype in [4]: self.SIPpath = os.path.join(self.SIPpath, self.ObjectIdentifierValue) if self.ok: if self.metatype in [1, 2, 3]: ########################################################### # get object_list from PREMIS file self.Premis_filepath = u'%s/%s/%s_PREMIS.xml' % ( self.SIPpath, self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list, errno, why = ESSMD.getPremisObjects( FILENAME=self.Premis_filepath) if errno == 0: logging.info( 'Succeeded to get object_list from premis for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to get object_list from premis for information package: %s, errno: %s, detail: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 elif self.metatype in [4]: ########################################################### # get object_list from METS if os.path.exists(os.path.join(self.SIPpath, 'sip.xml')): mets_file = 'sip.xml' self.SIPmets_objpath = os.path.join( self.SIPpath, mets_file) elif os.path.exists( os.path.join(self.SIPpath, 'mets.xml')): mets_file = 'mets.xml' self.SIPmets_objpath = os.path.join( self.SIPpath, mets_file) #elif os.path.exists(os.path.join(self.SIPpath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # mets_file = '%s_Content_METS.xml' % self.ObjectIdentifierValue # self.SIPmets_objpath = os.path.join(self.SIPpath,mets_file) else: self.SIPmets_objpath = '' self.event_info = 'Problem to find METS file for information package: %s in path: %s' % ( self.ObjectIdentifierValue, self.SIPpath) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 if self.SIPmets_objpath: self.object_list, errno, why = ESSMD.getAIPObjects( FILENAME=self.SIPmets_objpath) if errno == 0: logging.info( 'Succeeded to get object_list from METS for information package: %s', self.ObjectIdentifierValue) self.F_Checksum, errno, why = ESSPGM.Check( ).checksum(self.SIPmets_objpath, self.ChecksumAlgorithm) # Checksum self.F_SIZE = os.stat(self.SIPmets_objpath)[6] self.object_list.append([ mets_file, self.ChecksumAlgorithm_name, self.F_Checksum, self.F_SIZE, '' ]) else: self.event_info = 'Problem to get object_list from METS for information package: %s, errno: %s, detail: %s' % ( self.ObjectIdentifierValue, str(errno), str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, 2, self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # update ObjectIdentifierValue to StatusProcess: 25 and StatusActivity: 5 errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 25, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Format validate object: ' + self.ObjectIdentifierValue) if self.ok: ########################################################### # Start to format validate SIP self.startTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.ObjectNumItems = 0 self.ObjectSize = 0 if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.tmp_object_id = ( u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS' ]) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) #self.tmp_object_id = ('%s/TIFFEdit.RES') % self.ObjectIdentifierValue #self.object_list.append([self.tmp_object_id,'', '', '', '', 'ARCHMETA', '']) self.tmp_object_id = ( u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS' ]) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.tmp_object_id = ( u'%s/%s_ADDML.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'ADDML' ]) self.tmp_object_id = ( u'%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue, self.ObjectIdentifierValue) self.object_list.append([ self.tmp_object_id, '', '', '', '', 'ARCHMETAxmlWrap', 'PREMIS' ]) for self.object in self.object_list: logging.debug('variable self.SIPpath: %s, type: %s' % (self.SIPpath, type(self.SIPpath))) logging.debug('variable self.object[0]: %s, type: %s' % (self.object[0], type(self.object[0]))) self.filepath = os.path.join(self.SIPpath, self.object[0]) logging.debug('variable self.filepath: %s, type: %s' % (self.filepath, type(self.filepath))) #self.filepath = ESSPGM.Check().Unicode2isoStr(self.filepath.encode('utf-8')) #self.filepath_iso = ESSPGM.Check().unicode2str(self.filepath) #logging.debug('variable self.filepath_iso: %s, type: %s' % (self.filepath_iso,type(self.filepath_iso))) if self.metatype in [1, 2, 3 ] and self.ObjectNumItems == 0: if self.object[0] == self.ObjectIdentifierValue: logging.info( 'First premis object match information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'First premis object do not match information package: %s, premis_object: %s' % ( self.ObjectIdentifierValue, self.object[0]) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath, os.X_OK): pass else: self.event_info = 'Object path: %s do not exist or is not executable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath, os.R_OK): pass else: self.event_info = 'Object path: %s do not exist or is not readable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok and os.access(self.filepath, os.W_OK): pass else: self.event_info = 'Missing permission, Object path: %s is not writeable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.metatype in [1, 2, 3]: if self.ok and not (self.ObjectNumItems == 0 or self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): if int(os.stat(self.filepath)[6]) == int( self.object[4]): self.ObjectSize += int(self.object[4]) else: self.event_info = 'Filesize for object path: %s is %s and premis object size is %s. The sizes must match!' % ( self.filepath, str(os.stat(self.filepath)[6]), str(self.object[4])) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok: self.F_Checksum, errno, why = ESSPGM.Check( ).checksum(self.filepath, self.object[1]) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.filepath, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.F_Checksum == self.object[2]: pass else: self.event_info = 'Checksum for object path: %s is %s and premis object checksum is %s. The checksum must match!' % ( self.filepath, self.F_Checksum, self.object[2]) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break elif self.ok and not self.ObjectNumItems == 0 and ( self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): if int(os.stat(self.filepath)[6]) > 0: pass else: self.event_info = 'Filesize for object path: %s is 0 bytes. The size should be more then 0 bytes!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break elif self.metatype in [4]: if self.ok: #[objectIdentifierValue,messageDigestAlgorithm,messageDigest,a_SIZE,a_MIMETYPE] if int(os.stat(self.filepath)[6]) == int( self.object[3]): self.ObjectSize += int(self.object[3]) else: self.event_info = 'Filesize for object path: %s is %s and METS object size is %s. The sizes must match!' % ( self.filepath, str(os.stat(self.filepath)[6]), str(self.object[3])) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break if self.ok: self.F_Checksum, errno, why = ESSPGM.Check( ).checksum(self.filepath, self.object[1]) # Checksum if errno: self.event_info = 'Failed to get checksum for: %s, Error: %s' % ( self.filepath, str(why)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.F_Checksum == self.object[2]: pass else: self.event_info = 'Checksum for object path: %s is %s and METS object checksum is %s. The checksum must match!' % ( self.filepath, self.F_Checksum, self.object[2]) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 break self.ObjectNumItems += 1 if self.ok: if self.metatype in [1, 2, 3]: ############################################################################### # Check if SIP filesystem path contain files that not exist in metadatafile for self.filesystem_object in ESSPGM.Check( ).GetFiletree( os.path.join(self.SIPpath, self.ObjectIdentifierValue)): self.missmatch_flag = 0 for self.object in self.object_list: #if os.path.join(self.ObjectIdentifierValue,self.filesystem_object) == self.object[0].encode('utf-8'): if os.path.join(self.ObjectIdentifierValue, self.filesystem_object ) == self.object[0]: self.missmatch_flag = 0 break else: self.missmatch_flag = 1 if self.missmatch_flag: self.filesystempath = u'%s/%s/%s' % ( self.SIPpath, self.ObjectIdentifierValue, self.filesystem_object) self.event_info = 'Filesystem file: %s do not exist in metadatafile for object: %s' % ( self.filesystempath, os.path.join(self.SIPpath, self.ObjectIdentifierValue)) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.metatype in [4]: ############################################################################### # Check if SIP filesystem path contain files that not exist in metadatafile for self.filesystem_object in ESSPGM.Check( ).GetFiletree(self.SIPpath): self.missmatch_flag = 0 for self.object in self.object_list: if self.filesystem_object == self.object[0]: self.missmatch_flag = 0 break else: self.missmatch_flag = 1 if self.missmatch_flag: self.filesystempath = u'%s/%s' % ( self.SIPpath, self.filesystem_object) self.event_info = 'Filesystem file: %s do not exist in metadatafile for object: %s' % ( self.filesystempath, self.SIPpath) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: if self.metatype in [4]: ############################################################################### # Check if SIP INFORMATIONCLASS match Policy if self.INFORMATIONCLASS == self.Policy_INFORMATIONCLASS: self.event_info = 'Object: %s InformationClass: %s match defined InformaionClass: %s in PolicyID: %s' % ( self.ObjectIdentifierValue, self.INFORMATIONCLASS, self.Policy_INFORMATIONCLASS, self.PolicyId) logging.info(self.event_info) else: self.event_info = 'Object: %s InformationClass: %s do not match defined InformationClass: %s in PolicyID: %s' % ( self.ObjectIdentifierValue, self.INFORMATIONCLASS, self.Policy_INFORMATIONCLASS, self.PolicyId) logging.error(self.event_info) ESSPGM.Events().create( '1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) self.ok = 0 if self.ok: self.stopTime = datetime.timedelta( seconds=time.localtime()[5], minutes=time.localtime()[4], hours=time.localtime()[3]) self.MeasureTime = self.stopTime - self.startTime self.ObjectSizeMB = self.ObjectSize / 1048576 if self.MeasureTime.seconds < 1: self.MeasureTime = datetime.timedelta( seconds=1 ) #Fix min time to 1 second if it is zero. self.VerMBperSEC = int(self.ObjectSizeMB) / int( self.MeasureTime.seconds) if self.ok: logging.info('Succeeded to validate SIP package: ' + self.ObjectIdentifierValue + ' , ' + str(self.VerMBperSEC) + ' MB/Sec and Time: ' + str(self.MeasureTime)) errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 29, 0) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '0', '', self.DBmode, self.ObjectIdentifierValue) else: errno, why = ESSPGM.DB().SetAIPstatus( self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 26, 4) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to validate SIP package: ' + self.ObjectIdentifierValue logging.error(self.event_info) ESSPGM.Events().create('1025', '', 'ESSArch SIPValidateFormat', ProcVersion, '1', self.event_info, self.DBmode, self.ObjectIdentifierValue) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag = 0
def ThreadMain(self,ProcName): logging.info('Starting ' + ProcName) self.tz=timezone.get_default_timezone() while 1: if self.mDieFlag==1: break # Request for death self.mLock.acquire() self.Time,self.Run = ESSDB.DB().action('ESSProc','GET',('Time','Run'),('Name',ProcName))[0] if self.Run == '0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) self.RunFlag=0 self.mLock.release() #if Debug: print 'RunFlag: 0' time.sleep(2) continue # Process Item lock=thread.allocate_lock() self.IngestTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','IngestTable'))[0][0] self.PolicyTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','PolicyTable'))[0][0] Cmets_obj = Parameter.objects.get(entity='content_descriptionfile').value if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' self.dbget,errno,why = ESSDB.DB().action(self.IngestTable,'GET4',('ObjectIdentifierValue','ObjectUUID','PolicyId'),('StatusProcess','BETWEEN',29,'AND',31,'AND', 'StatusActivity','=','0')) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[0]=='0': logging.info('Stopping ' + ProcName) ESSDB.DB().action('ESSProc','UPD',('Status','0','Run','0','PID','0'),('Name',ProcName)) thread.interrupt_main() time.sleep(5) break elif self.ProcDB[1]==1: while 1: time.sleep(60) self.ProcDB = ESSDB.DB().action('ESSProc','GET',('Run','Pause'),('Name',ProcName))[0] if self.ProcDB[1]==1: logging.info('Process is in pause state') else: break self.ok = 1 ########################################################### # get policy info self.ObjectIdentifierValue = self.obj[0] self.ObjectUUID = self.obj[1] self.PolicyId = self.obj[2] logging.info('Start to create AIP for: %s', self.ObjectIdentifierValue) self.PolicyDB,errno,why = ESSDB.DB().action(self.PolicyTable,'GET3',('AIPpath','IngestMetadata','IngestPath'),('PolicyID',self.PolicyId)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) self.ok = 0 if self.ok: ########################################################### # set variables self.AIPpath = self.PolicyDB[0][0] self.metatype = self.PolicyDB[0][1] self.SIPpath = self.PolicyDB[0][2] self.p_obj = self.ObjectIdentifierValue + '.tar' self.p_objpath = os.path.join(self.AIPpath,self.p_obj) #self.Cmets_obj = self.ObjectIdentifierValue + '_Content_METS.xml' #self.Cmets_obj = Cmets_obj.replace('{uuid}',self.ObjectIdentifierValue) self.Cmets_obj = Cmets_obj.replace('{objid}',self.ObjectIdentifierValue) self.SIProotpath = os.path.join(self.SIPpath,self.ObjectIdentifierValue) if self.metatype in [4]: self.Cmets_objpath = os.path.join(self.SIProotpath,self.Cmets_obj) if os.path.exists(os.path.join(self.SIProotpath,'sip.xml')): mets_file = 'sip.xml' self.SIPmets_objpath = os.path.join(self.SIProotpath,mets_file) elif os.path.exists(os.path.join(self.SIProotpath,'mets.xml')): mets_file = 'mets.xml' self.SIPmets_objpath = os.path.join(self.SIProotpath,mets_file) #elif os.path.exists(os.path.join(self.SIProotpath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # mets_file = '%s_Content_METS.xml' % self.ObjectIdentifierValue # self.SIPmets_objpath = os.path.join(self.SIProotpath,mets_file) else: self.SIPmets_objpath = '' elif self.metatype in [1,2,3]: self.Cmets_objpath = os.path.join(self.AIPpath,self.Cmets_obj) Debug = 1 logging.debug('self.obj: %s', str(self.obj)) logging.debug('self.ObjectIdentifierValue: %s', self.ObjectIdentifierValue) logging.debug('Len self.ObjectIdentifierValue: %s', len(self.ObjectIdentifierValue)) logging.debug('self.SIPpath: %s', self.SIPpath) logging.debug('self.AIPpath: %s', self.AIPpath) if self.metatype in [1,2,3]: if self.ok: ########################################################### # get object_list from PREMIS file self.Premis_filepath = '%s/%s/%s_PREMIS.xml' % (self.SIPpath,self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.object_list,errno,why = ESSMD.getPremisObjects(FILENAME=self.Premis_filepath) # list [objectIdentifierValue,messageDigestAlgorithm,messageDigest,messageDigestOriginator,size,formatName,formatVersion] if errno == 0: logging.info('Succeeded to get object_list from premis for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to get object_list from premis for information package: %s, errno: %s, detail: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # create AIP content METS file #self.firstPremisObjectFlag = 1 METS_agent_list = [] if self.metatype == 1: ############################################ # Object have metatype 1 (METS) self.METS_LABEL = 'ESSArch AIP' self.tmp_object_id = ('%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.tmp_object_size = os.stat(os.path.join(self.SIPpath,self.tmp_object_id))[6] self.object_list.append([self.tmp_object_id,'', '', '', self.tmp_object_size, 'ARCHMETAxmlWrap', 'PREMIS']) # Get SIP Content METS information self.METSfilepath = os.path.join(self.SIPpath,self.ObjectIdentifierValue + '/metadata/SIP/' + self.ObjectIdentifierValue + '_Content_METS.xml') res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList(FILENAME=self.METSfilepath) for agent in res_info[2]: if not (agent[0] == 'CREATOR' and agent[3] == 'SOFTWARE'): METS_agent_list.append(agent) METS_agent_list.append(['CREATOR','INDIVIDUAL','',AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) elif self.metatype == 2: ############################################ # Object have metatype 2 (RES) self.METS_LABEL = 'Imaging AIP RA' self.tmp_object_id = ('%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.tmp_object_size = os.stat(os.path.join(self.SIPpath,self.tmp_object_id))[6] self.object_list.append([self.tmp_object_id,'', '', '', self.tmp_object_size, 'ARCHMETAxmlWrap', 'PREMIS']) METS_agent_list.append(['ARCHIVIST','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','INDIVIDUAL','',AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) elif self.metatype == 3: ############################################ # Object have metatype 3 (ADDML) self.METS_LABEL = 'Born Digital AIP RA' self.tmp_object_id = ('%s/%s_ADDML.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.tmp_object_size = os.stat(os.path.join(self.SIPpath,self.tmp_object_id))[6] self.object_list.append([self.tmp_object_id,'', '', '', self.tmp_object_size, 'ARCHMETAxmlWrap', 'ADDML']) self.tmp_object_id = ('%s/%s_PREMIS.xml') % (self.ObjectIdentifierValue,self.ObjectIdentifierValue) self.tmp_object_size = os.stat(os.path.join(self.SIPpath,self.tmp_object_id))[6] self.object_list.append([self.tmp_object_id,'', '', '', self.tmp_object_size, 'ARCHMETAxmlWrap', 'PREMIS']) METS_agent_list.append(['ARCHIVIST','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','ORGANIZATION','','Riksarkivet',[]]) METS_agent_list.append(['CREATOR','INDIVIDUAL','',AgentIdentifierValue,[]]) METS_agent_list.append(['CREATOR', 'OTHER', 'SOFTWARE', 'ESSArch', ['VERSION=%s' % ProcVersion]]) self.firstPremisObjectFlag = 1 self.DataObjectNumItems = 0 self.DataObjectSize = 0 self.MetaObjectSize = 0 self.MetaObjectIdentifier = 'None' for self.object in self.object_list: self.filepath = os.path.join(self.SIPpath, self.object[0]) self.filepath_iso = ESSPGM.Check().unicode2str(self.filepath) self.a_filepath = self.object[0] if self.firstPremisObjectFlag: if self.object[0] == self.ObjectIdentifierValue: logging.info('First premis object match information package: %s', self.ObjectIdentifierValue) if self.metatype == 1: self.METSdoc = ESSMD.createMets(self.ObjectIdentifierValue,self.METS_LABEL,METS_agent_list,['premis']) elif self.metatype == 2: self.METSdoc = ESSMD.createMets(self.ObjectIdentifierValue,self.METS_LABEL,METS_agent_list,['premis','mix']) elif self.metatype == 3: self.METSdoc = ESSMD.createMets(self.ObjectIdentifierValue,self.METS_LABEL,METS_agent_list,['premis','addml','xhtml']) self.firstPremisObjectFlag = 0 continue else: self.event_info = 'First premis object do not match information package: %s, premis_object: %s' % (self.ObjectIdentifierValue,self.object[0]) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 elif os.access(self.filepath_iso,os.R_OK): self.file_statinfo = os.stat(self.filepath_iso) if self.metatype == 2 and self.object[5] == 'ARCHMETA': ############################################ # Object have metatype 2 and RES file #self.file_ID = string.replace(self.object[0],'/','%') self.file_ID = self.object[0] self.file_ID = self.object[0] self.file_SIZE = self.file_statinfo.st_size self.file_LABEL = 'Content description' self.file_MIMETYPE = 'text/csv' self.file_MDTYPE = 'OTHER' self.file_OTHERMDTYPE = 'RES' self.file_CHECKSUMTYPE = self.object[1] self.file_CHECKSUM = self.object[2] self.file_LOCTYPE = 'URL' self.file_xlink_type = 'simple' elif self.object[5] == 'ARCHMETAxmlWrap' and self.object[6] == 'PREMIS': ############################################ # Object is a PREMIS XML file self.file_MDTYPE = 'PREMIS' self.file_OTHERMDTYPE = '' elif self.object[5] == 'ARCHMETAxmlWrap': ############################################ # Object is a OTHER XML file self.file_MDTYPE = 'OTHER' self.file_OTHERMDTYPE = self.object[6] elif self.metatype == 1: ############################################ # Object have metatype 1, convert PREMIS formatName to MIME-type, datafile self.file_ID = self.object[0] self.file_SIZE = self.file_statinfo.st_size self.file_LABEL = 'Datafiles' self.file_MIMETYPE = ESSPGM.Check().PREMISformat2MIMEtype(self.object[5]) self.file_USE = 'Datafile' self.file_CHECKSUMTYPE = self.object[1] self.file_CHECKSUM = self.object[2] self.file_LOCTYPE = 'URL' self.file_xlink_type = 'simple' if self.file_MIMETYPE == 'unknown': self.event_info = 'Problem to idetify MIMETYPE from PREMIS for: %s' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 elif self.metatype == 2 and self.object[0][-12:] == 'TIFFEdit.RES': ############################################ # Object have metatype 2 and RES file self.file_ID = self.object[0] self.file_SIZE = self.file_statinfo.st_size self.file_LABEL = 'RA Information' self.file_MIMETYPE = 'text/csv' self.file_USE = 'RA Information' self.file_CHECKSUMTYPE = self.object[1] self.file_CHECKSUM = self.object[2] self.file_LOCTYPE = 'URL' self.file_xlink_type = 'simple' elif self.metatype == 2: ############################################ # Object have metatype 2 and datafile is an tiff image self.file_ID = self.object[0] self.file_SIZE = self.file_statinfo.st_size self.file_LABEL = 'RA Datafiles' self.file_MIMETYPE = 'image/tiff' self.file_USE = 'RA Datafile' self.file_CHECKSUMTYPE = self.object[1] self.file_CHECKSUM = self.object[2] self.file_LOCTYPE = 'URL' self.file_xlink_type = 'simple' elif self.metatype == 3: ############################################ # Object have metatype 3, convert PREMIS formatName to MIME-type, datafile self.file_ID = self.object[0] self.file_SIZE = self.file_statinfo.st_size self.file_LABEL = 'Datafiles' self.file_MIMETYPE = ESSPGM.Check().PREMISformat2MIMEtype(self.object[5]) self.file_USE = 'Datafile' self.file_CHECKSUMTYPE = self.object[1] self.file_CHECKSUM = self.object[2] self.file_LOCTYPE = 'URL' self.file_xlink_type = 'simple' if self.file_MIMETYPE == 'unknown': self.event_info = 'Problem to idetify MIMETYPE from PREMIS for: %s' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: ############################################ # Object is a datafile self.file_ID = self.object[0] self.file_SIZE = self.file_statinfo.st_size self.file_LABEL = 'Datafiles' self.file_MIMETYPE = 'xxxxx' # Maste fixas self.file_USE = 'Datafile' self.file_CHECKSUMTYPE = self.object[1] self.file_CHECKSUM = self.object[2] self.file_LOCTYPE = 'URL' self.file_xlink_type = 'simple' else: self.event_info = 'Object path: %s do not exist or is not readable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # add files to METS file if not (self.object[5] == 'ARCHMETA' or self.object[5] == 'ARCHMETAxmlWrap'): self.DataObjectNumItems += 1 self.DataObjectSize += self.file_SIZE self.fil_utc_mtime = datetime.datetime.utcfromtimestamp(self.file_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.fil_lociso_mtime = self.fil_utc_mtime.astimezone(self.tz).isoformat() self.METSdoc = ESSMD.AddDataFiles(self.METSdoc,self.file_LABEL,'FILES','',[(self.file_ID,self.file_SIZE,self.fil_lociso_mtime,self.file_MIMETYPE,'',self.file_USE,self.file_CHECKSUMTYPE,self.file_CHECKSUM,self.file_LOCTYPE,self.file_xlink_type)]) elif self.object[5] == 'ARCHMETA': self.MetaObjectSize += self.file_SIZE self.fil_utc_mtime = datetime.datetime.utcfromtimestamp(self.file_statinfo.st_mtime).replace(tzinfo=pytz.utc) self.fil_lociso_mtime = self.fil_utc_mtime.astimezone(self.tz).isoformat() self.METSdoc = ESSMD.AddContentFiles(self.METSdoc,self.file_LABEL,'',[(self.file_ID,self.file_SIZE,self.fil_lociso_mtime,self.file_MIMETYPE,self.file_MDTYPE,self.file_OTHERMDTYPE,self.file_CHECKSUMTYPE,self.file_CHECKSUM,self.file_LOCTYPE,self.file_xlink_type)]) elif self.object[5] == 'ARCHMETAxmlWrap': logging.info('Wrap XML file: ' + self.a_filepath + ' to METS file') self.file_xml,errno,why = ESSMD.parseFromFile(self.filepath) if errno: self.event_info = 'Failed to parse XML file: ' + str(self.filepath) + ' error: ' + str(why) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 self.METSdoc = ESSMD.AddContentEtree(self.METSdoc,[(self.file_xml,self.file_MDTYPE,self.file_OTHERMDTYPE)]) if self.ok: ######################## # Update root schemalocation and remove all other schemalocation self.METSdoc,errno,why = ESSMD.updateSchemaLocation(self.METSdoc) ######################## # Update all ADMID in DOC res,errno,why = ESSMD.updateFilesADMID(self.METSdoc) ######################## # Set xml_METS to self.METSdoc xml_METS = self.METSdoc if self.metatype in [4]: dt = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) loc_dt_isoformat = dt.astimezone(self.tz).isoformat() xml_METS = ESSMD.updatePackage(FILENAME=self.SIPmets_objpath,TYPE='AIP',CREATED=loc_dt_isoformat,metsDocumentID=self.Cmets_obj) if self.ok: ######################## # Write METS file errno,why = ESSMD.writeToFile(xml_METS,self.Cmets_objpath) if errno: self.event_info = 'Problem to write METS to file for AIP package: ' + str(self.p_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) time.sleep(2) self.ok = 0 self.MetaObjectIdentifier = self.Cmets_obj self.MetaObjectSize = 0 if self.ok: ########################################################### # get object_list from METS self.object_list,errno,why = ESSMD.getAIPObjects(FILENAME=self.Cmets_objpath) if errno == 0: logging.info('Succeeded to get object_list from METS for information package: %s', self.ObjectIdentifierValue) else: self.event_info = 'Problem to get object_list from METS for information package: %s, errno: %s, detail: %s' % (self.ObjectIdentifierValue,str(errno),str(why)) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # Insert METS file as first object in AIP package self.tmp_object_size = os.stat(self.Cmets_objpath)[6] self.object_list.insert(0,[self.Cmets_obj,'','',self.tmp_object_size,'']) if self.ok: ########################################################### # create AIP package file try: errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 30, 5) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) logging.info('Create AIP Package: ' + self.p_objpath) self.tarfile = tarfile.open(self.p_objpath, "w",) except tarfile.TarError: self.event_info = 'Problem to create AIP Package: ' + str(self.p_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # add files to AIP package file self.startTarTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) self.firstPremisObjectFlag = 1 self.ObjectNumItems = 0 self.ObjectSize = 0 for self.object in self.object_list: if self.metatype in [1,2,3]: self.a_filepath = self.object[0] elif self.metatype in [4]: self.a_filepath = '%s/%s' % (self.ObjectIdentifierValue,self.object[0]) self.a_filepath_iso = ESSPGM.Check().unicode2str(self.a_filepath) self.object_size = int(self.object[3]) if self.a_filepath == self.Cmets_obj or self.a_filepath == '%s/%s' % (self.ObjectIdentifierValue,self.Cmets_obj): self.filepath = self.Cmets_objpath if self.metatype in [4]: self.MetaObjectSize = self.object_size self.DataObjectSize = 0 self.DataObjectNumItems = 0 else: self.filepath = os.path.join(self.SIPpath, self.a_filepath) if self.metatype in [4]: self.DataObjectSize += self.object_size self.DataObjectNumItems += 1 self.filepath_iso = ESSPGM.Check().unicode2str(self.filepath) if os.access(self.filepath_iso,os.R_OK): if int(os.stat(self.filepath_iso)[6]) == self.object_size: try: self.ObjectNumItems += 1 self.tarinfo = self.tarfile.gettarinfo(self.filepath_iso, self.a_filepath_iso) self.tarfile.addfile(self.tarinfo, file(self.filepath_iso)) logging.info('Add: ' + self.a_filepath + ' to AIP Package: ' + self.p_obj) except tarfile.TarError: self.event_info = 'Problem to add: ' + str(self.a_filepath) + ' to AIP Package: ' + str(self.p_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: self.event_info = 'Filesize for object path: %s is %s and METS object size is %s. The sizes must match!' % (self.filepath,str(os.stat(self.filepath_iso)[6]),str(self.object_size)) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 else: self.event_info = 'Object path: %s do not exist or is not readable!' % self.filepath logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # Close AIP package try: self.tarfile.close() except tarfile.TarError: self.event_info = 'Problem to close AIP package: ' + str(self.p_objpath) logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) self.ok = 0 if self.ok: ########################################################### # Check if StatusActivity is OK self.ObjectSize = os.stat(self.p_objpath)[6] self.stopTarTime = datetime.timedelta(seconds=time.localtime()[5],minutes=time.localtime()[4],hours=time.localtime()[3]) self.TarTime = self.stopTarTime-self.startTarTime self.WriteSize = int(self.ObjectSize)/1048576 if self.TarTime.seconds < 1: self.TarTime = datetime.timedelta(seconds=1) #Fix min time to 1 second if it is zero. self.TarMBperSEC = int(self.WriteSize)/int(self.TarTime.seconds) logging.info('Close AIP package: ' + self.p_obj) logging.info('Succeeded to create AIP for: ' + self.ObjectIdentifierValue + ' , ' + str(self.TarMBperSEC) + ' MB/Sec and Time: ' + str(self.TarTime)) self.timestamp_utc = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ObjectPackageName',self.p_obj, 'ObjectSize',self.ObjectSize, 'ObjectNumItems',self.ObjectNumItems, 'ObjectMessageDigest','', 'ObjectPath','', 'MetaObjectIdentifier',self.MetaObjectIdentifier, 'MetaObjectSize',self.MetaObjectSize, 'DataObjectSize',self.DataObjectSize, 'DataObjectNumItems',self.DataObjectNumItems, 'CreateDate',self.timestamp_utc.replace(tzinfo=None), 'CreateAgentIdentifierValue',AgentIdentifierValue, 'StatusProcess','39', 'StatusActivity','0', 'LastEventDate',self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue, 'LocalDBdatetime',self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'0','',2,self.ObjectIdentifierValue) if errno == 0 and self.ext_IngestTable: ext_res,ext_errno,ext_why = ESSMSSQL.DB().action(self.ext_IngestTable,'UPD',('ObjectPackageName',self.p_obj, 'ObjectSize',self.ObjectSize, 'ObjectNumItems',self.ObjectNumItems, 'ObjectMessageDigest','', 'ObjectPath','', 'MetaObjectIdentifier',self.MetaObjectIdentifier, 'MetaObjectSize',self.MetaObjectSize, 'DataObjectSize',self.DataObjectSize, 'DataObjectNumItems',self.DataObjectNumItems, 'CreateDate',self.timestamp_dst.replace(tzinfo=None), 'CreateAgentIdentifierValue',AgentIdentifierValue, 'StatusProcess','39', 'StatusActivity','0', 'LastEventDate',self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ExtDBdatetime',self.timestamp_utc.replace(tzinfo=None)),('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: errno,why = ESSPGM.DB().SetAIPstatus(self.IngestTable, self.ext_IngestTable, AgentIdentifierValue, self.ObjectUUID, 31, 4) if errno: logging.error('Failed to update DB status for AIP: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) else: self.event_info = 'Failed to create AIP package: ' + self.p_obj logging.error(self.event_info) ESSPGM.Events().create('1030','','ESSArch AIPCreator',ProcVersion,'1',self.event_info,2,self.ObjectIdentifierValue) db.close_old_connections() self.mLock.release() time.sleep(int(self.Time)) self.mDieFlag=0
def ObjectValidate(self): self.IngestTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','IngestTable'))[0][0] self.PolicyTable = ESSDB.DB().action('ESSConfig','GET',('Value',),('Name','PolicyTable'))[0][0] if ExtDBupdate: self.ext_IngestTable = self.IngestTable else: self.ext_IngestTable = '' # Check if exist extDB and got projektid self.dbget,errno,why = ESSDB.DB().action(self.IngestTable,'GET4',('ObjectIdentifierValue','PolicyID','StatusProcess','StatusActivity'),('StatusActivity','=','0','AND', 'StatusProcess','BETWEEN',9,'AND',14, 'OR', 'StatusActivity','=','4','AND', 'StatusProcess','BETWEEN',10,'AND',11)) if errno: logging.error('Failed to access Local DB, error: ' + str(why)) for self.obj in self.dbget: self.ObjectIdentifierValue = self.obj[0] self.PolicyID = self.obj[1] self.StatusProcess = self.obj[2] self.StatusActivity = self.obj[3] self.DBmode = 0 self.ext_ObjectGuid = None self.ext_EntryDate = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.ext_EntryAgentIdentifierValue = None self.ext_OAISPackageType = 2 self.ext_preservationLevelValue = 1 self.ext_ObjectActive = 0 self.objectstatus = 0 self.ext_ProjectGroupCode = None self.ext_ObjectPackageName = '' if Debug: logging.info('StatusProcess 9, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) #Check.... if self.PolicyID: #q_ESSArchPolicy = model.meta.Session.query(model.ESSArchPolicy) #DbRow = q_ESSArchPolicy.filter(and_(model.ESSArchPolicy.PolicyStat==1, \ # model.ESSArchPolicy.PolicyID==int(self.PolicyID))).first() DbRow = ESSArchPolicy.objects.filter( PolicyStat = 1, PolicyID = int(self.PolicyID) )[:1] if DbRow: DbRow = DbRow.get() if DbRow.Mode in range(0,2): self.DBmode = DbRow.Mode logging.info('Policy found for Object: %s in ESSArch mode' % self.ObjectIdentifierValue) if DbRow.IngestMetadata in [1,2,3]: metsfilename = os.path.join(DbRow.IngestPath,self.ObjectIdentifierValue + '_Package_METS.xml') elif DbRow.IngestMetadata in [4]: ObjectPath = os.path.join(DbRow.IngestPath,self.ObjectIdentifierValue) if os.path.exists(os.path.join(ObjectPath,'sip.xml')): metsfilename = os.path.join(ObjectPath,'sip.xml') elif os.path.exists(os.path.join(ObjectPath,'mets.xml')): metsfilename = os.path.join(ObjectPath,'mets.xml') #elif os.path.exists(os.path.join(ObjectPath,'%s_Content_METS.xml' % self.ObjectIdentifierValue)): # metsfilename = os.path.join(ObjectPath,'%s_Content_METS.xml' % self.ObjectIdentifierValue) else: metsfilename = '' #metsfilename = '%s/sip.xml' % os.path.join(DbRow.IngestPath,self.ObjectIdentifierValue) else: metsfilename = '' res_info, res_files, res_struct, error, why = ESSMD.getMETSFileList(FILENAME=metsfilename) if not error: # cut off microsecond and timezone info ".xxxxxxx+02:00" #for c in res_info[1][0]: # if c == '.' or c == '+': # break # else: # self.ext_EntryDate += c self.ext_EntryDate = parse_datetime(res_info[1][0]).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = res_info[2][0][4] try: self.ext_ObjectGuid = str(uuid.UUID(self.ObjectIdentifierValue)) except ValueError, why: logging.warning('ObjectIdentifierValue: %s is not a valid UUID, why: %s , start to generate a new UUID' % (self.ObjectIdentifierValue, str(why))) self.ext_ObjectGuid = str(uuid.uuid1()) logging.info('New UUID: %s for ObjectIdentifierValue: %s' % (self.ext_ObjectGuid,str(self.ObjectIdentifierValue))) self.ext_ObjectActive = 1 self.ext_OAISPackageType = 2 self.ext_preservationLevelValue = 1 self.objectstatus = 1 else: self.objectstatus = 102 # Problem to get information from package METS elif DbRow.Mode == 2: # AIS but POLICYID from METS, Check in AIS if object is active. self.DBmode = DbRow.Mode self.extOBJdbget,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'GET3',('ProjectGroupCode', 'ObjectPackageName', 'ObjectGuid', 'ObjectActive', 'EntryDate', 'EntryAgentIdentifierValue', 'OAISPackageType', 'preservationLevelValue'), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) #self.extOBJdbget = [[10,'','7283074a-00c0-11e2-a78f-002215836500',1,'2010-07-12 16:57:45','entryagent',2,1]] #ext_errno = 0 #ext_why = 'whywhy' if ext_errno: logging.error('Failed to access External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) elif self.extOBJdbget: if Debug: logging.info('Found object: %s in AIS, self.extOBJdbget: %s',self.ObjectIdentifierValue,str(self.extOBJdbget)) self.objectstatus = 10 # Object found in external DB if self.objectstatus < 100: ######################################## # Check if object alredy have an AIP if disable_ObjectPackageName: self.ext_ObjectPackageName = '' else: self.ext_ObjectPackageName = self.extOBJdbget[0][1] if not self.ext_ObjectPackageName: self.objectstatus = 12 # Object do not have an AIP else: self.objectstatus = 112 # Object already have an AIP if self.objectstatus < 100: ######################################## # Get GUID/UUID #self.ext_ObjectGuid = uuid.UUID(bytes_le=self.extOBJdbget[0][2]) #When pymssql self.ext_ObjectGuid = uuid.UUID(self.extOBJdbget[0][2]) if self.objectstatus < 100: ######################################## # Check if object is active self.ext_ObjectActive = self.extOBJdbget[0][3] if self.ext_ObjectActive == 1: self.objectstatus = 13 # Object is active else: self.objectstatus = 113 # Object is not active if self.objectstatus < 100: ######################################## # Check if POLICYID in local DB "METS" is equal to ProjectGroupCode in AIS self.ext_ProjectGroupCode = self.extOBJdbget[0][0] if self.ext_ProjectGroupCode == self.PolicyID: self.objectstatus = 1 # Object have an ProjectCode logging.info('Object: %s found in AIS with correct POLICYID' % self.ObjectIdentifierValue) else: self.objectstatus = 111 # Object do not have an ProjectCode ######################################## self.ext_EntryDate = self.extOBJdbget[0][4].replace(microsecond=0,tzinfo=self.tz).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = self.extOBJdbget[0][5] self.ext_OAISPackageType = self.extOBJdbget[0][6] self.ext_preservationLevelValue = self.extOBJdbget[0][7] ######################################## # Special function only for test if force_ProjectGroupCode: logging.info('Force set ProjectGroupCode for Object: %s' % self.ObjectIdentifierValue) self.objectstatus = 10 self.ext_ProjectGroupCode = force_ProjectGroupCode self.ext_ObjectPackageName = '' self.ext_ObjectGuid = str(uuid.uuid1()) # updDB self.ext_ObjectActive = 1 # updDB self.ext_EntryDate = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) # updDB self.ext_EntryAgentIdentifierValue = None # updDB self.ext_OAISPackageType = 2 # updDB self.ext_preservationLevelValue = 1 # updDB else: self.objectstatus = 110 # Object not found in external DB if Debug: logging.info('Missing object: %s in AIS, self.extOBJdbget: %s',self.ObjectIdentifierValue,str(self.extOBJdbget)) else: self.objectstatus = 100 # Policy is not in ESSArch mode else: self.objectstatus = 101 # Policy not found or not active #model.meta.Session.close() else: self.DBmode = 2 # AIS self.extOBJdbget,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'GET3',('ProjectGroupCode', 'ObjectPackageName', 'ObjectGuid', 'ObjectActive', 'EntryDate', 'EntryAgentIdentifierValue', 'OAISPackageType', 'preservationLevelValue'), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to access External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) elif self.extOBJdbget: #if not ext_errno and self.extOBJdbget: if Debug: logging.info('Found object: %s in AIS, self.extOBJdbget: %s',self.ObjectIdentifierValue,str(self.extOBJdbget)) self.objectstatus = 10 # Object found in external DB ######################################## # Check if object alredy have an AIP self.ext_ProjectGroupCode = self.extOBJdbget[0][0] if self.objectstatus < 100 and self.ext_ProjectGroupCode: self.objectstatus = 11 # Object have an ProjectCode else: self.objectstatus = 111 # Object do not have an ProjectCode ######################################## # Check if object alredy have an AIP if disable_ObjectPackageName: self.ext_ObjectPackageName = '' else: self.ext_ObjectPackageName = self.extOBJdbget[0][1] if self.objectstatus < 100 and not self.ext_ObjectPackageName: self.objectstatus = 12 # Object do not have an AIP else: self.objectstatus = 112 # Object already have an AIP ######################################## # Get GUID/UUID #self.ext_ObjectGuid = uuid.UUID(bytes_le=self.extOBJdbget[0][2]) #When pymssql self.ext_ObjectGuid = uuid.UUID(self.extOBJdbget[0][2]) ######################################## # Check if object is active self.ext_ObjectActive = self.extOBJdbget[0][3] if self.objectstatus < 100 and self.ext_ObjectActive == 1: self.objectstatus = 13 # Object is active else: self.objectstatus = 113 # Object is not active ######################################## self.ext_EntryDate = self.extOBJdbget[0][4].replace(microsecond=0,tzinfo=self.tz).astimezone(pytz.utc) self.ext_EntryAgentIdentifierValue = self.extOBJdbget[0][5] self.ext_OAISPackageType = self.extOBJdbget[0][6] self.ext_preservationLevelValue = self.extOBJdbget[0][7] if Debug: logging.info('ext_ProjectGroupCode is: '+str(self.ext_ProjectGroupCode)+' for ObjectIdentifierValue '+str(self.ObjectIdentifierValue)) if force_ProjectGroupCode: logging.info('Force set ProjectGroupCode for Object: %s' % self.ObjectIdentifierValue) self.objectstatus = 10 self.ext_ProjectGroupCode = force_ProjectGroupCode self.ext_ObjectPackageName = '' self.ext_ObjectGuid = str(uuid.uuid1()) # updDB self.ext_ObjectActive = 1 # updDB self.ext_EntryDate = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) # updDB self.ext_EntryAgentIdentifierValue = None # updDB self.ext_OAISPackageType = 2 # updDB self.ext_preservationLevelValue = 1 # updDB if self.objectstatus < 100: self.PolicyID_dbget,errno,why = ESSDB.DB().action(self.PolicyTable,'GET3',('PolicyID',),('AISProjectID',self.ext_ProjectGroupCode,'AND','PolicyStat',1)) if errno: logging.error('Failed to access Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) elif self.PolicyID_dbget: self.PolicyID = self.PolicyID_dbget[0][0] if Debug: logging.info('PolicyID: '+str(self.PolicyID)) self.objectstatus = 14 # Object got PolicyID else: self.objectstatus = 114 # Object mising PolicyID else: self.objectstatus = 110 # Object not found in external DB if Debug: logging.info('Missing object: %s in AIS, self.extOBJdbget: %s',self.ObjectIdentifierValue,str(self.extOBJdbget)) if self.objectstatus == 100: # Policy is not in ESSArch mode ################################################################### # Policy is not in ESSArch mode(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 12, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) self.event_info = 'Policy is not in ESSArch mode for Object: ' + str(self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.objectstatus == 101: # Policy not found or not active ################################################################### # Policy not found or active(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 12, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) self.event_info = 'Policy not found or active for Object: ' + str(self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.objectstatus == 102: # Problem to get information from Package_METS ################################################################### # Problem to get information from Package_METS(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 12, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) self.event_info = 'Problem to get information from Package_METS for Object: ' + str(self.ObjectIdentifierValue) logging.error(self.event_info) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.objectstatus == 110: ################################################################### #Object don't exist in extDB(10) and Need of assistance(4) ################################################################### self.StatusProcess = 10 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 10, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) elif self.objectstatus == 111: ################################################################### #Object don't have any projektkod in extDB(11) and Need of assistance(4) ################################################################### self.StatusProcess = 11 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 11, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) elif self.objectstatus == 112: ################################################################### #Object already have an AIP!! ################################################################### self.StatusProcess = 13 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 13, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str(self.ObjectIdentifierValue) + ' already have an AIP!' logging.error(self.event_info) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.objectstatus == 113: ################################################################### # Object is not active!! ################################################################### self.StatusProcess = 14 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 14, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str(self.ObjectIdentifierValue) + ' is not active in external DB!' logging.error(self.event_info) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.objectstatus == 114: ################################################################### #Object don't have any local policy(12) and Need of assistance(4) ################################################################### self.StatusProcess = 12 self.StatusActivity = 4 if Debug: logging.info('Change to StatusProcess 12, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) self.event_info = 'Object: ' + str(self.ObjectIdentifierValue) + ' do not have any local policy!' logging.error(self.event_info) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'1',self.event_info,self.DBmode,self.ObjectIdentifierValue) elif self.objectstatus == 14 or self.objectstatus == 1: ################################################################### #Object got a policy(19) and RFNext and OK(0) ################################################################### self.StatusProcess = 19 self.StatusActivity = 0 if Debug: logging.info('Change to StatusProcess 19, ObjectIdentifierValue ' +str(self.ObjectIdentifierValue)) ESSPGM.Events().create('1010','','ESSArch SIPValidateAIS',ProcVersion,'0','',self.DBmode,self.ObjectIdentifierValue) logging.info('objectstatus:%s,StatusProcess:%s,StatusActivity:%s,EntryDate:%s' % (self.objectstatus,self.StatusProcess,self.StatusActivity,self.ext_EntryDate)) if self.objectstatus: self.timestamp_utc = datetime.datetime.utcnow().replace(microsecond=0,tzinfo=pytz.utc) self.timestamp_dst = self.timestamp_utc.astimezone(self.tz) res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('PolicyId',self.PolicyID, 'ObjectUUID',self.ext_ObjectGuid, 'EntryDate',self.ext_EntryDate.replace(tzinfo=None), 'EntryAgentIdentifierValue',self.ext_EntryAgentIdentifierValue, 'StatusProcess',self.StatusProcess, 'StatusActivity',self.StatusActivity, 'LastEventDate',self.timestamp_utc.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue, 'OAISPackageType',self.ext_OAISPackageType, 'preservationLevelValue',self.ext_preservationLevelValue, 'ObjectActive',self.ext_ObjectActive, 'LocalDBdatetime',self.timestamp_utc.replace(tzinfo=None)), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why)) if errno == 0 and self.ext_IngestTable: ext_res,ext_errno,ext_why = ESSMSSQL.DB().action(self.IngestTable,'UPD',('PolicyId',self.PolicyID, 'StatusProcess',self.StatusProcess, 'StatusActivity',self.StatusActivity, 'LastEventDate',self.timestamp_dst.replace(tzinfo=None), 'linkingAgentIdentifierValue',AgentIdentifierValue), ('ObjectIdentifierValue',self.ObjectIdentifierValue)) if ext_errno: logging.error('Failed to update External DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(ext_why)) else: res,errno,why = ESSDB.DB().action(self.IngestTable,'UPD',('ExtDBdatetime',self.timestamp_utc.replace(tzinfo=None)),('ObjectIdentifierValue',self.ObjectIdentifierValue)) if errno: logging.error('Failed to update Local DB: ' + str(self.ObjectIdentifierValue) + ' error: ' + str(why))