def subClassProcessItem(self, processingObj): ''' processingObj = {"fullPath": "D:/tmp/good.txt", "size":100} ''' ncl(processingObj) fullPath = transform.transformDirToInternal(processingObj["fullPath"]) relaPath = transform.formatRelativePath(fullPath.replace(self.rootDir, '')) if self.logCollection.exists(relaPath): #Item exists, check if it is updated collectionItem = self.db.getObjFromUuid(self.logCollection.getObjUuid(relaPath)) if collectionItem["timestamp"] > processingObj["timestamp"]: #Want to update an older file to a newer file. Ignore it pass elif (collectionItem["timestamp"] == processingObj["timestamp"]) or (collectionItem["headMd5"] == processingObj["headMd5"]): #The 2 item is the same, ignore it pass else: if self.curArchive is None: self.createNewZip() if (self.curArchivedSize > MAX_SINGLE_ARCHIVE_SIZE): self.encZip() self.createNewZip() #Add the file to zip try: #If there is already an item with the same name, ignore the current? existingElem = self.zipContentState[transform.formatRelativePath(i)] return except: pass ############################## #Add the file to zip file ############################## #print 'copying "%s" to "%s"'%(fullPath, relPath) self.curArchive.addfile(unicode(fullPath).encode('gbk'), unicode(relaPath).encode('gbk')) self.curArchivedSize += os.stat(fullPath).st_size ''' itemInfo = item.getItemInfo() itemInfo["parentEncZip"] = self.curArchiveName.replace(".zip", ".enc") self.zippedFileInfo[relaPath] = itemInfo ''' processingObj["parentEncZip"] = self.curArchiveName.replace(".zip", ".enc") self.zippedFileInfo[relaPath] = processingObj
def updateZipLog(self, newLog): for i in newLog: relaPath = transform.formatRelativePath(i) if self.zipStorageState.has_key(relaPath): #Conflict, check if update needed if self.checkTimeNotPrecise(newLog[i]["zippedTimeStemp"], self.zipStorageState[relaPath]["zippedTimeStemp"]): print 'update duplicated item:', newLog[i]["zippedTimeStemp"], self.zipStorageState[relaPath]["zippedTimeStemp"] self.zipStorageState[relaPath] = newLog[i] else: #New item, add it self.zipStorageState[relaPath] = newLog[i]
def updateZipLog(self, newLog): for i in newLog: relaPath = transform.formatRelativePath(i) if self.logCollection.exists(relaPath): objUuid = self.db.addVirtualObj(newLog[i]) self.logCollection.updateObjUuid(relaPath, objUuid) else: #Add object to obj db objUuid = self.db.addVirtualObj(newLog[i]) #Add obj to collection self.logCollection.addObj(relaPath, objUuid)
def updateZipLog(self, newLog): for i in newLog: relaPath = transform.formatRelativePath(i) if self.zipStorageState.has_key(relaPath): #Conflict, check if update needed if newLog[i]["timestamp"] > self.zipStorageState[relaPath]["timestamp"]: print 'update duplicated item:', newLog[i]["timestamp"], self.zipStorageState[relaPath]["timestamp"] self.zipStorageState[relaPath] = newLog[i] else: #New item, add it t = newLog[i]["timestamp"] self.zipStorageState[relaPath] = newLog[i]
def store(self, item): ######################################## #Check if the target item is already updated ######################################## self.readZipStorageState() relaPath = transform.formatRelativePath(item.getRelaPath()) ######################################## #Check if the target item is already updated ######################################## if self.zipStorageState.has_key(relaPath) and (self.zipStorageState[relaPath]["timestamp"] > item.getTimestamp()): print 'Want to update an older file to a newer file. Ignore it.' return if self.curArchive is None: self.createNewZip() if (self.curArchivedSize > MAX_SINGLE_ARCHIVE_SIZE): self.encZip() self.createNewZip() #Add the file to zip fullPath = item.getFullPath() try: existingElem = self.zipContentState[transform.formatRelativePath(i)] return except: pass ############################## #Add the file to zip file ############################## #print 'copying "%s" to "%s"'%(fullPath, relPath) self.curArchive.addfile(unicode(fullPath).encode('gbk'), unicode(relaPath).encode('gbk')) self.curArchivedSize += os.stat(fullPath).st_size itemInfo = item.getItemInfo() itemInfo["parentEncZip"] = self.curArchiveName.replace(".zip", ".enc") self.zippedFileInfo[relaPath] = itemInfo
def store(self, item): ######################################## #Check if the target item is already updated ######################################## self.readZipStorageState() relaPath = transform.formatRelativePath(item.getRelaPath()) if getTimeInSeconds(self.zipStorageState[relaPath]["zippedTimeStemp"]) < int(item.getTimestamp()): print 'Want to update an older file to a newer file. Ignore it. return ######################################## #Check if the target item is already updated? ######################################## if self.curArchive is None: self.createNewZip() if (self.curArchivedSize > MAX_SINGLE_ARCHIVE_SIZE): self.encZip() self.createNewZip() #Add the file to zip fullPath = item.getFullPath() try: existingElem = self.zipContentState[transform.formatRelativePath(i)] return except: pass localItem = encZipStorageItem(self.zipStorageDir, self.curArchiveName, relaPath, fullPath) ############################## #Add the file to zip file ############################## #print 'copying "%s" to "%s"'%(fullPath, relPath) self.curArchive.addfile(unicode(fullPath).encode('gbk'), unicode(relaPath).encode('gbk')) self.curArchivedSize += os.stat(fullPath).st_size self.zippedFileInfo[relaPath] = {"timestamp": os.stat(fullPath).st_mtime, "fullPath":fullPath, "headMd5":infoCollector.getHeadContentMd5(fullPath), "parentZip":self.curArchiveName, "size":os.stat(fullPath).st_size,}
def getNextUpdatedItem(self): #print 'zipdir:',self.zipStorageDir for walkingItem in os.walk(self.zipStorageDir): #print walkingItem for j in walkingItem[2]: encZipFileFullPath = transform.transformDirToInternal(os.path.join(walkingItem[0], j)) print encZipFileFullPath zipFileFolderStorageItem = folderStorage.storageItem(self.zipStorageDir, encZipFileFullPath) if self.lastState.updated(zipFileFolderStorageItem): ################################################################## #For zip storage, if the zip file was updated (or newly created) we #should enumerate all element in this zip file ################################################################## #First decrypt the zip file if encZipFileFullPath.find('.enc') == -1: #Not an encrypted zip file, continue print 'not a encrypted zip file: ',encZipFileFullPath continue self.regenerateNeeded = False zipFileFullPath = self.getZipFile(encZipFileFullPath) #Enumerate all files in the decrypted zip file zf = zipClass.ZFile(zipFileFullPath, 'r') #Generate a log file if it does not exist if not os.path.exists(encZipFileFullPath.replace('.enc','.enclog')): self.regenerateNeeded = True for i in zf.list(): #yield zipStorageItem(i, zf) zf.extract(i, self.workingDir) extractedItemFullPath = os.path.join(self.workingDir, i) extractedItemInfo = {"timestamp": getTimeInSeconds(zf.zfile.getinfo(i).date_time), "headMd5":infoCollector.getHeadContentMd5(extractedItemFullPath), "parentEncryptedZip":encZipFileFullPath, "size":os.stat(extractedItemFullPath).st_size, } extractedItem = zipStorageItem(self.workingDir, os.path.join(self.workingDir, i)) if self.regenerateNeeded: relaPath = transform.formatRelativePath(i) self.zippedFileInfoRegenerating[relaPath] = extractedItemInfo yield extractedItem if self.regenerateNeeded: self.saveRegeneratedState(encZipFileFullPath, zipFileFullPath)
def store(self, processingObj, pendingCollection): ''' processingObj = {"fullPath": "D:/tmp/good.txt", "size":100} ''' ncl(processingObj) #relaPath = transform.formatRelativePath(item.getRelaPath()) relaPath = transform.formatRelativePath(fullPath.replace(self.rootDir, '')) pendingCollection[relaPath] = processingObj["uuid"] fullPath = transform.transformDirToInternal(processingObj["fullPath"]) #Add the file to zip try: #If there is already an item with the same name, ignore the current? existingElem = self.zippedFileInfo[relaPath] return except: pass if self.curArchive is None: self.createNewZip() if (self.curArchivedSize > MAX_SINGLE_ARCHIVE_SIZE): self.encZip() self.createNewZip() ############################## #Add the file to zip file ############################## #print 'copying "%s" to "%s"'%(fullPath, relPath) self.curArchive.addfile(unicode(fullPath).encode('gbk'), unicode(relaPath).encode('gbk')) self.curArchivedSize += os.stat(fullPath).st_size ''' itemInfo = item.getItemInfo() itemInfo["parentEncZip"] = self.curArchiveName.replace(".zip", ".enc") self.zippedFileInfo[relaPath] = itemInfo ''' processingObj["parentEncZip"] = self.curArchiveName.replace(".zip", ".enc") self.zippedFileInfo[relaPath] = processingObj
def updateContentStateForFile(self, encZipFileFullPath): encZipLogFilePath = getEncZipLogFilenameFromEncrypted(encZipFileFullPath) if not os.path.exists(): #Regenerate the state info file zipFileFullPath = self.getZipFile(encZipFileFullPath) #Enumerate all files in the decrypted zip file zf = zipClass.ZFile(zipFileFullPath, 'r') #Generate a log file if it does not exist for i in zf.list(): #yield zipStorageItem(i, zf) zf.extract(i, self.workingDir) extractedItem = folderStorage.folderStorageItem(self.workingDir, os.path.join(self.workingDir, i)) relaPath = transform.formatRelativePath(i) self.zippedFileInfoRegenerating[relaPath] = {"zippedTimeStemp": zf.zfile.getinfo(i).date_time, "headMd5":infoCollector.getHeadContentMd5(extractedItem.getFullPath()), "parentEncryptedZip":encZipFileFullPath, "size":os.stat(extractedItem.getFullPath()).st_size} self.saveRegeneratedState(encZipFileFullPath, zipFileFullPath) newLog = readEncryptedZipLog(encZipLogFilePath) updateZipLog(newLog)
def checkModification(self, item): relaPath = transform.formatRelativePath(item.getRelativePath()) self.configDict
def recordItemInfo(self, item): relaPath = transform.formatRelativePath(item.getRelativePath()) fullPath = os.path.join(self.storageRoot, relaPath) self.configDict[relaPath] = os.stat(fullPath).st_mtime
def getRelaPath(self, fullPath): #Here use [1:] to remove the leading '/' in front of the relative path relaPath = transform.formatRelativePath(fullPath.replace(self.rootDir, '')[1:]) if relaPath.find('/') == -1: return relaPath raise "no recursive folder element in this collection"
def subClassProcessItem(self, processingObj): encZipFileFullPath = processingObj["fullPath"] encryptedZipLogPath = encZipFileFullPath ncl(encryptedZipLogPath) zipLogPath = os.path.join( self.decryptionWorkingDir, os.path.basename(encryptedZipLogPath).replace(gTargetEncLog, gZipLogExt) ) cl(zipLogPath) self.decCopier.copy(encryptedZipLogPath, zipLogPath) # Extract log file from zip logZip = zipClass.ZFile(zipLogPath, "r") # logPath = zipLogPath.replace(gZipLogExt, gLogExt) logLoaded = False for logFileName in logZip.list(): logZip.extract(logFileName, self.decryptionWorkingDir) logPath = os.path.join(self.decryptionWorkingDir, logFileName) # cl(logPath) # Read log file try: f = open(logPath, "r") newLog = json.load(f) f.close() except IOError: newLog = None raise "log not read" if not (newLog is None): ######################### # Log loaded, update collection ######################### logLoaded = True print "extracting info from log", encZipFileFullPath for i in newLog: relaPath = transform.formatRelativePath(i) ncl(newLog[i]) # Remove the uuid in log file if newLog[i].has_key("uuid"): newLog[i]["originalUuid"] = newLog[i]["uuid"] del newLog[i]["uuid"] ################# # Get collection ################# if self.logCollection.exists(relaPath): itemUuid = self.logCollection.getObjUuid(relaPath) ncl("returned uuid:", itemUuid) item = self.objDb.getObjFromUuid(itemUuid) # Conflict, check if update needed ncl(newLog[i]["timestamp"]) ncl(item["timestamp"]) if newLog[i]["timestamp"] > item["timestamp"]: # The new item is newer, replace the old one ncl("updating duplicated item to 1st one:", newLog[i]["timestamp"], item["timestamp"]) objUuid = self.objDb.addVirtualObj(newLog[i]) self.logCollection.updateObjUuid(relaPath, objUuid) else: ncl("no update, ignore") else: # Add object to obj objDb objUuid = self.objDb.addVirtualObj(newLog[i]) # Add obj to collection self.logCollection.addObj(relaPath, objUuid) ncl("added new item", relaPath, newLog[i]) if logLoaded: ################################# # Process data ################################# print "extracting info from log complete", encZipFileFullPath encZipFileFullPath = transform.transformDirToInternal(encZipFileFullPath) ncl(encZipFileFullPath) zipFileFullPath = self.getZipFile(encZipFileFullPath.replace(gTargetEncLog, gTargetEnc)) # For all element in the zip file # Enumerate all files in the decrypted zip file zf = zipClass.ZFile(zipFileFullPath, "r") # Generate a log file if it does not exist for i in zf.list(): # zf.extract(i, self.workingDir) extractedItemFullPath = os.path.join(self.workingDir, i) relaPath = transform.formatRelativePath(i) ncl(relaPath) extractedItemInfo = self.getItemState(relaPath) extractedItem = extractedZipStorageItem( self.workingDir, extractedItemFullPath, extractedItemInfo, zipFileFullPath, zf, relaPath ) """ ########################### #Store the file ########################### #self.targetCollection.store(extractedItem) """ ########################### # Returning object ########################### yield extractedItem ########################## # Everything goes OK # Quit ########################## return else: cl("Load log file failed", encZipFileFullPath)
def getRelaPath(self, fullPath): #Here use [1:] to remove the leading '/' in front of the relative path return transform.formatRelativePath(fullPath.replace(self.rootDir, '')[1:])
def __init__(self, rootPath, itemInfo): self.rootPath = transform.transformDirToInternal(rootPath) self.fullPath = transform.transformDirToInternal(itemInfo["fullPath"]) relaPath = transform.formatRelativePath(self.fullPath.replace(self.rootPath, '')) super(syncFolderItem, self).__init__(relaPath, itemInfo)
def getIdInCol(self): return transform.formatRelativePath(self.pathInZipFile)
def updateZipLog(self, newLog, pendingCollection): for i in newLog: relaPath = transform.formatRelativePath(i) del pendingCollection[relaPath] self.logCollection.updateObjUuid(relaPath, newLog[i]["uuid"])
def subClassProcessItem(self, processingObj): encZipFileFullPath = processingObj["fullPath"] encryptedZipLogPath = encZipFileFullPath ncl(encryptedZipLogPath) zipLogPath = os.path.join(self.decryptionWorkingDir, os.path.basename(encryptedZipLogPath).replace(gTargetEncLog, gZipLogExt)) cl(zipLogPath) self.decCopier.copy(encryptedZipLogPath, zipLogPath) #Extract log file from zip logZip = zipClass.ZFile(zipLogPath, 'r') #logPath = zipLogPath.replace(gZipLogExt, gLogExt) logLoaded = False for logFileName in logZip.list(): logZip.extract(logFileName, self.decryptionWorkingDir) logPath = os.path.join(self.decryptionWorkingDir, logFileName) #cl(logPath) #Read log file try: f = open(logPath,'r') newLog = json.load(f) f.close() except IOError: newLog = None raise 'log not read' if not (newLog is None): ######################### #Log loaded, update collection ######################### logLoaded = True print 'extracting info from log', encZipFileFullPath for i in newLog: relaPath = transform.formatRelativePath(i) ncl(newLog[i]) #Remove the uuid in log file if newLog[i].has_key("uuid"): newLog[i]["originalUuid"] = newLog[i]["uuid"] del newLog[i]["uuid"] ################# #Get collection ################# if self.logCollection.exists(relaPath): itemUuid = self.logCollection.getObjUuid(relaPath) ncl("returned uuid:", itemUuid) item = self.objDb.getObjFromUuid(itemUuid) #Conflict, check if update needed ncl(newLog[i]["timestamp"]) ncl(item["timestamp"]) if newLog[i]["timestamp"] > item["timestamp"]: #The new item is newer, replace the old one ncl('updating duplicated item to 1st one:', newLog[i]["timestamp"], item["timestamp"]) objUuid = self.objDb.addVirtualObj(newLog[i]) self.logCollection.updateObjUuid(relaPath, objUuid) else: ncl("no update, ignore") else: #Add object to obj objDb objUuid = self.objDb.addVirtualObj(newLog[i]) #Add obj to collection self.logCollection.addObj(relaPath, objUuid) ncl("added new item", relaPath, newLog[i]) ''' encZipFileFullPath = transform.transformDirToInternal(encZipFileFullPath) zipFileFullPath = self.getZipFile(encZipFileFullPath.replace(gTargetEncLog, gTargetEnc)) extractedItemInfo = self.getItemState(relaPath) extractedItemFullPath = os.path.join(self.workingDir, i) extractedItem = extractedZipStorageItem(self.workingDir, extractedItemFullPath, extractedItemInfo, zipFileFullPath, None, relaPath) ''' yield newLog[i] '''
def __init__(self, idInCol, itemInfo): objBase.__init__(self, itemInfo) self.itemInfo["idInCol"] = transform.formatRelativePath(idInCol)
def subClassProcessItem(self, processingObj): #Check if the log file and data file are both OK encZipFileFullPath = processingObj["fullPath"] if not (re.search('\.enclog$', encZipFileFullPath) is None): #An encrypted zip file, go on if os.path.exists(encZipFileFullPath.replace('.enclog', '.enc')): #Log and Data are both OK encryptedZipLogPath = encZipFileFullPath ncl(encryptedZipLogPath) zipLogPath = os.path.join(self.decryptionWorkingDir, os.path.basename(encryptedZipLogPath).replace('.enclog', '.log')) ncl(zipLogPath) self.decCopier.copy(encryptedZipLogPath, zipLogPath) try: f = open(zipLogPath,'r') newLog = json.load(f) f.close() except IOError: newLog = None if not (newLog is None): ######################### #Log loaded, update collection ######################### for i in newLog: relaPath = transform.formatRelativePath(i) ncl(newLog[i]) ################# #Get collection ################# if self.logCollection.exists(relaPath): itemUuid = self.logCollection.getObjUuid(relaPath) ncl("returned uuid:", itemUuid) item = self.db.getObjFromUuid(itemUuid) #Conflict, check if update needed ncl(newLog[i]["timestamp"]) ncl(item["timestamp"]) if newLog[i]["timestamp"] > item["timestamp"]: #The new item is newer, replace the old one ncl('updating duplicated item to 1st one:', newLog[i]["timestamp"], item["timestamp"]) objUuid = self.db.addVirtualObj(newLog[i]) self.logCollection.updateObjUuid(relaPath, objUuid) else: ncl("no update, ignore") else: #Add object to obj db objUuid = self.db.addVirtualObj(newLog[i]) #Add obj to collection self.logCollection.addObj(relaPath, objUuid) ncl("added new item", relaPath, newLog[i]) ################################# #Process data ################################# encZipFileFullPath = transform.transformDirToInternal(encZipFileFullPath) ncl(encZipFileFullPath) zipFileFullPath = self.getZipFile(encZipFileFullPath.replace(".enclog", ".enc")) #For all element in the zip file #Enumerate all files in the decrypted zip file zf = zipClass.ZFile(zipFileFullPath, 'r') #Generate a log file if it does not exist for i in zf.list(): #zf.extract(i, self.workingDir) extractedItemFullPath = os.path.join(self.workingDir, i) relaPath = transform.formatRelativePath(i) extractedItemInfo = self.getItemState(relaPath) extractedItem = encZipStorage.extractedZipStorageItem(self.workingDir, extractedItemFullPath, extractedItemInfo, zf, relaPath) ########################### #Store the file ########################### self.targetCollection.store(extractedItem) ########################## #Everything goes OK #Quit ########################## return else: cl('Load log file failed',encZipFileFullPath) else: #Data file not exist, push it back ncl('Data file not exist, push it back: ',encZipFileFullPath) else: ncl('not a encrypted zip file: ',encZipFileFullPath) return ############ #Item not processed, push it back ############ processorBase.cacheCollectionProcessorBase.subClassProcessItem(self, processingObj)
def getItemState(self, relaPath): self.readZipStorageState() relaPath = transform.formatRelativePath(relaPath) return self.zipStorageState[relaPath]
def getRelaPath(self): return transform.formatRelativePath(self.fullPath.replace(self.rootPath, ''))