def doScanExistingPackets(self, arg): for SupplierNumber in range(self.EccMap.datasegments): PacketID = packetid.MakePacketID(self.BackupID, self.BlockNumber, SupplierNumber, 'Data') self.OnHandData[SupplierNumber] = os.path.exists(os.path.join(settings.getLocalBackupsDir(), PacketID)) for SupplierNumber in range(self.EccMap.paritysegments): PacketID = packetid.MakePacketID(self.BackupID, self.BlockNumber, SupplierNumber, 'Parity') self.OnHandParity[SupplierNumber] = os.path.exists(os.path.join(settings.getLocalBackupsDir(), PacketID))
def FileReceived(self, packet, state): if state in ['in queue', 'shutdown', 'exist']: return if state != 'received': dhnio.Dprint(4, "backup_rebuilder.FileReceived WARNING incorrect state [%s] for packet %s" % (str(state), str(packet))) return packetID = packet.PacketID filename = os.path.join(settings.getLocalBackupsDir(), packetID) if not packet.Valid(): # TODO # if we didn't get a valid packet ... re-request it or delete it? dhnio.Dprint(2, "backup_rebuilder.FileReceived WARNING " + packetID + " is not a valid packet") return if os.path.exists(filename): dhnio.Dprint(4, "backup_rebuilder.FileReceived WARNING rewriting existed file" + filename) try: os.remove(filename) except: dhnio.DprintException() dirname = os.path.dirname(filename) if not os.path.exists(dirname): try: dhnio._dirs_make(dirname) except: dhnio.Dprint(2, "backup_rebuilder.FileReceived ERROR can not create sub dir " + dirname) return if not dhnio.WriteFile(filename, packet.Payload): return backup_matrix.LocalFileReport(packetID) self.automat('inbox-data-packet', packetID)
def doRemoveUnusedFiles(self, arg): # we want to remove files for this block # because we only need them during rebuilding if settings.getGeneralLocalBackups() is True: # if user set this in settings - he want to keep the local files return # ... user do not want to keep local backups if settings.getGeneralWaitSuppliers() is True: # but he want to be sure - all suppliers are green for a long time if contact_status.hasOfflineSuppliers() or time.time() - fire_hire.GetLastFireTime() < 24*60*60: # some people are not there or we do not have stable team yet # do not remove the files because we need it to rebuild return count = 0 for backupID in misc.sorted_backup_ids(backup_matrix.local_files().keys()): packets = backup_matrix.ScanBlocksToRemove(backupID, settings.getGeneralWaitSuppliers()) for packetID in packets: filename = os.path.join(settings.getLocalBackupsDir(), packetID) if os.path.isfile(filename): try: os.remove(filename) # dhnio.Dprint(6, ' ' + os.path.basename(filename)) except: dhnio.DprintException() continue count += 1 dhnio.Dprint(8, 'data_sender.doRemoveUnusedFiles %d files were removed' % count) backup_matrix.ReadLocalFiles()
def doScanAndQueue(self, arg): global _ShutdownFlag dhnio.Dprint(10, 'data_sender.doScanAndQueue') log = open(os.path.join(settings.LogsDir(), 'data_sender.log'), 'w') log.write('doScanAndQueue %s\n' % time.asctime()) if _ShutdownFlag: log.write('doScanAndQueue _ShutdownFlag is True\n') self.automat('scan-done') log.flush() log.close() return if '' not in contacts.getSupplierIDs(): for backupID in misc.sorted_backup_ids(backup_matrix.local_files().keys(), True): packetsBySupplier = backup_matrix.ScanBlocksToSend(backupID) log.write('%s\n' % packetsBySupplier) for supplierNum in packetsBySupplier.keys(): supplier_idurl = contacts.getSupplierID(supplierNum) if not supplier_idurl: dhnio.Dprint(2, 'data_sender.doScanAndQueue WARNING ?supplierNum? %s for %s' % (supplierNum, backupID)) continue for packetID in packetsBySupplier[supplierNum]: backupID_, blockNum, supplierNum_, dataORparity = packetid.BidBnSnDp(packetID) if backupID_ != backupID: dhnio.Dprint(2, 'data_sender.doScanAndQueue WARNING ?backupID? %s for %s' % (packetID, backupID)) continue if supplierNum_ != supplierNum: dhnio.Dprint(2, 'data_sender.doScanAndQueue WARNING ?supplierNum? %s for %s' % (packetID, backupID)) continue if io_throttle.HasPacketInSendQueue(supplier_idurl, packetID): log.write('%s in the send queue to %s\n' % (packetID, supplier_idurl)) continue if not io_throttle.OkToSend(supplier_idurl): log.write('ok to send %s ? - NO!\n' % supplier_idurl) continue tranByiID = transport_control.transfers_by_idurl(supplier_idurl) if len(tranByiID) > 3: log.write('transfers by %s: %d\n' % (supplier_idurl, len(tranByiID))) continue filename = os.path.join(settings.getLocalBackupsDir(), packetID) if not os.path.isfile(filename): log.write('%s is not file\n' % filename) continue io_throttle.QueueSendFile( filename, packetID, supplier_idurl, misc.getLocalID(), self._packetAcked, self._packetFailed) log.write('io_throttle.QueueSendFile %s\n' % packetID) # dhnio.Dprint(6, ' %s for %s' % (packetID, backupID)) self.automat('scan-done') log.flush() log.close()
def doReadRaid(self, arg): fd, filename = tmpfile.make('restore', prefix=self.BackupID.replace('/','_')+'_'+str(self.BlockNumber)+'_') os.close(fd) threads.deferToThread( raidread.raidread, filename, eccmap.CurrentName(), self.Version, self.BlockNumber, os.path.join(settings.getLocalBackupsDir(), self.PathID) ).addBoth( lambda restored_blocks: self.automat('raid-done', filename))
def run(self): """ Runs a new `Job` from that `Task`. Called from `RunTasks()` method if it is possible to start a new task - the maximum number of simultaneously running `Jobs` is limited. """ iter_and_path = backup_fs.WalkByID(self.pathID) if iter_and_path is None: dhnio.Dprint(4, 'backup_control.Task.run ERROR %s not found in the index' % self.pathID) # self.defer.callback('error', self.pathID) return itemInfo, sourcePath = iter_and_path if isinstance(itemInfo, dict): try: itemInfo = itemInfo[backup_fs.INFO_KEY] except: dhnio.DprintException() return if not backup_fs.pathExist(sourcePath): dhnio.Dprint(4, 'backup_control.Task.run WARNING path not exist: %s' % sourcePath) reactor.callLater(0, OnTaskFailed, self.pathID, 'not exist') return dataID = misc.NewBackupID() if itemInfo.has_version(dataID): # ups - we already have same version # let's add 1,2,3... to the end to make absolutely unique version ID i = 1 while itemInfo.has_version(dataID+str(i)): i += 1 dataID += str(i) backupID = self.pathID + '/' + dataID try: backupPath = backup_fs.MakeLocalDir(settings.getLocalBackupsDir(), backupID) except: dhnio.DprintException() dhnio.Dprint(4, 'backup_control.Task.run ERROR creating destination folder for %s' % self.pathID) # self.defer.callback('error', self.pathID) return if backup_fs.pathIsDir(sourcePath): backupPipe = backup_tar.backuptar(sourcePath, compress='gz') else: backupPipe = backup_tar.backuptarfile(sourcePath, compress='gz') backupPipe.make_nonblocking() resultDefer = Deferred() blockSize = settings.getBackupBlockSize() job = backup.backup(backupID, backupPipe, OnJobDone, OnBackupBlockReport, settings.getBackupBlockSize()) jobs()[backupID] = job itemInfo.add_version(dataID) if itemInfo.type in [ backup_fs.PARENT, backup_fs.DIR ]: dirsize.ask(sourcePath, FoundFolderSize, (self.pathID, dataID)) # self.defer.callback('started', backupID) reactor.callLater(0, FireTaskStartedCallbacks, self.pathID, dataID) dhnio.Dprint(4, 'backup_control.Task.run %s [%s], size=%d' % (self.pathID, dataID, itemInfo.size))
def ScanID(pathID, basedir=None): """ Same, but check only single item in the index. """ if basedir is None: basedir = settings.getLocalBackupsDir() iter_and_path = WalkByID(pathID) if not iter_and_path: return iter, path = iter_and_path if isinstance(iter, dict): if not iter.has_key(INFO_KEY): return iter = iter[INFO_KEY] iter.read_stats(path) iter.read_versions(portablePath(os.path.join(basedir, pathID)))
def QueueRequestFile(self, callOnReceived, creatorID, packetID, ownerID, remoteID): # make sure that we don't actually already have the file #if packetID != settings.BackupInfoFileName(): if packetID not in [ settings.BackupInfoFileName(), settings.BackupInfoFileNameOld(), settings.BackupInfoEncryptedFileName(), ]: filename = os.path.join(settings.getLocalBackupsDir(), packetID) if os.path.exists(filename): dhnio.Dprint(4, "io_throttle.QueueRequestFile WARNING %s already exist " % filename) if callOnReceived: reactor.callLater(0, callOnReceived, packetID, 'exist') return if remoteID not in self.supplierQueues.keys(): # made a new queue for this man self.supplierQueues[remoteID] = SupplierQueue(remoteID, self.creatorID) dhnio.Dprint(6, "io_throttle.QueueRequestFile made a new queue for %s" % nameurl.GetName(remoteID)) # dhnio.Dprint(10, "io_throttle.QueueRequestFile asking for %s from %s" % (packetID, nameurl.GetName(remoteID))) self.supplierQueues[remoteID].SupplierRequestFile(callOnReceived, creatorID, packetID, ownerID)
def RunRequest(self): #dhnio.Dprint(6, 'io_throttle.RunRequest') packetsToRemove = set() for i in range(0, min(self.fileRequestMaxLength, len(self.fileRequestQueue))): packetID = self.fileRequestQueue[i] currentTime = time.time() if self.fileRequestDict[packetID].requestTime is not None: # the packet were requested if self.fileRequestDict[packetID].fileReceivedTime is None: # but no answer yet ... if currentTime - self.fileRequestDict[packetID].requestTime > self.fileRequestDict[packetID].requestTimeout: # and time is out!!! self.fileRequestDict[packetID].report = 'timeout' packetsToRemove.add(packetID) else: # the packet were received (why it is not removed from the queue yet ???) self.fileRequestDict[packetID].result = 'received' packetsToRemove.add(packetID) if self.fileRequestDict[packetID].requestTime is None: if not os.path.exists(os.path.join(settings.getLocalBackupsDir(), packetID)): fileRequest = self.fileRequestDict[packetID] dhnio.Dprint(10, "io_throttle.RunRequest for packetID " + fileRequest.packetID) transport_control.RegisterInterest( self.DataReceived, fileRequest.creatorID, fileRequest.packetID) newpacket = dhnpacket.dhnpacket( commands.Retrieve(), fileRequest.ownerID, fileRequest.creatorID, fileRequest.packetID, "", fileRequest.remoteID) transport_control.outboxNoAck(newpacket) fileRequest.requestTime = time.time() else: # we have the data file, no need to request it self.fileRequestDict[packetID].result = 'exist' packetsToRemove.add(packetID) # remember requests results result = len(packetsToRemove) # remove finished requests if len(packetsToRemove) > 0: for packetID in packetsToRemove: self.fileRequestQueue.remove(packetID) del packetsToRemove return result
def LocalFileReport(packetID=None, backupID=None, blockNum=None, supplierNum=None, dataORparity=None): """ Writes info for a single piece of data into "local" matrix. You can use two forms: - pass `packetID` parameter only - pass all other parameters and do not use `packetID` This is called when new local file created, for example during rebuilding process. """ if packetID is not None: backupID, blockNum, supplierNum, dataORparity = packetid.Split(packetID) if backupID is None: dhnio.Dprint(8, 'backup_matrix.LocalFileReport WARNING incorrect filename: ' + packetID) return else: blockNum = int(blockNum) supplierNum = int(supplierNum) dataORparity = dataORparity packetID = packetid.MakePacketID(backupID, blockNum, supplierNum, dataORparity) filename = packetID if dataORparity not in ['Data', 'Parity']: dhnio.Dprint(4, 'backup_matrix.LocalFileReport WARNING Data or Parity? ' + filename) return if supplierNum >= suppliers_set().supplierCount: dhnio.Dprint(4, 'backup_matrix.LocalFileReport WARNING supplier number %d > %d %s' % (supplierNum, suppliers_set().supplierCount, filename)) return if not local_files().has_key(backupID): local_files()[backupID] = {} # dhnio.Dprint(14, 'backup_matrix.LocalFileReport new local entry for %s created in the memory' % backupID) if not local_files()[backupID].has_key(blockNum): local_files()[backupID][blockNum] = { 'D': [0] * suppliers_set().supplierCount, 'P': [0] * suppliers_set().supplierCount} local_files()[backupID][blockNum][dataORparity[0]][supplierNum] = 1 if not local_max_block_numbers().has_key(backupID): local_max_block_numbers()[backupID] = -1 if local_max_block_numbers()[backupID] < blockNum: local_max_block_numbers()[backupID] = blockNum # dhnio.Dprint(6, 'backup_matrix.LocalFileReport %s max block num is %d' % (backupID, local_max_block_numbers()[backupID])) if not local_backup_size().has_key(backupID): local_backup_size()[backupID] = 0 localDest = os.path.join(settings.getLocalBackupsDir(), filename) if os.path.isfile(localDest): try: local_backup_size()[backupID] += os.path.getsize(localDest) except: dhnio.DprintException() RepaintBackup(backupID)
def ReadLocalFiles(): """ This method scans local backups and build the whole "local" matrix. """ global _LocalFilesNotifyCallback local_files().clear() local_max_block_numbers().clear() local_backup_size().clear() _counter = [0,] def visit(realpath, subpath, name): # subpath is something like 0/0/1/0/F20131120053803PM/0-1-Data if not os.path.isfile(realpath): return True if realpath.startswith('newblock-'): return False if subpath in [ settings.BackupIndexFileName(), settings.BackupInfoFileName(), settings.BackupInfoFileNameOld(), settings.BackupInfoEncryptedFileName() ]: return False try: version = subpath.split('/')[-2] except: return False if not packetid.IsCanonicalVersion(version): return True LocalFileReport(packetID=subpath) _counter[0] += 1 return False dhnio.traverse_dir_recursive(visit, settings.getLocalBackupsDir()) dhnio.Dprint(8, 'backup_matrix.ReadLocalFiles %d files indexed' % _counter[0]) if dhnio.Debug(8): try: if sys.version_info >= (2, 6): #localSZ = sys.getsizeof(local_files()) #remoteSZ = sys.getsizeof(remote_files()) import lib.getsizeof localSZ = lib.getsizeof.total_size(local_files()) remoteSZ = lib.getsizeof.total_size(remote_files()) indexByName = lib.getsizeof.total_size(backup_fs.fs()) indexByID = lib.getsizeof.total_size(backup_fs.fsID()) dhnio.Dprint(10, ' all local info uses %d bytes in the memory' % localSZ) dhnio.Dprint(10, ' all remote info uses %d bytes in the memory' % remoteSZ) dhnio.Dprint(10, ' index by name takes %d bytes in the memory' % indexByName) dhnio.Dprint(10, ' index by ID takes %d bytes in the memory' % indexByID) except: dhnio.DprintException() if _LocalFilesNotifyCallback is not None: _LocalFilesNotifyCallback()
def Scan(basedir=None): """ Walk all items in the index and check if local files and folders with same names exists. Parameter `basedir` is a root path of that structure, if None taken from `lib.settings.getLocalBackupsDir()`. Also calculate size of the files. """ if basedir is None: basedir = settings.getLocalBackupsDir() iterID = fsID() sum = [0, 0,] def visitor(path_id, path, info): info.read_stats(path) if info.exist(): sum[0] += info.size versions_path = portablePath(os.path.join(basedir, path_id)) sum[1] += info.read_versions(versions_path) TraverseByID(visitor, iterID) return sum[0], sum[1]
def raidread(OutputFileName, eccmapname, backupId, blockNumber, data_parity_dir=None): if data_parity_dir is None: data_parity_dir = settings.getLocalBackupsDir() # INTSIZE = settings.IntSize() # myeccmap = eccmap.eccmap(eccmapname) myeccmap = geteccmap(eccmapname) GoodFiles = range(0, 200) MakingProgress = 1 while MakingProgress == 1: MakingProgress = 0 for PSegNum in xrange(myeccmap.paritysegments): PFileName = os.path.join(data_parity_dir, backupId, str(blockNumber) + '-' + str(PSegNum) + '-Parity') if os.path.exists(PFileName): Map = myeccmap.ParityToData[PSegNum] TotalDSegs = 0 GoodDSegs = 0 for DSegNum in Map: TotalDSegs+=1 FileName = os.path.join(data_parity_dir, backupId, str(blockNumber) + '-' + str(DSegNum) + '-Data') if os.path.exists(FileName): GoodFiles[GoodDSegs]=FileName GoodDSegs+=1 else: BadName=FileName if GoodDSegs == TotalDSegs - 1: MakingProgress = 1 GoodFiles[GoodDSegs]=PFileName GoodDSegs += 1 RebuildOne(GoodFiles,GoodDSegs,BadName) # Count up the good segments and combine GoodDSegs = 0 output = open(OutputFileName, "wb") for DSegNum in xrange(myeccmap.datasegments): FileName = os.path.join(data_parity_dir, backupId, str(blockNumber) + '-' + str(DSegNum) + '-Data') if os.path.exists(FileName): GoodDSegs += 1 moredata = open(FileName,"rb").read() output.write(moredata) output.close() return GoodDSegs
def DeleteBackup(backupID, removeLocalFilesToo=True, saveDB=True, calculate=True): """ This removes a single backup ID completely. Perform several operations: 1) abort backup if it just started and is running at the moment 2) if we requested for files for this backup we do not need it anymore - remove 'Data' requests 3) remove interests in transport_control, see `lib.transport_control.DeleteBackupInterest()` 4) remove that ID from the index data base 5) remove local files for this backup ID 6) remove all remote info for this backup from the memory, see `p2p.backup_matrix.EraseBackupRemoteInfo()` 7) also remove local info from memory, see `p2p.backup_matrix.EraseBackupLocalInfo()` 8) stop any rebuilding, we will restart it soon 9) check and calculate used space 10) save the modified index data base, soon it will be synchronized with "backup_db_keeper()" state machine """ dhnio.Dprint(8, 'backup_control.DeleteBackup ' + backupID) # if the user deletes a backup, make sure we remove any work we're doing on it # abort backup if it just started and is running at the moment AbortRunningBackup(backupID) # if we requested for files for this backup - we do not need it anymore io_throttle.DeleteBackupRequests(backupID) # remove interests in transport_control transport_control.DeleteBackupInterest(backupID) # mark it as being deleted in the db, well... just remove it from the index now backup_fs.DeleteBackupID(backupID) # finally remove local files for this backupID if removeLocalFilesToo: backup_fs.DeleteLocalBackup(settings.getLocalBackupsDir(), backupID) # remove all remote info for this backup from the memory backup_matrix.EraseBackupRemoteInfo(backupID) # also remove local info backup_matrix.EraseBackupLocalInfo(backupID) # stop any rebuilding, we will restart it soon backup_rebuilder.RemoveAllBackupsToWork() backup_rebuilder.SetStoppedFlag() # check and calculate used space if calculate: backup_fs.Scan() backup_fs.Calculate() # in some cases we want to save the DB later if saveDB: Save()
def doSavePacket(self, NewPacket): packetID = NewPacket.PacketID pathID, version, packetBlockNum, SupplierNumber, dataORparity = packetid.SplitFull(packetID) if dataORparity == 'Data': self.OnHandData[SupplierNumber] = True elif NewPacket.DataOrParity() == 'Parity': self.OnHandParity[SupplierNumber] = True filename = os.path.join(settings.getLocalBackupsDir(), packetID) dirpath = os.path.dirname(filename) if not os.path.exists(dirpath): try: dhnio._dirs_make(dirpath) except: dhnio.DprintException() # either way the payload of packet is saved if not dhnio.WriteFile(filename, NewPacket.Payload): dhnio.Dprint(6, "restore.doSavePacket WARNING unable to write to %s" % filename) return dhnio.Dprint(6, "restore.doSavePacket %s saved" % packetID) if self.packetInCallback is not None: self.packetInCallback(self.BackupID, NewPacket)
def OnJobDone(backupID, result): """ A callback method fired when backup is finished. Here we need to save the index data base. """ dhnio.Dprint(4, 'backup_control.OnJobDone [%s] %s, %d more tasks' % (backupID, result, len(tasks()))) jobs().pop(backupID) pathID, version = packetid.SplitBackupID(backupID) if result == 'done': maxBackupsNum = settings.getGeneralBackupsToKeep() if maxBackupsNum: item = backup_fs.GetByID(pathID) if item: versions = item.list_versions(sorted=True, reverse=True) if len(versions) > maxBackupsNum: for version in versions[maxBackupsNum:]: item.delete_version(version) backupID = pathID+'/'+version backup_rebuilder.RemoveBackupToWork(backupID) io_throttle.DeleteBackupRequests(backupID) transport_control.DeleteBackupInterest(backupID) backup_fs.DeleteLocalBackup(settings.getLocalBackupsDir(), backupID) backup_matrix.EraseBackupLocalInfo(backupID) backup_matrix.EraseBackupLocalInfo(backupID) backup_fs.ScanID(pathID) backup_fs.Calculate() Save() # TODO check used space, if we have over use - stop all tasks immediately backup_matrix.RepaintBackup(backupID) elif result == 'abort': DeleteBackup(backupID) if len(tasks()) == 0: # do we really need to restart backup_monitor after each backup? # if we have a lot tasks started this will produce a lot unneeded actions # will be smarter to restart it once we finish all tasks # because user will probable leave DHN working after starting a long running operation backup_monitor.Restart() RunTasks() reactor.callLater(0, FireTaskFinishedCallbacks, pathID, version, result)
def DeletePathBackups(pathID, removeLocalFilesToo=True, saveDB=True, calculate=True): """ This removes all backups of given path ID. Doing same operations as `DeleteBackup()`. """ # get the working item item = backup_fs.GetByID(pathID) if item is None: return # this is a list of all known backups of this path versions = item.list_versions() for version in versions: backupID = pathID + '/' + version # abort backup if it just started and is running at the moment AbortRunningBackup(backupID) # if we requested for files for this backup - we do not need it anymore io_throttle.DeleteBackupRequests(backupID) # remove interests in transport_control transport_control.DeleteBackupInterest(backupID) # remove local files for this backupID if removeLocalFilesToo: backup_fs.DeleteLocalBackup(settings.getLocalBackupsDir(), backupID) # remove remote info for this backup from the memory backup_matrix.EraseBackupLocalInfo(backupID) # also remove local info backup_matrix.EraseBackupLocalInfo(backupID) # finally remove this backup from the index item.delete_version(version) dhnio.Dprint(8, 'backup_control.DeletePathBackups ' + backupID) # stop any rebuilding, we will restart it soon backup_rebuilder.RemoveAllBackupsToWork() backup_rebuilder.SetStoppedFlag() # check and calculate used space if calculate: backup_fs.Scan() backup_fs.Calculate() # save the index if needed if saveDB: Save()
def LocalBlockReport(newblock, num_suppliers): """ This updates "local" matrix - a several pieces corresponding to given block of data. """ if suppliers_set().supplierCount != num_suppliers: dhnio.Dprint(6, 'backup_matrix.LocalBlockReport %s skipped, because number of suppliers were changed' % str(newblock)) return try: backupID = newblock.BackupID blockNum = int(newblock.BlockNumber) except: dhnio.DprintException() return for supplierNum in xrange(num_suppliers): for dataORparity in ('Data', 'Parity'): packetID = packetid.MakePacketID(backupID, blockNum, supplierNum, dataORparity) if not local_files().has_key(backupID): local_files()[backupID] = {} # dhnio.Dprint(14, 'backup_matrix.LocalFileReport new local entry for %s created in the memory' % backupID) if not local_files()[backupID].has_key(blockNum): local_files()[backupID][blockNum] = { 'D': [0] * suppliers_set().supplierCount, 'P': [0] * suppliers_set().supplierCount} local_files()[backupID][blockNum][dataORparity[0]][supplierNum] = 1 # dhnio.Dprint(6, 'backup_matrix.LocalFileReport %s max block num is %d' % (backupID, local_max_block_numbers()[backupID])) if not local_backup_size().has_key(backupID): local_backup_size()[backupID] = 0 try: local_backup_size()[backupID] += os.path.getsize(os.path.join(settings.getLocalBackupsDir(), packetID)) except: dhnio.DprintException() if not local_max_block_numbers().has_key(backupID): local_max_block_numbers()[backupID] = -1 if local_max_block_numbers()[backupID] < blockNum: local_max_block_numbers()[backupID] = blockNum RepaintBackup(backupID)
def do_with_files(filename, eccmapname, backupId, blockNumber, targetDir=None): if targetDir is None: targetDir = settings.getLocalBackupsDir() + '/' + backupId myeccmap = geteccmap(eccmapname) INTSIZE = settings.IntSize() misc.RoundupFile(filename,myeccmap.datasegments*INTSIZE) # any padding at end and block.Length fixes wholefile = dhnio.ReadBinaryFile(filename) length = len(wholefile) seglength = (length + myeccmap.datasegments - 1)/myeccmap.datasegments # PREPRO - for DSegNum in range(myeccmap.datasegments): FileName = targetDir + '/' + str(blockNumber) + '-' + str(DSegNum) + '-Data' f = open(FileName, "wb") segoffset = DSegNum * seglength for i in range(seglength): offset = segoffset + i; if (offset < length): f.write(wholefile[offset]) else: # any padding should go at the end of last seg # and block.Length fixes f.write(" ") f.close() del wholefile #dfds = range(myeccmap.datasegments) dfds = {} for DSegNum in range(myeccmap.datasegments): FileName = targetDir + '/' + str(blockNumber) + '-' + str(DSegNum) + '-Data' dfds[DSegNum] = open(FileName, "rb") #pfds = range(myeccmap.paritysegments) pfds = {} for PSegNum in range(myeccmap.paritysegments): FileName = targetDir + '/' + str(blockNumber) + '-' + str(PSegNum) + '-Parity' pfds[PSegNum] = open(FileName, "wb") #Parities = range(myeccmap.paritysegments) Parities = {} for i in range(seglength/INTSIZE): for PSegNum in range(myeccmap.paritysegments): Parities[PSegNum] = 0 for DSegNum in range(myeccmap.datasegments): bstr = dfds[DSegNum].read(INTSIZE) if len(bstr) == INTSIZE: b, = struct.unpack(">l", bstr) Map = myeccmap.DataToParity[DSegNum] for PSegNum in Map: if PSegNum > myeccmap.paritysegments: dhnio.Dprint(2, "raidmake.raidmake PSegNum out of range " + str(PSegNum)) dhnio.Dprint(2, "raidmake.raidmake limit is " + str(myeccmap.paritysegments)) myeccmap.check() raise Exception("eccmap error") Parities[PSegNum] = Parities[PSegNum] ^ b else : #TODO dhnio.Dprint(2, 'raidmake.raidmake WARNING strange read under INTSIZE bytes') dhnio.Dprint(2, 'raidmake.raidmake len(bstr)=%s DSegNum=%s' % (str(len(bstr)), str(DSegNum))) for PSegNum in range(myeccmap.paritysegments): bstr = struct.pack(">l", Parities[PSegNum]) pfds[PSegNum].write(bstr) dataNum = 0 parityNum = 0 for f in dfds.values(): f.close() dataNum += 1 for f in pfds.values(): f.close() parityNum += 1 del dfds del pfds del Parities return dataNum, parityNum
def BuildRaidFileName(self, supplierNumber, dataOrParity): """ Same but return an absolute path of that file. """ return os.path.join(settings.getLocalBackupsDir(), self.BuildFileName(supplierNumber, dataOrParity))
def do_in_memory(filename, eccmapname, backupId, blockNumber, targetDir=None): if targetDir is None: # here backupID is something like # /0/0/1/23/F1234567890AM targetDir = settings.getLocalBackupsDir() + '/' + backupId myeccmap = geteccmap(eccmapname) INTSIZE = settings.IntSize() # any padding at end and block.Length fixes misc.RoundupFile(filename,myeccmap.datasegments*INTSIZE) wholefile = dhnio.ReadBinaryFile(filename) length = len(wholefile) seglength = (length + myeccmap.datasegments - 1) / myeccmap.datasegments for DSegNum in xrange(myeccmap.datasegments): FileName = targetDir + '/' + str(blockNumber) + '-' + str(DSegNum) + '-Data' f = open(FileName, "wb") segoffset = DSegNum * seglength for i in xrange(seglength): offset = segoffset + i; if offset < length: f.write(wholefile[offset]) else: # any padding should go at the end of last seg # and block.Length fixes f.write(" ") f.close() dfds = {} for DSegNum in xrange(myeccmap.datasegments): FileName = targetDir + '/' + str(blockNumber) + '-' + str(DSegNum) + '-Data' # instead of reading data from opened file # we'l put it in memory # and store current position in the data # so start from zero #dfds[DSegNum] = [0, dhnio.ReadBinaryFile(FileName)] dfds[DSegNum] = cStringIO.StringIO(dhnio.ReadBinaryFile(FileName)) pfds = {} for PSegNum in xrange(myeccmap.paritysegments): # we will keep parirty data in the momory # after doing all calculations # will write all parts on the disk pfds[PSegNum] = cStringIO.StringIO() #Parities = range(myeccmap.paritysegments) Parities = {} for i in xrange(seglength/INTSIZE): for PSegNum in xrange(myeccmap.paritysegments): Parities[PSegNum] = 0 for DSegNum in xrange(myeccmap.datasegments): bstr = dfds[DSegNum].read(INTSIZE) #pos = dfds[DSegNum][0] #dfds[DSegNum][0] += INTSIZE #bstr = dfds[DSegNum][1][pos:pos+INTSIZE] if len(bstr) == INTSIZE: b, = struct.unpack(">l", bstr) Map = myeccmap.DataToParity[DSegNum] for PSegNum in Map: if PSegNum > myeccmap.paritysegments: dhnio.Dprint(2, "raidmake.raidmake PSegNum out of range " + str(PSegNum)) dhnio.Dprint(2, "raidmake.raidmake limit is " + str(myeccmap.paritysegments)) myeccmap.check() raise Exception("eccmap error") Parities[PSegNum] = Parities[PSegNum] ^ b else: raise Exception('strange read under INTSIZE bytes, len(bstr)=%d DSegNum=%d' % (len(bstr), DSegNum)) #TODO #dhnio.Dprint(2, 'raidmake.raidmake WARNING strange read under INTSIZE bytes') #dhnio.Dprint(2, 'raidmake.raidmake len(bstr)=%s DSegNum=%s' % (str(len(bstr)), str(DSegNum))) for PSegNum in xrange(myeccmap.paritysegments): bstr = struct.pack(">l", Parities[PSegNum]) #pfds[PSegNum] += bstr pfds[PSegNum].write(bstr) dataNum = len(dfds) parityNum = len(pfds) for PSegNum, data in pfds.items(): FileName = targetDir + '/' + str(blockNumber) + '-' + str(PSegNum) + '-Parity' dhnio.WriteFile(FileName, pfds[PSegNum].getvalue()) for f in dfds.values(): f.close() #dataNum += 1 for f in pfds.values(): f.close() #parityNum += 1 del dfds del pfds del Parities return dataNum, parityNum