def isAllSuppliersResponded(self, arg): onlines = contact_status.countOnlineAmong(contacts.getSupplierIDs()) # dhnio.Dprint(6, 'backup_monitor.isAllSuppliersResponded ackCounter=%d onlines=%d' % (self.ackCounter, onlines)) if self.ackCounter == contacts.numSuppliers(): return True if self.ackCounter >= onlines - 1: return True return False
def doPingAllSuppliers(self, arg): # check our suppliers first, if we do not have enough yet - do request if '' in contacts.getSupplierIDs(): dhnio.Dprint(4, 'backup_monitor.doPingAllSuppliers found empty suppliers !!!!!!!!!!!!!!') self.ackCounter = contacts.numSuppliers() if time.time() - self.lastRequestSuppliersTime > 10 * 60: central_service.SendRequestSuppliers() self.lastRequestSuppliersTime = time.time() return # do not want to ping very often if time.time() - self.pingTime < 60 * 3: self.ackCounter = contacts.numSuppliers() return self.pingTime = time.time() self.ackCounter = 0 def increaseAckCounter(packet): self.ackCounter += 1 dhnio.Dprint(6, 'backup_monitor.doPingAllSuppliers going to call suppliers') identitypropagate.suppliers(increaseAckCounter, True)
def doCleanUpBackups(self, arg): # here we check all backups we have and remove the old one # user can set how many versions of that file of folder to keep # other versions (older) will be removed here versionsToKeep = settings.getGeneralBackupsToKeep() bytesUsed = backup_fs.sizebackups()/contacts.numSuppliers() bytesNeeded = diskspace.GetBytesFromString(settings.getCentralMegabytesNeeded(), 0) dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups backupsToKeep=%d used=%d needed=%d' % (versionsToKeep, bytesUsed, bytesNeeded)) delete_count = 0 if versionsToKeep > 0: for pathID, localPath, itemInfo in backup_fs.IterateIDs(): versions = itemInfo.list_versions() # TODO do we need to sort the list? it comes from a set, so must be sorted may be while len(versions) > versionsToKeep: backupID = pathID + '/' + versions.pop(0) dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups %d of %d backups for %s, so remove older %s' % (len(versions), versionsToKeep, localPath, backupID)) backup_control.DeleteBackup(backupID, saveDB=False, calculate=False) delete_count += 1 # we need also to fit used space into needed space (given from other users) # they trust us - do not need to take extra space from our friends # so remove oldest backups, but keep at least one for every folder - at least locally! # still our suppliers will remove our "extra" files by their "local_tester" if bytesNeeded <= bytesUsed: sizeOk = False for pathID, localPath, itemInfo in backup_fs.IterateIDs(): if sizeOk: break versions = itemInfo.list_versions(True, False) if len(versions) <= 1: continue for version in versions[1:]: backupID = pathID+'/'+version versionInfo = itemInfo.get_version_info(version) if versionInfo[1] > 0: dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups over use %d of %d, so remove %s of %s' % ( bytesUsed, bytesNeeded, backupID, localPath)) backup_control.DeleteBackup(backupID, saveDB=False, calculate=False) delete_count += 1 bytesUsed -= versionInfo[1] if bytesNeeded > bytesUsed: sizeOk = True break if delete_count > 0: backup_fs.Scan() backup_fs.Calculate() backup_control.Save() collected = gc.collect() dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups collected %d objects' % collected)
def UpdateListFiles(): if (not os.path.exists(settings.FileListDir())): os.mkdir(settings.FileListDir()) for supnum in range(0, contacts.numSuppliers()): filename= os.path.join(settings.FileListDir(), str(supnum)) dhnio.Dprint(7, "supplierpatrol.UpdateListFiles looking at = " + filename) if (not os.path.exists(filename) or (fileAgeInSeconds(filename) > 3600*24)): dhnio.Dprint(7, "supplierpatrol.UpdateListFiles found one to update " + filename) command=commands.ListFiles() OwnerID=misc.getLocalID() CreatorID=misc.getLocalID() PacketID="ListFiles" + str(supnum) Payload="" RemoteID= contacts.getSupplierID(supnum) request=dhnpacket.dhnpacket(command, OwnerID, CreatorID, PacketID, Payload, RemoteID) transport_control.RegisterInterest(ListResult, RemoteID, PacketID) transport_control.outboxAck(request) global NumRequestsOutstanding NumRequestsOutstanding += 1 dhnio.Dprint(7, "supplierpatrol.UpdateListFiles sent request - now outstanding=" + str(NumRequestsOutstanding))
def RequestListFilesAll(): r = [] for supi in range(contacts.numSuppliers()): r.append(RequestListFiles(supi)) return r
def RandomSample(): for supnum in range(0, contacts.numSuppliers()): filename= os.path.join(settings.FileListDir(), str(supnum)) if (os.path.exists(filename)): OneFromList(filename)
def WhoIsLost(): # if we have more than 50% data packets lost to someone and it was a long story - fire this guy # we check this first, because this is more important than other things. # many things can be a reason: slow connection, old code, network errors, timeout during sending # so if we can not send him our data or retreive it back - how can we do a backups to him even if he is online? unreliable_supplier = None most_fails = 0.0 for supplierNum in range(contacts.numSuppliers()): idurl = contacts.getSupplierID(supplierNum) if not idurl: continue if not data_sender.statistic().has_key(idurl): continue stats = data_sender.statistic()[idurl] total = stats[0] + stats[1] failed = stats[1] if total > 10: failed_percent = failed / total if failed_percent > 0.5: if most_fails < failed_percent: most_fails = failed_percent unreliable_supplier = idurl if unreliable_supplier: return 'found-one-lost-supplier', unreliable_supplier # we only fire offline suppliers offline_suppliers = {} # ask backup_monitor about current situation # check every offline supplier and see how many files he keep at the moment for supplierNum in range(contacts.numSuppliers()): idurl = contacts.getSupplierID(supplierNum) if not idurl: continue if contact_status.isOnline(idurl): continue blocks, total, stats = backup_matrix.GetSupplierStats(supplierNum) rating = 0 if total == 0 else blocks / total offline_suppliers[idurl] = rating # if all suppliers are online - we are very happy - no need to fire anybody! if len(offline_suppliers) == 0: dhnio.Dprint(4, 'fire_hire.WhoIsLost no offline suppliers, Cool!') return 'not-found-lost-suppliers', '' # sort users - we always fire worst supplier rating = offline_suppliers.keys() rating.sort(key=lambda idurl: offline_suppliers[idurl]) lost_supplier_idurl = rating[0] # we do not want to fire this man if he store at least 50% of our files # the fact that he is offline is not enough to fire him! if offline_suppliers[lost_supplier_idurl] < 0.5 and backup_fs.sizebackups() > 0: dhnio.Dprint(4, 'fire_hire.WhoIsLost !!!!!!!! %s is offline and keeps only %d%% of our data' % ( nameurl.GetName(lost_supplier_idurl), int(offline_suppliers[lost_supplier_idurl] * 100.0))) return 'found-one-lost-supplier', lost_supplier_idurl # but if we did not saw him for a long time - we do not want him for sure if time.time() - ratings.connected_time(lost_supplier_idurl) > 60 * 60 * 24 * 2: dhnio.Dprint(2, 'fire_hire.WhoIsLost !!!!!!!! %s is offline and keeps %d%% of our data, but he was online %d hours ago' % ( nameurl.GetName(lost_supplier_idurl), int(offline_suppliers[lost_supplier_idurl] * 100.0), int((time.time() - ratings.connected_time(lost_supplier_idurl)) * 60 * 60),)) return 'found-one-lost-supplier', lost_supplier_idurl dhnio.Dprint(2, 'fire_hire.WhoIsLost some people is not here, but we did not found the bad guy at this time') return 'not-found-lost-suppliers', ''
def AttemptRebuild(self): """ This made an attempt to rebuild the missing pieces from pieces we have on hands. """ dhnio.Dprint(14, 'block_rebuilder.AttemptRebuild %s %d BEGIN' % (self.backupID, self.blockNum)) newData = False madeProgress = True while madeProgress: madeProgress = False # if number of suppliers were changed - stop immediately if contacts.numSuppliers() != self.supplierCount: dhnio.Dprint(10, 'block_rebuilder.AttemptRebuild END - number of suppliers were changed') return False # will check all data packets we have for supplierNum in xrange(self.supplierCount): dataFileName = self.BuildRaidFileName(supplierNum, 'Data') # if we do not have this item on hands - we will reconstruct it from other items if self.localData[supplierNum] == 0: parityNum, parityMap = self.eccMap.GetDataFixPath(self.localData, self.localParity, supplierNum) if parityNum != -1: rebuildFileList = [] rebuildFileList.append(self.BuildRaidFileName(parityNum, 'Parity')) for supplierParity in parityMap: if supplierParity != supplierNum: filename = self.BuildRaidFileName(supplierParity, 'Data') if os.path.isfile(filename): rebuildFileList.append(filename) dhnio.Dprint(10, ' rebuilding file %s from %d files' % (os.path.basename(dataFileName), len(rebuildFileList))) raidread.RebuildOne(rebuildFileList, len(rebuildFileList), dataFileName) if os.path.exists(dataFileName): self.localData[supplierNum] = 1 madeProgress = True dhnio.Dprint(10, ' Data file %s found after rebuilding for supplier %d' % (os.path.basename(dataFileName), supplierNum)) # now we check again if we have the data on hand after rebuild at it is missing - send it # but also check to not duplicate sending to this man # now sending is separated, see the file data_sender.py if self.localData[supplierNum] == 1 and self.missingData[supplierNum] == 1: # and self.dataSent[supplierNum] == 0: dhnio.Dprint(10, ' rebuilt a new Data for supplier %d' % supplierNum) newData = True self.reconstructedData[supplierNum] = 1 # self.outstandingFilesList.append((dataFileName, self.BuildFileName(supplierNum, 'Data'), supplierNum)) # self.dataSent[supplierNum] = 1 # now with parities ... for supplierNum in xrange(self.supplierCount): parityFileName = self.BuildRaidFileName(supplierNum, 'Parity') if self.localParity[supplierNum] == 0: parityMap = self.eccMap.ParityToData[supplierNum] if self.HaveAllData(parityMap): rebuildFileList = [] for supplierParity in parityMap: filename = self.BuildRaidFileName(supplierParity, 'Data') # ??? why not 'Parity' if os.path.isfile(filename): rebuildFileList.append(filename) dhnio.Dprint(10, ' rebuilding file %s from %d files' % (os.path.basename(parityFileName), len(rebuildFileList))) raidread.RebuildOne(rebuildFileList, len(rebuildFileList), parityFileName) if os.path.exists(parityFileName): dhnio.Dprint(10, ' Parity file %s found after rebuilding for supplier %d' % (os.path.basename(parityFileName), supplierNum)) self.localParity[supplierNum] = 1 # so we have the parity on hand and it is missing - send it if self.localParity[supplierNum] == 1 and self.missingParity[supplierNum] == 1: # and self.paritySent[supplierNum] == 0: dhnio.Dprint(10, ' rebuilt a new Parity for supplier %d' % supplierNum) newData = True self.reconstructedParity[supplierNum] = 1 # self.outstandingFilesList.append((parityFileName, self.BuildFileName(supplierNum, 'Parity'), supplierNum)) # self.paritySent[supplierNum] = 1 dhnio.Dprint(14, 'block_rebuilder.AttemptRebuild END') return newData
def isSuppliersNeeded(self, arg): return settings.getCentralNumSuppliers() <= 0 or \ contacts.numSuppliers() != settings.getCentralNumSuppliers()