Example #1
0
 def isAllSuppliersResponded(self, arg):
     onlines = contact_status.countOnlineAmong(contacts.getSupplierIDs())
     # dhnio.Dprint(6, 'backup_monitor.isAllSuppliersResponded ackCounter=%d onlines=%d' % (self.ackCounter, onlines))
     if self.ackCounter == contacts.numSuppliers():
         return True
     if self.ackCounter >= onlines - 1:
         return True
     return False
Example #2
0
 def doPingAllSuppliers(self, arg):
     # check our suppliers first, if we do not have enough yet - do request
     if '' in contacts.getSupplierIDs():
         dhnio.Dprint(4, 'backup_monitor.doPingAllSuppliers found empty suppliers !!!!!!!!!!!!!!')
         self.ackCounter = contacts.numSuppliers()
         if time.time() - self.lastRequestSuppliersTime > 10 * 60:
             central_service.SendRequestSuppliers()
             self.lastRequestSuppliersTime = time.time()
         return
     # do not want to ping very often 
     if time.time() - self.pingTime < 60 * 3:
         self.ackCounter = contacts.numSuppliers()
         return
     self.pingTime = time.time()
     self.ackCounter = 0
     def increaseAckCounter(packet):
         self.ackCounter += 1
     dhnio.Dprint(6, 'backup_monitor.doPingAllSuppliers going to call suppliers')
     identitypropagate.suppliers(increaseAckCounter, True)
Example #3
0
 def doCleanUpBackups(self, arg):
     # here we check all backups we have and remove the old one
     # user can set how many versions of that file of folder to keep 
     # other versions (older) will be removed here  
     versionsToKeep = settings.getGeneralBackupsToKeep()
     bytesUsed = backup_fs.sizebackups()/contacts.numSuppliers()
     bytesNeeded = diskspace.GetBytesFromString(settings.getCentralMegabytesNeeded(), 0) 
     dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups backupsToKeep=%d used=%d needed=%d' % (versionsToKeep, bytesUsed, bytesNeeded))
     delete_count = 0
     if versionsToKeep > 0:
         for pathID, localPath, itemInfo in backup_fs.IterateIDs():
             versions = itemInfo.list_versions()
             # TODO do we need to sort the list? it comes from a set, so must be sorted may be
             while len(versions) > versionsToKeep:
                 backupID = pathID + '/' + versions.pop(0)
                 dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups %d of %d backups for %s, so remove older %s' % (len(versions), versionsToKeep, localPath, backupID))
                 backup_control.DeleteBackup(backupID, saveDB=False, calculate=False)
                 delete_count += 1
     # we need also to fit used space into needed space (given from other users)
     # they trust us - do not need to take extra space from our friends
     # so remove oldest backups, but keep at least one for every folder - at least locally!
     # still our suppliers will remove our "extra" files by their "local_tester"
     if bytesNeeded <= bytesUsed:
         sizeOk = False 
         for pathID, localPath, itemInfo in backup_fs.IterateIDs():
             if sizeOk:
                 break
             versions = itemInfo.list_versions(True, False)
             if len(versions) <= 1:
                 continue
             for version in versions[1:]:
                 backupID = pathID+'/'+version
                 versionInfo = itemInfo.get_version_info(version)
                 if versionInfo[1] > 0:
                     dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups over use %d of %d, so remove %s of %s' % (
                         bytesUsed, bytesNeeded, backupID, localPath))
                     backup_control.DeleteBackup(backupID, saveDB=False, calculate=False)
                     delete_count += 1
                     bytesUsed -= versionInfo[1] 
                     if bytesNeeded > bytesUsed:
                         sizeOk = True
                         break
     if delete_count > 0:
         backup_fs.Scan()
         backup_fs.Calculate()
         backup_control.Save() 
     collected = gc.collect()
     dhnio.Dprint(6, 'backup_monitor.doCleanUpBackups collected %d objects' % collected)
Example #4
0
def UpdateListFiles():
    if (not os.path.exists(settings.FileListDir())):
        os.mkdir(settings.FileListDir())
    for supnum in range(0, contacts.numSuppliers()):
        filename= os.path.join(settings.FileListDir(), str(supnum))
        dhnio.Dprint(7, "supplierpatrol.UpdateListFiles  looking at = " + filename)
        if (not os.path.exists(filename) or (fileAgeInSeconds(filename) > 3600*24)):
            dhnio.Dprint(7, "supplierpatrol.UpdateListFiles  found one to update " + filename)
            command=commands.ListFiles()
            OwnerID=misc.getLocalID()
            CreatorID=misc.getLocalID()
            PacketID="ListFiles" + str(supnum)
            Payload=""
            RemoteID= contacts.getSupplierID(supnum)
            request=dhnpacket.dhnpacket(command, OwnerID, CreatorID, PacketID, Payload, RemoteID)
            transport_control.RegisterInterest(ListResult, RemoteID, PacketID)
            transport_control.outboxAck(request)
            global NumRequestsOutstanding
            NumRequestsOutstanding += 1
            dhnio.Dprint(7, "supplierpatrol.UpdateListFiles  sent request - now outstanding=" + str(NumRequestsOutstanding))
Example #5
0
def RequestListFilesAll():
    r = []
    for supi in range(contacts.numSuppliers()):
        r.append(RequestListFiles(supi))
    return r
Example #6
0
def RandomSample():
    for supnum in range(0, contacts.numSuppliers()):
        filename= os.path.join(settings.FileListDir(), str(supnum))
        if (os.path.exists(filename)):
            OneFromList(filename)
Example #7
0
def WhoIsLost():
    # if we have more than 50% data packets lost to someone and it was a long story - fire this guy
    # we check this first, because this is more important than other things.
    # many things can be a reason: slow connection, old code, network errors, timeout during sending
    # so if we can not send him our data or retreive it back - how can we do a backups to him even if he is online?  
    unreliable_supplier = None
    most_fails = 0.0
    for supplierNum in range(contacts.numSuppliers()):
        idurl = contacts.getSupplierID(supplierNum)
        if not idurl:
            continue 
        if not data_sender.statistic().has_key(idurl):
            continue
        stats = data_sender.statistic()[idurl]
        total = stats[0] + stats[1]
        failed = stats[1]
        if total > 10:
            failed_percent = failed / total
            if failed_percent > 0.5:
                if most_fails < failed_percent:
                    most_fails = failed_percent
                    unreliable_supplier = idurl
    if unreliable_supplier:
        return 'found-one-lost-supplier', unreliable_supplier
        
    # we only fire offline suppliers
    offline_suppliers = {}

    # ask backup_monitor about current situation
    # check every offline supplier and see how many files he keep at the moment
    for supplierNum in range(contacts.numSuppliers()):
        idurl = contacts.getSupplierID(supplierNum)
        if not idurl:
            continue
        if contact_status.isOnline(idurl):
            continue
        blocks, total, stats = backup_matrix.GetSupplierStats(supplierNum)
        rating = 0 if total == 0 else blocks / total 
        offline_suppliers[idurl] = rating

    # if all suppliers are online - we are very happy - no need to fire anybody! 
    if len(offline_suppliers) == 0:
        dhnio.Dprint(4, 'fire_hire.WhoIsLost no offline suppliers, Cool!')
        return 'not-found-lost-suppliers', ''
    
    # sort users - we always fire worst supplier 
    rating = offline_suppliers.keys()
    rating.sort(key=lambda idurl: offline_suppliers[idurl])
    lost_supplier_idurl = rating[0]
    
    # we do not want to fire this man if he store at least 50% of our files
    # the fact that he is offline is not enough to fire him!
    if offline_suppliers[lost_supplier_idurl] < 0.5 and backup_fs.sizebackups() > 0:
        dhnio.Dprint(4, 'fire_hire.WhoIsLost !!!!!!!! %s is offline and keeps only %d%% of our data' % (
            nameurl.GetName(lost_supplier_idurl), 
            int(offline_suppliers[lost_supplier_idurl] * 100.0)))
        return 'found-one-lost-supplier', lost_supplier_idurl
    
    # but if we did not saw him for a long time - we do not want him for sure
    if time.time() - ratings.connected_time(lost_supplier_idurl) > 60 * 60 * 24 * 2:
        dhnio.Dprint(2, 'fire_hire.WhoIsLost !!!!!!!! %s is offline and keeps %d%% of our data, but he was online %d hours ago' % (
            nameurl.GetName(lost_supplier_idurl), 
            int(offline_suppliers[lost_supplier_idurl] * 100.0),
            int((time.time() - ratings.connected_time(lost_supplier_idurl)) * 60 * 60),))
        return 'found-one-lost-supplier', lost_supplier_idurl
    
    dhnio.Dprint(2, 'fire_hire.WhoIsLost some people is not here, but we did not found the bad guy at this time')
    return 'not-found-lost-suppliers', ''
Example #8
0
 def AttemptRebuild(self):
     """
     This made an attempt to rebuild the missing pieces from pieces we have on hands. 
     """
     dhnio.Dprint(14, 'block_rebuilder.AttemptRebuild %s %d BEGIN' % (self.backupID, self.blockNum))
     newData = False
     madeProgress = True
     while madeProgress:
         madeProgress = False
         # if number of suppliers were changed - stop immediately 
         if contacts.numSuppliers() != self.supplierCount:
             dhnio.Dprint(10, 'block_rebuilder.AttemptRebuild END - number of suppliers were changed')
             return False
         # will check all data packets we have 
         for supplierNum in xrange(self.supplierCount):
             dataFileName = self.BuildRaidFileName(supplierNum, 'Data')
             # if we do not have this item on hands - we will reconstruct it from other items 
             if self.localData[supplierNum] == 0:
                 parityNum, parityMap = self.eccMap.GetDataFixPath(self.localData, self.localParity, supplierNum)
                 if parityNum != -1:
                     rebuildFileList = []
                     rebuildFileList.append(self.BuildRaidFileName(parityNum, 'Parity'))
                     for supplierParity in parityMap:
                         if supplierParity != supplierNum:
                             filename = self.BuildRaidFileName(supplierParity, 'Data')
                             if os.path.isfile(filename):
                                 rebuildFileList.append(filename)
                     dhnio.Dprint(10, '    rebuilding file %s from %d files' % (os.path.basename(dataFileName), len(rebuildFileList)))
                     raidread.RebuildOne(rebuildFileList, len(rebuildFileList), dataFileName)
                 if os.path.exists(dataFileName):
                     self.localData[supplierNum] = 1
                     madeProgress = True
                     dhnio.Dprint(10, '        Data file %s found after rebuilding for supplier %d' % (os.path.basename(dataFileName), supplierNum))
             # now we check again if we have the data on hand after rebuild at it is missing - send it
             # but also check to not duplicate sending to this man   
             # now sending is separated, see the file data_sender.py          
             if self.localData[supplierNum] == 1 and self.missingData[supplierNum] == 1: # and self.dataSent[supplierNum] == 0:
                 dhnio.Dprint(10, '            rebuilt a new Data for supplier %d' % supplierNum)
                 newData = True
                 self.reconstructedData[supplierNum] = 1
                 # self.outstandingFilesList.append((dataFileName, self.BuildFileName(supplierNum, 'Data'), supplierNum))
                 # self.dataSent[supplierNum] = 1
     # now with parities ...            
     for supplierNum in xrange(self.supplierCount):
         parityFileName = self.BuildRaidFileName(supplierNum, 'Parity')
         if self.localParity[supplierNum] == 0:
             parityMap = self.eccMap.ParityToData[supplierNum]
             if self.HaveAllData(parityMap):
                 rebuildFileList = []
                 for supplierParity in parityMap:
                     filename = self.BuildRaidFileName(supplierParity, 'Data')  # ??? why not 'Parity'
                     if os.path.isfile(filename): 
                         rebuildFileList.append(filename)
                 dhnio.Dprint(10, '    rebuilding file %s from %d files' % (os.path.basename(parityFileName), len(rebuildFileList)))
                 raidread.RebuildOne(rebuildFileList, len(rebuildFileList), parityFileName)
                 if os.path.exists(parityFileName):
                     dhnio.Dprint(10, '        Parity file %s found after rebuilding for supplier %d' % (os.path.basename(parityFileName), supplierNum))
                     self.localParity[supplierNum] = 1
         # so we have the parity on hand and it is missing - send it
         if self.localParity[supplierNum] == 1 and self.missingParity[supplierNum] == 1: # and self.paritySent[supplierNum] == 0:
             dhnio.Dprint(10, '            rebuilt a new Parity for supplier %d' % supplierNum)
             newData = True
             self.reconstructedParity[supplierNum] = 1
             # self.outstandingFilesList.append((parityFileName, self.BuildFileName(supplierNum, 'Parity'), supplierNum))
             # self.paritySent[supplierNum] = 1
     dhnio.Dprint(14, 'block_rebuilder.AttemptRebuild END')
     return newData
Example #9
0
 def isSuppliersNeeded(self, arg):
     return settings.getCentralNumSuppliers() <= 0 or \
            contacts.numSuppliers() != settings.getCentralNumSuppliers()