def start(self): from twisted.internet.defer import Deferred from logs import lg from main import listeners from storage import keys_synchronizer from storage import index_synchronizer from storage import backup_fs self.starting_deferred = Deferred() self.starting_deferred.addErrback( lambda err: lg.warn('service %r was not started: %r' % (self.service_name, err.getErrorMessage() if err else 'unknown reason'))) if keys_synchronizer.is_synchronized( ) and index_synchronizer.is_synchronized(): if not self.starting_deferred.called: self.starting_deferred.callback(True) if listeners.is_populate_requered('private_file'): listeners.populate_later().remove('private_file') backup_fs.populate_private_files() else: lg.warn( 'can not start service_my_data right now, keys_synchronizer.is_synchronized=%r index_synchronizer.is_synchronized=%r' % (keys_synchronizer.is_synchronized(), index_synchronizer.is_synchronized())) return self.starting_deferred
def _do_synchronize_keys(self): """ Make sure all my keys are stored on my suppliers nodes (encrypted with my master key). If some key I do not have locally, but I know remote copy exists - download it. If some key was not stored - make a remote copy on supplier machine. When key was renamed (after identity rotate) make sure to store the latest copy and remove older one. """ from logs import lg from storage import backup_control from storage import index_synchronizer from twisted.internet.defer import Deferred is_in_sync = index_synchronizer.is_synchronized( ) and backup_control.revision() > 0 if is_in_sync: result = Deferred() result.addCallback(self._on_keys_synchronized) result.addErrback(self._on_keys_synchronize_failed) self._do_check_sync_keys(result) return lg.warn('backup index database is not synchronized yet') if index_synchronizer.is_synchronizing(): self.sync_keys_requested = True return result = Deferred() result.addCallback(self._on_keys_synchronized) result.addErrback(self._on_keys_synchronize_failed) result.errback(Exception('backup index database is not synchronized')) return None
def start(self): from twisted.internet.defer import Deferred from logs import lg from storage import keys_synchronizer from storage import index_synchronizer self.starting_deferred = Deferred() self.starting_deferred.addErrback( lambda err: lg.warn('service %r was not started: %r' % (self.service_name, err.getErrorMessage() if err else 'unknown reason'))) if keys_synchronizer.is_synchronized( ) and index_synchronizer.is_synchronized(): if not self.starting_deferred.called: self.starting_deferred.callback(True) else: lg.warn( 'can not start service_my_data right now, keys_synchronizer.is_synchronized=%r index_synchronizer.is_synchronized=%r' % (keys_synchronizer.is_synchronized(), index_synchronizer.is_synchronized())) return self.starting_deferred
def _do_synchronize_keys(self): """ Make sure all my keys are stored on my suppliers nodes (encrypted with my master key). If some key I do not have locally, but I know remote copy exists - download it. If some key was not stored - make a remote copy on supplier machine. When key was renamed (after identity rotate) make sure to store the latest copy and remove older one. """ from logs import lg from userid import global_id from userid import my_id from interface import api from storage import backup_control from storage import index_synchronizer from storage import keys_synchronizer from twisted.internet.defer import Deferred result = Deferred() result.addCallback(self._on_keys_synchronized) result.addErrback(self._on_keys_synchronize_failed) is_in_sync = index_synchronizer.is_synchronized( ) and backup_control.revision() > 0 if not is_in_sync: lg.warn('backup index database is not synchronized yet') result.errback( Exception('backup index database is not synchronized yet')) return None global_keys_folder_path = global_id.MakeGlobalID( key_alias='master', customer=my_id.getGlobalID(), path='.keys') res = api.file_exists(global_keys_folder_path) if res['status'] != 'OK' or not res['result'] or not res['result'].get( 'exist'): res = api.file_create(global_keys_folder_path, as_folder=True) if res['status'] != 'OK': lg.err( 'failed to create ".keys" folder "%s" in the catalog: %r' % (global_keys_folder_path, res)) result.errback( Exception( 'failed to create keys folder "%s" in the catalog: %r' % (global_keys_folder_path, res))) return lg.info('created new remote folder ".keys" in the catalog: %r' % global_keys_folder_path) keys_synchronizer.A('sync', result)
def ReadRawListFiles(supplierNum, listFileText): """ Read ListFiles packet for given supplier and build a "remote" matrix. All lines are something like that:: Findex 5456 D0 -1 D0/1 -1 V0/1/F20090709034221PM 3 0-1000 7463434 V0/1/F20090709034221PM 3 0-1000 7463434 D0/0/123/4567 -1 V0/0/123/4567/F20090709034221PM 3 0-11 434353 missing Data:1,3 V0/0/123/4/F20090709012331PM 3 0-5 434353 missing Data:1,3 Parity:0,1,2 First character can be:: "F" for files "D" for folders "V" for backed up data """ from storage import backup_control if driver.is_started("service_backup_db"): from storage import index_synchronizer is_in_sync = index_synchronizer.is_synchronized() and backup_control.revision() > 0 else: is_in_sync = False backups2remove = set() paths2remove = set() oldfiles = ClearSupplierRemoteInfo(supplierNum) newfiles = 0 lg.out( 8, "backup_matrix.ReadRawListFiles %d bytes to read from supplier #%d, rev:%d, %s, is_in_sync=%s" % (len(listFileText), supplierNum, backup_control.revision(), index_synchronizer.A(), is_in_sync), ) inpt = cStringIO.StringIO(listFileText) while True: line = inpt.readline() if line == "": break typ = line[0] line = line[1:] line = line.rstrip("\n") if line.strip() == "": continue # also don't consider the identity a backup, if line.find("http://") != -1 or line.find(".xml") != -1: continue lg.out(8, " %s:{%s}" % (typ, line)) if typ == "F": # we don't have this path in the index # so we have several cases: # 1. this is old file and we need to remove it and all its backups # 2. we loose our local index and did not restore it from one of suppliers yet # 3. we did restore our account and did not restore the index yet # 4. we lost our index at all and we do not have nor local nor remote copy # what to do now: # - in first case we just need to remove the file from remote supplier # - in other cases we must keep all remote data and believe we can restore the index # and get all file names and backed up data # how to recognize that? how to be sure we have the correct index? # because it should be empty right after we recover our account # or we may loose it if the local index file were lost # the first idea: check index_synchronizer() state - IN_SYNC means index is fine # the second idea: check revision number of the local index - 0 means we have no index yet try: pth, filesz = line.split(" ") filesz = int(filesz) except: pth = line filesz = -1 if not backup_fs.IsFileID(pth): # remote supplier have some file - but we don't have it in the index if pth.strip("/") in [settings.BackupIndexFileName()]: # this is the index file saved on remote supplier # let's remember its size and put it in the backup_fs item = backup_fs.FSItemInfo(pth.strip("/"), pth.strip("/"), backup_fs.FILE) item.size = filesz backup_fs.SetFile(item) else: if is_in_sync: # so we have some modifications in the index - it is not empty! # index_synchronizer() did his job - so we have up to date index on hands # now we are sure that this file is old and must be removed from remote site paths2remove.add(pth) lg.out(8, " F%s - remove, not found in the index" % pth) # what to do now? let's hope we still can restore our index and this file is our remote data elif typ == "D": try: pth = line.split(" ")[0] except: pth = line if not backup_fs.ExistsID(pth): if is_in_sync: paths2remove.add(pth) lg.out(8, " D%s - remove, not found in the index" % pth) elif typ == "V": # minimum is 4 words: "0/0/F20090709034221PM", "3", "0-1000" "123456" words = line.split(" ") if len(words) < 4: lg.warn("incorrect line:[%s]" % line) continue try: pathID, versionName = packetid.SplitBackupID(words[0]) backupID = pathID + "/" + versionName lineSupplierNum = int(words[1]) minBlockNum, maxBlockNum = words[2].split("-") maxBlockNum = int(maxBlockNum) except: lg.warn("incorrect line:[%s]" % line) continue if lineSupplierNum != supplierNum: # this mean supplier have old files and we do not need those files backups2remove.add(backupID) lg.out(8, " V%s - remove, different supplier number" % backupID) continue iter_path = backup_fs.WalkByID(pathID) if iter_path is None: # this version is not found in the index if is_in_sync: backups2remove.add(backupID) paths2remove.add(pathID) lg.out(8, " V%s - remove, path not found in the index" % pathID) continue item, localPath = iter_path if isinstance(item, dict): try: item = item[backup_fs.INFO_KEY] except: item = None if not item or not item.has_version(versionName): if is_in_sync: backups2remove.add(backupID) lg.out(8, " V%s - remove, version is not found in the index" % backupID) continue missingBlocksSet = {"Data": set(), "Parity": set()} if len(words) > 4: # "0/0/123/4567/F20090709034221PM/0-Data" "3" "0-5" "434353" "missing" "Data:1,3" "Parity:0,1,2" if words[4].strip() != "missing": lg.warn("incorrect line:[%s]" % line) continue for missingBlocksString in words[5:]: try: dp, blocks = missingBlocksString.split(":") missingBlocksSet[dp] = set(blocks.split(",")) except: lg.exc() break if backupID not in remote_files(): remote_files()[backupID] = {} # lg.out(6, 'backup_matrix.ReadRawListFiles new remote entry for %s created in the memory' % backupID) # +1 because range(2) give us [0,1] but we want [0,1,2] for blockNum in xrange(maxBlockNum + 1): if blockNum not in remote_files()[backupID]: remote_files()[backupID][blockNum] = { "D": [0] * contactsdb.num_suppliers(), "P": [0] * contactsdb.num_suppliers(), } for dataORparity in ["Data", "Parity"]: # we set -1 if the file is missing and 1 if exist, so 0 mean "no info yet" ... smart! bit = -1 if str(blockNum) in missingBlocksSet[dataORparity] else 1 remote_files()[backupID][blockNum][dataORparity[0]][supplierNum] = bit newfiles += int((bit + 1) / 2) # this should switch -1 or 1 to 0 or 1 # save max block number for this backup if backupID not in remote_max_block_numbers(): remote_max_block_numbers()[backupID] = -1 if maxBlockNum > remote_max_block_numbers()[backupID]: remote_max_block_numbers()[backupID] = maxBlockNum # mark this backup to be repainted RepaintBackup(backupID) inpt.close() lg.out( 8, " old:%d, new:%d, backups2remove:%d, paths2remove:%d" % (oldfiles, newfiles, len(backups2remove), len(paths2remove)), ) # return list of backupID's which is too old but stored on suppliers machines return backups2remove, paths2remove
def health_check(self): from storage import index_synchronizer from storage import keys_synchronizer return keys_synchronizer.is_synchronized( ) and index_synchronizer.is_synchronized()