def test_file_create(self): path = 'cat.jpeg' parent_path = os.path.dirname(path) customer_idurl = 'http://127.0.0.1:8084/alice.xml' key_alias = 'master' id_iter_iterID = backup_fs.GetIteratorsByPath( parent_path, iter=backup_fs.fs(customer_idurl, key_alias), iterID=backup_fs.fsID(customer_idurl, key_alias), ) newPathID, itemInfo, _, _ = backup_fs.PutItem( name=os.path.basename(path), parent_path_id=id_iter_iterID[0], as_folder=False, iter=id_iter_iterID[1], iterID=id_iter_iterID[2], key_id=None, ) self.assertEqual(newPathID, itemInfo.path_id) self.assertEqual(itemInfo.name(), 'cat.jpeg') self.assertEqual(itemInfo.key_alias(), 'master') self.assertEqual(backup_fs.fs(customer_idurl, key_alias), { 'cat.jpeg': int(newPathID), }) self.assertEqual( backup_fs.fsID(customer_idurl, key_alias)[int(newPathID)].name(), 'cat.jpeg')
def _delete_version(params): lg.out(6, '_delete_version %s' % str(params)) backupID = params['backupid'] if not packetid.Valid(backupID): return { 'result': { "success": False, "error": "backupID %s is not valid" % backupID } } customerGlobalID, remotePath, version = packetid.SplitBackupID(backupID) if not customerGlobalID: customerGlobalID = my_id.getGlobalID() if not backup_fs.ExistsID( remotePath, iterID=backup_fs.fsID( global_id.GlobalUserToIDURL(customerGlobalID))): return { 'result': { "success": False, "error": "path %s not found" % remotePath } } if version: backup_control.DeleteBackup(backupID, saveDB=False, calculate=False) backup_fs.Scan() backup_fs.Calculate() backup_control.Save() backup_monitor.A('restart') control.request_update([ ('backupID', backupID), ]) return {'result': {"success": True, "error": None}}
def ReadIndex(text_data, encoding='utf-8'): """ Read index data base, ``input`` is a ``StringIO.StringIO`` object which keeps the data. This is a simple text format, see ``p2p.backup_fs.Serialize()`` method. The first line keeps revision number. """ global _LoadingFlag if _LoadingFlag: return False _LoadingFlag = True backup_fs.Clear() count = 0 try: json_data = jsn.loads( text_data, encoding=encoding, ) except: lg.exc() json_data = text_data if _Debug: lg.args(_DebugLevel, json_data=json_data) for customer_id in json_data.keys(): if customer_id == 'items': try: count = backup_fs.Unserialize(json_data, from_json=True, decoding=encoding) except: lg.exc() return False else: customer_idurl = global_id.GlobalUserToIDURL(customer_id) if not id_url.is_cached(customer_idurl): lg.warn( 'identity %r is not yet cached, skip reading related catalog items' % customer_idurl) identitycache.immediatelyCaching(customer_idurl, try_other_sources=False, ignore_errors=True) continue try: count = backup_fs.Unserialize( json_data[customer_id], iter=backup_fs.fs(customer_idurl), iterID=backup_fs.fsID(customer_idurl), from_json=True, decoding=encoding, ) except: lg.exc() return False if _Debug: lg.out(_DebugLevel, 'backup_control.ReadIndex %d items loaded' % count) # local_site.update_backup_fs(backup_fs.ListAllBackupIDsSQL()) # commit(new_revision) _LoadingFlag = False return True
def WriteIndex(filepath=None, encoding='utf-8'): """ Write index data base to the local file .bitdust/metadata/index. """ global _LoadingFlag if _LoadingFlag: return if filepath is None: filepath = settings.BackupIndexFilePath() json_data = {} for customer_idurl in backup_fs.known_customers(): customer_id = customer_idurl.to_id() json_data[customer_id] = backup_fs.Serialize( iterID=backup_fs.fsID(customer_idurl), to_json=True, encoding=encoding, ) src = '%d\n' % revision() src += jsn.dumps( json_data, indent=1, separators=(',', ':'), encoding=encoding, ) if _Debug: lg.args(_DebugLevel, size=len(src), filepath=filepath) return bpio.WriteTextFile(filepath, src)
def WriteIndex(filepath=None, encoding='utf-8'): """ Write index data base to the local file .bitdust/metadata/index. """ global _LoadingFlag if _LoadingFlag: return if filepath is None: filepath = settings.BackupIndexFilePath() json_data = {} # json_data = backup_fs.Serialize(to_json=True, encoding=encoding) for customer_idurl in backup_fs.known_customers(): customer_id = global_id.UrlToGlobalID(customer_idurl) json_data[customer_id] = backup_fs.Serialize( iterID=backup_fs.fsID(customer_idurl), to_json=True, encoding=encoding, ) src = '%d\n' % revision() src += json.dumps(json_data, indent=1, separators=(',', ':'), encoding=encoding) if _Debug: import pprint lg.out(_DebugLevel, pprint.pformat(json_data)) return bpio.WriteTextFile(filepath, src)
def OnJobDone(backupID, result): """ A callback method fired when backup is finished. Here we need to save the index data base. """ from storage import backup_rebuilder # from customer import io_throttle lg.info('job done [%s] with result "%s", %d more tasks' % (backupID, result, len(tasks()))) jobs().pop(backupID) customerGlobalID, remotePath, version = packetid.SplitBackupID(backupID) customer_idurl = global_id.GlobalUserToIDURL(customerGlobalID) if result == 'done': maxBackupsNum = settings.getBackupsMaxCopies() if maxBackupsNum: item = backup_fs.GetByID(remotePath, iterID=backup_fs.fsID(customer_idurl)) if item: versions = item.list_versions(sorted=True, reverse=True) if len(versions) > maxBackupsNum: for version in versions[maxBackupsNum:]: item.delete_version(version) backupID = packetid.MakeBackupID( customerGlobalID, remotePath, version) backup_rebuilder.RemoveBackupToWork(backupID) # io_throttle.DeleteBackupRequests(backupID) # io_throttle.DeleteBackupSendings(backupID) # callback.delete_backup_interest(backupID) backup_fs.DeleteLocalBackup( settings.getLocalBackupsDir(), backupID) backup_matrix.EraseBackupLocalInfo(backupID) backup_matrix.EraseBackupLocalInfo(backupID) backup_fs.ScanID(remotePath) backup_fs.Calculate() Save() control.request_update([ ('pathID', remotePath), ]) # TODO: check used space, if we have over use - stop all tasks immediately backup_matrix.RepaintBackup(backupID) elif result == 'abort': DeleteBackup(backupID) if len(tasks()) == 0: # do we really need to restart backup_monitor after each backup? # if we have a lot tasks started this will produce a lot unneeded actions # will be smarter to restart it once we finish all tasks # because user will probably leave BitDust working after starting a long running operations from storage import backup_monitor if _Debug: lg.out( _DebugLevel, 'backup_control.OnJobDone restarting backup_monitor() machine because no tasks left' ) backup_monitor.A('restart') reactor.callLater(0, RunTask) # @UndefinedVariable reactor.callLater(0, FireTaskFinishedCallbacks, remotePath, version, result) # @UndefinedVariable
def _do_select_archive_snapshots(self): iterID_and_path = backup_fs.WalkByID(self.archive_folder_path, iterID=backup_fs.fsID( self.queue_owner_idurl, self.queue_alias)) if iterID_and_path is None: lg.err('did not found archive folder in the catalog: %r' % self.archive_folder_path) self.automat('restore-failed') return False iterID, _ = iterID_and_path known_archive_snapshots_list = backup_fs.ListAllBackupIDsFull( iterID=iterID) if not known_archive_snapshots_list: lg.err( 'failed to restore data from archive, no snapshots found in folder: %r' % self.archive_folder_path) self.automat('restore-failed') return False snapshots_list = [] for archive_item in known_archive_snapshots_list: snapshots_list.append(archive_item[1]) if _Debug: lg.args(_DebugLevel, snapshots_list=snapshots_list) if not snapshots_list: lg.err('no available snapshots found in archive list: %r' % known_archive_snapshots_list) self.automat('restore-failed') return False snapshot_sequence_ids = [] for backup_id in snapshots_list: _, path_id, _ = packetid.SplitBackupID(backup_id) if not path_id: continue try: snapshot_sequence_id = int(path_id.split('/')[-1]) except: lg.exc() continue if self.start_sequence_id is not None and self.start_sequence_id > snapshot_sequence_id: continue if self.end_sequence_id is not None and self.end_sequence_id < snapshot_sequence_id: continue snapshot_sequence_ids.append(( snapshot_sequence_id, backup_id, )) snapshot_sequence_ids.sort(key=lambda item: int(item[0])) if _Debug: lg.args(_DebugLevel, snapshot_sequence_ids=snapshot_sequence_ids) self.selected_backups = [item[1] for item in snapshot_sequence_ids] if not self.selected_backups: lg.err('no backups selected from snapshot list') self.automat('restore-failed') return False if _Debug: lg.args(_DebugLevel, selected_backups=self.selected_backups) return True
def ReadLocalFiles(): """ This method scans local backups and build the whole "local" matrix. """ global _LocalFilesNotifyCallback local_files().clear() local_max_block_numbers().clear() local_backup_size().clear() _counter = [0] def visit(realpath, subpath, name): # subpath is something like 0/0/1/0/F20131120053803PM/0-1-Data if not os.path.isfile(realpath): return True if realpath.startswith("newblock-"): return False if subpath in [ settings.BackupIndexFileName(), settings.BackupInfoFileName(), settings.BackupInfoFileNameOld(), settings.BackupInfoEncryptedFileName(), ]: return False try: version = subpath.split("/")[-2] except: return False if not packetid.IsCanonicalVersion(version): return True LocalFileReport(packetID=subpath) _counter[0] += 1 return False bpio.traverse_dir_recursive(visit, settings.getLocalBackupsDir()) lg.out(8, "backup_matrix.ReadLocalFiles %d files indexed" % _counter[0]) if lg.is_debug(8): try: if sys.version_info >= (2, 6): # localSZ = sys.getsizeof(local_files()) # remoteSZ = sys.getsizeof(remote_files()) import lib.getsizeof localSZ = lib.getsizeof.total_size(local_files()) remoteSZ = lib.getsizeof.total_size(remote_files()) indexByName = lib.getsizeof.total_size(backup_fs.fs()) indexByID = lib.getsizeof.total_size(backup_fs.fsID()) lg.out(10, " all local info uses %d bytes in the memory" % localSZ) lg.out(10, " all remote info uses %d bytes in the memory" % remoteSZ) lg.out(10, " index by name takes %d bytes in the memory" % indexByName) lg.out(10, " index by ID takes %d bytes in the memory" % indexByID) except: lg.exc() if _LocalFilesNotifyCallback is not None: _LocalFilesNotifyCallback()
def DeletePathBackups(pathID, removeLocalFilesToo=True, saveDB=True, calculate=True): """ This removes all backups of given path ID Doing same operations as ``DeleteBackup()``. """ from . import backup_rebuilder from customer import io_throttle pathID = global_id.CanonicalID(pathID) # get the working item customer, remotePath = packetid.SplitPacketID(pathID) customer_idurl = global_id.GlobalUserToIDURL(customer) item = backup_fs.GetByID(remotePath, iterID=backup_fs.fsID(customer_idurl)) if item is None: return False lg.out(8, 'backup_control.DeletePathBackups ' + pathID) # this is a list of all known backups of this path versions = item.list_versions() for version in versions: backupID = packetid.MakeBackupID(customer, remotePath, version) lg.out(8, ' removing %s' % backupID) # abort backup if it just started and is running at the moment AbortRunningBackup(backupID) # if we requested for files for this backup - we do not need it anymore io_throttle.DeleteBackupRequests(backupID) io_throttle.DeleteBackupSendings(backupID) # remove interests in transport_control # callback.delete_backup_interest(backupID) # remove local files for this backupID if removeLocalFilesToo: backup_fs.DeleteLocalBackup(settings.getLocalBackupsDir(), backupID) # remove remote info for this backup from the memory backup_matrix.EraseBackupRemoteInfo(backupID) # also remove local info backup_matrix.EraseBackupLocalInfo(backupID) # finally remove this backup from the index item.delete_version(version) # lg.out(8, 'backup_control.DeletePathBackups ' + backupID) # stop any rebuilding, we will restart it soon backup_rebuilder.RemoveAllBackupsToWork() backup_rebuilder.SetStoppedFlag() # check and calculate used space if calculate: backup_fs.Scan() backup_fs.Calculate() # save the index if needed if saveDB: Save() control.request_update() return True
def test(): """ For tests. """ # backup_fs.Calculate() # print backup_fs.counter() # print backup_fs.numberfiles() # print backup_fs.sizefiles() # print backup_fs.sizebackups() pprint.pprint(backup_fs.fsID()) pprint.pprint(backup_fs.fs()) print(backup_fs.GetByID('0'))
def ReadIndex(text_data, encoding='utf-8'): """ Read index data base, ``input`` is a ``StringIO.StringIO`` object which keeps the data. This is a simple text format, see ``p2p.backup_fs.Serialize()`` method. The first line keeps revision number. """ global _LoadingFlag if _LoadingFlag: return False _LoadingFlag = True backup_fs.Clear() count = 0 try: json_data = jsn.loads( text_data, encoding=encoding, ) except: lg.exc() json_data = text_data if _Debug: import pprint lg.out(_DebugLevel, pprint.pformat(json_data)) for customer_id in json_data.keys(): if customer_id == 'items': try: count = backup_fs.Unserialize(json_data, from_json=True, decoding=encoding) except: lg.exc() return False else: customer_idurl = global_id.GlobalUserToIDURL(customer_id) try: count = backup_fs.Unserialize( json_data[customer_id], iter=backup_fs.fs(customer_idurl), iterID=backup_fs.fsID(customer_idurl), from_json=True, decoding=encoding, ) except: lg.exc() return False if _Debug: lg.out(_DebugLevel, 'backup_control.ReadIndex %d items loaded' % count) # local_site.update_backup_fs(backup_fs.ListAllBackupIDsSQL()) # commit(new_revision) _LoadingFlag = False return True
def doStartRestoreWorker(self, *args, **kwargs): """ Action method. """ iterID_and_path = backup_fs.WalkByID(self.archive_folder_path, iterID=backup_fs.fsID( self.queue_owner_idurl)) if iterID_and_path is None: lg.err('did not found archive folder in the catalog: %r' % self.archive_folder_path) self.automat('restore-failed') return iterID, path = iterID_and_path known_archive_snapshots_list = backup_fs.ListAllBackupIDsFull( iterID=iterID) if not known_archive_snapshots_list: lg.err( 'failed to restore data from archive, no snapshots found in folder: %r' % self.archive_folder_path) self.automat('restore-failed') return snapshots_list = [] for archive_item in known_archive_snapshots_list: snapshots_list.append(archive_item[1]) if _Debug: lg.args(_DebugLevel, snapshots_list=snapshots_list) if not snapshots_list: lg.err('no available snapshots found in archive list: %r' % known_archive_snapshots_list) self.automat('restore-failed') return backupID = snapshots_list[0] outfd, outfilename = tmpfile.make( 'restore', extension='.tar.gz', prefix=backupID.replace('@', '_').replace('.', '_').replace( '/', '_').replace(':', '_') + '_', ) rw = restore_worker.RestoreWorker(backupID, outfd, KeyID=self.group_key_id) rw.MyDeferred.addCallback(self._on_restore_done, backupID, outfd, outfilename) rw.MyDeferred.addErrback(self._on_restore_failed, backupID, outfd, outfilename) if _Debug: rw.MyDeferred.addErrback( lg.errback, debug=_Debug, debug_level=_DebugLevel, method='archive_reader.doStartRestoreWorker') rw.automat('init')
def test(): """ For tests. """ # backup_fs.Calculate() # print backup_fs.counter() # print backup_fs.numberfiles() # print backup_fs.sizefiles() # print backup_fs.sizebackups() import pprint pprint.pprint(backup_fs.fsID()) pprint.pprint(backup_fs.fs()) print backup_fs.GetByID('0')
def test_file_create_with_key_alias(self): key_id = '[email protected]_8084' key_alias = key_id.split('$')[0] customer_idurl = 'http://127.0.0.1:8084/alice.xml' path = 'animals/dog.png' parent_path = os.path.dirname(path) parentPathID, _, _, _ = backup_fs.AddDir( parent_path, read_stats=False, iter=backup_fs.fs(customer_idurl, key_alias), iterID=backup_fs.fsID(customer_idurl, key_alias), key_id=key_id, ) id_iter_iterID = backup_fs.GetIteratorsByPath( parent_path, iter=backup_fs.fs(customer_idurl, key_alias), iterID=backup_fs.fsID(customer_idurl, key_alias), ) newPathID, itemInfo, _, _ = backup_fs.PutItem( name=os.path.basename(path), parent_path_id=parentPathID, as_folder=False, iter=id_iter_iterID[1], iterID=id_iter_iterID[2], key_id=key_id, ) self.assertEqual(newPathID, itemInfo.path_id) self.assertEqual(itemInfo.name(), 'dog.png') self.assertEqual(itemInfo.key_alias(), 'share_abcd') p1, p2 = newPathID.split('/') self.assertEqual(backup_fs.fs(customer_idurl, key_alias), {'animals': { 0: int(p1), 'dog.png': int(p2) }}) self.assertEqual( backup_fs.fsID(customer_idurl, key_alias)[int(p1)]['i'].name(), 'animals') self.assertEqual( backup_fs.fsID(customer_idurl, key_alias)[int(p1)]['i'].key_id, key_id) self.assertEqual( backup_fs.fsID(customer_idurl, key_alias)[int(p1)][int(p2)].name(), 'dog.png') self.assertEqual( backup_fs.fsID(customer_idurl, key_alias)[int(p1)][int(p2)].key_id, key_id)
def OnFoundFolderSize(pth, sz, arg): """ This is a callback, fired from ``lib.dirsize.ask()`` method after finish calculating of folder size. """ try: pathID, version = arg customerGlobID, pathID = packetid.SplitPacketID(pathID) customerIDURL = global_id.GlobalUserToIDURL(customerGlobID) item = backup_fs.GetByID(pathID, iterID=backup_fs.fsID(customerIDURL)) if item: item.set_size(sz) backup_fs.Calculate() Save() if version: backupID = packetid.MakeBackupID(customerGlobID, pathID, version) job = GetRunningBackupObject(backupID) if job: job.totalSize = sz if _Debug: lg.out(_DebugLevel, 'backup_control.OnFoundFolderSize %s %d' % (backupID, sz)) except: lg.exc()
def _on_files_received(self, newpacket, info): import json from logs import lg from p2p import p2p_service from storage import backup_fs from storage import backup_control from crypt import encrypted from crypt import my_keys from userid import my_id from userid import global_id try: user_id = newpacket.PacketID.strip().split(':')[0] if user_id == my_id.getGlobalID(): # skip my own Files() packets which comes from my suppliers # only process list Files() from other users who granted me access return False key_id = user_id if not my_keys.is_valid_key_id(key_id): # ignore, invalid key id in packet id return False if not my_keys.is_key_private(key_id): raise Exception('private key is not registered') except Exception as exc: lg.warn(str(exc)) p2p_service.SendFail(newpacket, str(exc)) return False block = encrypted.Unserialize(newpacket.Payload) if block is None: lg.warn('failed reading data from %s' % newpacket.RemoteID) return False if block.CreatorID != global_id.GlobalUserToIDURL(user_id): lg.warn( 'invalid packet, creator ID must be present in packet ID : %s ~ %s' % ( block.CreatorID, user_id, )) return False try: json_data = json.loads(block.Data(), encoding='utf-8') json_data['items'] customer_idurl = block.CreatorID count = backup_fs.Unserialize( raw_data=json_data, iter=backup_fs.fs(customer_idurl), iterID=backup_fs.fsID(customer_idurl), from_json=True, ) except Exception as exc: lg.exc() p2p_service.SendFail(newpacket, str(exc)) return False p2p_service.SendAck(newpacket) if count == 0: lg.warn('no files were imported during file sharing') else: backup_control.Save() lg.info('imported %d shared files from %s, key_id=%s' % ( count, customer_idurl, key_id, )) return True # from access import shared_access_coordinator # this_share = shared_access_coordinator.get_active_share(key_id) # if not this_share: # lg.warn('share is not opened: %s' % key_id) # p2p_service.SendFail(newpacket, 'share is not opened') # return False # this_share.automat('customer-list-files-received', (newpacket, info, block, )) return True
def doScanAndQueue(self, *args, **kwargs): """ Action method. """ global _ShutdownFlag if _ShutdownFlag: if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue _ShutdownFlag is True\n') self.automat('scan-done', 0) return from storage import backup_matrix from storage import backup_fs backup_matrix.ReadLocalFiles() progress = 0 if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue with %d known customers' % len(contactsdb.known_customers())) for customer_idurl in contactsdb.known_customers(): if customer_idurl != my_id.getLocalID(): # TODO: check that later if _Debug: lg.out(_DebugLevel + 6, 'data_sender.doScanAndQueue skip sending to another customer: %r' % customer_idurl) continue known_suppliers = contactsdb.suppliers(customer_idurl) if not known_suppliers or id_url.is_some_empty(known_suppliers): if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue found empty supplier(s) for customer %r, SKIP' % customer_idurl) continue known_backups = misc.sorted_backup_ids(list(backup_matrix.local_files().keys()), True) if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue found %d known suppliers for customer %r with %d backups' % ( len(known_suppliers), customer_idurl, len(known_backups))) for backupID in known_backups: this_customer_idurl = packetid.CustomerIDURL(backupID) if this_customer_idurl != customer_idurl: continue customerGlobalID, pathID, _ = packetid.SplitBackupID(backupID, normalize_key_alias=True) item = backup_fs.GetByID(pathID, iterID=backup_fs.fsID(customer_idurl=customer_idurl)) if not item: if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue skip sending backup %r path not exist in catalog' % backupID) continue if item.key_id and customerGlobalID and customerGlobalID != item.key_id: if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue skip sending backup %r key is different in the catalog' % backupID) continue packetsBySupplier = backup_matrix.ScanBlocksToSend(backupID, limit_per_supplier=None) total_for_customer = sum([len(v) for v in packetsBySupplier.values()]) if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue to be delivered for customer %r : %d' % (customer_idurl, total_for_customer)) for supplierNum in packetsBySupplier.keys(): # supplier_idurl = contactsdb.supplier(supplierNum, customer_idurl=customer_idurl) if supplierNum >= 0 and supplierNum < len(known_suppliers): supplier_idurl = known_suppliers[supplierNum] else: supplier_idurl = None if not supplier_idurl: lg.warn('skip sending, unknown supplier_idurl supplierNum=%s for %s, customer_idurl=%r' % ( supplierNum, backupID, customer_idurl)) continue for packetID in packetsBySupplier[supplierNum]: backupID_, _, supplierNum_, _ = packetid.BidBnSnDp(packetID) if backupID_ != backupID: lg.warn('skip sending, unexpected backupID supplierNum=%s for %s, customer_idurl=%r' % ( packetID, backupID, customer_idurl)) continue if supplierNum_ != supplierNum: lg.warn('skip sending, unexpected supplierNum %s for %s, customer_idurl=%r' % ( packetID, backupID, customer_idurl)) continue if io_throttle.HasPacketInSendQueue(supplier_idurl, packetID): if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue %s already in sending queue for %r' % (packetID, supplier_idurl)) continue if not io_throttle.OkToSend(supplier_idurl): if _Debug: lg.out(_DebugLevel + 6, 'data_sender.doScanAndQueue skip sending, queue is busy for %r\n' % supplier_idurl) continue # customerGlobalID, pathID = packetid.SplitPacketID(packetID) # tranByID = gate.transfers_out_by_idurl().get(supplier_idurl, []) # if len(tranByID) > 3: # log.write(u'transfers by %s: %d\n' % (supplier_idurl, len(tranByID))) # continue customerGlobalID, pathID = packetid.SplitPacketID(packetID) filename = os.path.join( settings.getLocalBackupsDir(), customerGlobalID, pathID, ) if not os.path.isfile(filename): if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue %s is not a file\n' % filename) continue if io_throttle.QueueSendFile( filename, packetID, supplier_idurl, my_id.getIDURL(), self._packetAcked, self._packetFailed, ): progress += 1 if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue put %s in the queue progress=%d' % (packetID, progress, )) else: if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue io_throttle.QueueSendFile FAILED %s' % packetID) if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue progress=%s' % progress) self.automat('scan-done', progress)
def on_files_received(newpacket, info): list_files_global_id = global_id.ParseGlobalID(newpacket.PacketID) if not list_files_global_id['idurl']: lg.warn('invalid PacketID: %s' % newpacket.PacketID) return False trusted_customer_idurl = list_files_global_id['idurl'] incoming_key_id = list_files_global_id['key_id'] if trusted_customer_idurl == my_id.getLocalID(): if _Debug: lg.dbg(_DebugLevel, 'ignore %s packet which seems to came from my own supplier' % newpacket) # only process list Files() from other customers who granted me access to their files return False if not my_keys.is_valid_key_id(incoming_key_id): lg.warn('ignore, invalid key id in packet %s' % newpacket) return False if not my_keys.is_key_private(incoming_key_id): lg.warn('private key is not registered : %s' % incoming_key_id) p2p_service.SendFail(newpacket, 'private key is not registered') return False try: block = encrypted.Unserialize( newpacket.Payload, decrypt_key=incoming_key_id, ) except: lg.exc(newpacket.Payload) return False if block is None: lg.warn('failed reading data from %s' % newpacket.RemoteID) return False # if block.CreatorID != trusted_customer_idurl: # lg.warn('invalid packet, creator ID must be present in packet ID : %s ~ %s' % ( # block.CreatorID, list_files_global_id['idurl'], )) # return False try: raw_files = block.Data() except: lg.exc() return False if block.CreatorID == trusted_customer_idurl: # this is a trusted guy sending some shared files to me try: json_data = serialization.BytesToDict(raw_files, keys_to_text=True, encoding='utf-8') json_data['items'] except: lg.exc() return False count = backup_fs.Unserialize( raw_data=json_data, iter=backup_fs.fs(trusted_customer_idurl), iterID=backup_fs.fsID(trusted_customer_idurl), from_json=True, ) p2p_service.SendAck(newpacket) if count == 0: lg.warn('no files were imported during file sharing') else: backup_control.Save() lg.info('imported %d shared files from %s, key_id=%s' % ( count, trusted_customer_idurl, incoming_key_id, )) events.send('shared-list-files-received', dict( customer_idurl=trusted_customer_idurl, new_items=count, )) return True # otherwise this must be an external supplier sending us a files he stores for trusted customer external_supplier_idurl = block.CreatorID try: supplier_raw_list_files = list_files.UnpackListFiles(raw_files, settings.ListFilesFormat()) except: lg.exc() return False # need to detect supplier position from the list of packets # and place that supplier on the correct position in contactsdb supplier_pos = backup_matrix.DetectSupplierPosition(supplier_raw_list_files) known_supplier_pos = contactsdb.supplier_position(external_supplier_idurl, trusted_customer_idurl) if _Debug: lg.args(_DebugLevel, supplier_pos=supplier_pos, known_supplier_pos=known_supplier_pos, external_supplier=external_supplier_idurl, trusted_customer=trusted_customer_idurl, key_id=incoming_key_id) if supplier_pos >= 0: if known_supplier_pos >= 0 and known_supplier_pos != supplier_pos: lg.err('known external supplier %r position %d is not matching to received list files position %d for customer %s' % ( external_supplier_idurl, known_supplier_pos, supplier_pos, trusted_customer_idurl)) # TODO: we should remove that bellow because we do not need it # service_customer_family() should take care of suppliers list for trusted customer # so we need to just read that list from DHT # contactsdb.erase_supplier( # idurl=external_supplier_idurl, # customer_idurl=trusted_customer_idurl, # ) # contactsdb.add_supplier( # idurl=external_supplier_idurl, # position=supplier_pos, # customer_idurl=trusted_customer_idurl, # ) # contactsdb.save_suppliers(customer_idurl=trusted_customer_idurl) else: lg.warn('not possible to detect external supplier position for customer %s from received list files, known position is %s' % ( trusted_customer_idurl, known_supplier_pos)) supplier_pos = known_supplier_pos remote_files_changed, _, _, _ = backup_matrix.process_raw_list_files( supplier_num=supplier_pos, list_files_text_body=supplier_raw_list_files, customer_idurl=trusted_customer_idurl, is_in_sync=True, auto_create=True, ) if remote_files_changed: backup_matrix.SaveLatestRawListFiles( supplier_idurl=external_supplier_idurl, raw_data=supplier_raw_list_files, customer_idurl=trusted_customer_idurl, ) # finally sending Ack() packet back p2p_service.SendAck(newpacket) if remote_files_changed: lg.info('received updated list of files from external supplier %s for customer %s' % (external_supplier_idurl, trusted_customer_idurl)) return True
def _on_files_received(self, newpacket, info): from logs import lg from lib import serialization from main import settings from main import events from p2p import p2p_service from storage import backup_fs from storage import backup_control from crypt import encrypted from crypt import my_keys from userid import my_id from userid import global_id from storage import backup_matrix from supplier import list_files from contacts import contactsdb list_files_global_id = global_id.ParseGlobalID(newpacket.PacketID) if not list_files_global_id['idurl']: lg.warn('invalid PacketID: %s' % newpacket.PacketID) return False trusted_customer_idurl = list_files_global_id['idurl'] incoming_key_id = list_files_global_id['key_id'] if trusted_customer_idurl == my_id.getGlobalID(): lg.warn('skip %s packet which seems to came from my own supplier' % newpacket) # only process list Files() from other users who granted me access return False if not my_keys.is_valid_key_id(incoming_key_id): lg.warn('ignore, invalid key id in packet %s' % newpacket) return False if not my_keys.is_key_private(incoming_key_id): lg.warn('private key is not registered : %s' % incoming_key_id) p2p_service.SendFail(newpacket, 'private key is not registered') return False try: block = encrypted.Unserialize( newpacket.Payload, decrypt_key=incoming_key_id, ) except: lg.exc(newpacket.Payload) return False if block is None: lg.warn('failed reading data from %s' % newpacket.RemoteID) return False # if block.CreatorID != trusted_customer_idurl: # lg.warn('invalid packet, creator ID must be present in packet ID : %s ~ %s' % ( # block.CreatorID, list_files_global_id['idurl'], )) # return False try: raw_files = block.Data() except: lg.exc() return False if block.CreatorID == trusted_customer_idurl: # this is a trusted guy sending some shared files to me try: json_data = serialization.BytesToDict(raw_files, keys_to_text=True) json_data['items'] except: lg.exc() return False count = backup_fs.Unserialize( raw_data=json_data, iter=backup_fs.fs(trusted_customer_idurl), iterID=backup_fs.fsID(trusted_customer_idurl), from_json=True, ) p2p_service.SendAck(newpacket) events.send( 'shared-list-files-received', dict( customer_idurl=trusted_customer_idurl, new_items=count, )) if count == 0: lg.warn('no files were imported during file sharing') else: backup_control.Save() lg.info('imported %d shared files from %s, key_id=%s' % ( count, trusted_customer_idurl, incoming_key_id, )) return True # otherwise this must be an external supplier sending us a files he stores for trusted customer external_supplier_idurl = block.CreatorID try: supplier_raw_list_files = list_files.UnpackListFiles( raw_files, settings.ListFilesFormat()) backup_matrix.SaveLatestRawListFiles( supplier_idurl=external_supplier_idurl, raw_data=supplier_raw_list_files, customer_idurl=trusted_customer_idurl, ) except: lg.exc() return False # need to detect supplier position from the list of packets # and place that supplier on the correct position in contactsdb real_supplier_pos = backup_matrix.DetectSupplierPosition( supplier_raw_list_files) known_supplier_pos = contactsdb.supplier_position( external_supplier_idurl, trusted_customer_idurl) if real_supplier_pos >= 0: if known_supplier_pos >= 0 and known_supplier_pos != real_supplier_pos: lg.warn( 'external supplier %s position is not matching to list files, rewriting for customer %s' % (external_supplier_idurl, trusted_customer_idurl)) contactsdb.erase_supplier( idurl=external_supplier_idurl, customer_idurl=trusted_customer_idurl, ) contactsdb.add_supplier( idurl=external_supplier_idurl, position=real_supplier_pos, customer_idurl=trusted_customer_idurl, ) contactsdb.save_suppliers(customer_idurl=trusted_customer_idurl) else: lg.warn( 'not possible to detect external supplier position for customer %s' % trusted_customer_idurl) # finally send ack packet back p2p_service.SendAck(newpacket) lg.info( 'received list of packets from external supplier %s for customer %s' % (external_supplier_idurl, trusted_customer_idurl)) return True
def run(self): """ Runs a new ``Job`` from that ``Task``. """ iter_and_path = backup_fs.WalkByID(self.remotePath, iterID=backup_fs.fsID(self.customerIDURL)) if iter_and_path is None: lg.out(4, 'backup_control.Task.run ERROR %s not found in the index' % self.remotePath) # self.defer.callback('error', self.pathID) # self._on_job_failed(self.pathID) err = 'remote path "%s" not found in the catalog' % self.remotePath OnTaskFailed(self.pathID, err) return err itemInfo, sourcePath = iter_and_path if isinstance(itemInfo, dict): try: itemInfo = itemInfo[backup_fs.INFO_KEY] except: lg.exc() # self._on_job_failed(self.pathID) err = 'catalog item related to "%s" is broken' % self.remotePath OnTaskFailed(self.pathID, err) return err if not self.localPath: self.localPath = sourcePath lg.out('backup_control.Task.run local path was populated from catalog: %s' % self.localPath) if self.localPath != sourcePath: lg.warn('local path is differ from catalog: %s != %s' % (self.localPath, sourcePath)) if not bpio.pathExist(self.localPath): lg.warn('path not exist: %s' % self.localPath) # self._on_job_failed(self.pathID) err = 'local path "%s" not exist' % self.localPath OnTaskFailed(self.pathID, err) return err # if os.path.isfile(self.localPath) and self.localPath != sourcePath: # tmpfile.make(name, extension, prefix) dataID = misc.NewBackupID() if itemInfo.has_version(dataID): # ups - we already have same version # let's add 1,2,3... to the end to make absolutely unique version ID i = 1 while itemInfo.has_version(dataID + str(i)): i += 1 dataID += str(i) self.backupID = packetid.MakeBackupID( customer=self.fullCustomerID, path_id=self.remotePath, version=dataID, ) if self.backupID in jobs(): lg.warn('backup job %s already started' % self.backupID) return 'backup job %s already started' % self.backupID try: backup_fs.MakeLocalDir(settings.getLocalBackupsDir(), self.backupID) except: lg.exc() lg.out(4, 'backup_control.Task.run ERROR creating destination folder for %s' % self.pathID) # self.defer.callback('error', self.pathID) # self._on_job_failed(self.backupID) err = 'failed creating destination folder for "%s"' % self.backupID return OnTaskFailed(self.backupID, err) compress_mode = 'bz2' # 'none' # 'gz' arcname = os.path.basename(sourcePath) if bpio.pathIsDir(self.localPath): backupPipe = backup_tar.backuptardir(self.localPath, arcname=arcname, compress=compress_mode) else: backupPipe = backup_tar.backuptarfile(self.localPath, arcname=arcname, compress=compress_mode) backupPipe.make_nonblocking() job = backup.backup( self.backupID, backupPipe, finishCallback=OnJobDone, blockResultCallback=OnBackupBlockReport, blockSize=settings.getBackupBlockSize(), sourcePath=self.localPath, keyID=self.keyID or itemInfo.key_id, ) jobs()[self.backupID] = job itemInfo.add_version(dataID) if itemInfo.type == backup_fs.DIR: dirsize.ask(self.localPath, OnFoundFolderSize, (self.pathID, dataID)) else: sz = os.path.getsize(self.localPath) jobs()[self.backupID].totalSize = sz itemInfo.set_size(sz) backup_fs.Calculate() Save() jobs()[self.backupID].automat('start') reactor.callLater(0, FireTaskStartedCallbacks, self.pathID, dataID) lg.out(4, 'backup_control.Task-%d.run [%s/%s], size=%d, %s' % ( self.number, self.pathID, dataID, itemInfo.size, self.localPath)) return None
def doScanAndQueue(self, *args, **kwargs): """ Action method. """ global _ShutdownFlag if _ShutdownFlag: if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue _ShutdownFlag is True\n') self.automat('scan-done', 0) return from storage import backup_matrix from storage import backup_fs backup_matrix.ReadLocalFiles() progress = 0 # if _Debug: # lg.out(_DebugLevel, 'data_sender.doScanAndQueue with %d known customers' % len(contactsdb.known_customers())) for customer_idurl in contactsdb.known_customers(): if customer_idurl != my_id.getIDURL(): # TODO: check that later if _Debug: lg.out( _DebugLevel + 2, 'data_sender.doScanAndQueue skip sending to another customer: %r' % customer_idurl) continue known_suppliers = contactsdb.suppliers(customer_idurl) if not known_suppliers or id_url.is_some_empty(known_suppliers): if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue found empty supplier(s) for customer %r, SKIP' % customer_idurl) continue known_backups = misc.sorted_backup_ids( list(backup_matrix.local_files().keys()), True) if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue found %d known suppliers for customer %r with %d backups' % (len(known_suppliers), customer_idurl, len(known_backups))) for backupID in known_backups: this_customer_idurl = packetid.CustomerIDURL(backupID) if this_customer_idurl != customer_idurl: continue customerGlobalID, pathID, _ = packetid.SplitBackupID( backupID, normalize_key_alias=True) keyAlias = packetid.KeyAlias(customerGlobalID) item = backup_fs.GetByID(pathID, iterID=backup_fs.fsID( customer_idurl, keyAlias)) if not item: if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue skip sending backup %r path not exist in catalog' % backupID) continue if item.key_id and customerGlobalID and customerGlobalID != item.key_id: if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue skip sending backup %r key is different in the catalog: %r ~ %r' % ( backupID, customerGlobalID, item.key_id, )) continue packetsBySupplier = backup_matrix.ScanBlocksToSend( backupID, limit_per_supplier=None) total_for_customer = sum( [len(v) for v in packetsBySupplier.values()]) if total_for_customer: if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue sending %r for customer %r with %d pieces' % (item.name(), customer_idurl, total_for_customer)) for supplierNum in packetsBySupplier.keys(): # supplier_idurl = contactsdb.supplier(supplierNum, customer_idurl=customer_idurl) if supplierNum >= 0 and supplierNum < len( known_suppliers): supplier_idurl = known_suppliers[supplierNum] else: supplier_idurl = None if not supplier_idurl: lg.warn( 'skip sending, unknown supplier_idurl supplierNum=%s for %s, customer_idurl=%r' % (supplierNum, backupID, customer_idurl)) continue for packetID in packetsBySupplier[supplierNum]: backupID_, _, supplierNum_, _ = packetid.BidBnSnDp( packetID) if backupID_ != backupID: lg.warn( 'skip sending, unexpected backupID supplierNum=%s for %s, customer_idurl=%r' % (packetID, backupID, customer_idurl)) continue if supplierNum_ != supplierNum: lg.warn( 'skip sending, unexpected supplierNum %s for %s, customer_idurl=%r' % (packetID, backupID, customer_idurl)) continue if io_throttle.HasPacketInSendQueue( supplier_idurl, packetID): if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue %s already in sending queue for %r' % (packetID, supplier_idurl)) continue latest_progress = self.statistic.get( supplier_idurl, {}).get('latest', '') if len(latest_progress ) >= 3 and latest_progress.endswith('---'): if _Debug: lg.out( _DebugLevel + 2, 'data_sender.doScanAndQueue skip sending to supplier %r because multiple packets already failed' % supplier_idurl) continue if not io_throttle.OkToSend(supplier_idurl): if _Debug: lg.out( _DebugLevel + 2, 'data_sender.doScanAndQueue skip sending, queue is busy for %r' % supplier_idurl) continue customerGlobalID, pathID = packetid.SplitPacketID( packetID) filename = os.path.join( settings.getLocalBackupsDir(), customerGlobalID, pathID, ) if not os.path.isfile(filename): if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue %s is not a file' % filename) continue itemInfo = item.to_json() if io_throttle.QueueSendFile( filename, packetID, supplier_idurl, my_id.getIDURL(), lambda packet, ownerID, packetID: self._packetAcked( packet, ownerID, packetID, itemInfo), lambda remoteID, packetID, why: self._packetFailed( remoteID, packetID, why, itemInfo), ): progress += 1 if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue for %r put %s in the queue progress=%d' % ( item.name(), packetID, progress, )) else: if _Debug: lg.out( _DebugLevel, 'data_sender.doScanAndQueue io_throttle.QueueSendFile FAILED %s' % packetID) if _Debug: lg.out(_DebugLevel, 'data_sender.doScanAndQueue progress=%s' % progress) self.automat('scan-done', progress)