def on_begin(self): if self.message.good: _now = datetime.utcnow() _settings = self.message.settings logger.debug('On %s, received the configuration data %r', _now, _settings) for name, (value, time) in _settings.iteritems(): # Ignore the time received from node. with db.RDB() as rdbw: HostQueries.HostSettings.set(name, value, _now, direct=True, rdbw=rdbw) parsed_value = HostQueries.HostSettings.get(name, value) logger_status_settings_modified.info( 'Setting %r modified to %r', name, parsed_value, extra={'s_name': name, 's_value': parsed_value}) self.manager.app.last_settings_sync_time = _now else: logger.error('Empty message %r', self.message)
def save_connection_password(arguments): """ Generate digest for the connection password (for the current host) and save it. """ if len(arguments) < 1: cli_error('The host UUID and (optionally) the password ' 'should be passed as the arguments!') else: my_uuid = try_parse_uuid(arguments.popleft()) password = _get_password_from_arguments(arguments, repeat=True) proceed_with_host_uuid_cli(my_uuid) with db.RDB() as rdbw: my_host = Queries.Inhabitants.get_host_by_uuid(my_uuid, rdbw) my_username = my_host.username _digest = \ crypto.generate_digest(my_username, password, common_settings.HTTP_AUTH_REALM_NODE) Queries.Inhabitants.update_user_digest(my_username, _digest, rdbw) print(u'For host {host} (with user {user}), ' u'saving the following digest: {digest}'.format(host=my_uuid, user=my_username, digest=_digest))
def __create_dataset_from_incoming_message(self, _message): """ When the incoming message is received, the dataset is created from its data; i.e. either the existing one is used, or a completely new one is generated. @rtype: DatasetOnChunks, NoneType """ _my_host = _message.dst if _message.dataset_uuid is not None: # Using the prebuilt dataset. with db.RDB() as rdbw: _dataset = \ HostQueries.HostDatasets.get_my_ds_in_progress( _my_host.uuid, _message.dataset_uuid, rdbw) if _dataset is not None: assert _dataset.uuid == _message.dataset_uuid, \ (_dataset.uuid, _message.dataset_uuid) else: raise Exception('Not supported message: {!r}'.format(_message)) return _dataset
def is_path_hot(self, path): """ Check whether the path is "hot", was recently changed and will (likely) be re-backupped soon. @type path: basestring @rtype: bool """ # 1. Is the path already snapshotted in the DB? with db.RDB() as rdbw: if HostQueries.HostFiles.is_file_going_to_backup(path, rdbw): return True # 2. Is the path in any intermediate buffers? # 2.1 with self.__cooling_down_to_store_lock: if path in self.__cooling_down_to_store: return True # 2.2 with self.__file_states_ready_to_write_lock: if path in self.__file_states_ready_to_write: return True # Otherwise, the path is not hot return False
def delete_chunks(self, chunk_uuids_to_delete): """ Delete some (real, non-dummy!) chunks (from FS and from DB). Yields the UUIDs of chunks after removing of each one. @note: For now, you need to call update_dummy_chunks_size() manually after deleting the chunks. @param chunk_uuids_to_delete: the UUIDs of the chunks to be deleted. @type chunk_uuids_to_delete: col.Iterable """ # For clarity only super(ChunkStorageFS, self).delete_chunks(chunk_uuids_to_delete) chunk_uuids_to_delete = list(chunk_uuids_to_delete) with self.__chunk_op_lock: logger.debug('Removing %i chunks', len(chunk_uuids_to_delete)) # First remove all the chunks from the DB altogether,.. with db.RDB() as rdbw: Queries.Chunks.delete_chunks(chunk_uuids_to_delete, rdbw) # ... then remove them from FS one by one. for chunk_uuid in chunk_uuids_to_delete: try: os.unlink(self.__get_chunk_file_path(chunk_uuid, is_dummy=False)) except Exception as e: logger.warning('Problem during removing chunk %s: %s', chunk_uuid, e) # Let's consider the chunk deleted even if an exception occured # during the unlink() call. Anyway there is # no such chunk anymore. yield chunk_uuid logger.debug('Deleted chunk %s', chunk_uuid)
def __backup_snapshotted_files_if_needed(self): """ Look over the existing snapshots and, if some files require backing up, start backup for them. """ assert not in_main_thread() max_birth = datetime.utcnow() - FILE_COOL_DOWN_TO_BACKUP with db.RDB() as rdbw: dirs_with_ugroups = \ HostQueries.HostFiles.get_base_directories_with_ugroups(rdbw) dirs_with_ugroups = list(dirs_with_ugroups) # to close rdbw for base_dir, ugroup in dirs_with_ugroups: logger.debug('Checking for non-backed data in %r for %r', base_dir, ugroup) with db.RDB() as rdbw: need_backup, files = \ inonempty(HostQueries.HostFiles .get_files_for_backup_older_than( max_birth, ugroup.uuid, rdbw)) # files is an iterable over LocalPhysicalFileStateRel # eagerly evaluate to leave RDB context files = list(files) # Now we've received the flag whether some file exist which # need to be backed up; and also we've received the iterator # over the file names to be backed up, together with their # sizes and last change time. # The extra data (size, last change time) is needed so that we # backup only the specific version of the file, but won't # backup if it has been changed since (it will be backed up # in a different transaction). if not need_backup: logger.debug('No changes to be synced for %r', ugroup) else: logger.debug( 'On auto sync, creating new dataset and ' 'launching backup for %r', ugroup) self.__backup_some_phys_files(base_dir, files, ugroup)
def on_end(self): if not self.failure: logger.debug('END RECEIVED: %r', self.message_ack) _ack = self.message_ack _node_uuid = _ack.src.uuid _result = _ack.ack_result _username = _ack.ack_username _host_uuid = _ack.ack_host_uuid _user_groups = _ack.ack_groups _ssl_cert = _ack.ack_ssl_cert if _host_uuid is not None and _ssl_cert is not None: # Received the certificate. But is it valid? if ssl.validate_host(_ssl_cert, node_uuid=_node_uuid, host_uuid=_host_uuid): logger.debug('Received the SSL cert for %r (%r), saving.', _username, _ssl_cert) with db.RDB() as rdbw: # Note that this most likely written # to the NULL_UUID db. HostQueries.HostSettings.set( HostQueries.HostSettings.SSL_CERTIFICATE, _ssl_cert, rdbw=rdbw) HostQueries.HostUsers.set_my_user_groups( _username, _user_groups, rdbw=rdbw) logger.debug('Written result for Login transaction.') else: logger.error('Received the wrong certificate %r ' 'signed by %r: ' 'from %r, to %r', _ssl_cert.get_subject(), _ssl_cert.get_issuer(), _node_uuid, _host_uuid) _ssl_cert = None # for future writing to the state with self.open_state(for_update=True) as state: if state.username != _username: # That's ok, it may be a case mismatch logger.debug('Requested username %r but received %r', state.username, _username) state.ack_result = _result state.ack_username = _username state.ack_host_uuid = _host_uuid state.ack_groups = _user_groups state.ack_ssl_cert = _ssl_cert
def __get_restore_directory(self): """ Get the restore directory to use, or None if it cannot be found/used. The result is always normalized/absolute. """ _message = self.message my_host = self.manager.app.host ugroup = _message.ugroup wr_uuid = _message.wr_uuid time_started = datetime.utcnow() target_dir = None # we'll modify it and return # The dataset is considered "auto-sync" if it was invoked by the node, # i.e. it's wr_uuid is None #assert _message.sync == (wr_uuid is None) == (ds.uuid is not None), \ # (_message.sync, wr_uuid, ds.uuid) if _message.sync: # The Restore transaction was invoked by the Node itself. # For "sync" datasets, the restore directory # is the synced directory. logger.debug('RDB lookup for restore directory for %r', ugroup) with db.RDB() as rdbw: target_dir = \ HostQueries.HostFiles.get_base_directory_for_ugroup( ugroup.uuid, rdbw) if target_dir is None: logger.debug('%r is not yet bound to the user', ugroup) self.manager.app.add_me_to_group(ugroup) target_dir = host_settings._get_ugroup_default_sync_dir( my_host.uuid, ugroup) elif host_settings.get_feature_set().web_initiated_restore: # 1. New-style logic: # the directory is made of dataset.time_started target_dir = host_settings.get_default_restore_dir(my_host.uuid) subdir_name = time_started.strftime('%d_%B_%Y_%H-%M-%S') target_dir = os.path.join(target_dir, subdir_name) else: # 2. Old-style logic: # the directory was saved before, in WantRestore. target_dir = \ RestoreTransaction_Host.restore_targets_by_wr_uuid \ .get(wr_uuid) if target_dir is None: logger.debug('No target directory stored for %r', wr_uuid) return abspath(target_dir)
def __start_syncing(self): """ Scan the watched directory for the changes happened offline, and launch the online dir watcher """ assert not in_main_thread() cls = self.__class__ logger.debug('Starting syncing...') if __debug__ and _TMP_ENABLE_DUMMY_DATA_TRANSFER_VENTILATOR: from .mocks import _start_dummy_data_transfer_ventilator _start_dummy_data_transfer_ventilator() watched_directories = host_settings.get_selected_paths() if watched_directories is None: logger.debug('No paths to backup setting, not syncing') else: _watched_dirs_len = len(watched_directories) if _watched_dirs_len != 1: logger.error( '%i directories to be synced instead of 1, ' 'not syncing anything then: %s', _watched_dirs_len, watched_directories) else: _ugroup_uuid = self.host.user.base_group.uuid watched_dir = watched_directories.iterkeys().next() with db.RDB() as rdbw: base_dir_id = \ HostQueries.HostFiles.add_or_get_base_directory( watched_dir, _ugroup_uuid, rdbw) logger.debug('Checking the directory %s for offline changes', watched_dir) cls.take_base_directory_snapshot(watched_dir, _ugroup_uuid) logger.debug('Snapshot taken!') # We are here after all preparations has been done (and maybe, # even the backup was started for the files which were changed # offline before the host restart). # # At this point, we are going to start the directory tracker. logger.debug('Enabling the FS notification watcher for %r', watched_dir) callFromThread( self.__fsnotify_manager.watch, watched_dir, partial(self.__on_fs_change, watched_dir, base_dir_id)) callFromThread(self.__do_next_iteration_of_file_state_bunch)
def _on_final_dataset_progress_success(p_state): """ This method is called after the PROGRESS transaction reporting to the node about the dataset has succeeded. @type p_state: ProgressTransaction_Host.State """ logger.debug('Finalizing the %r backup.', self.dataset) with db.RDB() as rdbw: # Mark the current dataset as completed # only after the response from the node is received. Queries.Datasets.update_dataset(_my_host.uuid, self.dataset, rdbw)
def _test_take_snapshot(arguments): if len(arguments) < 2: cli_error('Must pass the base host UUID and the directory path ' 'to take snapshot!') my_uuid = try_parse_uuid(arguments.popleft()) dir_path = arguments.popleft() print(u'Taking snapshot of {!r}'.format(dir_path)) proceed_with_host_uuid_cli(my_uuid) with db.RDB() as rdbw: host = Queries.Inhabitants.get_host_with_user_by_uuid(uuid, rdbw) UHostApp.take_base_directory_snapshot(dir_path, host.user.base_group.uuid)
def __bind_single_file_state_to_file_if_needed(self, ds, base_dir_id, file_): """ @param base_dir_id: the ID of the base directory in the DB, to improve performance of queries. @type base_dir_id: int, NoneType """ if ds is not None: _file_states = [file_] logger.verbose('Binding file state %r to %r (base dir %r)', file_, ds, base_dir_id) with db.RDB() as rdbw: HostQueries.HostFiles.add_file_states(base_dir_id, _file_states, rdbw) HostQueries.HostFiles.bind_file_states_to_files( ds.uuid, _file_states, rdbw)
def __init__(self, *args, **kwargs): """Constructor.""" super(BackupTransaction_Host, self).__init__(*args, **kwargs) self.__random = Random(42) with db.RDB() as rdbw: my_user = HostQueries.HostUsers.get_my_user(rdbw) self.__cryptographer = None self.__progress_notif_deferreds = [] self.__progress_notif_deferredlist = None self.ack_result_code = BackupMessage.ResultCodes.OK self.paused = False self.dataset = None
def __get_recently_changed_paths(self): """ Returns the recently changed, "hot", file paths, that will probably get into the next dataset. @note: pretty heavy function, use with care! @rtype: set @note: obsoleted! """ to_backup_paths = [] max_birth = datetime.utcnow() with db.RDB() as rdbw: _dirs_with_ugroups = \ HostQueries.HostFiles.get_base_directories_with_ugroups(rdbw) # Eagerly evaluate, to be able to run next queries. dirs_with_ugroups = list(_dirs_with_ugroups) for base_dir, ugroup in dirs_with_ugroups: new_paths = \ (os.path.join(base_dir, f.rel_path) for f in HostQueries.HostFiles .get_files_for_backup_older_than( max_birth, ugroup.uuid, rdbw)) to_backup_paths.extend(new_paths) # to_backup_paths now contains the non-backed-up, but already # snapshotted paths from the DB. # Two buffers before snapshotting the files with self.__cooling_down_to_store_lock: cooling_down_paths = self.__cooling_down_to_store.keys() with self.__file_states_ready_to_write_lock: ready_to_write_paths = self.__file_states_ready_to_write.keys() # The values may have duplicates, deduplicate them with set() return frozenset( chain(to_backup_paths, cooling_down_paths, ready_to_write_paths))
def on_begin(self): """ @todo: If we ever change the app.known_peers to use C{PeerBook} class, then it is the class who must control the DB storage, rather than the caller. """ cls = self.__class__ assert self.is_incoming(), repr(self) _known_peers = self.manager.app.known_peers _message = self.message # Add the expected hosts to the list of known peers. add_peers_to_db = [] for exp_peer_uuid in chain(_message.expect_replication.iterkeys(), _message.expect_restore.iterkeys()): assert isinstance(exp_peer_uuid, PeerUUID), \ exp_peer_uuid if exp_peer_uuid not in _known_peers: # Need to add some peer, unknown before. peer_to_add = Host(uuid=exp_peer_uuid) _known_peers[exp_peer_uuid] = peer_to_add # Add to the DB as well, but later add_peers_to_db.append(peer_to_add) expect_host_chunk_pairs = \ chain(cls._expect_mapping_as_list(_message.expect_replication, is_restore=False), cls._expect_mapping_as_list(_message.expect_restore, is_restore=True)) # Do we need to actually update the database? Do that, if yes. with db.RDB() as rdbw: if add_peers_to_db: logger.debug('Adding peers %r', add_peers_to_db) Queries.Inhabitants.set_peers(add_peers_to_db, rdbw) HostQueries.HostChunks.expect_chunks(expect_host_chunk_pairs, rdbw)
def on_reactor_start(app): """ @type app: UHostApp """ @exceptions_logged(logger) @contract_epydoc def on_backup_completed(backup_succeeded): """ @param backup_succeeded: whether the backup attempt has succeeded in overall. @type backup_succeeded: bool """ if backup_succeeded: print('Backup completed successfully!') else: print('The node disallowed the backup.') if (stay_alive_on_success if backup_succeeded else stay_alive_on_failure): print("Stayin' alive. Stayin' alive.") else: app.terminate_host() with db.RDB() as rdbw: all_datasets = Queries.Datasets.get_just_datasets( my_uuid, rdbw) incomplete_datasets_exist, incomplete_datasets = \ inonempty(ds for ds in all_datasets if not ds.completed) if not incomplete_datasets_exist: # No incomplete datasets to backup on_backup_completed(False) else: # Start the backup of the first dataset in the sequence. incomplete_dataset_to_start = incomplete_datasets.next() app.auto_start_backup = False app.start_backup(incomplete_dataset_to_start.uuid, on_backup_completed)
def __try_save_next_bunch_of_file_states(self): """Check if we have multiple states to store to the DB, and do it.""" assert not in_main_thread() with self.__file_states_ready_to_write_lock: all_states = self.__file_states_ready_to_write.values() self.__file_states_ready_to_write = {} # "states" contains tuples like (base_dir_id, state). # Group them by base_dir_id, and write multiple file states at once. if all_states: logger.debug('Writing %i file state(s) at once', len(all_states)) grouped_by_base_dir = sorted_groupby(all_states, key=itemgetter(0)) for base_dir_id, per_base_dir in grouped_by_base_dir: states_to_write = imap(itemgetter(1), per_base_dir) logger.debug('Writing states for base dir %r', base_dir_id) with db.RDB() as rdbw: HostQueries.HostFiles.add_file_states( base_dir_id, states_to_write, rdbw) logger.debug('Wrote the states')
def __get_dummy_chunk_uuids_on_fs(self): """ For a given host UUID, return the list of the chunk UUIDS for all the (dummy-only) chunks which are actually present on the file system for this host. As a convenient side effect, remove from the DB all the dummy chunks which are actually missing from the FS. @return: the (possibly non-reiterable) Iterable of the UUIDs for the dummy chunks available on the FS. @rtype: col.Iterable """ with self.__chunk_op_lock: present_chunk_uuids = frozenset(self.__get_chunk_uuids_on_fs()) with db.RDB() as rdbw: dummy_chunk_uuids_in_db = \ set(HostQueries.HostChunks .get_all_dummy_chunk_uuids(rdbw=rdbw)) # Do we have dummy chunks in DB actually absent from FS? # Wipe them. in_db_not_on_fs = dummy_chunk_uuids_in_db - present_chunk_uuids dummy_chunk_uuids_in_db -= in_db_not_on_fs assert dummy_chunk_uuids_in_db <= present_chunk_uuids, \ (dummy_chunk_uuids_in_db, present_chunk_uuids) if in_db_not_on_fs: logger.debug('%i chunks in DB but not on FS, ' 'deleting from DB:\n%r', len(in_db_not_on_fs), in_db_not_on_fs) HostQueries.HostChunks.delete_dummy_chunks(in_db_not_on_fs) # The actual dummy chunks is the intersection of "present chunks" # and "chunks marked as dummy in DB". return present_chunk_uuids & dummy_chunk_uuids_in_db
def __restore_files(self): """Internal procedure which actually restores the files. @todo: the Fingerprint calculation should be turned into "file is read by blocks and then repacked into 16KiB segments"; then recalculation of the fingerprint in case of FP mismatch won't be needed. """ _message = self.message my_host = self.manager.app.host feature_set = self.manager.app.feature_set ds = _message.dataset wr_uuid = _message.wr_uuid ugroup = _message.ugroup restore_directory = self.__get_restore_directory() assert _message.sync == (wr_uuid is None) == (ds.uuid is not None), \ (_message.sync, wr_uuid, ds) base_dir_id = None # will be used later if restore_directory is None: logger.error('Do not know the restore directory') else: logger.debug('Going to restore dataset %r for %r to %r', ds, ugroup, restore_directory) if not os.path.exists(restore_directory): os.makedirs(restore_directory) group_key = ugroup.enc_key if feature_set.per_group_encryption \ else None cryptographer = Cryptographer(group_key=group_key, key_generator=None) is_whole_dataset_restored = _message.sync logger.debug('Restoring %s files from dataset: %r', 'all' if is_whole_dataset_restored else 'selected', coalesce(ds, 'N/A')) # TODO: use the "delete time" from the LocalPhysicalFileState! _now = datetime.utcnow() # If we are syncing-in the whole dataset, we should write it # into the DB as a whole. The files/file_locals will be bound to it # so that after restore, we'll know on this Host that these states # are fully synced to the cloud already (in fact, they came # from the cloud). if _message.sync: # Let's hack into the files and substitute the base_dir. # TODO: do it better! for f in _message.files.iterkeys(): f.base_dir = restore_directory # Write the whole dataset to the DB _small_files = _message.files.keys() # not iterkeys(0 for now! _dirs = {restore_directory: (_small_files, [])} # Given the information in the inbound message about # the whole dataset, store this dataset in the DB. dataset = DatasetWithDirectories( name=ds.name, sync=ds.sync, directories=_dirs, # TODO: transport real data # from the node uuid=DatasetUUID.safe_cast_uuid(ds.uuid), ugroup_uuid=UserGroupUUID.safe_cast_uuid(ugroup.uuid), time_started=ds.time_started, time_completed=_now) with db.RDB() as rdbw: # Do we already have the dataset? _ds_in_progress = \ HostQueries.HostDatasets.get_my_ds_in_progress( host_uuid=my_host.uuid, ds_uuid=dataset.uuid, rdbw=rdbw) if _ds_in_progress is None: # We don't have it, insert. dummy_ds_uuid = \ HostQueries.HostDatasets.create_dataset_for_backup( my_host.uuid, dataset, rdbw) assert dummy_ds_uuid == dataset.uuid, \ (dummy_ds_uuid, dataset.uuid) base_dir_id = \ HostQueries.HostFiles.add_or_get_base_directory( restore_directory, ugroup.uuid, rdbw) error_in_any_file_occured = False # # Finally, loop over the files and restore each one # for file_, file_blocks in _message.files.iteritems(): self.__restore_op_for_path(file_, file_blocks, is_whole_dataset_restored, base_dir_id, restore_directory, cryptographer, ds) # Loop over the files completed if is_whole_dataset_restored: logger.debug('Restoring %r completed, there were %s issues.', ds, 'some' if error_in_any_file_occured else 'no') if not error_in_any_file_occured: with db.RDB() as rdbw: logger.debug('Updating %r at host %s...', ds, my_host.uuid) ds_to_finish = \ Queries.Datasets.get_dataset_by_uuid(ds.uuid, my_host.uuid, rdbw) ds_to_finish.time_completed = datetime.utcnow() logger.debug('Updating %r as completed', dataset) # Mark the current dataset as completed # only after the response from the node is received. Queries.Datasets.update_dataset( my_host.uuid, ds_to_finish, rdbw) # Everything seems ok to this moment with self.open_state(for_update=True) as state: state.ack_result_code = RestoreMessage.ResultCodes.OK
def on_begin(self): """ @todo: Add errback too. """ cls = self.__class__ _message = self.message _host = _message.dst logger.debug('Starting backup...') _dataset = self.dataset \ = cls.__create_dataset_from_incoming_message(_message) if self.manager.app.feature_set.per_group_encryption: # Read group key from the user group with db.RDB() as rdbw: _ugroup = Queries.Inhabitants.get_ugroup_by_uuid( _dataset.ugroup_uuid, rdbw) group_key = _ugroup.enc_key else: group_key = None self.__cryptographer = Cryptographer(group_key=group_key, key_generator=None) logger.debug('Created dataset %r.', _dataset) if _dataset is None: raise Exception('No dataset!') else: self.__notify_about_backup_started() self.__notify_about_backup_running() ds_uuid = _dataset.uuid with cls.per_dataset_transactions_lock: if ds_uuid in cls.per_dataset_transactions: self.ack_result_code = BackupMessage.ResultCodes \ .GENERAL_FAILURE raise Exception('The dataset {} is already being ' 'backed up'.format(ds_uuid)) else: cls.per_dataset_transactions[ds_uuid] = self # Force copying it to dict, to don't cause # race conditions during the logger message serialization. logger.debug('Added backup %r, per dataset transactions ' 'are now %r', ds_uuid, dict(cls.per_dataset_transactions)) if _dataset is None: raise Exception('The dataset {} is not found.'.format(ds_uuid)) # Initialize chunks. # Please note that these chunks may include the ones # which are actually present in the cloud already # but under a different UUID. # This will be fixed later, after NEED_INFO_ACK is received. # All chunks, including the already uploaded ones; # contains ChunkFromFilesFinal objects. # _dataset is MyDatasetOnChunks. # dataset.__chunks is list of ChunkFromFilesFinal. self.__all_chunks = set(_dataset.chunks()) assert consists_of(self.__all_chunks, ChunkFromFilesFinal), \ repr(self.__all_chunks) # Already uploaded chunks; contains Chunk objects. with db.RDB() as rdbw: self.__uploaded_chunks = \ set(HostQueries.HostChunks .get_uploaded_chunks(_dataset.uuid, rdbw=rdbw)) assert consists_of(self.__uploaded_chunks, Chunk), \ repr(self.__uploaded_chunks) # Only the pending chunks. self.__uploading_chunks = {ch for ch in self.__all_chunks if ch not in self.__uploaded_chunks} assert consists_of(self.__uploading_chunks, ChunkFromFilesFinal), \ repr(self.__uploading_chunks) # # Now create the NEED_INFO transaction. # But only if we have chunks to ask! # if self.__uploading_chunks: _query = { 'select': ('chunks.uuid', 'uuid'), 'from': 'chunks', 'where': {'["hash", "size", "uuid"]': [c for c in self.__uploading_chunks if c.hash is not None]} } nifn_tr = self.manager.create_new_transaction( name='NEED_INFO_FROM_NODE', src=_message.dst, dst=self.manager.app.primary_node, parent=self, # NEED_INFO_FROM_NODE-specific query=_query) nifn_tr.completed.addCallbacks(self._on_child_nifn_completed, partial(logger.error, 'NI issue: %r')) else: logger.debug('IMHO, no new chunks to upload. ' 'Proceeding directly.') # Go to the next step directly. self._ask_for_backup_hosts()
def migrate_chunks(self, old_path, new_path): """ Migrate the chunks from their previous path to the new one. @note: Only non-dummy chunks are migrated; dummy chunks are removed from the old place and not regenerated at the new place, please call update_dummy_chunks_size() manually for that. """ assert old_path != new_path, (old_path, new_path) # This two variables will be used to specify the progress of the task. num, of = 0, 0 @exceptions_logged(logger) def timercb(): """ Callback function called on timer firing. """ if (num, of) != (0, 0): logger_status_chunks_op.info( 'The chunk migration takes too long, completed %i/%i', num, of, extra={'_type': 'chunks_migration.progress', 'num': num, 'of': of}) timer_service = TimerService(1.0, timercb) # If the task takes more than 3 seconds, start notifying # about the progress _callLater = reactor.callLater # pylint:disable=E1101,C0103 # Won't worry about deferToThread here, cause it is very fast. long_task_timer = _callLater(3.0, timer_service.startService) with self.__chunk_op_lock: try: # What chunk files are present on the FS, # and what are the chunk UUIDs? present_chunk_uuids_iter = self.__get_chunk_uuids_on_fs() with db.RDB() as rdbw: dummy_chunks_in_db = \ frozenset(HostQueries.HostChunks .get_all_dummy_chunk_uuids( rdbw=rdbw)) # First, remove all the dummy chunks removed_dummy_chunks = [] for dummy_chunk_uuid in dummy_chunks_in_db: try: assert self.__get_chunk_file_path(dummy_chunk_uuid, is_dummy=True, dir_path=old_path) \ == self.__get_chunk_file_path(dummy_chunk_uuid, is_dummy=True) _path = self.__get_chunk_file_path(dummy_chunk_uuid, is_dummy=True) if os.path.exists(_path): os.unlink(_path) # If we removed the file successfully, let's append it # to the list of the chunks which are to be removed # from the DB. removed_dummy_chunks.append(dummy_chunk_uuid) except Exception as e: logger.error('Cannot remove dummy chunk %s: %s', dummy_chunk_uuid, e) HostQueries.HostChunks \ .delete_dummy_chunks(removed_dummy_chunks) # This dictionary maps the chunk UUID # to a tuple of the old filename and the new filename. # # Btw, no need to convert present_chunk_uuids_iter to set # and do the set difference, as it is the same complexity # as for ... if not in. uuid_to_filenames = \ {u: (self.__get_chunk_file_path(u, is_dummy=False, dir_path=old_path), self.__get_chunk_file_path(u, is_dummy=False, dir_path=new_path)) for u in present_chunk_uuids_iter if u not in dummy_chunks_in_db} # Now, move the files to the new directory. of = len(uuid_to_filenames) for u, (old_filename, new_filename) \ in uuid_to_filenames.iteritems(): logger.debug('Moving chunk %s from %s to %s', u, old_filename, new_filename) try: with open(old_filename, 'rb') as rfh: with open_wb(new_filename) as wfh: wfh.write(rfh.read()) except Exception: logger.error('Cannot move chunk %s from %s to %s', u, old_filename, new_filename) else: try: os.unlink(old_filename) except Exception: logger.error('Cannot remove chunk file %s', old_filename) num += 1 except Exception as e: logger_status_chunks_op_error.error( 'The chunks migration failed: %r', e, extra={'_type': 'chunks_migration.error', '_exc': e, '_tb': traceback.format_exc()}) finally: if (not long_task_timer.called and not long_task_timer.cancelled): long_task_timer.cancel() if timer_service.running: timer_service.stopService()