Exemple #1
0
    def on_begin(self):
        if self.message.good:
            _now = datetime.utcnow()
            _settings = self.message.settings
            logger.debug('On %s, received the configuration data %r',
                         _now, _settings)

            for name, (value, time) in _settings.iteritems():
                # Ignore the time received from node.
                with db.RDB() as rdbw:
                    HostQueries.HostSettings.set(name, value, _now,
                                                 direct=True,
                                                 rdbw=rdbw)

                parsed_value = HostQueries.HostSettings.get(name, value)

                logger_status_settings_modified.info(
                    'Setting %r modified to %r',
                    name, parsed_value,
                    extra={'s_name': name,
                           's_value': parsed_value})

            self.manager.app.last_settings_sync_time = _now

        else:
            logger.error('Empty message %r', self.message)
Exemple #2
0
def save_connection_password(arguments):
    """
    Generate digest for the connection password (for the current host)
    and save it.
    """
    if len(arguments) < 1:
        cli_error('The host UUID and (optionally) the password '
                  'should be passed as the arguments!')
    else:
        my_uuid = try_parse_uuid(arguments.popleft())
        password = _get_password_from_arguments(arguments, repeat=True)

        proceed_with_host_uuid_cli(my_uuid)

        with db.RDB() as rdbw:
            my_host = Queries.Inhabitants.get_host_by_uuid(my_uuid, rdbw)
            my_username = my_host.username

            _digest = \
                crypto.generate_digest(my_username,
                                       password,
                                       common_settings.HTTP_AUTH_REALM_NODE)

            Queries.Inhabitants.update_user_digest(my_username, _digest, rdbw)

        print(u'For host {host} (with user {user}), '
              u'saving the following digest: {digest}'.format(host=my_uuid,
                                                              user=my_username,
                                                              digest=_digest))
Exemple #3
0
    def __create_dataset_from_incoming_message(self, _message):
        """
        When the incoming message is received, the dataset is created
        from its data; i.e. either the existing one is used,
        or a completely new one is generated.

        @rtype: DatasetOnChunks, NoneType
        """
        _my_host = _message.dst

        if _message.dataset_uuid is not None:
            # Using the prebuilt dataset.

            with db.RDB() as rdbw:
                _dataset = \
                    HostQueries.HostDatasets.get_my_ds_in_progress(
                        _my_host.uuid, _message.dataset_uuid, rdbw)

                if _dataset is not None:
                    assert _dataset.uuid == _message.dataset_uuid, \
                           (_dataset.uuid, _message.dataset_uuid)

        else:
            raise Exception('Not supported message: {!r}'.format(_message))

        return _dataset
Exemple #4
0
    def is_path_hot(self, path):
        """
        Check whether the path is "hot", was recently changed and will (likely)
        be re-backupped soon.

        @type path: basestring
        @rtype: bool
        """

        # 1. Is the path already snapshotted in the DB?
        with db.RDB() as rdbw:
            if HostQueries.HostFiles.is_file_going_to_backup(path, rdbw):
                return True

        # 2. Is the path in any intermediate buffers?

        # 2.1
        with self.__cooling_down_to_store_lock:
            if path in self.__cooling_down_to_store:
                return True
        # 2.2
        with self.__file_states_ready_to_write_lock:
            if path in self.__file_states_ready_to_write:
                return True

        # Otherwise, the path is not hot
        return False
Exemple #5
0
    def delete_chunks(self, chunk_uuids_to_delete):
        """
        Delete some (real, non-dummy!) chunks (from FS and from DB).
        Yields the UUIDs of chunks after removing of each one.

        @note: For now, you need to call update_dummy_chunks_size()
               manually after deleting the chunks.

        @param chunk_uuids_to_delete: the UUIDs of the chunks to be deleted.
        @type chunk_uuids_to_delete: col.Iterable
        """
        # For clarity only
        super(ChunkStorageFS, self).delete_chunks(chunk_uuids_to_delete)

        chunk_uuids_to_delete = list(chunk_uuids_to_delete)
        with self.__chunk_op_lock:
            logger.debug('Removing %i chunks', len(chunk_uuids_to_delete))

            # First remove all the chunks from the DB altogether,..
            with db.RDB() as rdbw:
                Queries.Chunks.delete_chunks(chunk_uuids_to_delete, rdbw)

            # ... then remove them from FS one by one.
            for chunk_uuid in chunk_uuids_to_delete:
                try:
                    os.unlink(self.__get_chunk_file_path(chunk_uuid,
                                                         is_dummy=False))
                except Exception as e:
                    logger.warning('Problem during removing chunk %s: %s',
                                   chunk_uuid, e)
                # Let's consider the chunk deleted even if an exception occured
                # during the unlink() call. Anyway there is
                # no such chunk anymore.
                yield chunk_uuid
                logger.debug('Deleted chunk %s', chunk_uuid)
Exemple #6
0
    def __backup_snapshotted_files_if_needed(self):
        """
        Look over the existing snapshots and, if some files require backing up,
        start backup for them.
        """
        assert not in_main_thread()

        max_birth = datetime.utcnow() - FILE_COOL_DOWN_TO_BACKUP

        with db.RDB() as rdbw:
            dirs_with_ugroups = \
                HostQueries.HostFiles.get_base_directories_with_ugroups(rdbw)

            dirs_with_ugroups = list(dirs_with_ugroups)  # to close rdbw

        for base_dir, ugroup in dirs_with_ugroups:
            logger.debug('Checking for non-backed data in %r for %r', base_dir,
                         ugroup)

            with db.RDB() as rdbw:
                need_backup, files = \
                    inonempty(HostQueries.HostFiles
                                         .get_files_for_backup_older_than(
                                              max_birth, ugroup.uuid, rdbw))

                # files is an iterable over LocalPhysicalFileStateRel

                # eagerly evaluate to leave RDB context
                files = list(files)

            # Now we've received the flag whether some file exist which
            # need to be backed up; and also we've received the iterator
            # over the file names to be backed up, together with their
            # sizes and last change time.
            # The extra data (size, last change time) is needed so that we
            # backup only the specific version of the file, but won't
            # backup if it has been changed since (it will be backed up
            # in a different transaction).

            if not need_backup:
                logger.debug('No changes to be synced for %r', ugroup)
            else:
                logger.debug(
                    'On auto sync, creating new dataset and '
                    'launching backup for %r', ugroup)

                self.__backup_some_phys_files(base_dir, files, ugroup)
Exemple #7
0
    def on_end(self):
        if not self.failure:
            logger.debug('END RECEIVED: %r', self.message_ack)

            _ack = self.message_ack
            _node_uuid = _ack.src.uuid

            _result = _ack.ack_result
            _username = _ack.ack_username
            _host_uuid = _ack.ack_host_uuid
            _user_groups = _ack.ack_groups
            _ssl_cert = _ack.ack_ssl_cert

            if _host_uuid is not None and _ssl_cert is not None:
                # Received the certificate. But is it valid?
                if ssl.validate_host(_ssl_cert,
                                     node_uuid=_node_uuid,
                                     host_uuid=_host_uuid):
                    logger.debug('Received the SSL cert for %r (%r), saving.',
                                 _username, _ssl_cert)

                    with db.RDB() as rdbw:
                        # Note that this most likely written
                        # to the NULL_UUID db.
                        HostQueries.HostSettings.set(
                            HostQueries.HostSettings.SSL_CERTIFICATE,
                            _ssl_cert,
                            rdbw=rdbw)
                        HostQueries.HostUsers.set_my_user_groups(
                            _username, _user_groups, rdbw=rdbw)
                        logger.debug('Written result for Login transaction.')

                else:
                    logger.error('Received the wrong certificate %r '
                                     'signed by %r: '
                                     'from %r, to %r',
                                 _ssl_cert.get_subject(),
                                 _ssl_cert.get_issuer(),
                                 _node_uuid, _host_uuid)
                    _ssl_cert = None  # for future writing to the state

            with self.open_state(for_update=True) as state:
                if state.username != _username:
                    # That's ok, it may be a case mismatch
                    logger.debug('Requested username %r but received %r',
                                 state.username, _username)

                state.ack_result = _result
                state.ack_username = _username
                state.ack_host_uuid = _host_uuid
                state.ack_groups = _user_groups
                state.ack_ssl_cert = _ssl_cert
Exemple #8
0
    def __get_restore_directory(self):
        """
        Get the restore directory to use, or None if it cannot be found/used.

        The result is always normalized/absolute.
        """
        _message = self.message

        my_host = self.manager.app.host
        ugroup = _message.ugroup
        wr_uuid = _message.wr_uuid
        time_started = datetime.utcnow()

        target_dir = None  # we'll modify it and return

        # The dataset is considered "auto-sync" if it was invoked by the node,
        # i.e. it's wr_uuid is None
        #assert _message.sync == (wr_uuid is None) == (ds.uuid is not None), \
        #       (_message.sync, wr_uuid, ds.uuid)

        if _message.sync:
            # The Restore transaction was invoked by the Node itself.
            # For "sync" datasets, the restore directory
            # is the synced directory.
            logger.debug('RDB lookup for restore directory for %r', ugroup)
            with db.RDB() as rdbw:
                target_dir = \
                    HostQueries.HostFiles.get_base_directory_for_ugroup(
                        ugroup.uuid, rdbw)
            if target_dir is None:
                logger.debug('%r is not yet bound to the user', ugroup)
                self.manager.app.add_me_to_group(ugroup)
                target_dir = host_settings._get_ugroup_default_sync_dir(
                    my_host.uuid, ugroup)
        elif host_settings.get_feature_set().web_initiated_restore:
            # 1. New-style logic:
            #    the directory is made of dataset.time_started
            target_dir = host_settings.get_default_restore_dir(my_host.uuid)
            subdir_name = time_started.strftime('%d_%B_%Y_%H-%M-%S')
            target_dir = os.path.join(target_dir, subdir_name)
        else:
            # 2. Old-style logic:
            #    the directory was saved before, in WantRestore.
            target_dir = \
                RestoreTransaction_Host.restore_targets_by_wr_uuid \
                                       .get(wr_uuid)
            if target_dir is None:
                logger.debug('No target directory stored for %r', wr_uuid)

        return abspath(target_dir)
Exemple #9
0
    def __start_syncing(self):
        """
        Scan the watched directory for the changes happened offline,
        and launch the online dir watcher
        """
        assert not in_main_thread()
        cls = self.__class__
        logger.debug('Starting syncing...')

        if __debug__ and _TMP_ENABLE_DUMMY_DATA_TRANSFER_VENTILATOR:
            from .mocks import _start_dummy_data_transfer_ventilator
            _start_dummy_data_transfer_ventilator()

        watched_directories = host_settings.get_selected_paths()

        if watched_directories is None:
            logger.debug('No paths to backup setting, not syncing')
        else:
            _watched_dirs_len = len(watched_directories)
            if _watched_dirs_len != 1:
                logger.error(
                    '%i directories to be synced instead of 1, '
                    'not syncing anything then: %s', _watched_dirs_len,
                    watched_directories)
            else:
                _ugroup_uuid = self.host.user.base_group.uuid

                watched_dir = watched_directories.iterkeys().next()
                with db.RDB() as rdbw:
                    base_dir_id = \
                        HostQueries.HostFiles.add_or_get_base_directory(
                            watched_dir, _ugroup_uuid, rdbw)

                logger.debug('Checking the directory %s for offline changes',
                             watched_dir)
                cls.take_base_directory_snapshot(watched_dir, _ugroup_uuid)
                logger.debug('Snapshot taken!')

                # We are here after all preparations has been done (and maybe,
                # even the backup was started for the files which were changed
                # offline before the host restart).
                #
                # At this point, we are going to start the directory tracker.
                logger.debug('Enabling the FS notification watcher for %r',
                             watched_dir)

                callFromThread(
                    self.__fsnotify_manager.watch, watched_dir,
                    partial(self.__on_fs_change, watched_dir, base_dir_id))
                callFromThread(self.__do_next_iteration_of_file_state_bunch)
Exemple #10
0
            def _on_final_dataset_progress_success(p_state):
                """
                This method is called after the PROGRESS transaction
                reporting to the node about the dataset has succeeded.

                @type p_state: ProgressTransaction_Host.State
                """
                logger.debug('Finalizing the %r backup.', self.dataset)

                with db.RDB() as rdbw:
                    # Mark the current dataset as completed
                    # only after the response from the node is received.
                    Queries.Datasets.update_dataset(_my_host.uuid,
                                                    self.dataset,
                                                    rdbw)
Exemple #11
0
    def _test_take_snapshot(arguments):
        if len(arguments) < 2:
            cli_error('Must pass the base host UUID and the directory path '
                      'to take snapshot!')
        my_uuid = try_parse_uuid(arguments.popleft())
        dir_path = arguments.popleft()
        print(u'Taking snapshot of {!r}'.format(dir_path))

        proceed_with_host_uuid_cli(my_uuid)

        with db.RDB() as rdbw:
            host = Queries.Inhabitants.get_host_with_user_by_uuid(uuid, rdbw)

        UHostApp.take_base_directory_snapshot(dir_path,
                                              host.user.base_group.uuid)
Exemple #12
0
 def __bind_single_file_state_to_file_if_needed(self, ds, base_dir_id,
                                                file_):
     """
     @param base_dir_id: the ID of the base directory in the DB,
         to improve performance of queries.
     @type base_dir_id: int, NoneType
     """
     if ds is not None:
         _file_states = [file_]
         logger.verbose('Binding file state %r to %r (base dir %r)', file_,
                        ds, base_dir_id)
         with db.RDB() as rdbw:
             HostQueries.HostFiles.add_file_states(base_dir_id,
                                                   _file_states, rdbw)
             HostQueries.HostFiles.bind_file_states_to_files(
                 ds.uuid, _file_states, rdbw)
Exemple #13
0
    def __init__(self, *args, **kwargs):
        """Constructor."""
        super(BackupTransaction_Host, self).__init__(*args, **kwargs)
        self.__random = Random(42)

        with db.RDB() as rdbw:
            my_user = HostQueries.HostUsers.get_my_user(rdbw)

        self.__cryptographer = None

        self.__progress_notif_deferreds = []
        self.__progress_notif_deferredlist = None

        self.ack_result_code = BackupMessage.ResultCodes.OK
        self.paused = False
        self.dataset = None
Exemple #14
0
    def __get_recently_changed_paths(self):
        """
        Returns the recently changed, "hot", file paths,
        that will probably get into the next dataset.

        @note: pretty heavy function, use with care!

        @rtype: set

        @note: obsoleted!
        """
        to_backup_paths = []
        max_birth = datetime.utcnow()
        with db.RDB() as rdbw:
            _dirs_with_ugroups = \
                HostQueries.HostFiles.get_base_directories_with_ugroups(rdbw)

            # Eagerly evaluate, to be able to run next queries.
            dirs_with_ugroups = list(_dirs_with_ugroups)

            for base_dir, ugroup in dirs_with_ugroups:
                new_paths = \
                    (os.path.join(base_dir, f.rel_path)
                         for f in HostQueries.HostFiles
                                             .get_files_for_backup_older_than(
                                                  max_birth, ugroup.uuid,
                                                  rdbw))
                to_backup_paths.extend(new_paths)

        # to_backup_paths now contains the non-backed-up, but already
        # snapshotted paths from the DB.

        # Two buffers before snapshotting the files
        with self.__cooling_down_to_store_lock:
            cooling_down_paths = self.__cooling_down_to_store.keys()

        with self.__file_states_ready_to_write_lock:
            ready_to_write_paths = self.__file_states_ready_to_write.keys()

        # The values may have duplicates, deduplicate them with set()
        return frozenset(
            chain(to_backup_paths, cooling_down_paths, ready_to_write_paths))
Exemple #15
0
    def on_begin(self):
        """
        @todo: If we ever change the app.known_peers to use C{PeerBook} class,
        then it is the class who must control the DB storage,
        rather than the caller.
        """
        cls = self.__class__

        assert self.is_incoming(), repr(self)

        _known_peers = self.manager.app.known_peers
        _message = self.message

        # Add the expected hosts to the list of known peers.
        add_peers_to_db = []
        for exp_peer_uuid in chain(_message.expect_replication.iterkeys(),
                                   _message.expect_restore.iterkeys()):
            assert isinstance(exp_peer_uuid, PeerUUID), \
                   exp_peer_uuid

            if exp_peer_uuid not in _known_peers:
                # Need to add some peer, unknown before.
                peer_to_add = Host(uuid=exp_peer_uuid)
                _known_peers[exp_peer_uuid] = peer_to_add
                # Add to the DB as well, but later
                add_peers_to_db.append(peer_to_add)

        expect_host_chunk_pairs = \
            chain(cls._expect_mapping_as_list(_message.expect_replication,
                                              is_restore=False),
                  cls._expect_mapping_as_list(_message.expect_restore,
                                              is_restore=True))

        # Do we need to actually update the database? Do that, if yes.
        with db.RDB() as rdbw:
            if add_peers_to_db:
                logger.debug('Adding peers %r', add_peers_to_db)
                Queries.Inhabitants.set_peers(add_peers_to_db, rdbw)

            HostQueries.HostChunks.expect_chunks(expect_host_chunk_pairs, rdbw)
Exemple #16
0
        def on_reactor_start(app):
            """
            @type app: UHostApp
            """
            @exceptions_logged(logger)
            @contract_epydoc
            def on_backup_completed(backup_succeeded):
                """
                @param backup_succeeded: whether the backup attempt
                    has succeeded in overall.
                @type backup_succeeded: bool
                """
                if backup_succeeded:
                    print('Backup completed successfully!')
                else:
                    print('The node disallowed the backup.')

                if (stay_alive_on_success
                        if backup_succeeded else stay_alive_on_failure):
                    print("Stayin' alive. Stayin' alive.")
                else:
                    app.terminate_host()

            with db.RDB() as rdbw:
                all_datasets = Queries.Datasets.get_just_datasets(
                    my_uuid, rdbw)

            incomplete_datasets_exist, incomplete_datasets = \
                inonempty(ds for ds in all_datasets if not ds.completed)

            if not incomplete_datasets_exist:
                # No incomplete datasets to backup
                on_backup_completed(False)
            else:
                # Start the backup of the first dataset in the sequence.
                incomplete_dataset_to_start = incomplete_datasets.next()
                app.auto_start_backup = False
                app.start_backup(incomplete_dataset_to_start.uuid,
                                 on_backup_completed)
Exemple #17
0
    def __try_save_next_bunch_of_file_states(self):
        """Check if we have multiple states to store to the DB, and do it."""
        assert not in_main_thread()

        with self.__file_states_ready_to_write_lock:
            all_states = self.__file_states_ready_to_write.values()
            self.__file_states_ready_to_write = {}

        # "states" contains tuples like (base_dir_id, state).
        # Group them by base_dir_id, and write multiple file states at once.

        if all_states:
            logger.debug('Writing %i file state(s) at once', len(all_states))

            grouped_by_base_dir = sorted_groupby(all_states, key=itemgetter(0))

            for base_dir_id, per_base_dir in grouped_by_base_dir:
                states_to_write = imap(itemgetter(1), per_base_dir)

                logger.debug('Writing states for base dir %r', base_dir_id)
                with db.RDB() as rdbw:
                    HostQueries.HostFiles.add_file_states(
                        base_dir_id, states_to_write, rdbw)
            logger.debug('Wrote the states')
Exemple #18
0
    def __get_dummy_chunk_uuids_on_fs(self):
        """
        For a given host UUID, return the list of the chunk UUIDS
        for all the (dummy-only) chunks which are actually present
        on the file system for this host.

        As a convenient side effect, remove from the DB
        all the dummy chunks which are actually missing from the FS.

        @return: the (possibly non-reiterable) Iterable of the UUIDs
                 for the dummy chunks available on the FS.
        @rtype: col.Iterable
        """
        with self.__chunk_op_lock:
            present_chunk_uuids = frozenset(self.__get_chunk_uuids_on_fs())

            with db.RDB() as rdbw:
                dummy_chunk_uuids_in_db = \
                    set(HostQueries.HostChunks
                                   .get_all_dummy_chunk_uuids(rdbw=rdbw))

            # Do we have dummy chunks in DB actually absent from FS?
            # Wipe them.
            in_db_not_on_fs = dummy_chunk_uuids_in_db - present_chunk_uuids
            dummy_chunk_uuids_in_db -= in_db_not_on_fs
            assert dummy_chunk_uuids_in_db <= present_chunk_uuids, \
                   (dummy_chunk_uuids_in_db, present_chunk_uuids)
            if in_db_not_on_fs:
                logger.debug('%i chunks in DB but not on FS, '
                                 'deleting from DB:\n%r',
                             len(in_db_not_on_fs), in_db_not_on_fs)
                HostQueries.HostChunks.delete_dummy_chunks(in_db_not_on_fs)

            # The actual dummy chunks is the intersection of "present chunks"
            # and "chunks marked as dummy in DB".
            return present_chunk_uuids & dummy_chunk_uuids_in_db
Exemple #19
0
    def __restore_files(self):
        """Internal procedure which actually restores the files.

        @todo: the Fingerprint calculation should be turned into
            "file is read by blocks and then repacked into 16KiB segments";
            then recalculation of the fingerprint in case of FP mismatch
            won't be needed.
        """
        _message = self.message
        my_host = self.manager.app.host
        feature_set = self.manager.app.feature_set
        ds = _message.dataset
        wr_uuid = _message.wr_uuid
        ugroup = _message.ugroup

        restore_directory = self.__get_restore_directory()
        assert _message.sync == (wr_uuid is None) == (ds.uuid is not None), \
               (_message.sync, wr_uuid, ds)

        base_dir_id = None  # will be used later

        if restore_directory is None:
            logger.error('Do not know the restore directory')
        else:
            logger.debug('Going to restore dataset %r for %r to %r', ds,
                         ugroup, restore_directory)
            if not os.path.exists(restore_directory):
                os.makedirs(restore_directory)

            group_key = ugroup.enc_key if feature_set.per_group_encryption \
                                       else None
            cryptographer = Cryptographer(group_key=group_key,
                                          key_generator=None)

            is_whole_dataset_restored = _message.sync

            logger.debug('Restoring %s files from dataset: %r',
                         'all' if is_whole_dataset_restored else 'selected',
                         coalesce(ds, 'N/A'))

            # TODO: use the "delete time" from the LocalPhysicalFileState!
            _now = datetime.utcnow()

            # If we are syncing-in the whole dataset, we should write it
            # into the DB as a whole. The files/file_locals will be bound to it
            # so that after restore, we'll know on this Host that these states
            # are fully synced to the cloud already (in fact, they came
            # from the cloud).
            if _message.sync:
                # Let's hack into the files and substitute the base_dir.
                # TODO: do it better!
                for f in _message.files.iterkeys():
                    f.base_dir = restore_directory

                # Write the whole dataset to the DB
                _small_files = _message.files.keys()  # not iterkeys(0 for now!
                _dirs = {restore_directory: (_small_files, [])}

                # Given the information in the inbound message about
                # the whole dataset, store this dataset in the DB.
                dataset = DatasetWithDirectories(
                    name=ds.name,
                    sync=ds.sync,
                    directories=_dirs,
                    # TODO: transport real data
                    # from the node
                    uuid=DatasetUUID.safe_cast_uuid(ds.uuid),
                    ugroup_uuid=UserGroupUUID.safe_cast_uuid(ugroup.uuid),
                    time_started=ds.time_started,
                    time_completed=_now)

                with db.RDB() as rdbw:
                    # Do we already have the dataset?
                    _ds_in_progress = \
                        HostQueries.HostDatasets.get_my_ds_in_progress(
                            host_uuid=my_host.uuid,
                            ds_uuid=dataset.uuid,
                            rdbw=rdbw)

                    if _ds_in_progress is None:
                        # We don't have it, insert.
                        dummy_ds_uuid = \
                            HostQueries.HostDatasets.create_dataset_for_backup(
                                my_host.uuid, dataset, rdbw)
                        assert dummy_ds_uuid == dataset.uuid, \
                               (dummy_ds_uuid, dataset.uuid)

                    base_dir_id = \
                        HostQueries.HostFiles.add_or_get_base_directory(
                            restore_directory, ugroup.uuid, rdbw)

            error_in_any_file_occured = False

            #
            # Finally, loop over the files and restore each one
            #
            for file_, file_blocks in _message.files.iteritems():
                self.__restore_op_for_path(file_, file_blocks,
                                           is_whole_dataset_restored,
                                           base_dir_id, restore_directory,
                                           cryptographer, ds)

            # Loop over the files completed
            if is_whole_dataset_restored:
                logger.debug('Restoring %r completed, there were %s issues.',
                             ds, 'some' if error_in_any_file_occured else 'no')
                if not error_in_any_file_occured:
                    with db.RDB() as rdbw:
                        logger.debug('Updating %r at host %s...', ds,
                                     my_host.uuid)
                        ds_to_finish = \
                            Queries.Datasets.get_dataset_by_uuid(ds.uuid,
                                                                 my_host.uuid,
                                                                 rdbw)

                        ds_to_finish.time_completed = datetime.utcnow()
                        logger.debug('Updating %r as completed', dataset)

                        # Mark the current dataset as completed
                        # only after the response from the node is received.
                        Queries.Datasets.update_dataset(
                            my_host.uuid, ds_to_finish, rdbw)

            # Everything seems ok to this moment
            with self.open_state(for_update=True) as state:
                state.ack_result_code = RestoreMessage.ResultCodes.OK
Exemple #20
0
    def on_begin(self):
        """
        @todo: Add errback too.
        """
        cls = self.__class__

        _message = self.message
        _host = _message.dst
        logger.debug('Starting backup...')

        _dataset = self.dataset \
                 = cls.__create_dataset_from_incoming_message(_message)

        if self.manager.app.feature_set.per_group_encryption:
            # Read group key from the user group
            with db.RDB() as rdbw:
                _ugroup = Queries.Inhabitants.get_ugroup_by_uuid(
                              _dataset.ugroup_uuid, rdbw)
            group_key = _ugroup.enc_key
        else:
            group_key = None

        self.__cryptographer = Cryptographer(group_key=group_key,
                                             key_generator=None)

        logger.debug('Created dataset %r.', _dataset)
        if _dataset is None:
            raise Exception('No dataset!')
        else:
            self.__notify_about_backup_started()
            self.__notify_about_backup_running()

            ds_uuid = _dataset.uuid

            with cls.per_dataset_transactions_lock:
                if ds_uuid in cls.per_dataset_transactions:
                    self.ack_result_code = BackupMessage.ResultCodes \
                                                        .GENERAL_FAILURE
                    raise Exception('The dataset {} is already being '
                                        'backed up'.format(ds_uuid))
                else:
                    cls.per_dataset_transactions[ds_uuid] = self

                    # Force copying it to dict, to don't cause
                    # race conditions during the logger message serialization.
                    logger.debug('Added backup %r, per dataset transactions '
                                     'are now %r',
                                 ds_uuid, dict(cls.per_dataset_transactions))

            if _dataset is None:
                raise Exception('The dataset {} is not found.'.format(ds_uuid))

            # Initialize chunks.
            # Please note that these chunks may include the ones
            # which are actually present in the cloud already
            # but under a different UUID.
            # This will be fixed later, after NEED_INFO_ACK is received.

            # All chunks, including the already uploaded ones;
            # contains ChunkFromFilesFinal objects.

            # _dataset is MyDatasetOnChunks.
            # dataset.__chunks is list of ChunkFromFilesFinal.

            self.__all_chunks = set(_dataset.chunks())
            assert consists_of(self.__all_chunks, ChunkFromFilesFinal), \
                   repr(self.__all_chunks)
            # Already uploaded chunks; contains Chunk objects.
            with db.RDB() as rdbw:
                self.__uploaded_chunks = \
                    set(HostQueries.HostChunks
                                   .get_uploaded_chunks(_dataset.uuid,
                                                        rdbw=rdbw))
            assert consists_of(self.__uploaded_chunks, Chunk), \
                   repr(self.__uploaded_chunks)
            # Only the pending chunks.
            self.__uploading_chunks = {ch for ch in self.__all_chunks
                                          if ch not in self.__uploaded_chunks}
            assert consists_of(self.__uploading_chunks, ChunkFromFilesFinal), \
                   repr(self.__uploading_chunks)

            #
            # Now create the NEED_INFO transaction.
            # But only if we have chunks to ask!
            #
            if self.__uploading_chunks:
                _query = {
                    'select': ('chunks.uuid', 'uuid'),
                    'from': 'chunks',
                    'where': {'["hash", "size", "uuid"]':
                                  [c for c in self.__uploading_chunks
                                     if c.hash is not None]}
                }

                nifn_tr = self.manager.create_new_transaction(
                              name='NEED_INFO_FROM_NODE',
                              src=_message.dst,
                              dst=self.manager.app.primary_node,
                              parent=self,
                              # NEED_INFO_FROM_NODE-specific
                              query=_query)

                nifn_tr.completed.addCallbacks(self._on_child_nifn_completed,
                                               partial(logger.error,
                                                       'NI issue: %r'))

            else:
                logger.debug('IMHO, no new chunks to upload. '
                             'Proceeding directly.')
                # Go to the next step directly.
                self._ask_for_backup_hosts()
Exemple #21
0
    def migrate_chunks(self, old_path, new_path):
        """
        Migrate the chunks from their previous path to the new one.

        @note: Only non-dummy chunks are migrated;
               dummy chunks are removed from the old place and
               not regenerated at the new place,
               please call update_dummy_chunks_size() manually for that.
        """
        assert old_path != new_path, (old_path, new_path)

        # This two variables will be used to specify the progress of the task.
        num, of = 0, 0


        @exceptions_logged(logger)
        def timercb():
            """
            Callback function called on timer firing.
            """
            if (num, of) != (0, 0):
                logger_status_chunks_op.info(
                    'The chunk migration takes too long, completed %i/%i',
                    num, of,
                    extra={'_type': 'chunks_migration.progress',
                           'num': num,
                           'of': of})


        timer_service = TimerService(1.0, timercb)

        # If the task takes more than 3 seconds, start notifying
        # about the progress
        _callLater = reactor.callLater  # pylint:disable=E1101,C0103
        # Won't worry about deferToThread here, cause it is very fast.
        long_task_timer = _callLater(3.0, timer_service.startService)

        with self.__chunk_op_lock:
            try:
                # What chunk files are present on the FS,
                # and what are the chunk UUIDs?
                present_chunk_uuids_iter = self.__get_chunk_uuids_on_fs()

                with db.RDB() as rdbw:
                    dummy_chunks_in_db = \
                        frozenset(HostQueries.HostChunks
                                             .get_all_dummy_chunk_uuids(
                                                  rdbw=rdbw))

                # First, remove all the dummy chunks
                removed_dummy_chunks = []
                for dummy_chunk_uuid in dummy_chunks_in_db:
                    try:
                        assert self.__get_chunk_file_path(dummy_chunk_uuid,
                                                          is_dummy=True,
                                                          dir_path=old_path) \
                               == self.__get_chunk_file_path(dummy_chunk_uuid,
                                                             is_dummy=True)

                        _path = self.__get_chunk_file_path(dummy_chunk_uuid,
                                                           is_dummy=True)
                        if os.path.exists(_path):
                            os.unlink(_path)
                        # If we removed the file successfully, let's append it
                        # to the list of the chunks which are to be removed
                        # from the DB.
                        removed_dummy_chunks.append(dummy_chunk_uuid)
                    except Exception as e:
                        logger.error('Cannot remove dummy chunk %s: %s',
                                     dummy_chunk_uuid, e)
                HostQueries.HostChunks \
                           .delete_dummy_chunks(removed_dummy_chunks)

                # This dictionary maps the chunk UUID
                # to a tuple of the old filename and the new filename.
                #
                # Btw, no need to convert present_chunk_uuids_iter to set
                # and do the set difference, as it is the same complexity
                # as for ... if not in.
                uuid_to_filenames = \
                    {u: (self.__get_chunk_file_path(u,
                                                    is_dummy=False,
                                                    dir_path=old_path),
                         self.__get_chunk_file_path(u,
                                                    is_dummy=False,
                                                    dir_path=new_path))
                         for u in present_chunk_uuids_iter
                         if u not in dummy_chunks_in_db}

                # Now, move the files to the new directory.
                of = len(uuid_to_filenames)
                for u, (old_filename, new_filename) \
                        in uuid_to_filenames.iteritems():

                    logger.debug('Moving chunk %s from %s to %s',
                                 u, old_filename, new_filename)

                    try:
                        with open(old_filename, 'rb') as rfh:
                            with open_wb(new_filename) as wfh:
                                wfh.write(rfh.read())
                    except Exception:
                        logger.error('Cannot move chunk %s from %s to %s',
                                     u, old_filename, new_filename)
                    else:
                        try:
                            os.unlink(old_filename)
                        except Exception:
                            logger.error('Cannot remove chunk file %s',
                                         old_filename)

                    num += 1

            except Exception as e:
                logger_status_chunks_op_error.error(
                    'The chunks migration failed: %r',
                    e,
                    extra={'_type': 'chunks_migration.error',
                           '_exc': e,
                           '_tb': traceback.format_exc()})

            finally:
                if (not long_task_timer.called and
                    not long_task_timer.cancelled):
                    long_task_timer.cancel()

                if timer_service.running:
                    timer_service.stopService()