Beispiel #1
0
    def on_begin(self):
        """
        Perform the actual restoring process, and then send
        the RESTORE message to the host.
        """
        assert self.is_outgoing()

        host_uuid = self.message.dst.uuid

        with db.RDB() as rdbw:
            suspended = TrustedQueries.TrustedUsers.is_user_suspended_by_host(
                            host_uuid, rdbw)
        # If user is suspended, we don't need to restore any data.
        if not suspended:
            with self.open_state(for_update=True) as state:
                # Add this transaction to the list of restores-in-progress

                with db.RDB() as rdbw:
                    chunks_for_files_by_uuid = \
                        TrustedQueries.TrustedBlocks.get_chunks_for_files(
                            host_uuid,
                            state.ds_uuid,
                            state.file_paths_for_basedirs,
                            rdbw)

                    state.pending_chunk_uuids = \
                        set(chunks_for_files_by_uuid.iterkeys())

                logger.verbose('%s::Need to restore chunks to '
                                   'host %s (DS %s): (%i)\n%r',
                               self.uuid, host_uuid, state.ds_uuid,
                               len(state.pending_chunk_uuids),
                               state.pending_chunk_uuids)

                self.__recalc_pending_host_uuids_in_state(host_uuid, state)

                logger.verbose('%s::Now need to restore chunks '
                                   'to host %s: (%i) %r',
                               self.uuid, host_uuid,
                               len(state.pending_chunk_uuids),
                               state.pending_chunk_uuids)

                state.of = len(state.pending_chunk_uuids)
                state.of_bytes = \
                    sum(ch.size()
                            for ch in chunks_for_files_by_uuid.itervalues())

                logger.debug('For %r, total number of chunks is %i, '
                                 'total number of bytes is %i',
                             self, state.of, state.of_bytes)

                success = self.__request_restoring_more_chunks(state)

            if not success:
                self.__restore_ops_failed()  # outside the state context!
Beispiel #2
0
    def on_end(self):
        assert self.is_outgoing()
        cls = self.__class__

        yield defer.succeed(None)  # to satisfy @inlineCallbacks

        with self.open_state(for_update=True) as state:
            if not state.success:
                logger.error("Couldn't restore %r", self)

            else:
                _ds_uuid = state.ds_uuid

                # Receiving the RESTORE_ACK from the host
                ack_result_code = self.message_ack.ack_result_code
                host_uuid = self.message_ack.src.uuid

                result_is_good = \
                    RestoreMessage.ResultCodes.is_good(ack_result_code)

                if result_is_good:
                    with db.RDB() as rdbw:
                        ds = Queries.Datasets.get_dataset_by_uuid(_ds_uuid,
                                                                  host_uuid,
                                                                  rdbw)

                    restore_is_sync = state.is_sync
                    logger.debug('%s:: %s restore succeeded for %s',
                                 'Whole-dataset' if restore_is_sync
                                                 else 'selective',
                                 'the whole dataset' if ds.sync
                                                     else 'some files',
                                 self.uuid)
                    if restore_is_sync:
                        # It doesn't necessarily mean that ds.sync is True!
                        logger.debug('%s::Marking whole dataset %r '
                                         'as synced to %r',
                                     self.uuid, _ds_uuid, host_uuid)
                        with db.RDB() as rdbw:
                            TrustedQueries.TrustedDatasets \
                                          .mark_dataset_as_synced_to_host(
                                               _ds_uuid, host_uuid, rdbw)
                else:
                    logger.debug('%s::Restore (DS %r) failed at %r '
                                     'with code %r',
                                 self.uuid, _ds_uuid, host_uuid,
                                 ack_result_code)

                state.ack_result_code = ack_result_code

            logger.debug('%s::on_end done!', self.uuid)
Beispiel #3
0
    def __test_web_upload_remove(op_upload, arguments):
        """Common code for CLI operations for web upload/web remove."""
        op_name = 'upload' if op_upload else 'remove'

        if len(arguments) < 2:
            cli_error(u'You must pass at least 2 arguments to this command: '
                      u'the UUID of the group, and at least '
                      u'one filename to {op}.'
                          .format(op=op_name))
        else:
            group_uuid = UserGroupUUID.safe_cast_uuid(
                            try_parse_uuid(arguments.popleft()))

            file_paths = []
            while arguments:
                file_paths.append(arguments.popleft())

            print(u'{} file paths for group {}:\n{}'.format(
                      op_name.capitalize(), group_uuid, '\n'.join(file_paths)))

            node_map = proceed_with_node()

            chunk_storage = ChunkStorageBigDB(bdbw_factory=ds.BDB)

            with db.RDB() as rdbw:
                __test_upload_remove_files(group_uuid, file_paths,
                                           chunk_storage, op_upload, rdbw)
Beispiel #4
0
    def on_begin(self):
        """
        This method is called when the request together with its body
        is received completely.
        """
        _message = self.message

        datasets_to_delete = _message.ds_uuids_to_delete

        __deleted_successfully = set
        for ds_uuid in datasets_to_delete:
            # Note: we are not using delete_datasets() here,
            # cause it is possible some datasets will fail to delete
            try:
                with db.RDB() as rdbw:
                    Queries.Datasets.delete_dataset(_message.src.uuid, ds_uuid,
                                                    rdbw)
            except:
                # This is indeed a warning, as we cannot guarantee that
                # the host sent proper UUIDs that indeed may be deleted.
                logger.warning('Could not delete dataset %s from host %s',
                               ds_uuid, _message.src.uuid)
            else:
                __deleted_successfully.add(ds_uuid)

        with self.open_state(for_update=True) as state:
            state.deleted_successfully = __deleted_successfully
Beispiel #5
0
    def send_updated_schedules_to_host(self, host_uuid):
        """
        Given an UUID of the host,
        start an appropriate "UPDATE_CONFIGURATION" transaction.

        @type host_uuid: UUID
        """
        _setting_name = Queries.Settings.BACKUP_SCHEDULE

        with db.RDB() as rdbw:
            settings = {
                _setting_name:
                Queries.Settings.get_setting(host_uuid=host_uuid,
                                             setting_name=_setting_name,
                                             direct=True,
                                             with_time=True,
                                             rdbw=rdbw)
            }

        uc_tr = self.server_process.tr_manager.create_new_transaction(
            name='UPDATE_CONFIGURATION',
            src=self.server_process.me,
            dst=self.app.known_hosts[host_uuid],
            parent=None,
            # UPDATE_CONFIGURATION-specific
            settings=settings)
Beispiel #6
0
    def refreshed_progress(cls, tr_manager, host_uuid, tr_uuid):
        """Get a refreshed (to be up-to-date) progress state for a transaction.

        @type tr_manager: AbstractTransactionManager
        @type host_uuid: PeerUUID
        @type tr_uuid: TransactionUUID

        @return: a newly updated state, or C{None} if the state could not
            already be found.
        @rtype: RestoreTransaction_Node.State, NoneType
        """
        logger.debug('Force progress recalculation for %r on %r',
                     tr_uuid, host_uuid)
        try:
            # What chunks are really present on the Host?
            with db.RDB() as rdbw:
                all_chunk_uuids_on_target_host = \
                    set(TrustedQueries.TrustedChunks.get_chunk_uuids_for_host(
                            host_uuid, rdbw))

            with tr_manager.open_tr_state(tr_uuid=tr_uuid,
                                          for_update=False) as state:
                return cls.__refresh_progress(all_chunk_uuids_on_target_host,
                                              refreshed_state=state)
        except:
            logger.exception('Could not recalculate progress for %r', tr_uuid)
            return None
Beispiel #7
0
    def __recalc_pending_host_uuids_in_state(self, host_uuid, state):
        """Calculate/recalculate the C{pending_host_uuids} field in state.

        The calculation is performed upon the most actual situation in the DB,
        and upon the current transaction state (which may also be updated).
        The C{state.pending_host_uuids} must already be present!

        @note: C{state} must be opened for update.
        """
        cls = self.__class__

        with db.RDB() as rdbw:
            chunk_uuids_by_host_uuid = \
                cls.__get_chunk_uuids_by_host_uuid(
                    state.pending_chunk_uuids,
                    acceptor_host_uuid=host_uuid,
                    rdbw=rdbw)

        # What chunks are already present on the acceptor host?
        already_available = chunk_uuids_by_host_uuid.get(host_uuid,
                                                         set())
        logger.verbose('%s::But this chunks are already '
                           'available at %s: %r',
                       self.uuid, host_uuid, already_available)

        state.pending_chunk_uuids -= already_available

        # Explicitly create a sorted copy, and exclude the host_uuid.
        # state.pending_host_uuids = \
        #     sorted(u for u in chunk_uuids_by_host_uuid.iterkeys()
        #              if u != host_uuid)
        state.pending_host_uuids = \
            sorted(chunk_uuids_by_host_uuid.iterkeys())
Beispiel #8
0
def get_all_groups_space_info(arguments):
    node_map = proceed_with_node()
    print('Calculating the utilization info for groups.')

    with db.RDB() as rdbw:
        group_space_info = data_ops.get_group_space_info(rdbw)
        print('\nPer-group space info:')
        for per_group in group_space_info.per_group:
            print('   {!r}'.format(per_group))
        print('\nOverall information:')
        print('   {!r}'.format(group_space_info.overall))
Beispiel #9
0
    def host_just_became_alive(self, me, host_uuid):
        """Given a host, restore (to it) all datasets which are missing on it.

        @param me: my node
        @type me: Node

        @param host_uuid: the UUID of the host which just became alive.
        @type host_uuid: PeerUUID
        """
        assert not in_main_thread()

        host = self.__known_hosts[host_uuid]
        logger.verbose('Reviving %r', host)

        tr_manager = self.__app.tr_manager

        with db.RDB() as rdbw:
            suspended = TrustedQueries.TrustedUsers.is_user_suspended_by_host(
                            host.uuid, rdbw)

        if not suspended:
            # Do we have any datasets to restore to this host?
            with db.RDB() as rdbw:
                _ds_uuids_to_restore = \
                    TrustedQueries.TrustedDatasets \
                                  .get_ds_uuids_not_synced_to_host(
                                       host.uuid, rdbw)

                # Let's hope that there is not too many datasets UUIDs,
                # and eagerly calculate the list (so we can get out
                # of Database Wrapper context).
                ds_uuids_to_restore = list(_ds_uuids_to_restore)
                del _ds_uuids_to_restore  # help GC

            datasets_exist = bool(ds_uuids_to_restore)

            logger.debug('%r just became alive at %r: %sneed to sync',
                         host, me, '' if datasets_exist else 'do not ')

            self.__restore_datasets_to_host(me, host, ds_uuids_to_restore)
Beispiel #10
0
 def on_begin(self):
     """
     Received the WANT_BACKUP request from a host.
     """
     with db.RDB() as rdbw:
         # Do we need to proceed backup or not.
         suspended = \
             TrustedQueries.TrustedUsers.is_user_suspended_by_username(
                 self.message.src.user.name, rdbw)
     if not suspended:
         self.__do_nested_backup()
     else:
         with self.open_state(for_update=True) as state:
             state.ack_result_code = WB_Msg.ResultCodes.USER_IS_SUSPENDED
Beispiel #11
0
def get_user_presence_history(arguments):
    username = arguments.popleft()

    node_map = proceed_with_node()

    if node_map is not None:
        print('Getting the presence history for user {}...'.format(username))

        with db.RDB() as rdbw:
            presence_history = \
                TrustedQueries.PresenceStat.get_user_presence_history(username,
                                                                      rdbw)
            for host_name, data in presence_history.iteritems():
                print('Host "{}"'.format(host_name))
                for dt, count in data:
                    print('  {}: {} time(s)'.format(dt, count))
Beispiel #12
0
def proceed_with_node():
    """
    Read the node configuration for the node process.
    If needed, the database file is created and initialized as well.

    If it fails, it returns None and prints the reason of the failure
    (so the caller may not bother doing it).

    @returns: The port-to-node_settings mapping, like
        C{settings.get_my_nodes_settings()}; or None if failed for some reason.
    @rtype: dict, NoneType
    """
    settings.configure_logging(postfix='common')
    node_settings_map = settings.get_my_nodes_settings()

    rel_db_url = settings.get_common().rel_db_url
    db.init(rel_db_url, SQLALCHEMY_ECHO)

    node_common_settings = settings.get_common()
    try:
        ds.init(fast_db_url=node_common_settings.fast_db_url,
                big_db_url=node_common_settings.big_db_url)

    except ds.MongoDBInitializationException as e:
        print(u'MongoDB problem: {}'.format(e))

    else:
        # Create the database if the file is missing or 0 bytes long.
        if not db.RDB.is_schema_initialized:
            print('Initializing database schema...')
            try:
                db.create_node_db_schema()
            except Exception:
                cli_error('cannot create the RDB schema:\n%s',
                          traceback.format_exc())
            else:
                print('Successfully initialized.')

        else:
            # If the database is available, launch maintenance procedures.
            with db.RDB() as rdbw:
                Queries.System.maintenance(rdbw)
            logger.debug('Maintenance done')

            __migrate_if_needed()

        return node_settings_map
    def __poll_restore_requests_in_thread(self):
        """Perform another iteration of polling the restore requests."""
        assert not in_main_thread()

        poll_uuid = gen_uuid()
        logger.debug('Polling restore requests (%s)', poll_uuid)

        restore_request = True
        while restore_request is not None:
            with ds.FDB() as fdbw:
                restore_request = \
                    FDBQueries.RestoreRequests \
                              .atomic_start_oldest_restore_request(fdbw=fdbw)

            logger.debug('Poll (%s) returned %r', poll_uuid, restore_request)
            if restore_request is not None:
                # We've indeed have some restore request that needs processing.

                # Create new "virtual" dataset with all the data
                # to be restored.
                with db.RDB() as rdbw:
                    new_ds_uuid = \
                        Queries.Datasets.restore_files_to_dataset_clone(
                            restore_request.base_ds_uuid,
                            restore_request.paths,
                            restore_request.ts_start,
                            rdbw)

                # Now we know the new dataset to be restored.
                # Btw, write it into the docstore.
                # Doesn't need to be atomic, as only a single node
                # may be processing it at a time.
                with ds.FDB() as fdbw:
                    FDBQueries.RestoreRequests.set_ds_uuid(
                        _id=restore_request._id,
                        new_ds_uuid=new_ds_uuid,
                        fdbw=fdbw)

                # After creating the dataset, let's restore it to all host
                # which are alive.
                _syncer = self.__server_process.app.syncer
                _syncer.restore_dataset_to_lacking_hosts(
                    me=self.__server_process.me,
                    host=None,
                    ds_uuid=new_ds_uuid)

        logger.debug('Polling restore requests (%s) - done', poll_uuid)
Beispiel #14
0
    def reread_cache_for_host(self, host_uuid):
        """
        Reread all the schedules for a single host from the DB.
        It is assumed they've just been updated, so they should not be
        sent back.

        @param host_uuid: UUID of the host which schedules need to be updated.
        @type host_uuid: UUID
        """
        logger.debug('Rereading the schedules on %r for host %r',
                     self.server_process.me, host_uuid)

        with self.lock:
            old_schedules = self.get_schedules_by_host_uuid(host_uuid)
            logger.debug('Removing schedules for host %s: %r', host_uuid,
                         old_schedules)
            for schedule in old_schedules:
                self.remove(schedule)

            with db.RDB() as rdbw:
                new_schedules = \
                    Queries.Settings.get_setting(
                        host_uuid=host_uuid,
                        setting_name=Queries.Settings.BACKUP_SCHEDULE,
                        rdbw=rdbw)

                tz_name = Queries.Settings.get_setting(
                    host_uuid=host_uuid,
                    setting_name=Queries.Settings.TIMEZONE,
                    rdbw=rdbw)

            if tz_name:
                try:
                    tz_info = pytz.timezone(tz_name)
                except:
                    logger.error(
                        'Cannot parse timezone %r for host %r, '
                        'fallback to UTC', tz_name, host_uuid)
                    tz_info = pytz.utc
            else:
                tz_info = pytz.utc

            logger.debug('Adding new schedules for host %s: %r', host_uuid,
                         new_schedules)
            for schedule in new_schedules:
                self.add(BackupSchedule.from_dict(host_uuid, tz_info,
                                                  schedule))
Beispiel #15
0
    def test_web_download(arguments):
        node_map = proceed_with_node()

        if len(arguments) < 3:
            cli_error(u'You must pass at least 3 arguments to this command: '
                      u'the dataset UUID , '
                      u'base directory of the file, '
                      u'and the relative path of the file.'
                          .format(op=op_name))
        else:
            ds_uuid = try_parse_uuid(arguments.popleft())
            base_dir = arguments.popleft()
            rel_path = arguments.popleft()

            edition = settings.get_common().edition

            print(u'Downloading file from dataset {}, base directory {}, '
                  u'path {}'
                      .format(ds_uuid, base_dir, rel_path))

            with db.RDB() as rdbw, ds.BDB() as bdbw:
                cr = data_ops.get_cryptographer(ds_uuid, edition, rdbw)

                file_data = data_ops.download_file(ds_uuid, base_dir, rel_path,
                                                   edition, rdbw)

                result_filename = os.path.basename(rel_path)

                logger.debug('Writing to file %r %i bytes long',
                             result_filename, file_data.size)
                with open(result_filename, 'wb') as fh:
                    # Preallocate file
                    if file_data.size:
                        fh.seek(file_data.size - 1)
                        fh.write('\x00')
                    fh.seek(0)
                    # Now write the file contents.
                    try:
                        for bl in data_ops.get_file_of_blocks_gen(
                                      file_data.blocks, cr, bdbw=bdbw):
                            fh.write(bl)
                    except BDBQueries.Chunks.NoChunkException as e:
                        logger.error('Problem while downloading the file: %s',
                                     e)
                        print(u'Error: file {!r} cannot be created!'.format(
                                  result_filename))
Beispiel #16
0
def get_group_space_info(arguments):
    group_uuid = UUID(arguments.popleft())

    node_map = proceed_with_node()

    if node_map is not None:
        print('Calculating the utilization info for group {}.'
                  .format(group_uuid))

        with db.RDB() as rdbw:
            host_space_stat = TrustedQueries.SpaceStat.get_hosts_space_stat2(
                                  group_uuid, rdbw)

            for per_host in host_space_stat:
                print('  Host {}: max {}, used {}'.format(
                          per_host.host_name,
                          per_host.max_size,
                          per_host.used_size))
Beispiel #17
0
    def __execute_incoming_query(self, host_uuid, query):
        """
        Given a somehow-resembling-SQL query in the dict form,
        return the result.

        @param host_uuid: the UUID of the host which requests the query
            to be executed.
        @type host_uuid: PeerUUID

        @type query: dict
        @precondition: 'select' in query and 'from' in query # query

        @returns: the result of the query, in form of final C{State} object.
        @rtype: QueryResultData

        @todo: For cloud_stats, cache the values.
        """
        logger.verbose('%r querying node: %r', self, query)

        res_type = query['from']

        # Execute the query, and put the result into the "result" variable.

        if res_type == 'datasets':
            assert 'where' not in query

            with db.RDB() as rdbw:
                result = list(
                    Queries.Datasets.get_just_datasets(host_uuid, rdbw))

        elif res_type == 'files':
            assert 'where' in query, repr(query)
            where_cond = query['where']

            assert len(where_cond.keys()) == 1, repr(where_cond)
            assert 'dataset' in where_cond, repr(where_cond)
            where_dataset_uuid = UUID(where_cond['dataset'])

            with db.RDB() as rdbw:
                files_iter = \
                    Queries.Files.get_files_for_dataset(host_uuid,
                                                        where_dataset_uuid,
                                                        rdbw)

                result = {}
                for f in files_iter:
                    result.setdefault(f.base_dir, []).append(f)

        elif res_type == 'chunks':

            select_what = query['select']
            where_cond = query['where']

            expected_key = '["hash", "size", "uuid"]'

            assert set(select_what) == {'chunks.uuid', 'uuid'}, \
                   repr(select_what)
            assert where_cond.keys() == [expected_key], \
                   (where_cond, expected_key)

            attempt_count = 0
            successfully_saved = False
            while attempt_count < MAX_ATTEMPTS_TO_SAVE_CHUNKS and \
                  not successfully_saved:
                # Try to save chunks, but if saving fails
                # (due to parallel request completed before),
                # retry several times.
                attempt_count += 1
                try:
                    with db.RDB() as rdbw:
                        result = TrustedQueries.TrustedChunks \
                                               .save_chunks_and_get_duplicates(
                                                    where_cond[expected_key],
                                                    rdbw)
                except Exception as e:
                    if attempt_count < MAX_ATTEMPTS_TO_SAVE_CHUNKS:
                        logger.debug(
                            'Saving chunks failed on attempt %d, '
                            'retrying', attempt_count)
                    else:
                        logger.exception(
                            'Could not save chunks during '
                            '%d attempts', MAX_ATTEMPTS_TO_SAVE_CHUNKS)
                        raise
                else:
                    successfully_saved = True
                    logger.debug('Saved chunks in %d attempt(s)',
                                 attempt_count)

        elif res_type == 'cloud_stats':
            assert 'where' not in query, repr(query)

            select_what = query['select']

            assert set(select_what) == {
                'total_hosts_count', 'alive_hosts_count', 'total_mb', 'used_mb'
            }, repr(select_what)

            _known_hosts = self.manager.app.known_hosts
            total_hosts_count = _known_hosts.peers_count()
            alive_hosts_count = _known_hosts.alive_peers_count()

            total_mb, used_mb = \
                TrustedQueries.TrustedChunks.get_cloud_sizes(
                    ignore_uuid=host_uuid)

            result = {
                'total_hosts_count': total_hosts_count,
                'alive_hosts_count': alive_hosts_count,
                'total_mb': total_mb,
                'used_mb': used_mb
            }

        elif res_type == 'data_stats':
            assert 'where' in query, repr(query)

            select_what = query['select']
            where_cond = query['where']

            assert set(select_what) == {'file_count',
                                        'file_size',
                                        'uniq_file_count',
                                        'uniq_file_size',
                                        'full_replicas_count',
                                        'chunk_count',
                                        'chunk_replicas_count',
                                        'hosts_count'}, \
                   repr(select_what)

            ds_uuid, path = (UUID(where_cond['dataset']),
                             decode_posix_path(where_cond['path']))

            result = TrustedQueries.TrustedChunks.get_data_stats(
                ds_uuid=ds_uuid if ds_uuid != '*' else None,
                path=path if path != '*' else None,
                path_rec=where_cond['rec'] if path != '*' else None)
        else:
            raise NotImplementedError(
                'Unsupported result_type {}'.format(res_type))

        logger.verbose('For %r, result is %r (%r)', query, result, res_type)

        return QueryResultData(result_type=res_type, result=result)
Beispiel #18
0
    def on_end(self):
        # By this time, whatever process to determine the host UUID
        # for the user we use, we've definitely completed it,
        # and the proper host UUID is present in the source message.
        _host = self.message.src
        _message = self.message
        _ack = self.message_ack = self.message.reply()

        if _host is None:
            _ack.ack_result = 'fail'
        elif self.manager.app.known_hosts.is_peer_alive(_host.uuid):
            _ack.ack_result = 'dual login'
        else:
            _ack.ack_result = 'ok'
            _ack.ack_host_uuid = _host.uuid

            with db.RDB() as rdbw:
                user_groups = list(
                    Queries.Inhabitants.get_groups_for_user(
                        _host.user.name, rdbw))

                thost_uuids = \
                    {h.uuid
                         for h in TrustedQueries.HostAtNode
                                                .get_all_trusted_hosts(
                                                     rdbw=rdbw)}

            is_trusted_host = _host.uuid in thost_uuids

            _ack.ack_username = self.message.src.user.name
            _ack.ack_groups = user_groups

            logger.debug('For user %r, %s %r, groups are: %r',
                         _ack.ack_username,
                         'trusted host' if is_trusted_host else 'host',
                         _host.uuid, _ack.ack_groups)

            # If we have an SSL request, create the certificate
            _req = _message.cert_request
            if _req is not None:
                assert isinstance(_req, crypto.X509Req), \
                       repr(_req)

                subj = _req.get_subject()

                if not ssl.validate_host_req(_req):
                    # Need explicit str() here, as subj is non-pickleable!
                    logger.warning('%s probably trying to spoof himself: %r',
                                   _host.uuid, str(subj))
                else:
                    # Force CN to be the host UUID
                    subj.commonName = str(_host.uuid)
                    node_cert = get_common().ssl_cert
                    node_key = get_common().ssl_pkey
                    _key_duration = ssl.OPENSSL_TRUSTED_HOST_KEY_DURATION \
                                        if is_trusted_host \
                                        else ssl.OPENSSL_HOST_KEY_DURATION
                    _ack.ack_ssl_cert = \
                        ssl.createCertificate(
                            _req,
                            node_cert, node_key,
                            notAfter=long(_key_duration.total_seconds()))

        self.manager.post_message(self.message_ack)
Beispiel #19
0
    def on_begin(self):
        """
        Received the PROGRESS request from a host.
        """
        assert self.is_incoming()

        _message = self.message
        host = _message.src
        host_uuid = host.uuid
        me = _message.dst

        # Write into database, and do nothing more,
        # as this completes the transaction.
        dataset = _message.dataset
        ds_uuid = dataset.uuid if dataset is not None else None

        # Multiple independent RDB wrappers are used,
        # to decrease the RDB write lock times.

        # [1/5] Store dataset, either on beginning of backup or on end.
        #       Dataset creation should occur BEFORE chunks/blocks writing.
        #       Dataset finalization should occur AFTER chunks/blocks writing.
        if dataset is not None and not _message.completion:
            logger.info('Backup just started.')

            # Maybe the dataset is present already?
            logger.debug('Is the dataset present? DS %s, host %s',
                         ds_uuid, host_uuid)

            with db.RDB() as rdbw:
                dataset_in_db = Queries.Datasets.get_dataset_by_uuid(
                                    ds_uuid, host_uuid, rdbw)
                if dataset_in_db is None:
                    # Adding the dataset
                    dataset_uuid = Queries.Datasets.create_dataset_for_backup(
                                       host_uuid, dataset, rdbw)
                    assert dataset_uuid == ds_uuid, (dataset_uuid, ds_uuid)
                    logger.info('Dataset %s added to RDB.', dataset_uuid)
                else:
                    logger.debug('The dataset %s is present already, '
                                     'not readding.',
                                 ds_uuid)

        # [2/5] Store chunks themselves (may be needed for
        #       bind_blocks_to_files() below).
        if _message.chunks_by_uuid is not None:
            logger.debug('Progress %r with chunks_by_uuid', self)

            try:
                # Run several attempts to write into RDB.
                for i in xrange(MAX_RDB_WRITE_ATTEMPTS):
                    try:
                        with db.RDB() as rdbw:
                            # Actual RDB write
                            Queries.Chunks.add_chunks(
                                _message.chunks_by_uuid.itervalues(), rdbw)
                    except:
                        logger.warning('Attempt %d to write chunks failed', i)
                        if i == MAX_RDB_WRITE_ATTEMPTS - 1:
                            logger.error('Giving up to write chunks')
                            raise
                        else:
                            # Blocking on sleep() is bad, we know.
                            # But this is the last resort.
                            # So let's sleep and retry again.
                            sleep(SLEEP_ON_RDB_WRITE_RETRY.total_seconds())
                    else:
                        logger.debug('Wrote chunks successfully '
                                         'in %d retries!',
                                     i)
                        break  # need no more retries

            except:
                logger.exception('Could not add chunks!')
                # Debug rather than verbose; on a real error, we want
                # as much information as available.
                logger.debug('chunks_by_uuid = %r', _message.chunks_by_uuid)
                raise

        # [3/5] Store chunks-per-host - chunks uploaded to some host.
        if _message.chunks_map_getter is not None:
            chunks_map = _message.chunks_map_getter()
            logger.debug('Progress %r with chunks_map', self)
            for target_host, notifications in chunks_map.iteritems():
                logger.debug('Marking notifications for host %r: %r',
                             target_host, notifications)

                try:
                    # Run several attempts to write into RDB.
                    for i in xrange(MAX_RDB_WRITE_ATTEMPTS):
                        try:
                            with db.RDB() as rdbw:
                                # Actual RDB write
                                TrustedQueries.TrustedChunks \
                                              .chunks_are_uploaded_to_the_host(
                                                   host_uuid, target_host.uuid,
                                                   notifications, rdbw)
                        except:
                            logger.warning('Attempt %d to write '
                                               'chunks-per-host failed',
                                           i)
                            if i == MAX_RDB_WRITE_ATTEMPTS - 1:
                                logger.error('Giving up to write '
                                             'chunks-per-host')
                                raise
                            else:
                                # Blocking on sleep() is bad, we know.
                                # But this is the last resort.
                                # So let's sleep and retry again.
                                sleep(SLEEP_ON_RDB_WRITE_RETRY.total_seconds())
                        else:
                            logger.debug('Wrote chunks-per-host successfully '
                                             'in %d retries!',
                                         i)
                            break

                except:
                    logger.exception('Could not mark chunks as uploaded!')
                    # Debug rather than verbose; on a real error, we want
                    # as much information as available.
                    logger.debug('chunks_by_uuid = %r',
                                 _message.chunks_by_uuid)
                    logger.debug('chunks_map = %r', chunks_map)
                    raise

        # [4/5] Store blocks mapping.
        if _message.blocks_map is not None:
            logger.debug('Progress %r with block_map', self)

            try:
                # Run several attempts to write into RDB.
                for i in xrange(MAX_RDB_WRITE_ATTEMPTS):
                    try:
                        with db.RDB() as rdbw:
                            # Actual RDB write
                            Queries.Blocks.bind_blocks_to_files(
                                host_uuid, ds_uuid, _message.blocks_map, rdbw)
                    except:
                        logger.warning('Attempt %d to write blocks failed',
                                       i)
                        if i == MAX_RDB_WRITE_ATTEMPTS - 1:
                            logger.error('Giving up to write blocks')
                            raise
                        else:
                            # Blocking on sleep() is bad, we know.
                            # But this is the last resort.
                            # So let's sleep and retry again.
                            sleep(SLEEP_ON_RDB_WRITE_RETRY.total_seconds())
                    else:
                        logger.debug('Wrote blocks successfully '
                                         'in %d retries!',
                                     i)
                        break

            except:
                logger.exception('Could not bind blocks!')
                # Debug rather than verbose; on a real error, we want
                # as much information as available.
                logger.debug('chunks_by_uuid = %r', _message.chunks_by_uuid)
                logger.debug('blocks_map = %r', _message.blocks_map)
                raise

        # [5/5] Finalize the dataset if needed, but only if no errors occurred
        #       during chunks/blocks writing.
        if dataset is not None and _message.completion:
            logger.info('Backup just completed!')
            with db.RDB() as rdbw:
                # 1. write the "backup completed" state
                Queries.Datasets.update_dataset(host_uuid, dataset, rdbw)
                # 2. mark that the dataset was synced to the host.
                TrustedQueries.TrustedDatasets.mark_dataset_as_synced_to_host(
                    ds_uuid, host_uuid, rdbw)

        # We are now done with writing the data received from the host.
        # Do the additional operations (with a separate database lock).

        if dataset is not None and _message.completion:
            # Restore the dataset to the hosts which do not
            # contain it yet
            self.manager.app.syncer.restore_dataset_to_lacking_hosts(
                me=me, host=host, ds_uuid=ds_uuid)
Beispiel #20
0
    def restore_dataset_to_lacking_hosts(self, me, host, ds_uuid):
        """
        Given a dataset (its UUID), restore it to every host which lacks it
        (if it is a sync dataset.).

        @param me: my node
        @type me: Node

        @param host: host which just completed the backup and is going
            to restore the data, or C{None} if not applicable.
        @type host: Host, NoneType

        @type ds_uuid: DatasetUUID
        """
        logger.debug('Restoring DS %s to all lacking hosts (if needed), '
                         'except %r',
                     ds_uuid, host)

        with db.RDB() as rdbw:
            # If the dataset is non-syncing, _host_uuids_to_restore
            # will be empty.
            host_uuid = host.uuid if host else None
            _host_uuids_to_restore = \
                TrustedQueries.TrustedDatasets \
                              .get_host_uuids_lacking_sync_dataset(
                                   ds_uuid, rdbw=rdbw)

            # But, if we have a host that definitely just completed the backup,
            # we can filter it out.
            # At the same step, we eagerly evaluate it, so we can get out
            # of RDB wrapper.
            host_uuids_to_restore = list(_host_uuids_to_restore) \
                                        if host_uuid is None \
                                        else [u for u in _host_uuids_to_restore
                                                if u != host_uuid]

        if host_uuids_to_restore:
            logger.debug('Will probably restore dataset %s to %r',
                         ds_uuid, host_uuids_to_restore)

            for restore_host_uuid in host_uuids_to_restore:
                restore_host_uuid = \
                    PeerUUID.safe_cast_uuid(restore_host_uuid)
                if self.__known_hosts.is_peer_alive(restore_host_uuid):
                    logger.debug('Restoring dataset %s to host %r',
                                 ds_uuid, restore_host_uuid)
                    r_tr = self.__app.tr_manager.create_new_transaction(
                               name='RESTORE',
                               src=me,
                               dst=self.__known_hosts[restore_host_uuid],
                               parent=None,
                               # RESTORE-specific
                               ds_uuid=ds_uuid,
                               # None means "all files"
                               file_paths_for_basedirs=None,
                               wr_uuid=None)
                else:
                    logger.debug("Could've restored dataset %s to host %r, "
                                     'but the host is not alive',
                                 ds_uuid, restore_host_uuid)
        else:
            logger.debug('Dataset %s is likely not auto-syncable', ds_uuid)
Beispiel #21
0
    def __refresh_progress(cls, uploaded_chunks, refreshed_state):
        """
        Force recalculation of progress for a transaction, and modify the state
        accordingly, after some chunks were considered uploaded.

        The function is expected to be called from inside the opened state
        context with the state that would be used for calculation.

        @note: this function should not take too long, because it may run
            inside the state context opened for updating.

        @param uploaded_chunks: the iterable of UUIDs of the chunks which were
            successfully uploaded.
        @type uploaded_chunks: col.Iterable

        @param refreshed_state: the state which will be updated
            with the latest information.
        @type refreshed_state: RestoreTransaction_Node.State

        @return: a newly updated state (C{refreshed_state}).
        @rtype: RestoreTransaction_Node.State
        """
        uploaded_chunks = set(uploaded_chunks)
        logger.debug('Recalculating progress, while considering these chunks '
                         'uploaded to %r within %r: (%i) %r',
                     refreshed_state.tr_dst_uuid,
                     refreshed_state.tr_uuid,
                     len(uploaded_chunks), uploaded_chunks)

        # Get the current set of pending chunks;
        # remove the chunks that are uploaded;
        # compare the length of the sets (of pending chunks) before and
        # after removal.
        refreshed_state.last_progress_recalc_time = datetime.utcnow()
        pending_count = len(refreshed_state.pending_chunk_uuids)
        refreshed_state.pending_chunk_uuids -= uploaded_chunks
        refreshed_state.num = \
            refreshed_state.of - len(refreshed_state.pending_chunk_uuids)

        logger.debug('Remaining chunks to upload within %r: (%i) %r',
                     refreshed_state.tr_uuid,
                     len(refreshed_state.pending_chunk_uuids),
                     refreshed_state.pending_chunk_uuids)

        if refreshed_state.pending_chunk_uuids:
            with db.RDB() as rdbw:
                pending_bytes = \
                    TrustedQueries.TrustedChunks.get_total_size_of_chunks(
                        refreshed_state.pending_chunk_uuids, rdbw)
        else:
            pending_bytes = 0

        _new_num = refreshed_state.of \
                   - len(refreshed_state.pending_chunk_uuids)
        _new_num_bytes = refreshed_state.of_bytes - pending_bytes
        _st = refreshed_state
        logger.debug('Recalculated progress: chunks - %i->%i of %i, '
                         'bytes - %i->%i of %i',
                     _st.num, _new_num, _st.of,
                     _st.num_bytes, _new_num_bytes, _st.of_bytes)

        refreshed_state.num = max(_new_num, 0)
        refreshed_state.num_bytes = max(_new_num_bytes, 0)

        logger.debug('Current progress %i/%i (%i/%i)',
                     refreshed_state.num, refreshed_state.of,
                     refreshed_state.num_bytes, refreshed_state.of_bytes)
        return refreshed_state
Beispiel #22
0
    def __restore_datasets_to_host(self, me, host, ds_uuids):
        """
        Launch RESTORE transaction(s) to restore several datasets
        (with dataset uuids in C{ds_uuids}) to the host C{host}.

        @param me: my node
        @type me: Node

        @type host: Host
        @type ds_uuids: col.Iterable
        """
        tr_manager = self.__app.tr_manager
        ds_uuids_present, ds_uuids_asked_to_restore = inonempty(ds_uuids)

        if not ds_uuids_present:
            logger.debug('Actually, nothing to restore to %r', host)
        else:
            sync_ds_uuids_for_this_host = \
                {state.ds_uuid
                    for state in tr_manager.get_tr_states(class_name='RESTORE',
                                                          dst_uuid=host.uuid)
                    if state.is_sync}

            if sync_ds_uuids_for_this_host:
                logger.debug('Restoring something to %r, while '
                                 'the following RESTORE transactions are '
                                 'already syncing to it: %r',
                             host, sync_ds_uuids_for_this_host)

            # Let's evaluate the sequence to be able to multiply reiterate it,
            # as well as count its length.
            ds_uuids_asked_to_restore = frozenset(ds_uuids_asked_to_restore)
            assert ds_uuids_asked_to_restore, repr(ds_uuids_asked_to_restore)

            ds_uuids_to_restore = \
                ds_uuids_asked_to_restore - sync_ds_uuids_for_this_host
            logger.verbose('While asked to restore %i dataset(s) (%r), '
                               'will in fact restore %i one(s) (%r)',
                           len(ds_uuids_asked_to_restore),
                           ds_uuids_asked_to_restore,
                           len(ds_uuids_to_restore),
                           ds_uuids_to_restore)

            # # If we are syncing a sole dataset, let's sync it;
            # # if there are multiple ones, let's merge them.
            if len(ds_uuids_to_restore) == 1:
                ds_uuids_will_restore = ds_uuids_to_restore
            else:
                with db.RDB() as rdbw:
                    ds_uuids_will_restore = \
                        TrustedQueries.TrustedDatasets.merge_sync_datasets(
                            host.uuid, ds_uuids_to_restore, rdbw)
                    # To get it outside RDB wrapper,...
                    ds_uuids_will_restore = list(ds_uuids_will_restore)
                logger.debug('Merged DS UUIDs: %r', ds_uuids_will_restore)

            logger.debug('Will in fact restore these datasets: %r',
                         ds_uuids_will_restore)
            for ds_uuid in ds_uuids_will_restore:
                logger.debug('Restoring files from %s to %r', ds_uuid, host)

                r_tr = tr_manager.create_new_transaction(
                           name='RESTORE',
                           src=me,
                           dst=host,
                           parent=None,
                           # RESTORE-specific
                           ds_uuid=ds_uuid,
                           # None means "all files"
                           file_paths_for_basedirs=None,
                           wr_uuid=None)
Beispiel #23
0
    def __request_restoring_more_chunks(self, state):
        """
        Try the next host among the available ones, and attempt
        to request it to send all the chunks it can.

        @todo: each call of C{__request_restoring_more_chunks()}
            should be pretty fast, less than a second, because it holds
            the state context open for writing during the whole call.
            If it ever happens to be that long, this needs to be refactored,
            so that it opens a state context for writing only for the duration
            of actual writing.

        @param state: transaction state already opened for update.
        @type state: AbstractTransaction.State

        @return: whether everything goes so far so good. In the opposite case,
            the caller must call C{self.__restore_ops_failed()}
            (and do that outside of the state context).
        @rtype: bool
        """
        _message = self.message
        _manager = self.manager
        me = _message.src
        host = _message.dst

        so_far_so_good = True

        logger.debug('%s::Pending hosts (%i), %r',
                     self.uuid,
                     len(state.pending_host_uuids),
                     state.pending_host_uuids)
        logger.debug('%s::Pending chunks for restore (%i), %r',
                     self.uuid,
                     len(state.pending_chunk_uuids),
                     state.pending_chunk_uuids)

        if not state.pending_chunk_uuids:
            logger.debug('%s::Seems done with restoring...', self.uuid)
            so_far_so_good = \
                self.__no_more_chunks_to_restore(state, success=True)
        else:
            # What host shall we use? Use only alive ones.
            host_uuids = \
                deque(ifilter(self.manager.app.known_hosts.is_peer_alive,
                              state.pending_host_uuids))

            if not host_uuids:
                # If no hosts now, they still may occur in some time.
                logger.warning('%s::Cannot restore: no more hosts '
                                   'on attempt %d!',
                               self.uuid, state.no_donors_retries)

                state.no_donors_retries += 1

                if state.no_donors_retries <= MAX_RETRIES_IF_NO_DONORS:
                    logger.debug('Pausing for %s...',
                                 RESTORE_RETRY_PERIOD_IF_NO_HOSTS)
                    callLaterInThread(
                        RESTORE_RETRY_PERIOD_IF_NO_HOSTS.total_seconds(),
                        self.__on_restore_retry_delay_elapsed)
                    so_far_so_good = True
                else:
                    logger.error('In %d attempts, '
                                     "couldn't find any donors, cancelling",
                                 MAX_RETRIES_IF_NO_DONORS)
                    so_far_so_good = \
                        self.__no_more_chunks_to_restore(state, success=False)

            else:
                # No matter how many retries we could've made, but now we found
                # some donors; reset the retries counter.
                state.no_donors_retries = 0

                # Let's use the first host in the list.
                restoring_from_host_uuid = host_uuids.popleft()
                # ... but in case of possible failure, move it
                # to the end of loop.
                host_uuids.append(restoring_from_host_uuid)

                restoring_from_host = \
                    self.manager.app.known_hosts[restoring_from_host_uuid]

                logger.debug("%s::Let's restore some chunks from %r to %r",
                             self.uuid, restoring_from_host, host)

                with db.RDB() as rdbw:
                    all_chunk_uuids_on_from_host = \
                        set(TrustedQueries.TrustedChunks
                                          .get_chunk_uuids_for_host(
                                               restoring_from_host_uuid, rdbw))

                # Among all the chunks on the SRC host,
                # we need only several ones, which are needed for restore
                available_chunk_uuids = \
                    all_chunk_uuids_on_from_host & state.pending_chunk_uuids
                del all_chunk_uuids_on_from_host  # help GC

                logger.verbose('%s::Chunks available at host %s are: %r',
                               self.uuid,
                               restoring_from_host_uuid,
                               available_chunk_uuids)

                with db.RDB() as rdbw:
                    restoring_chunks = \
                        list(Queries.Chunks.get_chunks_by_uuids(
                                 available_chunk_uuids, rdbw))

                if restoring_chunks:
                    logger.verbose('%s::Restoring chunks from %r to %r: %r',
                                   self.uuid,
                                   restoring_from_host,
                                   host,
                                   available_chunk_uuids)

                    # Start the nested RECEIVE_CHUNKS transaction.
                    rc_tr = _manager.create_new_transaction(
                                name='RECEIVE_CHUNKS',
                                src=me,
                                dst=host,
                                parent=self,
                                # RECEIVE_CHUNKS-specific
                                chunks_to_restore={
                                    restoring_from_host.uuid: restoring_chunks
                                })
                    rc_tr.completed.addCallbacks(
                        partial(self._on_receive_chunks_success,
                                what_chunks=restoring_chunks,
                                from_what_host=restoring_from_host),
                        partial(self._on_receive_chunks_error,
                                from_what_host=restoring_from_host))
                    so_far_so_good = True
                else:
                    logger.debug("%s::Host %s doesn't have any chunks for %s,"
                                     'removing it from the set',
                                 self.uuid, restoring_from_host_uuid, host)
                    state.pending_host_uuids.remove(restoring_from_host_uuid)

                    so_far_so_good = \
                        self.__request_restoring_more_chunks(state)

        return so_far_so_good
Beispiel #24
0
    def __no_more_chunks_to_restore(self, state, success=True):
        """What to do when there are no more chunks to restore?

        @param state: transaction state already opened for update.
        @type state: AbstractTransaction.State

        @return: whether the restore transaction was successful.
        @rtype: bool
        """
        logger.debug('%s::No more chunks to restore! %s successful.',
                     self.uuid, 'Everything' if success else 'Not')

        state.success = success
        _ds_uuid = state.ds_uuid
        _wr_uuid = state.wr_uuid
        is_sync = state.is_sync

        if success:
            logger.debug('%s::All chunks are already on the host, '
                             "let's send out the RESTORE message",
                         self.uuid)

            _message = self.message
            host = _message.dst

            with db.RDB() as rdbw:
                ds = Queries.Datasets.get_dataset_by_uuid(_ds_uuid, host.uuid,
                                                          rdbw)
                ugroup = Queries.Inhabitants.get_ugroup_by_uuid(ds.ugroup_uuid,
                                                                rdbw)

            # Interesting fact: ds.sync doesn't mean that we are necessarily
            # syncing the dataset. Only state.is_sync knows that (since it is
            # possible, that we are restoring some separate files from a "sync"
            # dataset).

            _message.ugroup = ugroup
            _message.dataset = ds if is_sync else None
            _message.sync = is_sync

            # Re-get the files->blocks mapping information
            # to send to the host.
            with db.RDB() as rdbw:
                (files_by_rel_path,
                 blocks_by_file_rel_path) = \
                    TrustedQueries.TrustedBlocks.get_blocks_for_files(
                        host.uuid, state.ds_uuid,
                        state.file_paths_for_basedirs, rdbw)
            _message.files = \
                {f: blocks_by_file_rel_path[f.rel_path]
                     for f in files_by_rel_path.itervalues()}
            _message.wr_uuid = _wr_uuid

            logger.verbose(
                '%s::No more chunks for DS %r/WR %r at %r:'
                    '\n%r',
                self.uuid, _message.dataset, _message.wr_uuid, _message.ugroup,
                _message.files)

            # The transaction is still paused at the moment
            self.manager.post_message(_message)
            return True

        else:
            return False
Beispiel #25
0
    def __check_schedules(self):

        _now = utcnow()

        with self.lock:
            #
            # First, do we need to reread the schedules from the DB.
            #
            assert isinstance(self.__last_reread_from_db, datetime), \
                   repr(self.__last_reread_from_db)
            maxdelta = timedelta(
                seconds=BACKUP_SCHEDULES_REREAD_PERIOD.total_seconds())
            if _now - self.__last_reread_from_db > maxdelta:
                self.reread_cache()

            #
            # Now, we can check the schedules.
            #
            if self.__schedules:
                logger.debug(
                    'Checking for suspect schedules '
                    'at %s among %i schedules...', _now, len(self.__schedules))
            suspect_schedules = self.get_schedules_older_than(_now)
            if suspect_schedules:
                logger.debug(
                    'On node %r, the following (%i) schedules '
                    'have passed their time:\n%s', self.server_process.me,
                    len(suspect_schedules),
                    '\n'.join(repr(s) for s in suspect_schedules))

                # But what hosts are actually alive at the moment?
                alive_host_uuids = \
                    [h.uuid
                         for h in self.app.known_hosts.alive_peers()]
                if alive_host_uuids:
                    logger.debug('Alive hosts at the moment are: %r',
                                 alive_host_uuids)
                # The schedule will fire a backup only if both its time is out,
                # and its host is alive.
                process_schedules = {
                    sch
                    for sch in suspect_schedules
                    if sch.host_uuid in alive_host_uuids
                }

                if process_schedules:
                    processed_host_uuids = set()
                    logger.debug(
                        'The following (%i) schedules will fire:\n%s',
                        len(suspect_schedules),
                        '\n'.join(repr(sch) for sch in process_schedules))

                    # Loop over schedules, and run the backup transactions.
                    for schedule in process_schedules:
                        logger.debug('Firing a backup for schedule %r',
                                     schedule)

                        # TODO: Add "if user is suspended" check when
                        # TODO: BackupScheduler will be uncommented.
                        raise NotImplementedError
                        self.start_scheduled_backup(schedule)

                        new_schedule = schedule.copy()
                        new_schedule.advance_by_period()
                        logger.debug('%r advanced to %r', schedule,
                                     new_schedule)

                        # Remove old schedule; add new if needed.
                        self.remove(schedule)
                        if new_schedule.next_backup_datetime is not None:
                            self.add(new_schedule)

                        processed_host_uuids.add(new_schedule.host_uuid)

                    # We've done with the backup transactions.
                    # Would be cool to update the settings.
                    for host_uuid in processed_host_uuids:
                        schedules = self.get_schedules_by_host_uuid(host_uuid)
                        logger.debug(
                            'Updating the schedules '
                            'for host %s:\n%r', host_uuid, schedules)
                        with db.RDB() as rdbw:
                            TrustedQueries.TrustedSettings.set_setting(
                                rdbw=rdbw,
                                host_uuid=host_uuid,
                                setting_name=Queries.Settings.BACKUP_SCHEDULE,
                                setting_value=[s.to_dict() for s in schedules],
                                setting_time=_now.replace(tzinfo=None))
                        self.send_updated_schedules_to_host(host_uuid)