예제 #1
0
 def purge(self, service_name: str) -> None:
     self.mgr.check_mon_command({
         'prefix': 'config rm',
         'who': utils.name_to_config_section(service_name),
         'name': 'rgw_realm',
     })
     self.mgr.check_mon_command({
         'prefix': 'config rm',
         'who': utils.name_to_config_section(service_name),
         'name': 'rgw_zone',
     })
     self.mgr.check_mon_command({
         'prefix': 'config-key rm',
         'key': f'rgw/cert/{service_name}',
     })
예제 #2
0
 def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
     super().post_remove(daemon, is_failed_deploy=is_failed_deploy)
     self.mgr.check_mon_command({
         'prefix': 'config rm',
         'who': utils.name_to_config_section(daemon.name()),
         'name': 'rgw_frontends',
     })
예제 #3
0
    def create(self, igw_id, host, spec) -> str:
        ret, keyring, err = self.mgr.check_mon_command({
            'prefix':
            'auth get-or-create',
            'entity':
            utils.name_to_config_section('iscsi') + '.' + igw_id,
            'caps': [
                'mon', 'profile rbd, '
                'allow command "osd blacklist", '
                'allow command "config-key get" with "key" prefix "iscsi/"',
                'osd', f'allow rwx pool={spec.pool}'
            ],
        })

        if spec.ssl_cert:
            if isinstance(spec.ssl_cert, list):
                cert_data = '\n'.join(spec.ssl_cert)
            else:
                cert_data = spec.ssl_cert
            ret, out, err = self.mgr.mon_command({
                'prefix': 'config-key set',
                'key':
                f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.crt',
                'val': cert_data,
            })

        if spec.ssl_key:
            if isinstance(spec.ssl_key, list):
                key_data = '\n'.join(spec.ssl_key)
            else:
                key_data = spec.ssl_key
            ret, out, err = self.mgr.mon_command({
                'prefix': 'config-key set',
                'key':
                f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.key',
                'val': key_data,
            })

        api_secure = 'false' if spec.api_secure is None else spec.api_secure
        igw_conf = f"""
        # generated by cephadm
        [config]
        cluster_client_name = {utils.name_to_config_section('iscsi')}.{igw_id}
        pool = {spec.pool}
        trusted_ip_list = {spec.trusted_ip_list or ''}
        minimum_gateways = 1
        api_port = {spec.api_port or ''}
        api_user = {spec.api_user or ''}
        api_password = {spec.api_password or ''}
        api_secure = {api_secure}
        """
        extra_config = {'iscsi-gateway.cfg': igw_conf}
        return self.mgr._create_daemon('iscsi',
                                       igw_id,
                                       host,
                                       keyring=keyring,
                                       extra_config=extra_config)
예제 #4
0
    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
        assert self.TYPE == daemon_spec.daemon_type
        rgw_id, _ = daemon_spec.daemon_id, daemon_spec.host
        spec = cast(RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec)

        keyring = self.get_keyring(rgw_id)

        if daemon_spec.ports:
            port = daemon_spec.ports[0]
        else:
            # this is a redeploy of older instance that doesn't have an explicitly
            # assigned port, in which case we can assume there is only 1 per host
            # and it matches the spec.
            port = spec.get_port()

        # configure frontend
        args = []
        ftype = spec.rgw_frontend_type or "beast"
        if ftype == 'beast':
            if spec.ssl:
                if daemon_spec.ip:
                    args.append(
                        f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
                else:
                    args.append(f"ssl_port={port}")
                args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
            else:
                if daemon_spec.ip:
                    args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
                else:
                    args.append(f"port={port}")
        elif ftype == 'civetweb':
            if spec.ssl:
                if daemon_spec.ip:
                    # note the 's' suffix on port
                    args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s")
                else:
                    args.append(f"port={port}s")  # note the 's' suffix on port
                args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
            else:
                if daemon_spec.ip:
                    args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
                else:
                    args.append(f"port={port}")
        frontend = f'{ftype} {" ".join(args)}'

        ret, out, err = self.mgr.check_mon_command({
            'prefix': 'config set',
            'who': utils.name_to_config_section(daemon_spec.name()),
            'name': 'rgw_frontends',
            'value': frontend
        })

        daemon_spec.keyring = keyring
        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)

        return daemon_spec
예제 #5
0
파일: nfs.py 프로젝트: zhoufeng2021/ceph
    def generate_config(
        self, daemon_spec: CephadmDaemonSpec[NFSServiceSpec]
    ) -> Tuple[Dict[str, Any], List[str]]:
        assert self.TYPE == daemon_spec.daemon_type
        assert daemon_spec.spec

        daemon_type = daemon_spec.daemon_type
        daemon_id = daemon_spec.daemon_id
        host = daemon_spec.host
        spec = daemon_spec.spec

        deps: List[str] = []

        # create the keyring
        user = f'{daemon_type}.{daemon_id}'
        entity = utils.name_to_config_section(user)
        keyring = self.get_or_create_keyring(entity)

        # update the caps after get-or-create, the keyring might already exist!
        self.update_keyring_caps(entity, spec)

        # create the rados config object
        self.create_rados_config_obj(spec)

        # generate the ganesha config
        def get_ganesha_conf() -> str:
            context = dict(user=user,
                           nodeid=daemon_spec.name(),
                           pool=spec.pool,
                           namespace=spec.namespace if spec.namespace else '',
                           url=spec.rados_config_location())
            return self.mgr.template.render('services/nfs/ganesha.conf.j2',
                                            context)

        # generate the cephadm config json
        def get_cephadm_config() -> Dict[str, Any]:
            config: Dict[str, Any] = {}
            config['pool'] = spec.pool
            if spec.namespace:
                config['namespace'] = spec.namespace
            config['userid'] = user
            config['extra_args'] = ['-N', 'NIV_EVENT']
            config['files'] = {
                'ganesha.conf': get_ganesha_conf(),
            }
            config.update(
                self.mgr._get_config_and_keyring(daemon_type,
                                                 daemon_id,
                                                 keyring=keyring,
                                                 host=host))
            logger.debug('Generated cephadm config-json: %s' % config)
            return config

        return get_cephadm_config(), deps
예제 #6
0
    def _do_upgrade(self):
        # type: () -> None
        if not self.upgrade_state:
            logger.debug('_do_upgrade no state, exiting')
            return

        target_image = self.target_image
        target_id = self.upgrade_state.target_id
        target_digests = self.upgrade_state.target_digests
        target_version = self.upgrade_state.target_version

        first = False
        if not target_id or not target_version or not target_digests:
            # need to learn the container hash
            logger.info('Upgrade: First pull of %s' % target_image)
            self.upgrade_info_str = 'Doing first pull of %s image' % (
                target_image)
            try:
                target_id, target_version, target_digests = CephadmServe(
                    self.mgr)._get_container_image_info(target_image)
            except OrchestratorError as e:
                self._fail_upgrade(
                    'UPGRADE_FAILED_PULL', {
                        'severity': 'warning',
                        'summary': 'Upgrade: failed to pull target image',
                        'count': 1,
                        'detail': [str(e)],
                    })
                return
            if not target_version:
                self._fail_upgrade(
                    'UPGRADE_FAILED_PULL', {
                        'severity':
                        'warning',
                        'summary':
                        'Upgrade: failed to pull target image',
                        'count':
                        1,
                        'detail':
                        ['unable to extract ceph version from container'],
                    })
                return
            self.upgrade_state.target_id = target_id
            # extract the version portion of 'ceph version {version} ({sha1})'
            self.upgrade_state.target_version = target_version.split(' ')[2]
            self.upgrade_state.target_digests = target_digests
            self._save_upgrade_state()
            target_image = self.target_image
            first = True

        if target_digests is None:
            target_digests = []
        if target_version.startswith('ceph version '):
            # tolerate/fix upgrade state from older version
            self.upgrade_state.target_version = target_version.split(' ')[2]
            target_version = self.upgrade_state.target_version
        target_major, target_minor, target_patch = target_version.split('.')
        target_major_name = self.mgr.lookup_release_name(int(target_major))

        if first:
            logger.info('Upgrade: Target is version %s (%s)' %
                        (target_version, target_major_name))
            logger.info('Upgrade: Target container is %s, digests %s' %
                        (target_image, target_digests))

        version_error = self._check_target_version(target_version)
        if version_error:
            self._fail_upgrade(
                'UPGRADE_BAD_TARGET_VERSION', {
                    'severity': 'error',
                    'summary':
                    f'Upgrade: cannot upgrade/downgrade to {target_version}',
                    'count': 1,
                    'detail': [version_error],
                })
            return

        image_settings = self.get_distinct_container_image_settings()

        daemons = [
            d for d in self.mgr.cache.get_daemons()
            if d.daemon_type in CEPH_UPGRADE_ORDER
        ]
        done = 0
        for daemon_type in CEPH_UPGRADE_ORDER:
            logger.debug('Upgrade: Checking %s daemons' % daemon_type)

            need_upgrade_self = False
            need_upgrade: List[Tuple[DaemonDescription, bool]] = []
            need_upgrade_deployer: List[Tuple[DaemonDescription, bool]] = []
            for d in daemons:
                if d.daemon_type != daemon_type:
                    continue
                assert d.daemon_type is not None
                assert d.daemon_id is not None
                correct_digest = False
                if (any(d in target_digests
                        for d in (d.container_image_digests or []))
                        or d.daemon_type in MONITORING_STACK_TYPES):
                    logger.debug('daemon %s.%s container digest correct' %
                                 (daemon_type, d.daemon_id))
                    correct_digest = True
                    if any(d in target_digests for d in (d.deployed_by or [])):
                        logger.debug(
                            'daemon %s.%s deployed by correct version' %
                            (d.daemon_type, d.daemon_id))
                        done += 1
                        continue

                if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id):
                    logger.info('Upgrade: Need to upgrade myself (mgr.%s)' %
                                self.mgr.get_mgr_id())
                    need_upgrade_self = True
                    continue

                if correct_digest:
                    logger.debug(
                        'daemon %s.%s not deployed by correct version' %
                        (d.daemon_type, d.daemon_id))
                    need_upgrade_deployer.append((d, True))
                else:
                    logger.debug(
                        'daemon %s.%s not correct (%s, %s, %s)' %
                        (daemon_type, d.daemon_id, d.container_image_name,
                         d.container_image_digests, d.version))
                    need_upgrade.append((d, False))

            if not need_upgrade_self:
                # only after the mgr itself is upgraded can we expect daemons to have
                # deployed_by == target_digests
                need_upgrade += need_upgrade_deployer

            # prepare filesystems for daemon upgrades?
            if (daemon_type == 'mds' and need_upgrade
                    and not self._prepare_for_mds_upgrade(
                        target_major, [d_entry[0]
                                       for d_entry in need_upgrade])):
                return

            if need_upgrade:
                self.upgrade_info_str = 'Currently upgrading %s daemons' % (
                    daemon_type)

            to_upgrade: List[Tuple[DaemonDescription, bool]] = []
            known_ok_to_stop: List[str] = []
            for d_entry in need_upgrade:
                d = d_entry[0]
                assert d.daemon_type is not None
                assert d.daemon_id is not None
                assert d.hostname is not None

                if not d.container_image_id:
                    if d.container_image_name == target_image:
                        logger.debug(
                            'daemon %s has unknown container_image_id but has correct image name'
                            % (d.name()))
                        continue

                if known_ok_to_stop:
                    if d.name() in known_ok_to_stop:
                        logger.info(
                            f'Upgrade: {d.name()} is also safe to restart')
                        to_upgrade.append(d_entry)
                    continue

                if d.daemon_type in ['mon', 'osd', 'mds']:
                    # NOTE: known_ok_to_stop is an output argument for
                    # _wait_for_ok_to_stop
                    if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
                        return

                to_upgrade.append(d_entry)

                # if we don't have a list of others to consider, stop now
                if not known_ok_to_stop:
                    break

            num = 1
            for d_entry in to_upgrade:
                d = d_entry[0]
                assert d.daemon_type is not None
                assert d.daemon_id is not None
                assert d.hostname is not None

                self._update_upgrade_progress(done / len(daemons))

                # make sure host has latest container image
                out, errs, code = CephadmServe(self.mgr)._run_cephadm(
                    d.hostname,
                    '',
                    'inspect-image', [],
                    image=target_image,
                    no_fsid=True,
                    error_ok=True)
                if code or not any(d in target_digests
                                   for d in json.loads(''.join(out)).get(
                                       'repo_digests', [])):
                    logger.info('Upgrade: Pulling %s on %s' %
                                (target_image, d.hostname))
                    self.upgrade_info_str = 'Pulling %s image on host %s' % (
                        target_image, d.hostname)
                    out, errs, code = CephadmServe(self.mgr)._run_cephadm(
                        d.hostname,
                        '',
                        'pull', [],
                        image=target_image,
                        no_fsid=True,
                        error_ok=True)
                    if code:
                        self._fail_upgrade(
                            'UPGRADE_FAILED_PULL', {
                                'severity':
                                'warning',
                                'summary':
                                'Upgrade: failed to pull target image',
                                'count':
                                1,
                                'detail': [
                                    'failed to pull %s on host %s' %
                                    (target_image, d.hostname)
                                ],
                            })
                        return
                    r = json.loads(''.join(out))
                    if not any(d in target_digests
                               for d in r.get('repo_digests', [])):
                        logger.info(
                            'Upgrade: image %s pull on %s got new digests %s (not %s), restarting'
                            % (target_image, d.hostname, r['repo_digests'],
                               target_digests))
                        self.upgrade_info_str = 'Image %s pull on %s got new digests %s (not %s), restarting' % (
                            target_image, d.hostname, r['repo_digests'],
                            target_digests)
                        self.upgrade_state.target_digests = r['repo_digests']
                        self._save_upgrade_state()
                        return

                    self.upgrade_info_str = 'Currently upgrading %s daemons' % (
                        daemon_type)

                if len(to_upgrade) > 1:
                    logger.info(
                        'Upgrade: Updating %s.%s (%d/%d)' %
                        (d.daemon_type, d.daemon_id, num, len(to_upgrade)))
                else:
                    logger.info('Upgrade: Updating %s.%s' %
                                (d.daemon_type, d.daemon_id))
                action = 'Upgrading' if not d_entry[1] else 'Redeploying'
                try:
                    daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(
                        d)
                    self.mgr._daemon_action(
                        daemon_spec,
                        'redeploy',
                        image=target_image if not d_entry[1] else None)
                except Exception as e:
                    self._fail_upgrade(
                        'UPGRADE_REDEPLOY_DAEMON', {
                            'severity': 'warning',
                            'summary':
                            f'{action} daemon {d.name()} on host {d.hostname} failed.',
                            'count': 1,
                            'detail': [f'Upgrade daemon: {d.name()}: {e}'],
                        })
                    return
                num += 1
            if to_upgrade:
                return

            # complete mon upgrade?
            if daemon_type == 'mon':
                if not self.mgr.get("have_local_config_map"):
                    logger.info(
                        'Upgrade: Restarting mgr now that mons are running pacific'
                    )
                    need_upgrade_self = True

            if need_upgrade_self:
                try:
                    self.mgr.mgr_service.fail_over()
                except OrchestratorError as e:
                    self._fail_upgrade(
                        'UPGRADE_NO_STANDBY_MGR', {
                            'severity':
                            'warning',
                            'summary':
                            f'Upgrade: {e}',
                            'count':
                            1,
                            'detail': [
                                'The upgrade process needs to upgrade the mgr, '
                                'but it needs at least one standby to proceed.',
                            ],
                        })
                    return

                return  # unreachable code, as fail_over never returns
            elif daemon_type == 'mgr':
                if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks:
                    del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR']
                    self.mgr.set_health_checks(self.mgr.health_checks)

            # make sure 'ceph versions' agrees
            ret, out_ver, err = self.mgr.check_mon_command({
                'prefix':
                'versions',
            })
            j = json.loads(out_ver)
            for version, count in j.get(daemon_type, {}).items():
                short_version = version.split(' ')[2]
                if short_version != target_version:
                    logger.warning(
                        'Upgrade: %d %s daemon(s) are %s != target %s' %
                        (count, daemon_type, short_version, target_version))

            # push down configs
            daemon_type_section = name_to_config_section(daemon_type)
            if image_settings.get(daemon_type_section) != target_image:
                logger.info('Upgrade: Setting container_image for all %s' %
                            daemon_type)
                self.mgr.set_container_image(daemon_type_section, target_image)
            to_clean = []
            for section in image_settings.keys():
                if section.startswith(
                        name_to_config_section(daemon_type) + '.'):
                    to_clean.append(section)
            if to_clean:
                logger.debug('Upgrade: Cleaning up container_image for %s' %
                             to_clean)
                for section in to_clean:
                    ret, image, err = self.mgr.check_mon_command({
                        'prefix':
                        'config rm',
                        'name':
                        'container_image',
                        'who':
                        section,
                    })

            logger.debug('Upgrade: All %s daemons are up to date.' %
                         daemon_type)

            # complete osd upgrade?
            if daemon_type == 'osd':
                osdmap = self.mgr.get("osd_map")
                osd_min_name = osdmap.get("require_osd_release", "argonaut")
                osd_min = ceph_release_to_major(osd_min_name)
                if osd_min < int(target_major):
                    logger.info(
                        f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}'
                    )
                    ret, _, err = self.mgr.check_mon_command({
                        'prefix':
                        'osd require-osd-release',
                        'release':
                        target_major_name,
                    })

            # complete mds upgrade?
            if daemon_type == 'mds' and self.upgrade_state.fs_original_max_mds:
                for i in self.mgr.get("fs_map")['filesystems']:
                    fs_id = i["id"]
                    fs_name = i['mdsmap']['fs_name']
                    new_max = self.upgrade_state.fs_original_max_mds.get(fs_id)
                    if new_max:
                        self.mgr.log.info(
                            'Upgrade: Scaling up filesystem %s max_mds to %d' %
                            (fs_name, new_max))
                        ret, _, err = self.mgr.check_mon_command({
                            'prefix':
                            'fs set',
                            'fs_name':
                            fs_name,
                            'var':
                            'max_mds',
                            'val':
                            str(new_max),
                        })

                self.upgrade_state.fs_original_max_mds = {}
                self._save_upgrade_state()

        # clean up
        logger.info('Upgrade: Finalizing container_image settings')
        self.mgr.set_container_image('global', target_image)

        for daemon_type in CEPH_UPGRADE_ORDER:
            ret, image, err = self.mgr.check_mon_command({
                'prefix':
                'config rm',
                'name':
                'container_image',
                'who':
                name_to_config_section(daemon_type),
            })

        logger.info('Upgrade: Complete!')
        if self.upgrade_state.progress_id:
            self.mgr.remote('progress', 'complete',
                            self.upgrade_state.progress_id)
        self.upgrade_state = None
        self._save_upgrade_state()
        return
예제 #7
0
파일: upgrade.py 프로젝트: niliushall/ceph
    def _do_upgrade(self):
        # type: () -> None
        if not self.upgrade_state:
            logger.debug('_do_upgrade no state, exiting')
            return

        target_name = self.upgrade_state.target_name
        target_id = self.upgrade_state.target_id
        if not target_id:
            # need to learn the container hash
            logger.info('Upgrade: First pull of %s' % target_name)
            try:
                target_id, target_version = self.mgr._get_container_image_id(
                    target_name)
            except OrchestratorError as e:
                self._fail_upgrade(
                    'UPGRADE_FAILED_PULL', {
                        'severity': 'warning',
                        'summary': 'Upgrade: failed to pull target image',
                        'count': 1,
                        'detail': [str(e)],
                    })
                return
            self.upgrade_state.target_id = target_id
            self.upgrade_state.target_version = target_version
            self._save_upgrade_state()
        target_version = self.upgrade_state.target_version
        logger.info('Upgrade: Target is %s with id %s' %
                    (target_name, target_id))

        # get all distinct container_image settings
        image_settings = {}
        ret, out, err = self.mgr.check_mon_command({
            'prefix': 'config dump',
            'format': 'json',
        })
        config = json.loads(out)
        for opt in config:
            if opt['name'] == 'container_image':
                image_settings[opt['section']] = opt['value']

        daemons = self.mgr.cache.get_daemons()
        done = 0
        for daemon_type in CEPH_UPGRADE_ORDER:
            logger.info('Upgrade: Checking %s daemons...' % daemon_type)
            need_upgrade_self = False
            for d in daemons:
                if d.daemon_type != daemon_type:
                    continue
                if d.container_image_id == target_id:
                    logger.debug('daemon %s.%s version correct' %
                                 (daemon_type, d.daemon_id))
                    done += 1
                    continue
                logger.debug('daemon %s.%s not correct (%s, %s, %s)' %
                             (daemon_type, d.daemon_id, d.container_image_name,
                              d.container_image_id, d.version))

                if daemon_type == 'mgr' and \
                   d.daemon_id == self.mgr.get_mgr_id():
                    logger.info('Upgrade: Need to upgrade myself (mgr.%s)' %
                                self.mgr.get_mgr_id())
                    need_upgrade_self = True
                    continue

                # make sure host has latest container image
                out, err, code = self.mgr._run_cephadm(d.hostname,
                                                       '',
                                                       'inspect-image', [],
                                                       image=target_name,
                                                       no_fsid=True,
                                                       error_ok=True)
                if code or json.loads(
                        ''.join(out)).get('image_id') != target_id:
                    logger.info('Upgrade: Pulling %s on %s' %
                                (target_name, d.hostname))
                    out, err, code = self.mgr._run_cephadm(d.hostname,
                                                           '',
                                                           'pull', [],
                                                           image=target_name,
                                                           no_fsid=True,
                                                           error_ok=True)
                    if code:
                        self._fail_upgrade(
                            'UPGRADE_FAILED_PULL', {
                                'severity':
                                'warning',
                                'summary':
                                'Upgrade: failed to pull target image',
                                'count':
                                1,
                                'detail': [
                                    'failed to pull %s on host %s' %
                                    (target_name, d.hostname)
                                ],
                            })
                        return
                    r = json.loads(''.join(out))
                    if r.get('image_id') != target_id:
                        logger.info(
                            'Upgrade: image %s pull on %s got new image %s (not %s), restarting'
                            % (target_name, d.hostname, r['image_id'],
                               target_id))
                        self.upgrade_state.target_id = r['image_id']
                        self._save_upgrade_state()
                        return

                self._update_upgrade_progress(done / len(daemons))

                if not d.container_image_id:
                    if d.container_image_name == target_name:
                        logger.debug(
                            'daemon %s has unknown container_image_id but has correct image name'
                            % (d.name()))
                        continue
                if not self._wait_for_ok_to_stop(d):
                    return
                logger.info('Upgrade: Redeploying %s.%s' %
                            (d.daemon_type, d.daemon_id))
                ret, out, err = self.mgr.check_mon_command({
                    'prefix':
                    'config set',
                    'name':
                    'container_image',
                    'value':
                    target_name,
                    'who':
                    name_to_config_section(daemon_type + '.' + d.daemon_id),
                })
                self.mgr._daemon_action(d.daemon_type, d.daemon_id, d.hostname,
                                        'redeploy')
                return

            if need_upgrade_self:
                mgr_map = self.mgr.get('mgr_map')
                num = len(mgr_map.get('standbys'))
                if not num:
                    self._fail_upgrade(
                        'UPGRADE_NO_STANDBY_MGR', {
                            'severity':
                            'warning',
                            'summary':
                            'Upgrade: Need standby mgr daemon',
                            'count':
                            1,
                            'detail': [
                                'The upgrade process needs to upgrade the mgr, '
                                'but it needs at least one standby to proceed.',
                            ],
                        })
                    return

                logger.info('Upgrade: there are %d other already-upgraded '
                            'standby mgrs, failing over' % num)

                self._update_upgrade_progress(done / len(daemons))

                # fail over
                ret, out, err = self.mgr.check_mon_command({
                    'prefix':
                    'mgr fail',
                    'who':
                    self.mgr.get_mgr_id(),
                })
                return
            elif daemon_type == 'mgr':
                if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks:
                    del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR']
                    self.mgr.set_health_checks(self.mgr.health_checks)

            # make sure 'ceph versions' agrees
            ret, out_ver, err = self.mgr.check_mon_command({
                'prefix':
                'versions',
            })
            j = json.loads(out_ver)
            for version, count in j.get(daemon_type, {}).items():
                if version != target_version:
                    logger.warning(
                        'Upgrade: %d %s daemon(s) are %s != target %s' %
                        (count, daemon_type, version, target_version))

            # push down configs
            if image_settings.get(daemon_type) != target_name:
                logger.info('Upgrade: Setting container_image for all %s...' %
                            daemon_type)
                ret, out, err = self.mgr.check_mon_command({
                    'prefix':
                    'config set',
                    'name':
                    'container_image',
                    'value':
                    target_name,
                    'who':
                    name_to_config_section(daemon_type),
                })
            to_clean = []
            for section in image_settings.keys():
                if section.startswith(
                        name_to_config_section(daemon_type) + '.'):
                    to_clean.append(section)
            if to_clean:
                logger.debug('Upgrade: Cleaning up container_image for %s...' %
                             to_clean)
                for section in to_clean:
                    ret, image, err = self.mgr.check_mon_command({
                        'prefix':
                        'config rm',
                        'name':
                        'container_image',
                        'who':
                        section,
                    })

            logger.info('Upgrade: All %s daemons are up to date.' %
                        daemon_type)

        # clean up
        logger.info('Upgrade: Finalizing container_image settings')
        ret, out, err = self.mgr.check_mon_command({
            'prefix': 'config set',
            'name': 'container_image',
            'value': target_name,
            'who': 'global',
        })
        for daemon_type in CEPH_UPGRADE_ORDER:
            ret, image, err = self.mgr.check_mon_command({
                'prefix':
                'config rm',
                'name':
                'container_image',
                'who':
                name_to_config_section(daemon_type),
            })

        logger.info('Upgrade: Complete!')
        if self.upgrade_state.progress_id:
            self.mgr.remote('progress', 'complete',
                            self.upgrade_state.progress_id)
        self.upgrade_state = None
        self._save_upgrade_state()
        return
예제 #8
0
파일: nfs.py 프로젝트: zoumingzhe/ceph
 def get_keyring_entity(self) -> str:
     return utils.name_to_config_section(self.get_rados_user())
예제 #9
0
파일: upgrade.py 프로젝트: zhouwei128/ceph
    def _do_upgrade(self):
        # type: () -> None
        if not self.upgrade_state:
            logger.debug('_do_upgrade no state, exiting')
            return

        target_image = self.target_image
        target_id = self.upgrade_state.target_id
        if not target_id or (self.mgr.use_repo_digest
                             and not self.upgrade_state.repo_digest):
            # need to learn the container hash
            logger.info('Upgrade: First pull of %s' % target_image)
            try:
                target_id, target_version, repo_digest = CephadmServe(
                    self.mgr)._get_container_image_info(target_image)
            except OrchestratorError as e:
                self._fail_upgrade(
                    'UPGRADE_FAILED_PULL', {
                        'severity': 'warning',
                        'summary': 'Upgrade: failed to pull target image',
                        'count': 1,
                        'detail': [str(e)],
                    })
                return
            self.upgrade_state.target_id = target_id
            self.upgrade_state.target_version = target_version
            self.upgrade_state.repo_digest = repo_digest
            self._save_upgrade_state()
            target_image = self.target_image
        target_version = self.upgrade_state.target_version
        logger.info('Upgrade: Target is %s with id %s' %
                    (target_image, target_id))

        image_settings = self.get_distinct_container_image_settings()

        daemons = self.mgr.cache.get_daemons()
        done = 0
        for daemon_type in CEPH_UPGRADE_ORDER:
            logger.info('Upgrade: Checking %s daemons...' % daemon_type)
            need_upgrade_self = False
            for d in daemons:
                if d.daemon_type != daemon_type:
                    continue
                if d.container_image_id == target_id:
                    logger.debug('daemon %s.%s version correct' %
                                 (daemon_type, d.daemon_id))
                    done += 1
                    continue
                logger.debug('daemon %s.%s not correct (%s, %s, %s)' %
                             (daemon_type, d.daemon_id, d.container_image_name,
                              d.container_image_id, d.version))

                assert d.daemon_type is not None
                assert d.daemon_id is not None
                assert d.hostname is not None

                if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id):
                    logger.info('Upgrade: Need to upgrade myself (mgr.%s)' %
                                self.mgr.get_mgr_id())
                    need_upgrade_self = True
                    continue

                # make sure host has latest container image
                out, err, code = CephadmServe(self.mgr)._run_cephadm(
                    d.hostname,
                    '',
                    'inspect-image', [],
                    image=target_image,
                    no_fsid=True,
                    error_ok=True)
                if code or json.loads(
                        ''.join(out)).get('image_id') != target_id:
                    logger.info('Upgrade: Pulling %s on %s' %
                                (target_image, d.hostname))
                    out, err, code = CephadmServe(self.mgr)._run_cephadm(
                        d.hostname,
                        '',
                        'pull', [],
                        image=target_image,
                        no_fsid=True,
                        error_ok=True)
                    if code:
                        self._fail_upgrade(
                            'UPGRADE_FAILED_PULL', {
                                'severity':
                                'warning',
                                'summary':
                                'Upgrade: failed to pull target image',
                                'count':
                                1,
                                'detail': [
                                    'failed to pull %s on host %s' %
                                    (target_image, d.hostname)
                                ],
                            })
                        return
                    r = json.loads(''.join(out))
                    if r.get('image_id') != target_id:
                        logger.info(
                            'Upgrade: image %s pull on %s got new image %s (not %s), restarting'
                            % (target_image, d.hostname, r['image_id'],
                               target_id))
                        self.upgrade_state.target_id = r['image_id']
                        self._save_upgrade_state()
                        return

                self._update_upgrade_progress(done / len(daemons))

                if not d.container_image_id:
                    if d.container_image_name == target_image:
                        logger.debug(
                            'daemon %s has unknown container_image_id but has correct image name'
                            % (d.name()))
                        continue
                if not self._wait_for_ok_to_stop(d):
                    return
                logger.info('Upgrade: Redeploying %s.%s' %
                            (d.daemon_type, d.daemon_id))
                try:
                    self.mgr._daemon_action(d.daemon_type,
                                            d.daemon_id,
                                            d.hostname,
                                            'redeploy',
                                            image=target_image)
                except Exception as e:
                    self._fail_upgrade(
                        'UPGRADE_REDEPLOY_DAEMON', {
                            'severity': 'warning',
                            'summary':
                            f'Upgrading daemon {d.name()} on host {d.hostname} failed.',
                            'count': 1,
                            'detail': [f'Upgrade daemon: {d.name()}: {e}'],
                        })
                return

            if need_upgrade_self:
                try:
                    self.mgr.mgr_service.fail_over()
                except OrchestratorError as e:
                    self._fail_upgrade(
                        'UPGRADE_NO_STANDBY_MGR', {
                            'severity':
                            'warning',
                            'summary':
                            f'Upgrade: {e}',
                            'count':
                            1,
                            'detail': [
                                'The upgrade process needs to upgrade the mgr, '
                                'but it needs at least one standby to proceed.',
                            ],
                        })
                    return

                return  # unreachable code, as fail_over never returns
            elif daemon_type == 'mgr':
                if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks:
                    del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR']
                    self.mgr.set_health_checks(self.mgr.health_checks)

            # make sure 'ceph versions' agrees
            ret, out_ver, err = self.mgr.check_mon_command({
                'prefix':
                'versions',
            })
            j = json.loads(out_ver)
            for version, count in j.get(daemon_type, {}).items():
                if version != target_version:
                    logger.warning(
                        'Upgrade: %d %s daemon(s) are %s != target %s' %
                        (count, daemon_type, version, target_version))

            # push down configs
            if image_settings.get(daemon_type) != target_image:
                logger.info('Upgrade: Setting container_image for all %s...' %
                            daemon_type)
                self.mgr.set_container_image(
                    name_to_config_section(daemon_type), target_image)
            to_clean = []
            for section in image_settings.keys():
                if section.startswith(
                        name_to_config_section(daemon_type) + '.'):
                    to_clean.append(section)
            if to_clean:
                logger.debug('Upgrade: Cleaning up container_image for %s...' %
                             to_clean)
                for section in to_clean:
                    ret, image, err = self.mgr.check_mon_command({
                        'prefix':
                        'config rm',
                        'name':
                        'container_image',
                        'who':
                        section,
                    })

            logger.info('Upgrade: All %s daemons are up to date.' %
                        daemon_type)

        # clean up
        logger.info('Upgrade: Finalizing container_image settings')
        self.mgr.set_container_image('global', target_image)

        for daemon_type in CEPH_UPGRADE_ORDER:
            ret, image, err = self.mgr.check_mon_command({
                'prefix':
                'config rm',
                'name':
                'container_image',
                'who':
                name_to_config_section(daemon_type),
            })

        logger.info('Upgrade: Complete!')
        if self.upgrade_state.progress_id:
            self.mgr.remote('progress', 'complete',
                            self.upgrade_state.progress_id)
        self.upgrade_state = None
        self._save_upgrade_state()
        return