async def _update(self) -> None: try: mon = Mon() df = mon.df() except Exception as e: raise StorageError("error obtaining info from cluster") from e self._state.stats = StorageStatsModel( total=df.stats.total_bytes, available=df.stats.total_avail_bytes, used=df.stats.total_used_bytes, raw_used=df.stats.total_used_raw_bytes, raw_used_ratio=df.stats.total_used_raw_ratio) by_id: Dict[int, StoragePoolModel] = {} by_name: Dict[str, StoragePoolModel] = {} for p in df.pools: pool: StoragePoolModel = StoragePoolModel( id=p.id, name=p.name, stats=StoragePoolStatsModel(used=p.stats.bytes_used, percent_used=p.stats.percent_used, max_available=p.stats.max_avail)) by_id[p.id] = pool by_name[p.name] = pool self._state.pools_by_name = by_name self._state.pools_by_id = by_id
async def _handle_ready_to_add( self, conn: IncomingConnection, msg: ReadyToAddMessageModel ) -> None: logger.debug(f"handle ready to add from {conn}") address: str = conn.address if address not in self._joining: logger.info(f"handle ready to add > unknown node {conn}") await conn.send_msg( MessageModel( type=MessageTypeEnum.ERROR, data=ErrorMessageModel( what="node not joining", code=status.HTTP_428_PRECONDITION_REQUIRED ) ) ) return node: JoiningNodeModel = self._joining[address] logger.info("handle ready to add > " f"hostname: {node.hostname}, address: {node.address}") orch = Orchestrator() if not orch.host_add(node.hostname, node.address): logger.error("handle ready > failed adding host to orch") # reset default crush ruleset, and adjust pools to use a multi-node # ruleset, spreading replicas across hosts rather than osds. mon = Mon() if not mon.set_replicated_ruleset(): logger.error( "handle ready to add > unable to set replicated ruleset")
def test_set_pool_size( ceph_conf_file_fs: Generator[fake_filesystem.FakeFilesystem, None, None], mocker: MockerFixture, ): from gravel.controllers.orch.ceph import Ceph, Mon def argscheck(cls: Any, args: Dict[str, Any]) -> Any: assert "prefix" in args assert "pool" in args assert "var" in args assert "val" in args assert args["prefix"] == "osd pool set" assert args["pool"] == "foobar" assert args["var"] in ["size", "min_size"] if args["var"] == "size": assert args["val"] == "2" assert "i_really_mean_it" not in args else: assert args["var"] == "min_size" assert args["val"] == "1" mocker.patch.object( Mon, "call", new=argscheck # type:ignore ) ceph = Ceph() mon = Mon(ceph) mon.set_pool_size("foobar", 2)
async def main(): await storage.tick() print(await storage.usage()) print(storage.available) print(storage.used) print(storage.total) assert storage.total == storage.used + storage.available mon = Mon() print(mon.df().json(indent=2))
def test_mon_df( ceph_conf_file_fs: Generator[fake_filesystem.FakeFilesystem, None, None], mocker: MockerFixture, get_data_contents: Callable[[str, str], str] ): mon = Mon() mon.call = mocker.MagicMock( return_value=json.loads(get_data_contents(DATA_DIR, 'mon_df_raw.json')) ) res = mon.df() assert res.stats.total_bytes == 0
def test_get_pools( ceph_conf_file_fs: Generator[fake_filesystem.FakeFilesystem, None, None], mocker: MockerFixture, get_data_contents: Callable[[str, str], str] ): mon = Mon() mon.call = mocker.MagicMock( return_value=json.loads( get_data_contents(DATA_DIR, 'mon_osdmap_raw.json')) ) res = mon.get_pools() assert len(res) == 0
def test_get_pools( ceph_conf_file_fs: Generator[fake_filesystem.FakeFilesystem, None, None], mocker: MockerFixture, get_data_contents: Callable[[str, str], str], ): from gravel.controllers.orch.ceph import Ceph, Mon ceph = Ceph() mon = Mon(ceph) mon.call = mocker.MagicMock(return_value=json.loads( get_data_contents(DATA_DIR, "mon_osdmap_raw.json"))) res = mon.get_pools() assert len(res) == 0
def _create_cephfs(self, svc: ServiceModel) -> None: cephfs = CephFS() try: cephfs.create(svc.name) except CephFSError as e: raise ServiceError("unable to create cephfs service") from e try: fs: CephFSListEntryModel = cephfs.get_fs_info(svc.name) except CephFSError as e: raise ServiceError("unable to list cephfs filesystems") from e assert fs.name == svc.name mon = Mon() pools: List[CephOSDPoolEntryModel] = mon.get_pools() def get_pool(name: str) -> CephOSDPoolEntryModel: for pool in pools: if pool.pool_name == name: return pool raise ServiceError(f"unknown pool {name}") metadata_pool = get_pool(fs.metadata_pool) if metadata_pool.size != svc.replicas: mon.set_pool_size(metadata_pool.pool_name, svc.replicas) svc.pools.append(metadata_pool.pool) for name in fs.data_pools: data_pool = get_pool(name) if data_pool.size != svc.replicas: mon.set_pool_size(data_pool.pool_name, svc.replicas) svc.pools.append(data_pool.pool)
class CephFS: mgr: Mgr mon: Mon def __init__(self): self.mgr = Mgr() self.mon = Mon() pass def create(self, name: str) -> None: cmd = { "prefix": "fs volume create", "name": name } try: res = self.mgr.call(cmd) except CephCommandError as e: raise CephFSError(e) from e # this command does not support json at this time, and will output # free-form text instead. We are not going to parse it, but we'll make # sure we've got something out of it. assert "result" in res assert len(res["result"]) > 0 def volume_ls(self) -> CephFSVolumeListModel: cmd = { "prefix": "fs volume ls", "format": "json" } try: res = self.mgr.call(cmd) except CephCommandError as e: raise CephFSError(e) from e return CephFSVolumeListModel( volumes=parse_obj_as(List[CephFSNameModel], res) ) def ls(self) -> List[CephFSListEntryModel]: cmd = { "prefix": "fs ls", "format": "json" } try: res = self.mon.call(cmd) except CephCommandError as e: raise CephFSError(e) from e return parse_obj_as(List[CephFSListEntryModel], res) def get_fs_info(self, name: str) -> CephFSListEntryModel: ls: List[CephFSListEntryModel] = self.ls() for fs in ls: if fs.name == name: return fs raise CephFSError(f"unknown filesystem {name}")
def test_set_pool_default_size( ceph_conf_file_fs: Generator[fake_filesystem.FakeFilesystem, None, None], mocker: MockerFixture, ): from gravel.controllers.orch.ceph import Ceph, Mon def argscheck(cls: Any, args: Dict[str, Any]) -> Any: assert "prefix" in args assert "who" in args assert "name" in args assert "value" in args assert args["prefix"] == "config set" assert args["who"] == "global" assert args["name"] == "osd_pool_default_size" assert args["value"] == "2" mocker.patch.object(Mon, "call", new=argscheck) ceph = Ceph() mon = Mon(ceph) mon.set_pool_default_size(2)
def test_config_set( ceph_conf_file_fs: Generator[fake_filesystem.FakeFilesystem, None, None], mocker: MockerFixture, ): from gravel.controllers.orch.ceph import Ceph, Mon def argscheck(cls: Any, args: Dict[str, Any]) -> Any: assert "prefix" in args assert "who" in args assert "key" not in args # config set uses `name` assert "name" in args assert "value" in args assert "force" not in args assert args["prefix"] == "config set" assert args["who"] == "foo" assert args["name"] == "bar" assert args["value"] == "baz" mocker.patch.object(Mon, "call", new=argscheck) ceph = Ceph() mon = Mon(ceph) mon.config_set("foo", "bar", "baz")
async def _finish_bootstrap_config(self) -> None: mon: Mon = Mon() try: mon.set_allow_pool_size_one() except CephCommandError as e: logger.error("unable to allow pool size 1") logger.debug(str(e)) try: mon.disable_warn_on_no_redundancy() except CephCommandError as e: logger.error("unable to disable redundancy warning") logger.debug(str(e))
def test_ceph_conf(fs: fake_filesystem.FakeFilesystem): # default location fs.add_real_file( # pyright: reportUnknownMemberType=false os.path.join(TEST_DIR, 'data/default_ceph.conf'), target_path='/etc/ceph/ceph.conf' ) Mgr() Mon() # custom location conf_file = '/foo/bar/baz.conf' fs.add_real_file( os.path.join(TEST_DIR, 'data/default_ceph.conf'), target_path=conf_file ) Mgr(conf_file=conf_file) Mon(conf_file=conf_file) # invalid location conf_file = "missing.conf" with pytest.raises(FileNotFoundError, match=conf_file): Mgr(conf_file=conf_file) Mon(conf_file=conf_file)
async def aquarium_startup(_: FastAPI, aquarium_api: FastAPI): lvl = "INFO" if not os.getenv("AQUARIUM_DEBUG") else "DEBUG" setup_logging(lvl) logger.info("Aquarium startup!") gstate: GlobalState = GlobalState() # init node mgr logger.info("starting node manager") nodemgr: NodeMgr = NodeMgr(gstate) # Prep cephadm cephadm: Cephadm = Cephadm(gstate.config.options.containers) gstate.add_cephadm(cephadm) # Set up Ceph connections ceph: Ceph = Ceph() ceph_mgr: Mgr = Mgr(ceph) gstate.add_ceph_mgr(ceph_mgr) ceph_mon: Mon = Mon(ceph) gstate.add_ceph_mon(ceph_mon) # Set up all of the tickers devices: Devices = Devices( gstate.config.options.devices.probe_interval, nodemgr, ceph_mgr, ceph_mon, ) gstate.add_devices(devices) status: Status = Status(gstate.config.options.status.probe_interval, gstate, nodemgr) gstate.add_status(status) inventory: Inventory = Inventory( gstate.config.options.inventory.probe_interval, nodemgr, gstate) gstate.add_inventory(inventory) storage: Storage = Storage(gstate.config.options.storage.probe_interval, nodemgr, ceph_mon) gstate.add_storage(storage) await nodemgr.start() await gstate.start() # Add instances into FastAPI's state: aquarium_api.state.gstate = gstate aquarium_api.state.nodemgr = nodemgr
async def probe(self) -> None: logger.debug("probe devices") orch: Orchestrator = Orchestrator() mon: Mon = Mon() device_lst: List[OrchDevicesPerHostModel] = orch.devices_ls() osd_df: CephOSDDFModel = mon.osd_df() if len(device_lst) == 0 or len(osd_df.nodes) == 0: logger.debug("probe > no devices to probe") return osds_per_host: Dict[str, List[int]] = {} osd_entries: Dict[int, DeviceModel] = {} for hostdevs in device_lst: host: str = hostdevs.name devs: List[VolumeDeviceModel] = hostdevs.devices osds: List[int] = [] for dev in devs: if dev.available or len(dev.lvs) == 0: continue for lv in dev.lvs: osd_entries[lv.osd_id] = DeviceModel( host=host, osd_id=lv.osd_id, path=dev.path, rotational=dev.sys_api.rotational, vendor=dev.sys_api.vendor, model=dev.sys_api.model) osds.append(lv.osd_id) osds_per_host[host] = osds for osd in osd_df.nodes: if osd.id not in osd_entries: continue osd_entries[osd.id].utilization = DeviceUtilizationModel( total_kb=osd.kb, avail_kb=osd.kb_avail, used_kb=osd.kb_used, utilization=osd.utilization) self._osds_per_host = osds_per_host self._osd_entries = osd_entries
class CephFS: mgr: Mgr mon: Mon def __init__(self): self.mgr = Mgr() self.mon = Mon() pass def create(self, name: str) -> None: cmd = {"prefix": "fs volume create", "name": name} try: # this is expected to be a silent command self.mgr.call(cmd) except CephCommandError as e: raise CephFSError(e) from e # schedule orchestrator to update the number of mds instances orch = Orchestrator() orch.apply_mds(name) def volume_ls(self) -> CephFSVolumeListModel: cmd = {"prefix": "fs volume ls", "format": "json"} try: res = self.mgr.call(cmd) except CephCommandError as e: raise CephFSError(e) from e return CephFSVolumeListModel( volumes=parse_obj_as(List[CephFSNameModel], res)) def ls(self) -> List[CephFSListEntryModel]: cmd = {"prefix": "fs ls", "format": "json"} try: res = self.mon.call(cmd) except CephCommandError as e: raise CephFSError(e) from e return parse_obj_as(List[CephFSListEntryModel], res) def get_fs_info(self, name: str) -> CephFSListEntryModel: ls: List[CephFSListEntryModel] = self.ls() for fs in ls: if fs.name == name: return fs raise CephFSError(f"unknown filesystem {name}")
async def get_status() -> StatusModel: nodemgr: NodeMgr = get_node_mgr() stage: NodeStageEnum = nodemgr.stage cluster: Optional[CephStatusModel] = None if stage >= NodeStageEnum.BOOTSTRAPPED and \ stage != NodeStageEnum.JOINING: mon = Mon() try: cluster = mon.status except Exception: logger.error("unable to obtain cluster status!") pass status: StatusModel = StatusModel(cluster=cluster) return status
def _create_cephfs(self, svc: ServiceModel) -> None: cephfs = CephFS() try: cephfs.create(svc.name) except CephFSError as e: raise ServiceError("unable to create cephfs service") from e try: fs: CephFSListEntryModel = cephfs.get_fs_info(svc.name) except CephFSError as e: raise ServiceError("unable to list cephfs filesystems") from e assert fs.name == svc.name mon = Mon() pools: List[CephOSDPoolEntryModel] = mon.get_pools() def get_pool(name: str) -> CephOSDPoolEntryModel: for pool in pools: if pool.pool_name == name: return pool raise ServiceError(f"unknown pool {name}") metadata_pool = get_pool(fs.metadata_pool) if metadata_pool.size != svc.replicas: mon.set_pool_size(metadata_pool.pool_name, svc.replicas) svc.pools.append(metadata_pool.pool) for name in fs.data_pools: data_pool = get_pool(name) if data_pool.size != svc.replicas: mon.set_pool_size(data_pool.pool_name, svc.replicas) svc.pools.append(data_pool.pool) # create cephfs default user logger.debug("authorize default user") try: cephfs.authorize(svc.name, "default") logger.info(f"created cephfs client for service '{svc.name}'") except CephFSError as e: logger.error(f"Unable to authorize cephfs client: {str(e)}") logger.exception(e)
class CephFS: mgr: Mgr mon: Mon def __init__(self): self.mgr = Mgr() self.mon = Mon() pass def create(self, name: str) -> None: cmd = {"prefix": "fs volume create", "name": name} try: # this is expected to be a silent command self.mgr.call(cmd) except CephCommandError as e: raise CephFSError(e) from e # schedule orchestrator to update the number of mds instances orch = Orchestrator() orch.apply_mds(name) def volume_ls(self) -> CephFSVolumeListModel: cmd = {"prefix": "fs volume ls", "format": "json"} try: res = self.mgr.call(cmd) except CephCommandError as e: raise CephFSError(e) from e return CephFSVolumeListModel( volumes=parse_obj_as(List[CephFSNameModel], res)) def ls(self) -> List[CephFSListEntryModel]: cmd = {"prefix": "fs ls", "format": "json"} try: res = self.mon.call(cmd) except CephCommandError as e: raise CephFSError(e) from e return parse_obj_as(List[CephFSListEntryModel], res) def get_fs_info(self, name: str) -> CephFSListEntryModel: ls: List[CephFSListEntryModel] = self.ls() for fs in ls: if fs.name == name: return fs raise CephFSError(f"unknown filesystem {name}") def authorize(self, fsname: str, clientid: str) -> CephFSAuthorizationModel: assert fsname and clientid cmd = { "prefix": "fs authorize", "filesystem": fsname, "entity": f"client.{fsname}-{clientid}", "caps": ["/", "rw"], "format": "json" } try: res = self.mon.call(cmd) except CephCommandError as e: raise CephFSError(str(e)) from e lst = parse_obj_as(List[CephFSAuthorizationModel], res) assert len(lst) == 1 return lst[0] def get_authorization(self, fsname: str, clientid: Optional[str]) -> CephFSAuthorizationModel: if not clientid: clientid = "default" cmd = { "prefix": "auth get", "entity": f"client.{fsname}-{clientid}", "format": "json" } try: res = self.mon.call(cmd) except CephCommandError as e: if e.rc == errno.ENOENT: raise CephFSNoAuthorizationError(e.message) raise CephFSError(str(e)) from e lst = parse_obj_as(List[CephFSAuthorizationModel], res) if len(lst) == 0: raise CephFSNoAuthorizationError() return lst[0]
async def _do_tick(self) -> None: if not self._mon: self._mon = Mon() await self.probe()
# project aquarium's backend # Copyright (C) 2021 SUSE, LLC. from gravel.controllers.orch.ceph import Ceph, Mon if __name__ == "__main__": ceph: Ceph = Ceph() ceph_mon: Mon = Mon(ceph) print(ceph_mon.get_osdmap()) print(ceph_mon.get_pools()) pass
# project aquarium's backend # Copyright (C) 2021 SUSE, LLC. from gravel.controllers.orch.ceph import Mon if __name__ == "__main__": mon = Mon() print(mon.get_osdmap()) print(mon.get_pools()) pass
class Status(Ticker): _mon: Optional[Mon] _latest_cluster: Optional[CephStatusModel] _latest_pools_stats: Dict[int, CephOSDPoolStatsModel] def __init__(self): super().__init__( "status", gstate.config.options.status.probe_interval ) self._mon = None self._latest_cluster = None self._latest_pools_stats = {} async def _do_tick(self) -> None: if not self._mon: self._mon = Mon() await self.probe() async def _should_tick(self) -> bool: nodemgr: NodeMgr = get_node_mgr() return nodemgr.stage >= NodeStageEnum.BOOTSTRAPPED async def probe(self) -> None: assert self._mon self._latest_cluster = self._mon.status pool_stats: List[CephOSDPoolStatsModel] = self._mon.get_pools_stats() latest_pool_stats: Dict[int, CephOSDPoolStatsModel] = {} for pool in pool_stats: latest_pool_stats[pool.pool_id] = pool self._latest_pools_stats = latest_pool_stats @property def status(self) -> CephStatusModel: if not self._latest_cluster: raise CephStatusNotAvailableError() return self._latest_cluster @property def client_io_rate(self) -> OverallClientIORateModel: if len(self._latest_pools_stats) == 0 or not self._latest_cluster: raise ClientIORateNotAvailableError() services_rates: List[ServiceIORateModel] = [] services: Services = Services() for service in services.ls(): svc_name: str = service.name svc_type: ServiceTypeEnum = service.type svc_io_rate: ClientIORateModel = ClientIORateModel() for poolid in service.pools: assert poolid in self._latest_pools_stats stats: CephOSDPoolStatsModel = self._latest_pools_stats[poolid] svc_io_rate.read += stats.client_io_rate.read_bytes_sec svc_io_rate.write += stats.client_io_rate.write_bytes_sec svc_io_rate.read_ops += stats.client_io_rate.read_op_per_sec svc_io_rate.write_ops += stats.client_io_rate.write_op_per_sec services_rates.append( ServiceIORateModel( service_name=svc_name, service_type=svc_type, io_rate=svc_io_rate ) ) cluster_rates: ClientIORateModel = ClientIORateModel( read=self._latest_cluster.pgmap.read_bytes_sec, write=self._latest_cluster.pgmap.write_bytes_sec, read_ops=self._latest_cluster.pgmap.read_op_per_sec, write_ops=self._latest_cluster.pgmap.write_op_per_sec ) return OverallClientIORateModel( cluster=cluster_rates, services=services_rates )
def __init__(self): self.mgr = Mgr() self.mon = Mon() pass
async def startup(aquarium_app: FastAPI, aquarium_api: FastAPI): from fastapi.logger import logger as fastapi_logger from gravel.cephadm.cephadm import Cephadm from gravel.controllers.inventory.inventory import Inventory from gravel.controllers.nodes.deployment import NodeDeployment from gravel.controllers.nodes.errors import NodeCantDeployError from gravel.controllers.nodes.mgr import ( NodeError, NodeInitStage, NodeMgr, ) from gravel.controllers.orch.ceph import Ceph, Mgr, Mon from gravel.controllers.resources.devices import Devices from gravel.controllers.resources.status import Status from gravel.controllers.resources.storage import Storage logger: logging.Logger = fastapi_logger class FakeNodeDeployment(NodeDeployment): # Do we still need this thing since removing etcd? pass class FakeNodeMgr(NodeMgr): def __init__(self, gstate: GlobalState): super().__init__(gstate) self._deployment = FakeNodeDeployment(gstate, self._connmgr) async def start(self) -> None: assert self._state logger.debug(f"start > {self._state}") if not self.deployment_state.can_start(): raise NodeError("unable to start unstartable node") assert self._init_stage == NodeInitStage.NONE if self.deployment_state.nostage: await self._node_prepare() else: assert (self.deployment_state.ready or self.deployment_state.deployed) assert self._state.hostname assert self._state.address await self.gstate.store.ensure_connection() async def _obtain_images(self) -> bool: return True class FakeCephadm(Cephadm): def __init__(self): super().__init__(ContainersOptionsModel()) async def call( self, cmd: List[str], noimage: bool = False, outcb: Optional[Callable[[str], None]] = None, ) -> Tuple[str, str, int]: # Implement expected calls to cephadm with testable responses if cmd[0] == "pull": return "", "", 0 elif cmd[0] == "gather-facts": return ( get_data_contents(DATA_DIR, "gather_facts_real.json"), "", 0, ) elif cmd == ["ceph-volume", "inventory", "--format", "json"]: return ( get_data_contents(DATA_DIR, "inventory_real.json"), "", 0, ) else: print(cmd) print(outcb) raise Exception("Tests should not get here") class FakeCeph(Ceph): def __init__(self, conf_file: str = "/etc/ceph/ceph.conf"): self.conf_file = conf_file self._is_connected = False def connect(self): if not self.is_connected(): self.cluster = mocker.Mock() self._is_connected = True class FakeStorage(Storage): # type: ignore available = 2000 # type: ignore total = 2000 # type: ignore gstate: GlobalState = GlobalState(FakeKV) # init node mgr nodemgr: NodeMgr = FakeNodeMgr(gstate) # Prep cephadm cephadm: Cephadm = FakeCephadm() gstate.add_cephadm(cephadm) # Set up Ceph connections ceph: Ceph = FakeCeph() ceph_mgr: Mgr = Mgr(ceph) gstate.add_ceph_mgr(ceph_mgr) ceph_mon: Mon = Mon(ceph) gstate.add_ceph_mon(ceph_mon) # Set up all of the tickers devices: Devices = Devices( gstate.config.options.devices.probe_interval, nodemgr, ceph_mgr, ceph_mon, ) gstate.add_devices(devices) status: Status = Status(gstate.config.options.status.probe_interval, gstate, nodemgr) gstate.add_status(status) inventory: Inventory = Inventory( gstate.config.options.inventory.probe_interval, nodemgr, gstate) gstate.add_inventory(inventory) storage: Storage = FakeStorage( gstate.config.options.storage.probe_interval, nodemgr, ceph_mon) gstate.add_storage(storage) await nodemgr.start() await gstate.start() # Add instances into FastAPI's state: aquarium_api.state.gstate = gstate aquarium_api.state.nodemgr = nodemgr
def test_ceph_conf(self): with pytest.raises(FileNotFoundError, match="ceph.conf"): Mgr() Mon()