Esempio n. 1
0
 def _update_mon_count(current, new):
     # type: (ccl.CephCluster, ccl.CephCluster) -> ccl.CephCluster
     if newcount is None:
         raise orchestrator.OrchestratorError(
             'unable to set mon count to None')
     if not new.spec.mon:
         raise orchestrator.OrchestratorError(
             "mon attribute not specified in new spec")
     new.spec.mon.count = newcount
     return new
Esempio n. 2
0
 def get_ceph_image(self) -> str:
     try:
         api_response = self.coreV1_api.list_namespaced_pod(self.rook_env.namespace,
                                                            label_selector="app=rook-ceph-mon",
                                                            timeout_seconds=10)
         if api_response.items:
             return api_response.items[-1].spec.containers[0].image
         else:
             raise orchestrator.OrchestratorError(
                     "Error getting ceph image. Cluster without monitors")
     except ApiException as e:
         raise orchestrator.OrchestratorError("Error getting ceph image: {}".format(e))
Esempio n. 3
0
    def add_stateless_service(self, service_type, spec):
        """ Add a stateless service in the cluster

        : service_type: Kind of service (nfs, rgw, mds)
        : spec        : an Orchestrator.StatelessServiceSpec object

        : returns     : Completion object
        """

        # Check service_type is supported
        if service_type not in ["rgw"]:
            raise orchestrator.OrchestratorError(
                "{} service not supported".format(service_type))

        # Add the hosts to the inventory in the right group
        hosts = spec.service_spec.hosts
        if not hosts:
            raise orchestrator.OrchestratorError("No hosts provided."\
                "At least one destination host is needed to install the RGW "\
                "service")
        InventoryGroup("{}s".format(service_type),
                       self.ar_client).update(hosts)

        # Limit playbook execution to certain hosts
        limited = ",".join(hosts)

        # Add the settings for this service
        extravars = vars(spec.service_spec)

        # Group hosts by resource (used in rm ops)
        if service_type == "rgw":
            resource_group = "rgw_zone_{}".format(spec.service_spec.rgw_zone)
        InventoryGroup(resource_group, self.ar_client).update(hosts)

        # Execute the playbook to create the service
        playbook_operation = PlaybookOperation(
            client=self.ar_client,
            playbook=SITE_PLAYBOOK,
            logger=self.log,
            result_pattern="",
            params=extravars,
            querystr_dict={"limit": limited})

        # Filter to get the result
        playbook_operation.output_wizard = ProcessPlaybookResult(
            self.ar_client, self.log)
        playbook_operation.event_filter_list = ["playbook_on_stats"]

        # Execute the playbook
        self._launch_operation(playbook_operation)

        return playbook_operation
Esempio n. 4
0
    def _remove_osds_bg(self) -> None:
        """
        Performs actions in the _serve() loop to remove an OSD
        when criteria is met.
        """
        logger.debug(
            f"{len(self.to_remove_osds)} OSDs are scheduled for removal: {list(self.to_remove_osds)}"
        )
        self._update_osd_removal_status()
        remove_osds: set = self.to_remove_osds.copy()
        for osd in remove_osds:
            if not osd.force:
                self.drain_osd(osd.osd_id)
                # skip criteria
                if not self.is_empty(osd.osd_id):
                    logger.info(
                        f"OSD <{osd.osd_id}> is not empty yet. Waiting a bit more"
                    )
                    continue

            if not self.ok_to_destroy([osd.osd_id]):
                logger.info(
                    f"OSD <{osd.osd_id}> is not safe-to-destroy yet. Waiting a bit more"
                )
                continue

            # abort criteria
            if not self.down_osd([osd.osd_id]):
                # also remove it from the remove_osd list and set a health_check warning?
                raise orchestrator.OrchestratorError(
                    f"Could not set OSD <{osd.osd_id}> to 'down'")

            if osd.replace:
                if not self.destroy_osd(osd.osd_id):
                    # also remove it from the remove_osd list and set a health_check warning?
                    raise orchestrator.OrchestratorError(
                        f"Could not destroy OSD <{osd.osd_id}>")
            else:
                if not self.purge_osd(osd.osd_id):
                    # also remove it from the remove_osd list and set a health_check warning?
                    raise orchestrator.OrchestratorError(
                        f"Could not purge OSD <{osd.osd_id}>")

            self.mgr._remove_daemon(osd.fullname, osd.nodename)
            logger.info(
                f"Successfully removed OSD <{osd.osd_id}> on {osd.nodename}")
            logger.debug(f"Removing {osd.osd_id} from the queue.")
            self.to_remove_osds.remove(osd)
Esempio n. 5
0
 def remove_service(self, service_name: str, force: bool = False) -> str:
     if service_name == 'rbd-mirror':
         return self.rook_cluster.rm_service('cephrbdmirrors',
                                             'default-rbd-mirror')
     service_type, service_id = service_name.split('.', 1)
     if service_type == 'mds':
         return self.rook_cluster.rm_service('cephfilesystems', service_id)
     elif service_type == 'rgw':
         return self.rook_cluster.rm_service('cephobjectstores', service_id)
     elif service_type == 'nfs':
         ret, out, err = self.mon_command({'prefix': 'auth ls'})
         matches = re.findall(rf'client\.nfs-ganesha\.{service_id}\..*',
                              out)
         for match in matches:
             self.check_mon_command({'prefix': 'auth rm', 'entity': match})
         return self.rook_cluster.rm_service('cephnfses', service_id)
     elif service_type == 'rbd-mirror':
         return self.rook_cluster.rm_service('cephrbdmirrors', service_id)
     elif service_type == 'osd':
         if service_id in self._drive_group_map:
             del self._drive_group_map[service_id]
             self._save_drive_groups()
         return f'Removed {service_name}'
     elif service_type == 'ingress':
         self.log.info("{0} service '{1}' does not exist".format(
             'ingress', service_id))
         return 'The Rook orchestrator does not currently support ingress'
     else:
         raise orchestrator.OrchestratorError(
             f'Service type {service_type} not supported')
Esempio n. 6
0
 def remote_from_orchestrator_cli_self_test(self, what: str) -> Any:
     import orchestrator
     if what == 'OrchestratorError':
         return orchestrator.OrchResult(result=None, exception=orchestrator.OrchestratorError('hello, world'))
     elif what == "ZeroDivisionError":
         return orchestrator.OrchResult(result=None, exception=ZeroDivisionError('hello, world'))
     assert False, repr(what)
Esempio n. 7
0
 def get_daemon(self, daemon_name: str) -> orchestrator.DaemonDescription:
     assert not daemon_name.startswith('ha-rgw.')
     for _, dm in self.daemons.items():
         for _, dd in dm.items():
             if dd.name() == daemon_name:
                 return dd
     raise orchestrator.OrchestratorError(f'Unable to find {daemon_name} daemon(s)')
Esempio n. 8
0
 def _update_mon_count(current, new):
     # type: (ccl.CephCluster, ccl.CephCluster) -> ccl.CephCluster
     if newcount is None:
         raise orchestrator.OrchestratorError(
             'unable to set mon count to None')
     new.spec.mon.count = newcount
     return new
Esempio n. 9
0
 def _daemon_action(self, action, name):
     if '.' not in name:
         raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name)
     (daemon_type, daemon_id) = name.split('.', 1)
     completion = self.daemon_action(action, daemon_type, daemon_id)
     self._orchestrator_wait([completion])
     orchestrator.raise_if_exception(completion)
     return HandleCommandResult(stdout=completion.result_str())
Esempio n. 10
0
    def get_daemon(self, daemon_name: str, host: Optional[str] = None) -> orchestrator.DaemonDescription:
        assert not daemon_name.startswith('ha-rgw.')
        dds = self.get_daemons_by_host(host) if host else self._get_daemons()
        for dd in dds:
            if dd.name() == daemon_name:
                return dd

        raise orchestrator.OrchestratorError(f'Unable to find {daemon_name} daemon(s)')
Esempio n. 11
0
 def _daemon_rm(self, names):
     for name in names:
         if '.' not in name:
             raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name)
     completion = self.remove_daemons(names)
     self._orchestrator_wait([completion])
     orchestrator.raise_if_exception(completion)
     return HandleCommandResult(stdout=completion.result_str())
Esempio n. 12
0
    def remove_stateless_service(self, service_type, id_resource):
        """ Remove a stateles services providing <sv_id> resources

        :svc_type    : Kind of service (nfs, rgw, mds)
        :id_resource : Id of the resource provided
                            <zone name> if service is RGW
                            ...
        : returns    : Completion object
        """

        # Check service_type is supported
        if service_type not in ["rgw"]:
            raise orchestrator.OrchestratorError(
                "{} service not supported".format(service_type))

        # Ansible Inventory group for the kind of service
        group = "{}s".format(service_type)

        # get the list of hosts where to remove the service
        # (hosts in resource group)
        if service_type == "rgw":
            group_prefix = "rgw_zone_{}"

        resource_group = group_prefix.format(id_resource)

        hosts_list = list(InventoryGroup(resource_group, self.ar_client))
        limited = ",".join(hosts_list)

        # Avoid manual confirmation
        extravars = {"ireallymeanit": "yes"}

        # Execute the playbook to remove the service
        playbook_operation = PlaybookOperation(
            client=self.ar_client,
            playbook=PURGE_PLAYBOOK,
            logger=self.log,
            result_pattern="",
            params=extravars,
            querystr_dict={"limit": limited})

        # Filter to get the result
        playbook_operation.output_wizard = ProcessPlaybookResult(
            self.ar_client, self.log)
        playbook_operation.event_filter_list = ["playbook_on_stats"]

        # Cleaning of inventory after a sucessful operation
        clean_inventory = {}
        clean_inventory[resource_group] = hosts_list
        clean_inventory[group] = hosts_list
        playbook_operation.clean_hosts_on_success = clean_inventory

        # Execute the playbook
        self.log.info("Removing service %s for resource %s", service_type,
                      id_resource)
        self._launch_operation(playbook_operation)

        return playbook_operation
Esempio n. 13
0
 def remove_service(self, service_name: str) -> str:
     service_type, service_name = service_name.split('.', 1)
     if service_type == 'mds':
         return self.rook_cluster.rm_service('cephfilesystems', service_name)
     elif service_type == 'rgw':
         return self.rook_cluster.rm_service('cephobjectstores', service_name)
     elif service_type == 'nfs':
         return self.rook_cluster.rm_service('cephnfses', service_name)
     else:
         raise orchestrator.OrchestratorError(f'Service type {service_type} not supported')
Esempio n. 14
0
 def remote_from_orchestrator_cli_self_test(self, what):
     import orchestrator
     if what == 'OrchestratorError':
         c = orchestrator.TrivialReadCompletion(result=None)
         c.fail(orchestrator.OrchestratorError('hello, world'))
         return c
     elif what == "ZeroDivisionError":
         c = orchestrator.TrivialReadCompletion(result=None)
         c.fail(ZeroDivisionError('hello, world'))
         return c
     assert False, repr(what)
Esempio n. 15
0
 def _service_rm(self, name):
     if '.' in name:
         (service_type, service_name) = name.split('.')
     else:
         service_type = name
         service_name = None
     if name in ['mon', 'mgr']:
         raise orchestrator.OrchestratorError(
             'The mon and mgr services cannot be removed')
     completion = self.remove_service(service_type, service_name)
     self._orchestrator_wait([completion])
     orchestrator.raise_if_exception(completion)
     return HandleCommandResult(stdout=completion.result_str())
Esempio n. 16
0
 def _proc_daemons(daemons):
     args = []
     for d in daemons:
         args.append((d.service_type, d.service_instance,
                      d.nodename, action))
     if not args:
         if service_name:
             n = service_name + '-*'
         else:
             n = service_id
         raise orchestrator.OrchestratorError(
             'Unable to find %s.%s daemon(s)' % (
                 service_type, n))
     return self._service_action(args)
Esempio n. 17
0
 def add_host(self, spec):
     # type: (orchestrator.HostSpec) -> None
     host = spec.hostname
     if host == 'raise_validation_error':
         raise orchestrator.OrchestratorValidationError("MON count must be either 1, 3 or 5")
     if host == 'raise_error':
         raise orchestrator.OrchestratorError("host address is empty")
     if host == 'raise_bug':
         raise ZeroDivisionError()
     if host == 'raise_not_implemented':
         raise NotImplementedError()
     if host == 'raise_no_orchestrator':
         raise orchestrator.NoOrchestrator()
     if host == 'raise_import_error':
         raise ImportError("test_orchestrator not enabled")
     assert isinstance(host, str)
Esempio n. 18
0
 def remove_service(self, service_name: str) -> RookCompletion[str]:
     service_type, service_name = service_name.split('.', 1)
     if service_type == 'mds':
         return self._service_rm_decorate(
             'MDS', service_name, lambda: self.rook_cluster.rm_service(
                 'cephfilesystems', service_name))
     elif service_type == 'rgw':
         return self._service_rm_decorate(
             'RGW', service_name, lambda: self.rook_cluster.rm_service(
                 'cephobjectstores', service_name))
     elif service_type == 'nfs':
         return self._service_rm_decorate(
             'NFS', service_name, lambda: self.rook_cluster.rm_service(
                 'cephnfses', service_name))
     else:
         raise orchestrator.OrchestratorError(
             f'Service type {service_type} not supported')
Esempio n. 19
0
 def service_action(self, action, service_type,
                    service_name=None,
                    service_id=None):
     self.log.debug('service_action action %s type %s name %s id %s' % (
         action, service_type, service_name, service_id))
     if action == 'reload':
         return trivial_result(["Reload is a no-op"])
     daemons = self._get_services(
         service_type,
         service_name=service_name,
         service_id=service_id)
     args = []
     for d in daemons:
         args.append((d.service_type, d.service_instance,
                                    d.nodename, action))
     if not args:
         if service_name:
             n = service_name + '-*'
         else:
             n = service_id
         raise orchestrator.OrchestratorError(
             'Unable to find %s.%s daemon(s)' % (
                 service_type, n))
     return self._service_action(args)
Esempio n. 20
0
    def process_removal_queue(self) -> None:
        """
        Performs actions in the _serve() loop to remove an OSD
        when criteria is met.

        we can't hold self.lock, as we're calling _remove_daemon in the loop
        """

        # make sure that we don't run on OSDs that are not in the cluster anymore.
        self.cleanup()

        # find osds that are ok-to-stop and not yet draining
        ready_to_drain_osds = self._ready_to_drain_osds()
        if ready_to_drain_osds:
            # start draining those
            _ = [osd.start_draining() for osd in ready_to_drain_osds]

        all_osds = self.all_osds()

        logger.debug(f"{self.queue_size()} OSDs are scheduled "
                     f"for removal: {all_osds}")

        # Check all osds for their state and take action (remove, purge etc)
        new_queue: Set[OSD] = set()
        for osd in all_osds:  # type: OSD
            if not osd.force:
                # skip criteria
                if not osd.is_empty:
                    logger.debug(f"{osd} is not empty yet. Waiting a bit more")
                    new_queue.add(osd)
                    continue

            if not osd.safe_to_destroy():
                logger.debug(
                    f"{osd} is not safe-to-destroy yet. Waiting a bit more")
                new_queue.add(osd)
                continue

            # abort criteria
            if not osd.down():
                # also remove it from the remove_osd list and set a health_check warning?
                raise orchestrator.OrchestratorError(
                    f"Could not mark {osd} down")

            # stop and remove daemon
            assert osd.hostname is not None

            if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'):
                CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}',
                                                      osd.hostname)
                logger.info(f"Successfully removed {osd} on {osd.hostname}")
            else:
                logger.info(
                    f"Daemon {osd} on {osd.hostname} was already removed")

            if osd.replace:
                # mark destroyed in osdmap
                if not osd.destroy():
                    raise orchestrator.OrchestratorError(
                        f"Could not destroy {osd}")
                logger.info(
                    f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement"
                )
            else:
                # purge from osdmap
                if not osd.purge():
                    raise orchestrator.OrchestratorError(
                        f"Could not purge {osd}")
                logger.info(f"Successfully purged {osd} on {osd.hostname}")

            if osd.zap:
                # throws an exception if the zap fails
                logger.info(f"Zapping devices for {osd} on {osd.hostname}")
                osd.do_zap()
                logger.info(
                    f"Successfully zapped devices for {osd} on {osd.hostname}")

            logger.debug(f"Removing {osd} from the queue.")

        # self could change while this is processing (osds get added from the CLI)
        # The new set is: 'an intersection of all osds that are still not empty/removed (new_queue) and
        # osds that were added while this method was executed'
        with self.lock:
            self.osds.intersection_update(new_queue)
            self._save_to_store()
Esempio n. 21
0
    def process_removal_queue(self) -> None:
        """
        Performs actions in the _serve() loop to remove an OSD
        when criteria is met.
        """

        # make sure that we don't run on OSDs that are not in the cluster anymore.
        self.cleanup()

        logger.debug(
            f"{self.mgr.to_remove_osds.queue_size()} OSDs are scheduled "
            f"for removal: {self.mgr.to_remove_osds.all_osds()}")

        # find osds that are ok-to-stop and not yet draining
        ok_to_stop_osds = self.find_osd_stop_threshold(
            self.mgr.to_remove_osds.idling_osds())
        if ok_to_stop_osds:
            # start draining those
            _ = [osd.start_draining() for osd in ok_to_stop_osds]

        # Check all osds for their state and take action (remove, purge etc)
        to_remove_osds = self.mgr.to_remove_osds.all_osds()
        new_queue = set()
        for osd in to_remove_osds:
            if not osd.force:
                # skip criteria
                if not osd.is_empty:
                    logger.info(
                        f"OSD <{osd.osd_id}> is not empty yet. Waiting a bit more"
                    )
                    new_queue.add(osd)
                    continue

            if not osd.safe_to_destroy():
                logger.info(
                    f"OSD <{osd.osd_id}> is not safe-to-destroy yet. Waiting a bit more"
                )
                new_queue.add(osd)
                continue

            # abort criteria
            if not osd.down():
                # also remove it from the remove_osd list and set a health_check warning?
                raise orchestrator.OrchestratorError(
                    f"Could not set OSD <{osd.osd_id}> to 'down'")

            if osd.replace:
                if not osd.destroy():
                    raise orchestrator.OrchestratorError(
                        f"Could not destroy OSD <{osd.osd_id}>")
            else:
                if not osd.purge():
                    raise orchestrator.OrchestratorError(
                        f"Could not purge OSD <{osd.osd_id}>")

            if not osd.exists:
                continue
            self.mgr._remove_daemon(osd.fullname, osd.hostname)
            logger.info(
                f"Successfully removed OSD <{osd.osd_id}> on {osd.hostname}")
            logger.debug(f"Removing {osd.osd_id} from the queue.")

        # self.mgr.to_remove_osds could change while this is processing (osds get added from the CLI)
        # The new set is: 'an intersection of all osds that are still not empty/removed (new_queue) and
        # osds that were added while this method was executed'
        self.mgr.to_remove_osds.intersection_update(new_queue)
        self.save_to_store()
Esempio n. 22
0
    def _execute_blight_job(self, ident_fault: str, on: bool,
                            loc: orchestrator.DeviceLightLoc) -> str:
        operation_id = str(hash(loc))
        message = ""

        # job definition
        job_metadata = client.V1ObjectMeta(name=operation_id,
                                           namespace=self.rook_env.namespace,
                                           labels={"ident": operation_id})
        pod_metadata = client.V1ObjectMeta(labels={"ident": operation_id})
        pod_container = client.V1Container(
            name="ceph-lsmcli-command",
            security_context=client.V1SecurityContext(privileged=True),
            image=self.get_ceph_image(),
            command=[
                "lsmcli",
            ],
            args=[
                'local-disk-%s-led-%s' % (ident_fault, 'on' if on else 'off'),
                '--path',
                loc.path or loc.dev,
            ],
            volume_mounts=[
                client.V1VolumeMount(name="devices", mount_path="/dev"),
                client.V1VolumeMount(name="run-udev", mount_path="/run/udev")
            ])
        pod_spec = client.V1PodSpec(
            containers=[pod_container],
            active_deadline_seconds=30,  # Max time to terminate pod
            restart_policy="Never",
            node_selector={"kubernetes.io/hostname": loc.host},
            volumes=[
                client.V1Volume(
                    name="devices",
                    host_path=client.V1HostPathVolumeSource(path="/dev")),
                client.V1Volume(
                    name="run-udev",
                    host_path=client.V1HostPathVolumeSource(path="/run/udev"))
            ])
        pod_template = client.V1PodTemplateSpec(metadata=pod_metadata,
                                                spec=pod_spec)
        job_spec = client.V1JobSpec(
            active_deadline_seconds=60,  # Max time to terminate job
            ttl_seconds_after_finished=
            10,  # Alfa. Lifetime after finishing (either Complete or Failed)
            backoff_limit=0,
            template=pod_template)
        job = client.V1Job(api_version="batch/v1",
                           kind="Job",
                           metadata=job_metadata,
                           spec=job_spec)

        # delete previous job if it exists
        try:
            try:
                api_response = self.batchV1_api.delete_namespaced_job(
                    operation_id,
                    self.rook_env.namespace,
                    propagation_policy="Background")
            except ApiException as e:
                if e.status != 404:  # No problem if the job does not exist
                    raise

            # wait until the job is not present
            deleted = False
            retries = 0
            while not deleted and retries < 10:
                api_response = self.batchV1_api.list_namespaced_job(
                    self.rook_env.namespace,
                    label_selector="ident=%s" % operation_id,
                    timeout_seconds=10)
                deleted = not api_response.items
                if retries > 5:
                    sleep(0.1)
                retries += 1
            if retries == 10 and not deleted:
                raise orchestrator.OrchestratorError(
                    "Light <{}> in <{}:{}> cannot be executed. Cannot delete previous job <{}>"
                    .format(on, loc.host, loc.path or loc.dev, operation_id))

            # create the job
            api_response = self.batchV1_api.create_namespaced_job(
                self.rook_env.namespace, job)

            # get the result
            finished = False
            while not finished:
                api_response = self.batchV1_api.read_namespaced_job(
                    operation_id, self.rook_env.namespace)
                finished = api_response.status.succeeded or api_response.status.failed
                if finished:
                    message = api_response.status.conditions[-1].message

            # get the result of the lsmcli command
            api_response = self.coreV1_api.list_namespaced_pod(
                self.rook_env.namespace,
                label_selector="ident=%s" % operation_id,
                timeout_seconds=10)
            if api_response.items:
                pod_name = api_response.items[-1].metadata.name
                message = self.coreV1_api.read_namespaced_pod_log(
                    pod_name, self.rook_env.namespace)

        except ApiException as e:
            log.exception('K8s API failed. {}'.format(e))
            raise

        # Finally, delete the job.
        # The job uses <ttl_seconds_after_finished>. This makes that the TTL controller delete automatically the job.
        # This feature is in Alpha state, so extra explicit delete operations trying to delete the Job has been used strategically
        try:
            api_response = self.batchV1_api.delete_namespaced_job(
                operation_id,
                self.rook_env.namespace,
                propagation_policy="Background")
        except ApiException as e:
            if e.status != 404:  # No problem if the job does not exist
                raise

        return message
Esempio n. 23
0
    def add_rgw(self, spec):
        # type: (orchestrator.RGWSpec) -> orchestrator.Completion
        """ Add a RGW service in the cluster

        : spec        : an Orchestrator.RGWSpec object

        : returns     : Completion object
        """

        # Add the hosts to the inventory in the right group
        hosts = spec.placement.hosts
        if not hosts:
            raise orchestrator.OrchestratorError(
                "No hosts provided. "
                "At least one destination host is needed to install the RGW "
                "service")

        def set_rgwspec_defaults(spec):
            spec.rgw_multisite = spec.rgw_multisite if spec.rgw_multisite is not None else True
            spec.rgw_zonemaster = spec.rgw_zonemaster if spec.rgw_zonemaster is not None else True
            spec.rgw_zonesecondary = spec.rgw_zonesecondary \
                if spec.rgw_zonesecondary is not None else False
            spec.rgw_multisite_proto = spec.rgw_multisite_proto \
                if spec.rgw_multisite_proto is not None else "http"
            spec.rgw_frontend_port = spec.rgw_frontend_port \
                if spec.rgw_frontend_port is not None else 8080

            spec.rgw_zonegroup = spec.rgw_zonegroup if spec.rgw_zonegroup is not None else "default"
            spec.rgw_zone_user = spec.rgw_zone_user if spec.rgw_zone_user is not None else "zone.user"
            spec.rgw_realm = spec.rgw_realm if spec.rgw_realm is not None else "default"

            spec.system_access_key = spec.system_access_key \
                if spec.system_access_key is not None else spec.genkey(20)
            spec.system_secret_key = spec.system_secret_key \
                if spec.system_secret_key is not None else spec.genkey(40)

        set_rgwspec_defaults(spec)
        InventoryGroup("rgws", self.ar_client).update(hosts)

        # Limit playbook execution to certain hosts
        limited = ",".join(str(host) for host in hosts)

        # Add the settings for this service
        extravars = {
            k: v
            for (k, v) in spec.__dict__.items() if k.startswith('rgw_')
        }
        extravars['rgw_zone'] = spec.name
        extravars[
            'rgw_multisite_endpoint_addr'] = spec.rgw_multisite_endpoint_addr
        extravars[
            'rgw_multisite_endpoints_list'] = spec.rgw_multisite_endpoints_list
        extravars['rgw_frontend_port'] = str(spec.rgw_frontend_port)

        # Group hosts by resource (used in rm ops)
        resource_group = "rgw_zone_{}".format(spec.name)
        InventoryGroup(resource_group, self.ar_client).update(hosts)

        # Execute the playbook to create the service
        op = playbook_operation(client=self.ar_client,
                                playbook=SITE_PLAYBOOK,
                                result_pattern="",
                                params=extravars,
                                querystr_dict={"limit": limited},
                                output_wizard=ProcessPlaybookResult(
                                    self.ar_client),
                                event_filter_list=["playbook_on_stats"])

        # Execute the playbook
        self._launch_operation(op)

        return op