class RabbitMQOperatorPeers(Object):
    """RabbitMQ Operator Peer interface"""

    on = RabbitMQOperatorPeersEvents()
    state = StoredState()
    OPERATOR_PASSWORD = "******"
    OPERATOR_USER_CREATED = "operator_user_created"
    ERLANG_COOKIE = "erlang_cookie"

    def __init__(self, charm, relation_name):
        super().__init__(charm, relation_name)
        self.relation_name = relation_name
        self.framework.observe(charm.on[relation_name].relation_created,
                               self.on_created)
        self.framework.observe(charm.on[relation_name].relation_changed,
                               self.on_changed)
        self.framework.observe(charm.on[relation_name].relation_broken,
                               self.on_broken)

    @property
    def peers_rel(self):
        return self.framework.model.get_relation(self.relation_name)

    def on_created(self, event):
        logging.debug("RabbitMQOperatorPeers on_created")
        self.on.connected.emit()

    def on_broken(self, event):
        logging.debug("RabbitMQOperatorPeers on_broken")
        self.on.gonewaway.emit()

    def on_changed(self, event):
        logging.debug("RabbitMQOperatorPeers on_changed")
        if self.operator_password and self.erlang_cookie:
            self.on.ready.emit()

    def set_operator_password(self, password: str):
        logging.debug("Setting operator password")
        self.peers_rel.data[self.peers_rel.app][
            self.OPERATOR_PASSWORD] = password

    def set_operator_user_created(self, user: str):
        logging.debug("Setting operator user created")
        self.peers_rel.data[self.peers_rel.app][
            self.OPERATOR_USER_CREATED] = user

    def set_erlang_cookie(self, cookie: str):
        """Set Erlang cookie for RabbitMQ clustering."""
        logging.debug("Setting erlang cookie")
        self.peers_rel.data[self.peers_rel.app][self.ERLANG_COOKIE] = cookie

    def store_password(self, username: str, password: str):
        """Store username and password."""
        logging.debug(f"Storing password for {username}")
        self.peers_rel.data[self.peers_rel.app][username] = password

    def retrieve_password(self, username: str) -> str:
        """Retrieve persisted password for provided username"""
        if not self.peers_rel:
            return None
        return str(self.peers_rel.data[self.peers_rel.app].get(username))

    @property
    def operator_password(self) -> str:
        if not self.peers_rel:
            return None
        return self.peers_rel.data[self.peers_rel.app].get(
            self.OPERATOR_PASSWORD)

    @property
    def operator_user_created(self) -> str:
        if not self.peers_rel:
            return None
        return self.peers_rel.data[self.peers_rel.app].get(
            self.OPERATOR_USER_CREATED)

    @property
    def erlang_cookie(self) -> str:
        if not self.peers_rel:
            return None
        return self.peers_rel.data[self.peers_rel.app].get(self.ERLANG_COOKIE)
Example #2
0
class AmfCharm(CharmBase):
    """AMF charm events class definition"""

    state = StoredState()

    def __init__(self, *args) -> NoReturn:
        """AMF charm constructor."""
        super().__init__(*args)
        # Internal state initialization
        self.state.set_default(pod_spec=None)

        self.image = OCIImageResource(self, "image")

        # Registering regular events
        self.framework.observe(self.on.config_changed, self.configure_pod)

        # Registering required relation changed events
        self.framework.observe(self.on.nrf_relation_changed,
                               self._on_nrf_relation_changed)

        # Registering required relation broken events
        self.framework.observe(self.on.nrf_relation_broken,
                               self._on_nrf_relation_broken)

        # -- initialize states --
        self.state.set_default(nrf_host=None)

    def publish_amf_info(self, _=None) -> NoReturn:
        """Publishes AMF information
        relation.7.
        """
        if not self.unit.is_leader():
            return
        relation_id = self.model.relations.__getitem__("amf")
        for i in relation_id:
            relation = self.model.get_relation("amf", i.id)
            relation.data[self.model.app]["hostname"] = self.model.app.name

    def _on_nrf_relation_changed(self, event: EventBase) -> NoReturn:
        """Reads information about the NRF relation.

        Args:
           event (EventBase): NRF relation event.
        """
        if event.app not in event.relation.data:
            return

        nrf_host = event.relation.data[event.app].get("hostname")
        if nrf_host and self.state.nrf_host != nrf_host:
            self.state.nrf_host = nrf_host
            self.configure_pod()

    def _on_nrf_relation_broken(self, _=None) -> NoReturn:
        """Clears data from NRF relation departed."""
        self.state.nrf_host = None
        self.configure_pod()

    def _missing_relations(self) -> str:
        """Checks if there missing relations.

        Returns:
            str: string with missing relations.
        """
        data_status = {"nrf": self.state.nrf_host}
        missing_relations = [k for k, v in data_status.items() if not v]
        return ", ".join(missing_relations)

    @property
    def relation_state(self) -> Dict[str, Any]:
        """Collects relation state configuration for pod spec assembly.

        Returns:
            Dict[str, Any]: relation state information.
        """
        relation_state = {"nrf_host": self.state.nrf_host}

        return relation_state

    def configure_pod(self, _=None) -> NoReturn:
        """Assemble the pod spec and apply it, if possible."""
        missing = self._missing_relations()
        if missing:
            status = "Waiting for {0} relation{1}"
            self.unit.status = BlockedStatus(
                status.format(missing, "s" if "," in missing else ""))
            return
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus("ready")
            return

        self.unit.status = MaintenanceStatus("Assembling pod spec")

        # Fetch image information
        try:
            self.unit.status = MaintenanceStatus("Fetching image information")
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        try:
            pod_spec = make_pod_spec(
                image_info,
                self.model.config,
                self.model.app.name,
                self.relation_state,
            )
        except ValueError as exc:
            logger.exception("Config/Relation data validation error")
            self.unit.status = BlockedStatus(str(exc))
            return

        if self.state.pod_spec != pod_spec:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec

        self.unit.status = ActiveStatus("ready")
        self.publish_amf_info()
Example #3
0
class RabbitMQAMQPRequires(Object):
    """
    RabbitMQAMQPRequires class
    """

    on = RabbitMQAMQPServerEvents()
    _stored = StoredState()

    def __init__(self, charm, relation_name):
        super().__init__(charm, relation_name)
        self.charm = charm
        self.relation_name = relation_name
        self.framework.observe(
            self.charm.on[relation_name].relation_joined, self._on_amqp_relation_joined
        )
        self.framework.observe(
            self.charm.on[relation_name].relation_changed, self._on_amqp_relation_changed
        )
        self.framework.observe(
            self.charm.on[relation_name].relation_broken, self._on_amqp_relation_broken
        )

    @property
    def _amqp_rel(self):
        """The AMQP relation."""
        return self.framework.model.get_relation(self.relation_name)

    def _on_amqp_relation_joined(self, event):
        """AMQP relation joined."""
        logging.debug("RabbitMQAMQPRequires on_joined")
        self.event = event
        self.on.has_amqp_servers.relation_event = event
        self.on.has_amqp_servers.emit()
        # TODO Move to charm code once the emit has this event attached
        self.request_access(event, self.charm.username, self.charm.vhost)

    def _on_amqp_relation_changed(self, event):
        """AMQP relation changed."""
        logging.debug("RabbitMQAMQPRequires on_changed")
        self.event = event
        self.request_access(event, self.charm.username, self.charm.vhost)
        if self.password(event):
            self.on.ready_amqp_servers.emit()

    def _on_amqp_relation_broken(self, event):
        """AMQP relation broken."""
        # TODO clear data on the relation
        logging.debug("RabbitMQAMQPRequires on_departed")

    def password(self, event):
        """Return the AMQP password from the server side of the relation."""
        return event.relation.data[self._amqp_rel.app].get("password")

    def request_access(self, event, username, vhost):
        """Request access to the AMQP server.

        :param event: The current event
        :type EventsBase
        :param username: The requested username
        :type username: str
        :param vhost: The requested vhost
        :type vhost: str
        :returns: None
        :rtype: None
        """
        logging.debug("Requesting AMQP user and vhost")
        event.relation.data[self.charm.app]['username'] = username
        event.relation.data[self.charm.app]['vhost'] = vhost
class PrometheusCharm(CharmBase):
    """A Juju Charm for Prometheus
    """
    stored = StoredState()

    def __init__(self, *args):
        logger.debug('Initializing Charm')

        super().__init__(*args)
        self.stored.set_default(alertmanagers=dict())

        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self.framework.observe(self.on.stop, self._on_stop)
        self.framework.observe(self.on['alertmanager'].relation_changed,
                               self.on_alertmanager_changed)
        self.framework.observe(self.on['alertmanager'].relation_departed,
                               self.on_alertmanager_departed)
        self.framework.observe(self.on['grafana-source'].relation_changed,
                               self.on_grafana_changed)

    def _on_config_changed(self, _):
        """Set a new Juju pod specification
        """
        self.configure_pod()

    def _on_stop(self, _):
        """Mark unit is inactive
        """
        self.unit.status = MaintenanceStatus('Pod is terminating.')

    def on_grafana_changed(self, event):
        """Provide Grafana with data source information
        """
        event.relation.data[self.unit]['port'] = str(
            self.model.config['advertised-port'])
        event.relation.data[self.unit]['source-type'] = 'prometheus'

    def on_alertmanager_changed(self, event):
        """Set an alertmanager configuation
        """
        if not self.unit.is_leader():
            logger.debug('{} is not leader. '
                         'Not handling alertmanager change.'.format(
                             self.unit.name))
            return

        if event.unit is None:
            self.stored.alertmanagers.pop(event.relation.id)
            logger.warning('Got null event unit on alertmanager changed')
            return

        alerting_config = event.relation.data[event.unit].get(
            'alerting_config', {})
        logger.debug('Received alerting config: {}'.format(alerting_config))

        if not alerting_config:
            logger.warning(
                'Got empty alerting config for relation id {}'.format(
                    event.relation.id))
            return

        self.stored.alertmanagers.update({event.relation.id: alerting_config})

        self.configure_pod()

    def on_alertmanager_departed(self, event):
        """Remove an alertmanager configuration
        """
        if not self.unit.is_leader():
            logger.debug('{} is not leader. '
                         'Not handling alertmanager departed.'.format(
                             self.unit.name))
            return

        self.stored.alertmanagers.pop(event.relation.id)
        self.configure_pod()

    def _cli_args(self):
        """Construct command line arguments for Prometheus
        """
        config = self.model.config
        args = [
            '--config.file=/etc/prometheus/prometheus.yml',
            '--storage.tsdb.path=/prometheus', '--web.enable-lifecycle',
            '--web.console.templates=/usr/share/prometheus/consoles',
            '--web.console.libraries=/usr/share/prometheus/console_libraries'
        ]

        # get log level
        allowed_log_levels = ['debug', 'info', 'warn', 'error', 'fatal']
        if config.get('log-level'):
            log_level = config['log-level'].lower()
        else:
            log_level = 'info'

        # If log level is invalid set it to debug
        if log_level not in allowed_log_levels:
            logging.error('Invalid loglevel: {0} given, {1} allowed. '
                          'defaulting to DEBUG loglevel.'.format(
                              log_level, '/'.join(allowed_log_levels)))
            log_level = 'debug'

        # set log level
        args.append('--log.level={0}'.format(log_level))

        # Expose Prometheus Adminstration API only if requested
        if config.get('web-enable-admin-api'):
            args.append('--web.enable-admin-api')

        # User specified Prometheus web page title
        if config.get('web-page-title'):
            # TODO: Validate and sanitize input
            args.append('--web.page-title="{0}"'.format(
                config['web-page-title']))

        # Enable time series database compression
        if config.get('tsdb-wal-compression'):
            args.append('--storage.tsdb.wal-compression')

        # Set time series retention time
        if config.get('tsdb-retention-time') and self._is_valid_timespec(
                config['tsdb-retention-time']):
            args.append('--storage.tsdb.retention.time={}'.format(
                config['tsdb-retention-time']))

        # Set maximum number of connections to prometheus server
        if config.get('web-max-connections'):
            args.append('--web.max-connections={}'.format(
                config['web-max-connections']))

        # Set maximum number of pending alerts
        if config.get('alertmanager-notification-queue-capacity'):
            args.append('--alertmanager.notification-queue-capacity={}'.format(
                config['alertmanager-notification-queue-capacity']))

        # Set timeout for alerts
        if config.get('alertmanager-timeout') and self._is_valid_timespec(
                config['alertmanager-timeout']):
            args.append('--alertmanager.timeout={}'.format(
                config['alertmanager-timeout']))

        logger.debug("CLI args: {0}".format(' '.join(args)))

        return args

    def _is_valid_timespec(self, timeval):
        """Is a time interval unit and value valid
        """
        if not timeval:
            return False

        time, unit = timeval[:-1], timeval[-1]

        if unit not in ['y', 'w', 'd', 'h', 'm', 's']:
            logger.error('Invalid unit {} in time spec'.format(unit))
            return False

        try:
            int(time)
        except ValueError:
            logger.error('Can not convert time {} to integer'.format(time))
            return False

        if not int(time) > 0:
            logger.error('Expected positive time spec but got {}'.format(time))
            return False

        return True

    def _are_valid_labels(self, json_data):
        """Are Prometheus external labels valid
        """
        if not json_data:
            return False

        try:
            labels = json.loads(json_data)
        except (ValueError, TypeError):
            logger.error(
                'Can not parse external labels : {}'.format(json_data))
            return False

        if not isinstance(labels, dict):
            logger.error(
                'Expected label dictionary but got : {}'.format(labels))
            return False

        for key, value in labels.items():
            if not isinstance(key, str) or not isinstance(value, str):
                logger.error('External label keys/values must be strings')
                return False

        return True

    def _external_labels(self):
        """Extract external labels for Prometheus from configuration
        """
        config = self.model.config
        labels = {}

        if config.get('external-labels') and self._are_valid_labels(
                config['external-labels']):
            labels = json.loads(config['external-labels'])

        return labels

    def _prometheus_global_config(self):
        """Construct Prometheus global configuration
        """
        config = self.model.config
        global_config = {}

        labels = self._external_labels()
        if labels:
            global_config['external_labels'] = labels

        if config.get('scrape-interval') and self._is_valid_timespec(
                config['scrape-interval']):
            global_config['scrape_interval'] = config['scrape-interval']

        if config.get('scrape-timeout') and self._is_valid_timespec(
                config['scrape-timeout']):
            global_config['scrape_timeout'] = config['scrape-timeout']

        if config.get('evaluation-interval') and self._is_valid_timespec(
                config['evaluation-interval']):
            global_config['evaluation_interval'] = config[
                'evaluation-interval']

        return global_config

    def _alerting_config(self):
        """Construct Prometheus altering configuation
        """
        alerting_config = ''

        if len(self.stored.alertmanagers) < 1:
            logger.debug('No alertmanagers available')
            return alerting_config

        if len(self.stored.alertmanagers) > 1:
            logger.warning('More than one altermanager found. Using first!')

        manager = list(self.stored.alertmanagers.keys())[0]
        alerting_config = self.stored.alertmanagers.get(manager, '')

        return alerting_config

    def _prometheus_config(self):
        """Construct Prometheus configuration
        """
        config = self.model.config

        scrape_config = {
            'global': self._prometheus_global_config(),
            'scrape_configs': []
        }

        alerting_config = self._alerting_config()
        if alerting_config:
            scrape_config['alerting'] = alerting_config

        # By default only monitor prometheus server itself
        default_config = {
            'job_name':
            'prometheus',
            'scrape_interval':
            '5s',
            'scrape_timeout':
            '5s',
            'metrics_path':
            '/metrics',
            'honor_timestamps':
            True,
            'scheme':
            'http',
            'static_configs': [{
                'targets': ['localhost:{}'.format(config['advertised-port'])]
            }]
        }
        scrape_config['scrape_configs'].append(default_config)

        # If monitoring of k8s is requested gather all scraping configuration for k8s
        if config.get('monitor-k8s'):
            with open('config/prometheus-k8s.yml') as yaml_file:
                k8s_scrape_configs = yaml.safe_load(yaml_file).get(
                    'scrape_configs', [])
            for k8s_config in k8s_scrape_configs:
                scrape_config['scrape_configs'].append(k8s_config)

        logger.debug('Prometheus config : {}'.format(scrape_config))

        return yaml.dump(scrape_config)

    def _build_pod_spec(self):
        """Construct a Juju pod specification for Prometheus
        """
        logger.debug('Building Pod Spec')
        config = self.model.config
        spec = {
            'containers': [{
                'name':
                self.app.name,
                'imageDetails': {
                    'imagePath': config['prometheus-image-path'],
                    'username': config.get('prometheus-image-username', ''),
                    'password': config.get('prometheus-image-password', '')
                },
                'args':
                self._cli_args(),
                'readinessProbe': {
                    'httpGet': {
                        'path': '/-/ready',
                        'port': config['advertised-port']
                    },
                    'initialDelaySeconds': 10,
                    'timeoutSeconds': 30
                },
                'livenessProbe': {
                    'httpGet': {
                        'path': '/-/healthy',
                        'port': config['advertised-port']
                    },
                    'initialDelaySeconds': 30,
                    'timeoutSeconds': 30
                },
                'ports': [{
                    'containerPort': config['advertised-port'],
                    'name': 'prometheus-http',
                    'protocol': 'TCP'
                }],
                'files': [{
                    'name': 'prometheus-config',
                    'mountPath': '/etc/prometheus',
                    'files': {
                        'prometheus.yml': self._prometheus_config()
                    }
                }]
            }]
        }

        return spec

    def _check_config(self):
        """Identify missing but required items in configuation

        :returns: list of missing configuration items (configuration keys)
        """
        logger.debug('Checking Config')
        config = self.model.config
        missing = []

        if not config.get('prometheus-image-path'):
            missing.append('prometheus-image-path')

        if config.get('prometheus-image-username') \
                and not config.get('prometheus-image-password'):
            missing.append('prometheus-image-password')

        return missing

    def configure_pod(self):
        """Setup a new Prometheus pod specification
        """
        logger.debug('Configuring Pod')
        missing_config = self._check_config()
        if missing_config:
            logger.error('Incomplete Configuration : {}. '
                         'Application will be blocked.'.format(missing_config))
            self.unit.status = \
                BlockedStatus('Missing configuration: {}'.format(missing_config))
            return

        if not self.unit.is_leader():
            self.unit.status = ActiveStatus('Prometheus unit is ready')
            return

        self.unit.status = MaintenanceStatus('Setting pod spec.')
        pod_spec = self._build_pod_spec()

        self.model.pod.set_spec(pod_spec)
        self.app.status = ActiveStatus('Prometheus Application is ready')
        self.unit.status = ActiveStatus('Prometheus leader unit is ready')
class SlurmdRequires(Object):
    """SlurmdRequires."""

    on = SlurmdRequiresEvents()
    _state = StoredState()

    def __init__(self, charm, relation_name):
        """Set self._relation_name and self.charm."""
        super().__init__(charm, relation_name)
        self._charm = charm
        self._relation_name = relation_name

        self._state.set_default(ingress_address=None)

        self.framework.observe(
            self._charm.on[self._relation_name].relation_created,
            self._on_relation_created)
        self.framework.observe(
            self._charm.on[self._relation_name].relation_changed,
            self._on_relation_changed)
        self.framework.observe(
            self._charm.on[self._relation_name].relation_broken,
            self._on_relation_broken)
        self.framework.observe(
            self._charm.on[self._relation_name].relation_departed,
            self._on_relation_departed)

    def _on_relation_created(self, event):
        unit_data = event.relation.data[self.model.unit]
        self._state.ingress_address = unit_data['ingress-address']

    def _on_relation_changed(self, event):
        """Check for slurmdbd and slurmd, write config, set relation data."""
        logger.debug('_on_relation_changed(): entering')

        if len(self.framework.model.relations['slurmd']) > 0:
            if not self._charm.is_slurmd_available():
                self._charm.set_slurmd_available(True)
            self.on.slurmd_available.emit()
        else:
            self._charm.unit.status = BlockedStatus("Need > 0 units of slurmd")
            event.defer()
            return

    def _on_relation_departed(self, event):
        """Account for relation departed activity."""
        relations = len(_get_slurmd_active_units())
        logger.debug(f"number of slurmd relations:  {relations}")
        if relations < 1:
            self._charm.set_slurmd_available(False)
        self.on.slurmd_departed.emit()

    def _on_relation_broken(self, event):
        """Account for relation broken activity."""
        pass

    def _get_partitions(self, node_data):
        """Parse the node_data and return the hosts -> partition mapping."""
        part_dict = collections.defaultdict(dict)
        for node in node_data:
            part_dict[node['partition_name']].setdefault('hosts', [])
            part_dict[node['partition_name']]['hosts'].append(node['hostname'])
            part_dict[node['partition_name']]['partition_default'] = \
                True if node['partition_default'] == "true" else False
            if node.get('partition_config'):
                part_dict[node['partition_name']]['partition_config'] = \
                    node['partition_config']
        return dict(part_dict)

    def _get_slurmd_node_data(self):
        """Return the node info for units of applications on the relation."""
        nodes_info = list()
        relations = self.framework.model.relations['slurmd']

        slurmd_active_units = _get_slurmd_active_units()

        for relation in relations:
            app = relation.app
            for unit in relation.units:
                if unit.name in slurmd_active_units:
                    unit_data = relation.data[unit]
                    app_data = relation.data[app]
                    ctxt = {
                        'ingress_address': unit_data['ingress-address'],
                        'hostname': unit_data['hostname'],
                        'inventory': unit_data['inventory'],
                        'partition_name': app_data['partition_name'],
                        'partition_default': app_data['partition_default'],
                    }
                    # Related slurmd units don't specify custom
                    # partition_config by default.
                    # Only get partition_config if it exists on in the
                    # related unit's unit data.
                    if app_data.get('partition_config'):
                        ctxt['partition_config'] = \
                                app_data['partition_config']
                    nodes_info.append(ctxt)
        return nodes_info

    def set_slurm_config_on_app_relation_data(
        self,
        relation,
        slurm_config,
    ):
        """Set the slurm_conifg to the app data on the relation.

        Setting data on the relation forces the units of related applications
        to observe the relation-changed event so they can acquire and
        render the updated slurm_config.
        """
        relations = self._charm.framework.model.relations[relation]
        for relation in relations:
            relation.data[self.model.app]['slurm_config'] = json.dumps(
                slurm_config)

    def get_slurm_config(self):
        """Assemble and return the slurm_config."""
        slurmctld_ingress_address = self._state.ingress_address
        slurmctld_hostname = socket.gethostname().split(".")[0]

        slurmdbd_info = dict(self._charm.get_slurmdbd_info())
        slurmd_node_data = self._get_slurmd_node_data()
        partitions = self._get_partitions(slurmd_node_data)

        if slurmd_node_data and partitions:
            return {
                'nodes': slurmd_node_data,
                'partitions': partitions,
                'slurmdbd_port': slurmdbd_info['port'],
                'slurmdbd_hostname': slurmdbd_info['hostname'],
                'slurmdbd_ingress_address': slurmdbd_info['ingress_address'],
                'active_controller_hostname': slurmctld_hostname,
                'active_controller_ingress_address': slurmctld_ingress_address,
                'active_controller_port': "6817",
                'munge_key': self._charm.get_munge_key(),
                **self.model.config,
            }
        else:
            return None
Example #6
0
class CephISCSIGatewayPeers(Object):

    on = CephISCSIGatewayPeerEvents()
    state = StoredState()
    PASSWORD_KEY = 'admin_password'
    READY_KEY = 'gateway_ready'
    FQDN_KEY = 'gateway_fqdn'

    def __init__(self, charm, relation_name):
        super().__init__(charm, relation_name)
        self.relation_name = relation_name
        self.this_unit = self.framework.model.unit
        self.framework.observe(charm.on[relation_name].relation_changed,
                               self.on_changed)

    def on_changed(self, event):
        logging.info("CephISCSIGatewayPeers on_changed")
        self.on.has_peers.emit()
        if self.ready_peer_details:
            self.on.ready_peers.emit()

    def set_admin_password(self, password):
        logging.info("Setting admin password")
        self.peer_rel.data[self.peer_rel.app][self.PASSWORD_KEY] = password

    def announce_ready(self):
        logging.info("announcing ready")
        self.peer_rel.data[self.this_unit][self.READY_KEY] = 'True'
        self.peer_rel.data[self.this_unit][self.FQDN_KEY] = self.fqdn

    @property
    def ready_peer_details(self):
        peers = {
            self.framework.model.unit.name: {
                'fqdn': self.fqdn,
                'ip': self.cluster_bind_address
            }
        }
        for u in self.peer_rel.units:
            if self.peer_rel.data[u].get(self.READY_KEY) == 'True':
                peers[u.name] = {
                    'fqdn': self.peer_rel.data[u][self.FQDN_KEY],
                    'ip': self.peer_rel.data[u]['ingress-address']
                }
        return peers

    @property
    def fqdn(self):
        return socket.getfqdn()

    @property
    def is_joined(self):
        return self.peer_rel is not None

    @property
    def peer_rel(self):
        return self.framework.model.get_relation(self.relation_name)

    @property
    def peer_binding(self):
        return self.framework.model.get_binding(self.peer_rel)

    @property
    def cluster_bind_address(self):
        return str(self.peer_binding.network.bind_address)

    @property
    def admin_password(self):
        # https://github.com/canonical/operator/issues/148
        # return self.peer_rel.data[self.peer_rel.app].get(self.PASSWORD_KEY)
        return 'hardcodedpassword'

    @property
    def peer_addresses(self):
        addresses = [self.cluster_bind_address]
        for u in self.peer_rel.units:
            addresses.append(self.peer_rel.data[u]['ingress-address'])
        return sorted(addresses)
Example #7
0
"""Operator Charm main library."""
# Load modules from lib directory
import logging

import setuppath  # noqa:F401
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.main import main
from ops.model import ActiveStatus, MaintenanceStatus


class ${class}(CharmBase):
    """Class reprisenting this Operator charm."""

    state = StoredState()

    def __init__(self, *args):
        """Initialize charm and configure states and events to observe."""
        super().__init__(*args)
        # -- standard hook observation
        self.framework.observe(self.on.install, self.on_install)
        self.framework.observe(self.on.start, self.on_start)
        self.framework.observe(self.on.config_changed, self.on_config_changed)
        # -- initialize states --
        self.state.set_default(installed=False)
        self.state.set_default(configured=False)
        self.state.set_default(started=False)

    def on_install(self, event):
        """Handle install state."""
Example #8
0
class SlurmctldCharm(CharmBase):
    """Slurmctld lifecycle events."""

    _stored = StoredState()

    def __init__(self, *args):
        """Init _stored attributes and interfaces, observe events."""
        super().__init__(*args)

        self._stored.set_default(
            munge_key_available=False,
            slurmctld_controller_type=str(),
        )

        self._nrpe = Nrpe(self, "nrpe-external-master")

        self._slurm_manager = SlurmManager(self, "slurmctld")

        self._slurmctld = Slurmctld(self, "slurmctld")
        self._slurmctld_peer = SlurmctldPeer(self, "slurmctld-peer")

        event_handler_bindings = {
            self.on.install: self._on_install,
            self._slurmctld.on.slurm_config_available: self._on_check_status_and_write_config,
            self._slurmctld.on.scontrol_reconfigure: self._on_scontrol_reconfigure,
            self._slurmctld.on.restart_slurmctld: self._on_restart_slurmctld,
            self._slurmctld.on.munge_key_available: self._on_write_munge_key,
            self._slurmctld_peer.on.slurmctld_peer_available: self._on_slurmctld_peer_available,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        self._slurm_manager.install(self.config["snapstore-channel"])
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("slurm snap successfully installed")

    def _on_upgrade(self, event):
        slurm_config = dict(self._check_status())
        snapstore_channel = self.config["snapstore-channel"]
        self._slurm_manager.upgrade(slurm_config, snapstore_channel)

    def _on_write_munge_key(self, event):
        if not self._stored.slurm_installed:
            event.defer()
            return
        munge_key = self._slurmctld.get_stored_munge_key()
        self._slurm_manager.configure_munge_key(munge_key)
        self._slurm_manager.restart_munged()
        self._stored.munge_key_available = True

    def _on_slurmctld_peer_available(self, event):
        if self.framework.model.unit.is_leader():
            if self._slurmctld.is_joined:
                slurmctld_info = self._slurmctld_peer.get_slurmctld_info()
                if slurmctld_info:
                    self._slurmctld.set_slurmctld_info_on_app_relation_data(
                        slurmctld_info
                    )
                    return
            event.defer()
            return

    def _on_check_status_and_write_config(self, event):
        slurm_config = self._check_status()
        if not slurm_config:
            event.defer()
            return

        self._slurm_manager.render_slurm_configs(dict(slurm_config))
        self.unit.status = ActiveStatus("slurmctld available")

    def _on_restart_slurmctld(self, event):
        self._slurm_manager.restart_slurm_component()

    def _on_scontrol_reconfigure(self, event):
        self._slurm_manager.slurm_cmd("scontrol", "reconfigure")

    def _check_status(self):
        munge_key_available = self._stored.munge_key_available
        slurm_installed = self._stored.slurm_installed
        slurm_config = self._slurmctld.get_stored_slurm_config()

        slurmctld_joined = self._slurmctld.is_joined

        if not slurmctld_joined:
            self.unit.status = BlockedStatus(
                "Relations needed: slurm-configurator"
            )
            return None

        elif not (munge_key_available and slurm_installed and slurm_config):
            self.unit.status = WaitingStatus(
                "Waiting on: configuration"
            )
            return None

        return slurm_config

    def get_slurm_component(self):
        """Return the slurm component."""
        return self._slurm_manager.slurm_component

    def get_hostname(self):
        """Return the hostname."""
        return self._slurm_manager.hostname

    def get_port(self):
        """Return the port."""
        return self._slurm_manager.port
class ServingActivatorCharm(CharmBase):
    _stored = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        if not self.unit.is_leader():
            self.unit.status = WaitingStatus("Waiting for leadership")
            return
        # self.image = OCIImageResource(self, 'knative-activator-image')
        self.framework.observe(self.on.install, self._on_start)
        # self.framework.observe(self.on.config_changed, self._on_config_changed)
        # --- initialize states ---
        # self._stored.set_default(config_hash=self._config_hash())
        self._stored.set_default(started=False)
        # -- base values --
        self._stored.set_default(namespace=os.environ["JUJU_MODEL_NAME"])
    
    def _on_start(self, event):
        """Occurs upon install, start, upgrade, and possibly config changed."""
        if self._stored.started:
            return
        self.unit.status = MaintenanceStatus("Installing Knative Activator...")
        # try:
            #image_info = self.image.fetch()
        image_info = "gcr.io/knative-releases/knative.dev/serving/cmd/activator@sha256:1e3db4f2eeed42d3ef03f41cc3d07c333edab92af3653a530d6d5f370da96ab6"
        # except OCIImageResourceError:
        #     logging.exception('An error occured while fetching the image info')
        #     self.unit.status = BlockedStatus("Error fetching image information")
        #     return

        self.model.pod.set_spec(
            {
                'version': 3,
                'containers': [{
                    'name': 'activator',
                    'image': image_info,
                    # 'imageDetails': image_info,
                    'imagePullPolicy': 'Always',
                    'ports': [{
                        'containerPort': 9090,
                        'name': 'metrics'
                        },
                        {
                        'containerPort': 8008,
                        'name': 'profiling'
                        },
                        {
                        'containerPort': 8012,
                        'name': 'http1'
                        },
                        {
                        'containerPort': 8013,
                        'name': 'h2c'
                        },
                    ],
                    'envConfig': {
                        'GOGC': '500',
                        'POD_NAME':{
                            'field': {
                                'path': "metadata.name"
                            }
                        },
                        'POD_IP':{
                            'field': {
                                'path': "status.podIP"
                            }
                        },
                        'SYSTEM_NAMESPACE':{
                            'field': {
                                'path': "metadata.namespace"
                            }
                        },
                        'CONFIG_LOGGING_NAME':'config-logging',
                        'CONFIG_OBSERVABILITY_NAME': 'config-observability',
                        'METRICS_DOMAIN':'knative.dev/internal/serving',
                    },
                    'kubernetes': {
                        'securityContext': {
                            'privileged': False,
                            'readOnlyRootFilesystem': True,
                            'runAsNonRoot': True,
                            'capabilities': {
                                'drop': ['ALL']
                            }
                        },
                        'readinessProbe': {
                            'httpGet': {
                                'port': 8012,
                                'httpHeaders': [{
                                    'name': 'k-kubelet-probe',
                                    'value': 'activator',
                                }],
                            },
                            'failureThreshold': 12
                        },
                        'livenessProbe': {
                            'initialDelaySeconds': 15,
                            'failureThreshold': 12,
                            'httpGet': {
                                'port': 8012,
                                'httpHeaders': [{
                                    'name': 'k-kubelet-probe',
                                    'value': 'activator',
                                }],
                            }
                        }
                    },
                }],
            },
            k8s_resources={
                'kubernetesResources': {
                    'services': [
                        {
                            # Need to create a 2nd service because of bug 
                            # lp:https://bugs.launchpad.net/juju/+bug/1902000
                            'name': 'activator-service',
                            'spec': {
                                'ports': [
                                    {
                                        'name': 'http-metrics',
                                        'port': 9090,
                                        'targetPort': 9090,
                                    },
                                    {
                                        'name': 'http-profiling',
                                        'port': 8008,
                                        'targetPort': 8008,
                                    },
                                    {
                                        'name': 'http',
                                        'port': 80,
                                        'targetPort': 8012,
                                    },
                                    {
                                        'name': 'http2',
                                        'port': 81,
                                        'targetPort': 8013,
                                    }
                                ],
                                'selector': {'app': 'activator'},
                            }
                        }
                    ],
                }
            }
        )
        self.unit.status = ActiveStatus("Ready")
Example #10
0
class CephISCSIGatewayCharmBase(ops_openstack.OSBaseCharm):

    state = StoredState()
    PACKAGES = ['ceph-iscsi', 'tcmu-runner', 'ceph-common']
    CEPH_CAPABILITIES = ["osd", "allow *", "mon", "allow *", "mgr", "allow r"]

    RESTART_MAP = {
        '/etc/ceph/ceph.conf': ['rbd-target-api', 'rbd-target-gw'],
        '/etc/ceph/iscsi-gateway.cfg': ['rbd-target-api'],
        '/etc/ceph/ceph.client.ceph-iscsi.keyring': ['rbd-target-api']
    }

    DEFAULT_TARGET = "iqn.2003-01.com.ubuntu.iscsi-gw:iscsi-igw"
    REQUIRED_RELATIONS = ['ceph-client', 'cluster']

    def __init__(self, framework, key):
        super().__init__(framework, key)
        logging.info("Using {} class".format(self.release))
        self.state.set_default(target_created=False)
        self.state.set_default(enable_tls=False)
        self.state.set_default(additional_trusted_ips=[])
        self.ceph_client = interface_ceph_client.CephClientRequires(
            self, 'ceph-client')
        self.peers = interface_ceph_iscsi_peer.CephISCSIGatewayPeers(
            self, 'cluster')
        self.tls = interface_tls_certificates.TlsRequires(self, "certificates")
        self.adapters = CephISCSIGatewayAdapters(
            (self.ceph_client, self.peers, self.tls), self)
        self.framework.observe(self.on.ceph_client_relation_joined, self)
        self.framework.observe(self.ceph_client.on.pools_available, self)
        self.framework.observe(self.peers.on.has_peers, self)
        self.framework.observe(self.peers.on.ready_peers, self)
        self.framework.observe(self.on.create_target_action, self)
        self.framework.observe(self.on.add_trusted_ip_action, self)
        self.framework.observe(self.on.certificates_relation_joined, self)
        self.framework.observe(self.on.certificates_relation_changed, self)
        self.framework.observe(self.on.config_changed, self)
        self.framework.observe(self.on.upgrade_charm, self)

    def on_add_trusted_ip_action(self, event):
        self.state.additional_trusted_ips.append(
            event.params['ips'].split(' '))
        logging.info(self.state.additional_trusted_ips)

    def on_create_target_action(self, event):
        gw_client = gwcli_client.GatewayClient()
        target = event.params.get('iqn', self.DEFAULT_TARGET)
        gateway_units = event.params.get(
            'gateway-units', [u for u in self.peers.ready_peer_details.keys()])
        gw_client.create_target(target)
        for gw_unit, gw_config in self.peers.ready_peer_details.items():
            added_gateways = []
            if gw_unit in gateway_units:
                gw_client.add_gateway_to_target(target, gw_config['ip'],
                                                gw_config['fqdn'])
                added_gateways.append(gw_unit)
        gw_client.create_pool(self.model.config['rbd-pool'],
                              event.params['image-name'],
                              event.params['image-size'])
        gw_client.add_client_to_target(target,
                                       event.params['client-initiatorname'])
        gw_client.add_client_auth(target, event.params['client-initiatorname'],
                                  event.params['client-username'],
                                  event.params['client-password'])
        gw_client.add_disk_to_client(target,
                                     event.params['client-initiatorname'],
                                     self.model.config['rbd-pool'],
                                     event.params['image-name'])
        event.set_results({'iqn': target})

    def setup_default_target(self):
        gw_client = gwcli_client.GatewayClient()
        gw_client.create_target(self.DEFAULT_TARGET)
        for gw_unit, gw_config in self.peers.ready_peer_details.items():
            gw_client.add_gateway_to_target(self.DEFAULT_TARGET,
                                            gw_config['ip'], gw_config['fqdn'])
        self.state.target_created = True

    def on_ready_peers(self, event):
        if not self.unit.is_leader():
            logging.info("Leader should do setup")
            return
        if not self.state.is_started:
            logging.info("Cannot perform setup yet, not started")
            event.defer()
            return
        if self.state.target_created:
            logging.info("Initial target setup already complete")
            return
        else:
            # This appears to race and sometime runs before the
            # peer is 100% ready. There is probably little value
            # in this anyway so may just remove it.
            # self.setup_default_target()
            return

    def on_has_peers(self, event):
        logging.info("Unit has peers")
        if self.unit.is_leader() and not self.peers.admin_password:
            logging.info("Setting admin password")
            alphabet = string.ascii_letters + string.digits
            password = ''.join(secrets.choice(alphabet) for i in range(8))
            self.peers.set_admin_password(password)

    def on_ceph_client_relation_joined(self, event):
        logging.info("Requesting replicated pool")
        self.ceph_client.create_replicated_pool(self.model.config['rbd-pool'])
        logging.info("Requesting permissions")
        self.ceph_client.request_ceph_permissions('ceph-iscsi',
                                                  self.CEPH_CAPABILITIES)
        self.ceph_client.request_osd_settings({
            'osd heartbeat grace': 20,
            'osd heartbeat interval': 5
        })

    def on_config_changed(self, event):
        if self.state.is_started:
            self.on_pools_available(event)
            self.on_ceph_client_relation_joined(event)

    def on_upgrade_charm(self, event):
        if self.state.is_started:
            self.on_pools_available(event)
            self.on_ceph_client_relation_joined(event)

    def on_pools_available(self, event):
        logging.info("on_pools_available")
        if not self.peers.admin_password:
            logging.info("Defering setup")
            event.defer()
            return

        def daemon_reload_and_restart(service_name):
            subprocess.check_call(['systemctl', 'daemon-reload'])
            subprocess.check_call(['systemctl', 'restart', service_name])

        rfuncs = {'rbd-target-api': daemon_reload_and_restart}

        @ch_host.restart_on_change(self.RESTART_MAP, restart_functions=rfuncs)
        def render_configs():
            for config_file in self.RESTART_MAP.keys():
                ch_templating.render(os.path.basename(config_file),
                                     config_file, self.adapters)

        logging.info("Rendering config")
        render_configs()
        logging.info("Setting started state")
        self.peers.announce_ready()
        self.state.is_started = True
        self.update_status()
        logging.info("on_pools_available: status updated")

    def on_certificates_relation_joined(self, event):
        addresses = set()
        for binding_name in ['public', 'cluster']:
            binding = self.model.get_binding(binding_name)
            addresses.add(binding.network.ingress_address)
            addresses.add(binding.network.bind_address)
        sans = [str(s) for s in addresses]
        sans.append(socket.gethostname())
        self.tls.request_application_cert(socket.getfqdn(), sans)

    def on_certificates_relation_changed(self, event):
        app_certs = self.tls.application_certs
        if not all([self.tls.root_ca_cert, app_certs]):
            return
        if self.tls.chain:
            # Append chain file so that clients that trust the root CA will
            # trust certs signed by an intermediate in the chain
            ca_cert_data = self.tls.root_ca_cert + os.linesep + self.tls.chain
        pem_data = app_certs['cert'] + os.linesep + app_certs['key']
        tls_files = {
            '/etc/ceph/iscsi-gateway.crt': app_certs['cert'],
            '/etc/ceph/iscsi-gateway.key': app_certs['key'],
            '/etc/ceph/iscsi-gateway.pem': pem_data,
            '/usr/local/share/ca-certificates/vault_ca_cert.crt': ca_cert_data
        }
        for tls_file, tls_data in tls_files.items():
            with open(tls_file, 'w') as f:
                f.write(tls_data)
        subprocess.check_call(['update-ca-certificates'])
        cert_out = subprocess.check_output(
            ('openssl x509 -inform pem -in /etc/ceph/iscsi-gateway.pem '
             '-pubkey -noout').split())
        with open('/etc/ceph/iscsi-gateway-pub.key', 'w') as f:
            f.write(cert_out.decode('UTF-8'))
        self.state.enable_tls = True
        self.on_pools_available(event)
class Slurmdbd(Object):
    """Slurmdbd."""

    _stored = StoredState()
    on = SlurmdbdEvents()

    def __init__(self, charm, relation_name):
        """Observe relation lifecycle events."""
        super().__init__(charm, relation_name)

        self._charm = charm
        self._relation_name = relation_name

        self._stored.set_default(munge_key=None, )

        self.framework.observe(
            self._charm.on[self._relation_name].relation_joined,
            self._on_relation_joined,
        )

        self.framework.observe(
            self._charm.on[self._relation_name].relation_broken,
            self._on_relation_broken,
        )

    def _on_relation_joined(self, event):
        """Handle the relation-joined event.

        Get the munge_key from slurm-configurator and save it to the
        charm stored state.
        """
        # Since we are in relation-joined (with the app on the other side)
        # we can almost guarantee that the app object will exist in
        # the event, but check for it just in case.
        event_app_data = event.relation.data.get(event.app)
        if not event_app_data:
            event.defer()
            return

        # slurm-configurator sets the munge_key on the relation-created event
        # which happens before relation-joined. We can almost guarantee that
        # the munge key will exist at this point, but check for it just incase.
        munge_key = event_app_data.get("munge_key")
        if not munge_key:
            event.defer()
            return

        # Store the munge_key in the interface's stored state object and emit
        # munge_key_available.
        self._store_munge_key(munge_key)
        self.on.munge_key_available.emit()

    def _on_relation_broken(self, event):
        self.set_slurmdbd_info_on_app_relation_data("")
        self.on.slurmdbd_unavailable.emit()

    def set_slurmdbd_info_on_app_relation_data(self, slurmdbd_info):
        """Set slurmdbd_info."""
        relations = self.framework.model.relations["slurmdbd"]
        # Iterate over each of the relations setting the relation data.
        for relation in relations:
            if slurmdbd_info != "":
                relation.data[self.model.app]["slurmdbd_info"] = json.dumps(
                    slurmdbd_info)
            else:
                relation.data[self.model.app]["slurmdbd_info"] = ""

    def _store_munge_key(self, munge_key):
        """Set the munge key in the stored state."""
        self._stored.munge_key = munge_key

    def get_munge_key(self):
        """Retrieve the munge key from the stored state."""
        return self._stored.munge_key
Example #12
0
class CephISCSIGatewayCharmOcto(CephISCSIGatewayCharmBase):

    state = StoredState()
    release = 'octopus'
Example #13
0
class CephISCSIGatewayCharmJewel(CephISCSIGatewayCharmBase):

    state = StoredState()
    release = 'jewel'
class HaCluster(Object):

    state = StoredState()
    PACEMAKER_LOGIN_NAME = 'MSSQLPacemaker'
    PACEMAKER_LOGIN_CREDS_FILE = '/var/opt/mssql/secrets/passwd'
    APT_PACKAGES = ['fence-agents', 'resource-agents', 'mssql-server-ha']
    UNIT_ACTIVE_STATUS = ActiveStatus('Unit is ready and clustered')

    def __init__(self, charm, relation_name):
        super().__init__(charm, relation_name)
        self.state.set_default(pacemaker_login_ready=False,
                               ha_cluster_ready=False)
        self.relation_name = relation_name
        self.app = self.model.app
        self.unit = self.model.unit
        self.cluster = charm.cluster
        self.framework.observe(charm.on[relation_name].relation_joined,
                               self.on_joined)
        self.framework.observe(charm.on[relation_name].relation_changed,
                               self.on_changed)
        self.framework.observe(charm.cluster.on.created_ag, self.on_created_ag)

    def on_joined(self, event):
        if not self.cluster.is_ag_ready:
            logger.warning('The availability group is not ready. Defering '
                           'hacluster on_joined until AG is ready.')
            event.defer()
            return
        logger.info('Installing Microsoft SQL Server HA components')
        retry_on_error()(apt_install)(packages=self.APT_PACKAGES, fatal=True)
        self.setup_pacemaker_mssql_login()
        rel_data = {
            'resources': {
                'ag_cluster': 'ocf:mssql:ag'
            },
            'resource_params': {
                'ag_cluster':
                'params ag_name="{ag_name}" '
                'meta failure-timeout=60s '
                'op start timeout=60s '
                'op stop timeout=60s '
                'op promote timeout=60s '
                'op demote timeout=10s '
                'op monitor timeout=60s interval=10s '
                'op monitor timeout=60s interval=11s role="Master" '
                'op monitor timeout=60s interval=12s role="Slave" '
                'op notify timeout=60s'.format(ag_name=self.cluster.AG_NAME)
            },
            'ms': {
                'ms-ag_cluster':
                'ag_cluster meta '
                'master-max="1" master-node-max="1" '
                'clone-max="3" clone-node-max="1" notify="true"'
            }
        }
        update_hacluster_vip('mssql', rel_data)
        group_name = VIP_GROUP_NAME.format(service='mssql')
        rel_data.update({
            'colocations': {
                'vip_on_master':
                'inf: {} ms-ag_cluster:Master'.format(group_name)
            },
            'orders': {
                'ag_first':
                'inf: ms-ag_cluster:promote {}:start'.format(group_name)
            }
        })
        rel = self.model.get_relation(event.relation.name, event.relation.id)
        for k, v in rel_data.items():
            rel.data[self.unit]['json_{}'.format(k)] = json.dumps(
                v, **JSON_ENCODE_OPTIONS)

    def on_changed(self, event):
        rel_data = event.relation.data.get(event.unit)
        if rel_data.get('clustered'):
            logger.info('The hacluster relation is ready')
            self.unit.status = self.UNIT_ACTIVE_STATUS
            self.state.ha_cluster_ready = True

    def on_created_ag(self, _):
        self.setup_pacemaker_mssql_login()
        self.cluster.mssql_db_client().exec_t_sql("""
        GRANT ALTER, CONTROL, VIEW DEFINITION
            ON AVAILABILITY GROUP::[{ag_name}] TO [{login_name}]
        GRANT VIEW SERVER STATE TO [{login_name}]
        """.format(ag_name=self.cluster.AG_NAME,
                   login_name=self.PACEMAKER_LOGIN_NAME))

    def setup_pacemaker_mssql_login(self):
        if self.state.pacemaker_login_ready:
            logger.info('The pacemaker login is already configured.')
            return
        login_password = host.pwgen(32)
        self.cluster.mssql_db_client().create_login(
            name=self.PACEMAKER_LOGIN_NAME,
            password=login_password,
            server_roles=['sysadmin'])
        with open(self.PACEMAKER_LOGIN_CREDS_FILE, 'w') as f:
            f.write('{}\n{}\n'.format(self.PACEMAKER_LOGIN_NAME,
                                      login_password))
        os.chown(self.PACEMAKER_LOGIN_CREDS_FILE, 0, 0)
        os.chmod(self.PACEMAKER_LOGIN_CREDS_FILE, 0o400)
        self.state.pacemaker_login_ready = True

    @property
    def is_ha_cluster_ready(self):
        return self.state.ha_cluster_ready

    @property
    def bind_address(self):
        return self.model.config['vip']
Example #15
0
class SlurmDBDCharm(CharmBase):
    """This charm demonstrates the 'requires' side of the relationship by
    extending CharmBase with an event object that observes
    the relation-changed hook event.
    """

    _stored = StoredState()

    def __init__(self, *args):
        super().__init__(*args)

        self._stored.set_default(db_info=dict())

        self.framework.observe(self.on.start, self._on_start)
        self.framework.observe(self.on.install, self._on_install)

        self.db_info = MySQLClient(self, "db")
        self.framework.observe(self.db_info.on.db_info_available,
                               self._on_db_info_available)

        self.slurm_ops = SlurmSnapOps(self, "slurm-config")
        self.framework.observe(self.slurm_ops.on.configure_slurm,
                               self._on_configure_slurm)
        self.framework.observe(self.slurm_ops.on.slurm_snap_installed,
                               self._on_slurm_snap_installed)

    def _on_install(self, event):
        pass

    def _on_start(self, event):
        pass

    def _on_slurm_snap_installed(self, event):
        pass

    def _on_db_info_available(self, event):
        """Store the db_info in the StoredState for later use.
        """
        db_info = {
            'user': event.db_info.user,
            'password': event.db_info.password,
            'host': event.db_info.host,
            'port': event.db_info.port,
            'database': event.db_info.database,
        }
        self._stored.db_info = db_info
        self.slurm_config.on.configure_slurm.emit()

    def _on_configure_slurm(self, event):
        """Render the slurmdbd.yaml and set the snap.mode.
        """
        hostname = socket.gethostname().split(".")[0]
        self.slurm_config.render_slurm_config(
            f"{os.getcwd()}/slurmdbd.yaml.tmpl",
            #"/var/snap/slurm/common/etc/slurm-configurator/slurmdbd.yaml",
            "/home/ubuntu/slurmdbd.yaml",
            context={
                **{
                    "hostname": hostname
                },
                **self._stored.db_info
            })
Example #16
0
class MongoconsumerCharm(CharmBase):
    _stored = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        self.mongo_consumer = MongoConsumer(self, 'database', self.consumes)
        self.image = OCIImageResource(self, "busybox-image")
        self.framework.observe(self.on.config_changed, self.on_config_changed)
        self.framework.observe(self.mongo_consumer.on.available,
                               self.on_db_available)
        self.framework.observe(self.mongo_consumer.on.invalid,
                               self.on_provider_invalid)
        self.framework.observe(self.mongo_consumer.on.broken,
                               self.on_provider_broken)
        self._stored.set_default(events=[])
        self._stored.set_default(num_dbs=2)
        self._stored.set_default(requested_dbs=0)

    def on_stop(self, _):
        """Mark terminating unit as inactive
        """
        if self.model.config['record_events']:
            self._stored.events.append("config_chagned")

        self.unit.status = MaintenanceStatus('Pod is terminating.')

    def on_config_changed(self, _):
        if self.model.config['record_events']:
            self._stored.events.append("config_chagned")

        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        self.configure_pod()

    def on_db_available(self, event):
        if self.model.config['record_events']:
            self._stored.events.append("db_available")

        logger.debug("Got Databases: " + str(self.mongo_consumer.databases()))
        if self._stored.requested_dbs < self._stored.num_dbs:
            num_dbs = self._stored.num_dbs - self._stored.requested_dbs
            logger.debug("Requesting additional {} databases".format(num_dbs))
            for i in range(num_dbs):
                self.mongo_consumer.new_database()
                self._stored.requested_dbs += 1
        else:
            self.test_databases()

    def on_provider_invalid(self, _):
        if self.model.config['record_events']:
            self._stored.events.append("provider_invalid")

        logger.debug("Failed to get a valid provider")

    def on_provider_broken(self, _):
        logger.debug("Database provider relation broken")

    def test_databases(self):
        for id in self.mongo_consumer.provider_ids():
            creds = self.mongo_consumer.credentials(id)
            uri = creds['replica_set_uri']
            client = pymongo.MongoClient(uri)
            for dbname in self.mongo_consumer.databases(id):
                post = {"test": "A test post"}
                logger.debug("writing {} to {}".format(post, dbname))
                db = client[dbname]
                tbl = db["test"]
                tbl.insert_one(post)
                posts = list(tbl.find())
                logger.debug("read {} from {}".format(posts, dbname))

    def configure_pod(self):
        logger.debug(str(sorted(os.environ.items())))
        # Fetch image information
        try:
            self.unit.status = WaitingStatus("Fetching image information")
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        # Build Pod spec
        self.unit.status = WaitingStatus("Assembling pod spec")

        pod_spec = {
            "version":
            3,
            "containers": [{
                "name":
                self.app.name,
                "imageDetails":
                image_info,
                "command": ["sh"],
                "args": ["-c", "while true; do date; sleep 60;done"],
                "imagePullPolicy":
                "Always",
                "ports": [{
                    "name": self.app.name,
                    "containerPort": 80,
                    "protocol": "TCP"
                }]
            }]
        }

        if self.unit.is_leader():
            self.model.pod.set_spec(pod_spec)
            self.unit.status = ActiveStatus()

    @property
    def consumes(self):
        return json.loads(self.model.config['consumes'])
class ZookeeperCharm(CharmBase):
    on = ZookeeperCharmEvents()
    state = StoredState()

    def __init__(self, framework, key):
        super().__init__(framework, key)

        self.framework.observe(self.on.start, self)
        #        self.framework.observe(self.on.stop, self)
        self.framework.observe(self.on.update_status, self)
        self.framework.observe(self.on.upgrade_charm, self)
        self.framework.observe(self.on.config_changed, self)
        self.framework.observe(self.on.cluster_relation_changed,
                               self.on_cluster_modified)
        self.framework.observe(self.on.zookeeper_relation_joined,
                               self.expose_relation_data)

        self._unit = 1
        self._zookeeperuri = ""
        self._pod = K8sPod(self.framework.model.app.name)

        self.cluster = ZookeeperCluster(self, 'cluster')
        self.client = ZookeeperClient(self, 'zookeeper',
                                      self.model.config['client-port'])

        self.state.set_default(isStarted=False)

        self.framework.observe(self.on.leader_elected, self)

    def on_start(self, event):
        logging.info('START')
        if (self.model.pod._backend.is_leader()):
            #        if not self.model.config['ha-mode']:
            #self.model.unit.status = MaintenanceStatus('Starting pod')
            podSpec = self.makePodSpec()
            self.model.pod.set_spec(podSpec)
            self.state.podSpec = podSpec
        self.on.config_changed.emit()

    def expose_relation_data(self, event):
        logging.info('Data Exposed')
        fqdn = socket.getnameinfo((str(self.cluster.ingress_address), 0),
                                  socket.NI_NAMEREQD)[0]
        logging.info(fqdn)
        self.client.set_host(fqdn)
        self.client.set_port(self.model.config['client-port'])
        self.client.set_rest_port(self.model.config['client-port'])
        self.client.expose_zookeeper()
        self.on.config_changed.emit()

    def on_upgrade_charm(self, event):
        logging.info('UPGRADE')
        self.on.config_changed.emit()

    def on_leader_elected(self, event):
        logging.info('LEADER ELECTED')
        self.on.config_changed.emit()

    def getUnits(self):
        logging.info('get_units')
        peer_relation = self.model.get_relation('cluster')
        units = self._unit
        if peer_relation is not None:
            logging.info(peer_relation)
            if not self.model.config['ha-mode']:
                self._unit = 1
            else:
                self._unit = len(peer_relation.units) + 1
        self.on.update_status.emit()

    def on_cluster_modified(self, event):
        logging.info('on_cluster_modified')
        self.on.config_changed.emit()

    def on_update_status(self, event):
        logging.info('UPDATE STATUS')
        if self._pod.is_ready:
            logging.info('Pod is ready')
            self.state.isStarted = True
            if (self.model.pod._backend.is_leader()):
                self.model.unit.status = ActiveStatus('ready')
            else:
                self.model.unit.status = ActiveStatus('ready Not a Leader')

    def on_config_changed(self, event):
        logging.info('CONFIG CHANGED')
        if self._pod.is_ready:
            if (self.model.pod._backend.is_leader()):
                self.getUnits()
                podSpec = self.makePodSpec()
                if self.state.podSpec != podSpec:
                    self.model.pod.set_spec(podSpec)
                    self.state.podSpec = podSpec
        self.on.update_status.emit()

    def on_new_client(self, event):
        logging.info('NEW CLIENT')
        if not self.state.isStarted:
            logging.info('NEW CLIENT DEFERRED')
            return event.defer()
        logging.info('NEW CLIENT SERVING')
        if (self.model.pod._backend.is_leader()):
            self.client.expose_zookeeper()

    def makePodSpec(self):
        logging.info('MAKING POD SPEC')
        with open("templates/spec_template.yaml") as spec_file:
            podSpecTemplate = spec_file.read()
        dockerImage = self.model.config['image']
        logging.info(self._unit)
        data = {
            "name": self.model.app.name,
            "zookeeper-units": int(self._unit),
            "docker_image_path": dockerImage,
            "server-port": self.model.config['server-port'],
            "client-port": self.model.config['client-port'],
            "leader-election-port":
            int(self.model.config['leader-election-port']),
        }
        logging.info(data)
        podSpec = podSpecTemplate % data
        podSpec = yaml.load(podSpec)
        return podSpec
class SimpleHAProxyCharm(CharmBase):

    state = StoredState()
    on = ProxyClusterEvents()

    def __init__(self, framework, key):
        super().__init__(framework, key)

        # An example of setting charm state
        # that's persistent across events
        self.state.set_default(is_started=False)

        self.peers = ProxyCluster(self, "proxypeer")

        if not self.state.is_started:
            self.state.is_started = True

        # Register all of the events we want to observe
        for event in (
                # Charm events
                self.on.config_changed,
                self.on.install,
                self.on.start,
                self.on.upgrade_charm,
                # Charm actions (primitives)
                self.on.touch_action,
                # OSM actions (primitives)
                self.on.start_action,
                self.on.stop_action,
                self.on.restart_action,
                self.on.reboot_action,
                self.on.upgrade_action,
                # SSH Proxy actions (primitives)
                self.on.generate_ssh_key_action,
                self.on.get_ssh_public_key_action,
                self.on.run_action,
                self.on.verify_ssh_credentials_action,
        ):
            self.framework.observe(event, self)

        self.framework.observe(self.on.proxypeer_relation_changed, self)

    def get_ssh_proxy(self):
        """Get the SSHProxy instance"""
        proxy = SSHProxy(
            hostname=self.model.config["ssh-hostname"],
            username=self.model.config["ssh-username"],
            password=self.model.config["ssh-password"],
        )
        return proxy

    def on_proxypeer_relation_changed(self, event):
        if self.peers.is_cluster_initialized:
            pubkey = self.peers.ssh_public_key
            privkey = self.peers.ssh_private_key
            SSHProxy.write_ssh_keys(public=pubkey, private=privkey)
            self.on_config_changed(event)
        else:
            event.defer()

    def on_config_changed(self, event):
        """Handle changes in configuration"""
        unit = self.model.unit

        # Unit should go into a waiting state until verify_ssh_credentials is successful
        unit.status = WaitingStatus("Waiting for SSH credentials")
        proxy = self.get_ssh_proxy()

        verified = proxy.verify_credentials()
        if verified:
            unit.status = ActiveStatus()
        else:
            unit.status = BlockedStatus("Invalid SSH credentials.")

    def on_install(self, event):
        pass

    def on_start(self, event):
        """Called when the charm is being installed"""
        if not self.peers.is_joined:
            event.defer()
            return

        unit = self.model.unit

        if not SSHProxy.has_ssh_key():
            unit.status = MaintenanceStatus("Generating SSH keys...")
            pubkey = None
            privkey = None
            if self.is_leader:
                if self.peers.is_cluster_initialized:
                    SSHProxy.write_ssh_keys(
                        public=self.peers.ssh_public_key,
                        private=self.peers.ssh_private_key,
                    )
                else:
                    SSHProxy.generate_ssh_key()
                    self.on.ssh_keys_initialized.emit(
                        SSHProxy.get_ssh_public_key(),
                        SSHProxy.get_ssh_private_key())
                unit.status = ActiveStatus()
            else:
                unit.status = WaitingStatus(
                    "Waiting for leader to populate the keys")

    def on_touch_action(self, event):
        """Touch a file."""

        if self.is_leader:
            filename = event.params["filename"]
            proxy = self.get_ssh_proxy()
            stdout, stderr = proxy.run("touch {}".format(filename))
            event.set_results({"output": stdout})
        else:
            event.fail("Unit is not leader")
            return

    def on_upgrade_charm(self, event):
        """Upgrade the charm."""
        unit = self.model.unit

        # Mark the unit as under Maintenance.
        unit.status = MaintenanceStatus("Upgrading charm")

        self.on_install(event)

        # When maintenance is done, return to an Active state
        unit.status = ActiveStatus()

    ###############
    # OSM methods #
    ###############
    def on_start_action(self, event):
        """Start the VNF service on the VM."""
        pass

    def on_stop_action(self, event):
        """Stop the VNF service on the VM."""
        pass

    def on_restart_action(self, event):
        """Restart the VNF service on the VM."""
        pass

    def on_reboot_action(self, event):
        """Reboot the VM."""
        if self.is_leader:
            proxy = self.get_ssh_proxy()
            stdout, stderr = proxy.run("sudo reboot")
            if len(stderr):
                event.fail(stderr)
        else:
            event.fail("Unit is not leader")
            return

    def on_upgrade_action(self, event):
        """Upgrade the VNF service on the VM."""
        pass

    #####################
    # SSH Proxy methods #
    #####################
    def on_generate_ssh_key_action(self, event):
        """Generate a new SSH keypair for this unit."""
        if self.is_leader:
            if not SSHProxy.generate_ssh_key():
                event.fail("Unable to generate ssh key")
        else:
            event.fail("Unit is not leader")
            return

    def on_get_ssh_public_key_action(self, event):
        """Get the SSH public key for this unit."""
        if self.is_leader:
            pubkey = SSHProxy.get_ssh_public_key()
            event.set_results({"pubkey": SSHProxy.get_ssh_public_key()})
        else:
            event.fail("Unit is not leader")
            return

    def on_run_action(self, event):
        """Run an arbitrary command on the remote host."""
        if self.is_leader:
            cmd = event.params["command"]
            proxy = self.get_ssh_proxy()
            stdout, stderr = proxy.run(cmd)
            event.set_results({"output": stdout})
            if len(stderr):
                event.fail(stderr)
        else:
            event.fail("Unit is not leader")
            return

    def on_verify_ssh_credentials_action(self, event):
        """Verify the SSH credentials for this unit."""
        if self.is_leader:
            proxy = self.get_ssh_proxy()

            verified = proxy.verify_credentials()
            if verified:
                print("Verified!")
                event.set_results({"verified": True})
            else:
                print("Verification failed!")
                event.set_results({"verified": False})
        else:
            event.fail("Unit is not leader")
            return

    @property
    def is_leader(self):
        # update the framework to include self.unit.is_leader()
        return self.model.unit.is_leader()
Example #19
0
class HelloJujuCharm(CharmBase):
    """Main 'Hello, Juju' charm class"""

    _stored = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        self.framework.observe(self.on.install, self._on_install)
        self.framework.observe(self.on.start, self._on_start)
        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self._stored.set_default(repo="", port="", conn_str="")

        # Initialise the PostgreSQL Client for the "db" relation
        self.db = pgsql.PostgreSQLClient(self, "db")
        self.framework.observe(self.db.on.database_relation_joined,
                               self._on_database_relation_joined)
        self.framework.observe(self.db.on.master_changed,
                               self._on_database_master_changed)

    def _on_install(self, _):
        """Install prerequisites for the application"""
        # Install some Python packages using apt
        self.unit.status = MaintenanceStatus("installing pip and virtualenv")
        self._install_apt_packages(["python3-pip", "python3-virtualenv"])
        # Clone application code and install dependencies, setup initial db
        self._setup_application()
        # Template out the systemd service file
        self._render_systemd_unit()

    def _on_start(self, _):
        """Start the workload"""
        check_call(["open-port", f"{self._stored.port}/TCP"])
        # Enable and start the "hello-juju" systemd unit
        systemd.service_resume("hello-juju")
        self.unit.status = ActiveStatus()

    def _on_config_changed(self, _):
        """Handle changes to the application configuration"""
        restart = False

        # Check if the application repo has been changed
        if self.config["application-repo"] != self._stored.repo:
            logger.info("application repo changed, installing")
            self._stored.repo = self.config["application-repo"]
            self._setup_application()
            restart = True

        if self.config["port"] != self._stored.port:
            logger.info("port config changed, configuring")
            # Close the existing application port
            check_call(["close-port", f"{self._stored.port}/TCP"])
            # Reconfigure the systemd unit to specify the new port
            self._stored.port = self.config["port"]
            self._render_systemd_unit()
            # Ensure the correct port is opened for the application
            check_call(["open-port", f"{self._stored.port}/TCP"])
            restart = True

        if restart:
            logger.info("restarting hello-juju application")
            systemd.service_restart("hello-juju")

        self.unit.status = ActiveStatus()

    def _on_database_relation_joined(self, event):
        """Handle the event where this application is joined with a database"""
        if self.unit.is_leader():
            # Ask the database to create a database with this app's name
            event.database = self.app.name

    def _on_database_master_changed(self, event):
        """Handler the case where a new PostgreSQL DB master is available"""
        if event.database != self.app.name:
            # Leader has not yet set the database name/requirements.
            return

        # event.master will be none if the master database is unavailable,
        # or a pgsql.ConnectingString instance
        if event.master:
            self.unit.status = MaintenanceStatus(
                "configuring database settings")
            # Store the connection uri in state
            # Replace the first part of the URL with pg8000 equivalent
            self._stored.conn_str = event.master.uri.replace(
                "postgresql://", "postgresql+pg8000://")
            # Render the settings file with the database connection details
            self._render_settings_file()
            # Ensure the database tables are created in the master
            self._create_database_tables()
            # Restart the service
            systemd.service_restart("hello-juju")
            # Set back to active status
            self.unit.status = ActiveStatus()
        else:
            # Defer this event until the master is available
            event.defer()
            return

    def _setup_application(self):
        """Clone a Flask application into place and setup it's dependencies"""
        self.unit.status = MaintenanceStatus("fetching application code")

        # Delete the application directory if it exists already
        if Path(APP_PATH).is_dir():
            shutil.rmtree("/srv/app")

        # If this is the first time, set the repo in the stored state
        if not self._stored.repo:
            self._stored.repo = self.config["application-repo"]

        # Fetch the code using git
        Repo.clone_from(self._stored.repo, APP_PATH)
        # Install application dependencies
        check_output(["python3", "-m", "virtualenv", f"{VENV_ROOT}"])
        check_output([f"{VENV_ROOT}/bin/pip3", "install", "gunicorn"])
        check_output([
            f"{VENV_ROOT}/bin/pip3", "install", "-r",
            f"{APP_PATH}/requirements.txt", "--force"
        ])

        # If a connection string exists (and relation is defined) then
        # render the settings file for the new app with the connection details
        if self._stored.conn_str:
            self._render_settings_file()

        # Create required database tables
        self._create_database_tables()

    def _install_apt_packages(self, packages: list):
        """Simple wrapper around 'apt-get install -y"""
        try:
            apt.update()
            apt.add_package(packages)
        except apt.PackageNotFoundError:
            logger.error(
                "a specified package not found in package cache or on system")
            self.unit.status = BlockedStatus("Failed to install packages")
        except apt.PackageError:
            logger.error("could not install package")
            self.unit.status = BlockedStatus("Failed to install packages")

    def _render_systemd_unit(self):
        """Render the systemd unit for Gunicorn to a file"""
        # Open the template systemd unit file
        with open("templates/hello-juju.service.j2", "r") as t:
            template = Template(t.read())

        # If this is the first time, set the port in the stored state
        if not self._stored.port:
            self._stored.port = self.config["port"]

        # Render the template files with the correct values
        rendered = template.render(port=self._stored.port,
                                   project_root=APP_PATH,
                                   user="******",
                                   group="www-data")
        # Write the rendered file out to disk
        with open(UNIT_PATH, "w+") as t:
            t.write(rendered)

        # Ensure correct permissions are set on the service
        os.chmod(UNIT_PATH, 0o755)
        # Reload systemd units
        systemd.daemon_reload()

    def _render_settings_file(self):
        """Render the application settings file with database connection details"""
        # Open the template settings files
        with open("templates/settings.py.j2", "r") as t:
            template = Template(t.read())

        # Render the template file with the correct values
        rendered = template.render(conn_str=self._stored.conn_str)

        # Write the rendered file out to disk
        with open(f"{APP_PATH}/settings.py", "w+") as t:
            t.write(rendered)
        # Ensure correct permissions are set on the file
        os.chmod(f"{APP_PATH}/settings.py", 0o644)
        # Get the uid/gid for the www-data user
        u = passwd.user_exists("www-data")
        # Set the correct ownership for the settings file
        os.chown(f"{APP_PATH}/settings.py", uid=u.pw_uid, gid=u.pw_gid)

    def _create_database_tables(self):
        """Initialise the database and populate with initial tables required"""
        self.unit.status = MaintenanceStatus("creating database tables")
        # Call the application's `init.py` file to instantiate the database tables
        check_call([
            "sudo", "-u", "www-data", f"{VENV_ROOT}/bin/python3",
            f"{APP_PATH}/init.py"
        ])
class HaproxyInstanceManager(Object):

    _stored = StoredState()
    HAPROXY_ENV_FILE = Path('/etc/default/haproxy')

    def __init__(self, charm, key, tcp_backend_manager, bind_addresses=None):
        super().__init__(charm, key)
        self.tcp_backend_manager = tcp_backend_manager
        self.tcp_pool_adapter = TCPLoadBalancerPoolAdapter(
            self.tcp_backend_manager.pools,
            bind_addresses,
        )

        self._stored.set_default(is_started=False)
        self.haproxy_conf_file = Path(
            f'/etc/haproxy/juju-{self.model.app.name}.cfg')

    @property
    def is_started(self):
        return self._stored.is_started

    def install(self):
        self._install_haproxy()
        self._update_haproxy_env_file()

    def _install_haproxy(self):
        logger.info('Installing the haproxy package')
        subprocess.check_call(['apt', 'update'])
        subprocess.check_call(['apt', 'install', '-yq', 'haproxy'])

    def _update_haproxy_env_file(self):
        """Update the maintainer-provided environment file.

        This is done to include the config rendered by us in addition to
        the default config provided by the package.
        """
        ctxt = {'haproxy_app_config': self.haproxy_conf_file}
        env = Environment(loader=FileSystemLoader('templates'))
        template = env.get_template('haproxy.env.j2')
        rendered_content = template.render(ctxt)
        self.HAPROXY_ENV_FILE.write_text(rendered_content)
        self.haproxy_conf_file.write_text('')

    def start(self):
        if not self._stored.is_started:
            logger.info('Starting the haproxy service')
            self._run_start()
            self._stored.is_started = True

    def _run_start(self):
        subprocess.check_call(['systemctl', 'start', 'haproxy'])

    def stop(self):
        if not self._stored.is_started:
            logger.info('Stopping the haproxy service')
            subprocess.check_call(['systemctl', 'stop', 'haproxy'])
        self.state.is_started = False

    def uninstall(self):
        logger.info('Uninstalling the haproxy service')
        subprocess.check_call(['apt', 'purge', '-yq', 'haproxy'])

    def reconfigure(self):
        logger.info('Reconfiguring the haproxy service')
        self._do_reconfigure()
        self._run_restart()

    def _run_restart(self):
        logger.info('Restarting the haproxy service')
        subprocess.check_call(['systemctl', 'restart', 'haproxy'])

    def _do_reconfigure(self):
        logger.info('Rendering the haproxy config file')
        env = Environment(loader=FileSystemLoader('templates'))
        template = env.get_template('haproxy.conf.j2')

        listen_sections = self.tcp_pool_adapter.listen_sections
        rendered_content = template.render(
            {'listen_sections': listen_sections})
        self.haproxy_conf_file.write_text(rendered_content)
Example #21
0
class FileBeatCharm(CharmBase):

    FILEBEAT_CONFIG = '/etc/filebeat/filebeat.yml'
    KUBE_CONFIG = '/root/.kube/config'
    LOGSTASH_SSL_CERT = '/etc/ssl/certs/filebeat-logstash.crt'
    LOGSTASH_SSL_KEY = '/etc/ssl/private/filebeat-logstash.key'
    state = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        self.framework.observe(self.on.install, self.on_install)
        self.framework.observe(self.on.stop, self.on_remove)
        self.framework.observe(self.on.config_changed, self.on_config_changed)
        self.framework.observe(self.on.reinstall_action, self.on_reinstall_action)
        self.framework.observe(self.beats_server.on.server_ready, self.on_beats_server_available)
        self.state.set_default(repo_sources_hash=None, repo_keys_hash=None, needs_reinstall=False, logstash_key=None, logstash_cert=None)

    def on_install(self, event):
        logger.info('Installing filebeat')
        sources = self.model.config.get('install_sources', '')
        keys = self.model.config.get('install_keys', '')
        self.state.repo_details_hash = hash(sources + keys)
        configure_sources(update=True, sources_var=sources, keys_var=keys)
        apt_install('filebeat')
        self.unit.status = ActiveStatus("Filebeat is installed")

    def on_config_changed(self, event):
        sources = self.model.config.get('install_sources', '')
        keys = self.model.config.get('install_keys', '')
        new_repo_sources_hash = hash(sources)
        new_repo_keys_hash = hash(keys)
        if self.state.repo_sources_hash != new_repo_sources_hash:
            configure_sources(update=True, sources_var=sources, keys_var=keys)
            self.state.needs_reinstall = True
            msg = "Filebeat repo changed, use reinstall action to obtain a new version."
            self.unit.status = BlockedStatus(msg)
            return
        elif self.state.repo_keys_hash != new_repo_keys_hash:
            configure_sources(update=True, sources_var=sources, keys_var=keys)
        self.render_filebeat_template()

    def on_reinstall_action(self, event):
        if self.state.needs_reinstall:
            logger.info('Reinstalling filebeat')
            apt_purge('filebeat')
            apt_install('filebeat')
            self.state.needs_reinstall = False
            self.render_filebeat_template()

    def render_filebeat_template(self):
        """Create the filebeat.yaml config file.
            Renders the appropriate template for the major version of filebeat that
            is installed.
            """
        if self.model.config['kube_logs']:
            if os.path.exists(self.KUBE_CONFIG):
                msg = 'Collecting k8s metadata.'
            else:
                msg = ('kube_logs=True, but {} does not exist. '
                       'No k8s metadata will be collected.'.format(self.KUBE_CONFIG))
            logger.info(msg)

        self.manage_filebeat_logstash_ssl()

        pass #TODO

    def manage_filebeat_logstash_ssl(self):
        """Manage the ssl cert/key that filebeat uses to connect to logstash.
            Create the cert/key files when both logstash_ssl options have been set;
            update when either config option changes; remove if either gets unset.
            """
        logstash_ssl_cert = self.model.config['logstash_ssl_cert']
        logstash_ssl_key = self.model.config['logstash_ssl_key']
        if logstash_ssl_cert and logstash_ssl_key:
            cert = base64.b64decode(logstash_ssl_cert).decode('utf8')
            key = base64.b64decode(logstash_ssl_key).decode('utf8')

            if cert != self.state.logstash_cert:
                render(template='{{ data }}',
                       context={'data': cert},
                       target=self.LOGSTASH_SSL_CERT, perms=0o444)

            if key != self.state.logstash_key:
                render(template='{{ data }}',
                       context={'data': key},
                       target=self.LOGSTASH_SSL_KEY, perms=0o400)
            else:
                if not logstash_ssl_cert and os.path.exists(self.LOGSTASH_SSL_CERT):
                    os.remove(self.LOGSTASH_SSL_CERT)
                if not logstash_ssl_key and os.path.exists(self.LOGSTASH_SSL_KEY):
                    os.remove(self.LOGSTASH_SSL_KEY)

    def on_beats_server_available(self, event):
        """Create the Filebeat index in Elasticsearch.
            Once elasticsearch is available, make 5 attempts to create a filebeat
            index. Set appropriate charm status so the operator knows when ES is
            configured to accept data.
            """
        hosts = self.beats_server.socket_addresses[0]
        for host in hosts:
            host_string = "{}:{}".format(host['host'], host['port'])

        max_attempts = 5
        for i in range(1, max_attempts):
            if push_beat_index(elasticsearch=host_string,
                               service='filebeat', fatal=False):
                logger.info('Filebeat.index.pushed')
                self.unit.status = ActiveStatus("Filebeat ready")
                break
            else:
                msg = "Attempt {} to push filebeat index failed (retrying)".format(i)
                self.unit.status = WaitingStatus(msg)
                time.sleep(i * 30)  # back off 30s for each attempt
        else:
            msg = "Failed to push filebeat index to http://{}".format(host_string)
            self.unit.status = BlockedStatus(msg)

    def on_remove(self, event):
        logger.info('Removing filebeat')
        apt_autoremove('filebeat')
        self.unit.status = MaintenanceStatus('Removing filebeat')
class MetallbSpeakerCharm(CharmBase):
    _stored = StoredState()

    NAMESPACE = os.environ["JUJU_MODEL_NAME"]
    CONTAINER_IMAGE = 'metallb/speaker:v0.9.3'

    def __init__(self, *args):
        super().__init__(*args)
        self.framework.observe(self.on.start, self.on_start)
        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self.framework.observe(self.on.remove, self.on_remove)
        self._stored.set_default(things=[])

    def _on_config_changed(self, _):
        current = self.model.config["thing"]
        if current not in self._stored.things:
            logger.debug("found a new thing: %r", current)
            self._stored.things.append(current)

    def on_start(self, event):
        if not self.framework.model.unit.is_leader():
            return

        logging.info('Setting the pod spec')
        self.framework.model.unit.status = MaintenanceStatus("Configuring pod")
        secret = utils._random_secret(128)

        self.framework.model.pod.set_spec(
            {
                'version':
                3,
                'serviceAccount': {
                    'roles': [
                        {
                            'global':
                            True,
                            'rules': [
                                {
                                    'apiGroups': [''],
                                    'resources':
                                    ['services', 'endpoints', 'nodes'],
                                    'verbs': ['get', 'list', 'watch'],
                                },
                                {
                                    'apiGroups': [''],
                                    'resources': ['events'],
                                    'verbs': ['create', 'patch'],
                                },
                                {
                                    'apiGroups': ['policy'],
                                    'resourceNames': ['speaker'],
                                    'resources': ['podsecuritypolicies'],
                                    'verbs': ['use'],
                                },
                            ],
                        },
                    ],
                },
                'containers': [{
                    'name':
                    'speaker',
                    'image':
                    self.CONTAINER_IMAGE,
                    'imagePullPolicy':
                    'Always',
                    'ports': [{
                        'containerPort': 7472,
                        'protocol': 'TCP',
                        'name': 'monitoring'
                    }],
                    'envConfig': {
                        'METALLB_NODE_NAME': {
                            'field': {
                                'path': 'spec.nodeName',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_HOST': {
                            'field': {
                                'path': 'status.hostIP',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_ML_BIND_ADDR': {
                            'field': {
                                'path': 'status.podIP',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_ML_LABELS': "app=metallb,component=speaker",
                        'METALLB_ML_NAMESPACE': {
                            'field': {
                                'path': 'metadata.namespace',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_ML_SECRET_KEY': {
                            'secret': {
                                'name': 'memberlist',
                                'key': 'secretkey'
                            }
                        }
                    },
                    # TODO: add constraint fields once it exists in pod_spec
                    # bug : https://bugs.launchpad.net/juju/+bug/1893123
                    # 'resources': {
                    #     'limits': {
                    #         'cpu': '100m',
                    #         'memory': '100Mi',
                    #     }
                    # },
                    'kubernetes': {
                        'securityContext': {
                            'allowPrivilegeEscalation': False,
                            'readOnlyRootFilesystem': True,
                            'capabilities': {
                                'add': ['NET_ADMIN', 'NET_RAW', 'SYS_ADMIN'],
                                'drop': ['ALL']
                            },
                        },
                        # fields do not exist in pod_spec
                        # 'TerminationGracePeriodSeconds': 2,
                    },
                }],
                'kubernetesResources': {
                    'secrets': [{
                        'name': 'memberlist',
                        'type': 'Opaque',
                        'data': {
                            'secretkey':
                            b64encode(secret.encode('utf-8')).decode('utf-8')
                        }
                    }]
                },
                'service': {
                    'annotations': {
                        'prometheus.io/port': '7472',
                        'prometheus.io/scrape': 'true'
                    }
                },
            }, )

        response = utils.create_pod_security_policy_with_k8s_api(
            namespace=self.NAMESPACE, )
        if not response:
            self.framework.model.unit.status = BlockedStatus(
                "An error occured during init. Please check the logs.")
            return

        response = utils.create_namespaced_role_with_api(
            name='config-watcher',
            namespace=self.NAMESPACE,
            labels={'app': 'metallb'},
            resources=['configmaps'],
            verbs=['get', 'list', 'watch'])
        if not response:
            self.framework.model.unit.status = BlockedStatus(
                "An error occured during init. Please check the logs.")
            return

        response = utils.create_namespaced_role_with_api(
            name='pod-lister',
            namespace=self.NAMESPACE,
            labels={'app': 'metallb'},
            resources=['pods'],
            verbs=['list'])
        if not response:
            self.framework.model.unit.status = BlockedStatus(
                "An error occured during init. Please check the logs.")
            return

        response = utils.bind_role_with_api(name='config-watcher',
                                            namespace=self.NAMESPACE,
                                            labels={'app': 'metallb'},
                                            subject_name='speaker')
        if not response:
            self.framework.model.unit.status = BlockedStatus(
                "An error occured during init. Please check the logs.")
            return

        response = utils.bind_role_with_api(name='pod-lister',
                                            namespace=self.NAMESPACE,
                                            labels={'app': 'metallb'},
                                            subject_name='speaker')
        if not response:
            self.framework.model.unit.status = BlockedStatus(
                "An error occured during init. Please check the logs.")
            return

        self.framework.model.unit.status = ActiveStatus("Ready")

    def on_remove(self, event):
        if not self.framework.model.unit.is_leader():
            return
Example #23
0
class OpenfaasCharm(CharmBase):
    _stored = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        self._stored.set_default(namespace=os.environ["JUJU_MODEL_NAME"])
        self._stored.set_default(nats_ip="")
        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self.framework.observe(self.on["nats-address"].relation_joined, self._on_nats_relation_joined)
        self.framework.observe(self.on["nats-address"].relation_changed, self._on_nats_relation_changed)

    def _on_nats_relation_changed(self, event):
        ip = event.relation.data[event.unit].get("ip")

        if ip == None:
            return
        self._stored.nats_ip = ip

        logger.info("OF - nats says: {}".format(ip))
        self._on_config_changed()

    def _on_nats_relation_joined(self, event):
        ip = event.relation.data[event.unit].get("ip")

        if ip == None:
            return
        self._stored.nats_ip = ip

        logger.info("OF - nats says: {}".format(ip))
        self._on_config_changed()

    def _on_config_changed(self, _=None): 
        logger.info("OpenFaaS config_change")
        # if not self.unit.is_leader():
        #     self.unit.status = ActiveStatus()
        #     return

        nats_ip = self._stored.nats_ip

        if nats_ip == "":
            self.unit.status = BlockedStatus("OpenFaaS needs a NATS relation")
            return

        self.unit.status = MaintenanceStatus('Setting pod spec.')

        logger.info("OpenFaaS building pod spec with nats_ip {}".format(nats_ip))

        pod_spec = self._build_pod_spec()
        self.model.pod.set_spec(pod_spec)
        self.unit.status = ActiveStatus("OpenFaaS pod ready.")

    def _build_pod_spec(self):
        namespace = self._stored.namespace

        # function_crd = {}
        # profiles_crd = {}

        rules = []
        try:
            rules = yaml.load(open(Path('files/rbac_rules.yaml'),"r"), Loader=yaml.FullLoader)
        except yaml.YAMLError as exc:
            print("Error in configuration file:", exc)

        # try:
        #     function_crd = yaml.load(open(Path('files/function_crd.yaml'),"r"), Loader=yaml.FullLoader)
        # except yaml.YAMLError as exc:
        #     print("Error in configuration file:", exc)

        # try:
        #     profiles_crd = yaml.load(open(Path('files/profiles_crd.yaml'),"r"), Loader=yaml.FullLoader)
        # except yaml.YAMLError as exc:
        #     print("Error in configuration file:", exc)

        # logger.debug(json.dumps(function_crd["spec"]))

        username = self.model.config["admin_username"]
        password = self.model.config["admin_password"]

        vol_config = [
            {
            "name": "auth", 
            "mountPath": "/var/secrets", 
            "secret": {"name": "basic-auth"}
            },
        ]

# "functions_provider_url": "http://192.168.0.35:8080",
        spec = {
            "version": 3,
            "kubernetesResources": {
                # "customResourceDefinitions": [
                #     {
                #         "name": function_crd["metadata"]["name"],
                #         "labels": {
                #             "juju-global-resource-lifecycle": "model",
                #         },
                #         "spec": function_crd["spec"],
                #     },
                #     {
                #         "name": profiles_crd["metadata"]["name"],
                #         "labels": {
                #             "juju-global-resource-lifecycle": "model",
                #         },
                #         "spec": profiles_crd["spec"],
                #     },
                # ],
                'secrets': [{
                    'name': 'basic-auth',
                    'type': 'Opaque',
                    'data': {
                        'basic-auth-user': b64encode(username.encode('utf-8')).decode('utf-8'),
                        'basic-auth-password': b64encode(password.encode('utf-8')).decode('utf-8'),
                    }
                }],
            },
            'serviceAccount': {
                'roles': [{
                    'global': True,
                    'rules': rules["rules"],
                }],
            },
            "containers": [
                {
                    "name": self.app.name+"-gateway",
                    "imageDetails": {"imagePath": "openfaas/gateway:0.20.2"},
                    "ports": [{"containerPort": 8080, "protocol": "TCP","name":"gateway"}],
                    "envConfig": {
                        "faas_nats_address": self._stored.nats_ip,
                        "faas_nats_port": "4222",
                        "functions_provider_url": "http://127.0.0.1:8081/",
                        "direct_functions": "false",
                        "basic_auth": "true",
                        "faas_nats_channel": "faas-request",
                        "secret_mount_path": "/var/secrets",
                        "faas_prometheus_host": "192.168.0.35",
                        "faas_prometheus_port": "9090",
                        "auth_pass_body": "false",
                        "auth_proxy_url": "http://127.0.0.1:8083/validate",
                        "scale_from_zero": "false",
                        "direct_functions": "false",
                    },
                    "volumeConfig": vol_config,
                },
                {
                    "name": self.app.name+"-auth-plugin",
                    "imageDetails": {"imagePath": "openfaas/basic-auth-plugin:0.20.2"},
                    "ports": [{"containerPort": 8083, "protocol": "TCP","name":"auth"}],
                    "envConfig": {
                        "basic_auth": "true",
                        "secret_mount_path": "/var/secrets",
                        "port": "8083",
                    },
                    "volumeConfig": vol_config,
                },
                {
                    "name": self.app.name+"-provider",
                    "imageDetails": {"imagePath": "ghcr.io/openfaas/faas-netes:0.12.9"},
                    "ports": [{"containerPort": 8081, "protocol": "TCP","name":"provider"}],
                    "command": ["./faas-netes","-operator=true"],
                    "envConfig": {
                        "port": "8081",
                        "operator": "true",
                        "basic_auth": "true",
                        "function_namespace": namespace,
                        "cluster_role": "true",
                        "profiles_namespace": namespace,
                    },
                    "volumeConfig": vol_config,
                }
            ]
        }

        return spec
Example #24
0
class MetalLBControllerCharm(CharmBase):
    """MetalLB Controller Charm."""

    _stored = StoredState()

    def __init__(self, *args):
        """Charm initialization for events observation."""
        super().__init__(*args)
        if not self.unit.is_leader():
            self.unit.status = WaitingStatus("Waiting for leadership")
            return
        self.image = OCIImageResource(self, 'metallb-controller-image')
        self.framework.observe(self.on.install, self._on_start)
        self.framework.observe(self.on.start, self._on_start)
        self.framework.observe(self.on.leader_elected, self._on_start)
        self.framework.observe(self.on.upgrade_charm, self._on_upgrade)
        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self.framework.observe(self.on.remove, self._on_remove)
        # -- initialize states --
        self._stored.set_default(k8s_objects_created=False)
        self._stored.set_default(started=False)
        self._stored.set_default(config_hash=self._config_hash())
        # -- base values --
        self._stored.set_default(namespace=os.environ["JUJU_MODEL_NAME"])

    def _config_hash(self):
        data = json.dumps({
            'iprange': self.model.config['iprange'],
        },
                          sort_keys=True)
        return md5(data.encode('utf8')).hexdigest()

    def _on_start(self, event):
        """Occurs upon install, start, upgrade, and possibly config changed."""
        if self._stored.started:
            return
        self.unit.status = MaintenanceStatus("Fetching image information")
        try:
            image_info = self.image.fetch()
        except OCIImageResourceError:
            logging.exception('An error occured while fetching the image info')
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        if not self._stored.k8s_objects_created:
            self.unit.status = MaintenanceStatus("Creating supplementary "
                                                 "Kubernetes objects")
            utils.create_k8s_objects(self._stored.namespace)
            self._stored.k8s_objects_created = True

        self.unit.status = MaintenanceStatus("Configuring pod")
        self.set_pod_spec(image_info)

        self.unit.status = ActiveStatus()
        self._stored.started = True

    def _on_upgrade(self, event):
        """Occurs when new charm code or image info is available."""
        self._stored.started = False
        self._on_start(event)

    def _on_config_changed(self, event):
        if self.model.config['protocol'] != 'layer2':
            self.unit.status = BlockedStatus(
                'Invalid protocol; '
                'only "layer2" currently supported')
            return
        current_config_hash = self._config_hash()
        if current_config_hash != self._stored.config_hash:
            self._stored.started = False
            self._stored.config_hash = current_config_hash
            self._on_start(event)

    def _on_remove(self, event):
        """Remove of artifacts created by the K8s API."""
        self.unit.status = MaintenanceStatus("Removing supplementary "
                                             "Kubernetes objects")
        utils.remove_k8s_objects(self._stored.namespace)
        self.unit.status = MaintenanceStatus("Removing pod")
        self._stored.started = False
        self._stored.k8s_objects_created = False

    def set_pod_spec(self, image_info):
        """Set pod spec."""
        iprange = self.model.config["iprange"].split(",")
        cm = "address-pools:\n- name: default\n  protocol: layer2\n  addresses:\n"
        for range in iprange:
            cm += "  - " + range + "\n"

        self.model.pod.set_spec(
            {
                'version':
                3,
                'serviceAccount': {
                    'roles': [{
                        'global':
                        True,
                        'rules': [
                            {
                                'apiGroups': [''],
                                'resources': ['services'],
                                'verbs': ['get', 'list', 'watch', 'update'],
                            },
                            {
                                'apiGroups': [''],
                                'resources': ['services/status'],
                                'verbs': ['update'],
                            },
                            {
                                'apiGroups': [''],
                                'resources': ['events'],
                                'verbs': ['create', 'patch'],
                            },
                            {
                                'apiGroups': ['policy'],
                                'resourceNames': ['controller'],
                                'resources': ['podsecuritypolicies'],
                                'verbs': ['use'],
                            },
                        ],
                    }],
                },
                'containers': [{
                    'name':
                    'controller',
                    'imageDetails':
                    image_info,
                    'imagePullPolicy':
                    'Always',
                    'ports': [{
                        'containerPort': 7472,
                        'protocol': 'TCP',
                        'name': 'monitoring'
                    }],
                    # TODO: add constraint fields once it exists in pod_spec
                    # bug : https://bugs.launchpad.net/juju/+bug/1893123
                    # 'resources': {
                    #     'limits': {
                    #         'cpu': '100m',
                    #         'memory': '100Mi',
                    #     }
                    # },
                    'kubernetes': {
                        'securityContext': {
                            'privileged': False,
                            'runAsNonRoot': True,
                            'runAsUser': 65534,
                            'readOnlyRootFilesystem': True,
                            'capabilities': {
                                'drop': ['ALL']
                            }
                        },
                        # fields do not exist in pod_spec
                        # 'TerminationGracePeriodSeconds': 0,
                    },
                }],
                'service': {
                    'annotations': {
                        'prometheus.io/port': '7472',
                        'prometheus.io/scrape': 'true'
                    }
                },
                'configMaps': {
                    'config': {
                        'config': cm
                    }
                }
            }, )
Example #25
0
class MongoDBCharm(CharmBase):
    """A Juju Charm to deploy MongoDB on Kubernetes

    This charm has the following features:
    - Add one more MongoDB units
    - Reconfigure replica set anytime number of MongoDB units changes
    - Provides a database relation for any MongoDB client
    """
    state = StoredState()

    def __init__(self, *args):
        super().__init__(*args)

        self.state.set_default(pod_spec=None)
        self.state.set_default(mongodb_initialized=False)
        self.state.set_default(replica_set_hosts=None)

        self.port = MONGODB_PORT
        self.image = OCIImageResource(self, "mongodb-image")

        # Register all of the events we want to observe
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.upgrade_charm, self.configure_pod)
        self.framework.observe(self.on.start, self.on_start)
        self.framework.observe(self.on.stop, self.on_stop)
        self.framework.observe(self.on.update_status, self.on_update_status)

        self.framework.observe(self.on["database"].relation_changed,
                               self.on_database_relation_changed)
        self.framework.observe(self.on[PEER].relation_changed,
                               self.reconfigure)
        self.framework.observe(self.on[PEER].relation_departed,
                               self.reconfigure)

        logger.debug("MongoDBCharm initialized!")

    ##############################################
    #           CHARM HOOKS HANDLERS             #
    ##############################################

    # Handles config-changed and upgrade-charm events
    def configure_pod(self, event):
        """Configure MongoDB Pod specification

        A new MongoDB pod specification is set only if it is different
        from the current specification.
        """
        # Continue only if the unit is the leader
        if not self.unit.is_leader():
            self.on_update_status(event)
            return

        logger.debug("Running configuring_pod")

        # Fetch image information
        try:
            self.unit.status = WaitingStatus("Fetching image information")
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        # Build Pod spec
        self.unit.status = WaitingStatus("Assembling pod spec")
        builder = PodSpecBuilder(
            name=self.model.app.name,
            replica_set_name=self.replica_set_name,
            port=self.port,
            image_info=image_info,
        )
        pod_spec = builder.make_pod_spec()

        # Update pod spec if the generated one is different
        # from the one previously applied
        if self.state.pod_spec != pod_spec:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec

        self.on_update_status(event)
        logger.debug("Running configuring_pod finished")

    # Handles start event
    def on_start(self, event):
        """Initialize MongoDB

        This event handler is deferred if initialization of MongoDB
        replica set fails. By doing so it is gauranteed that another
        attempt at initialization will be made.
        """
        logger.debug("Running on_start")
        if not self.unit.is_leader():
            return

        if not self.mongo.is_ready():
            self.unit.status = WaitingStatus("Waiting for MongoDB Service")
            logger.debug("Waiting for MongoDB Service")
            event.defer()

        if not self.state.mongodb_initialized:
            logger.debug("Initializing MongoDB")
            self.unit.status = WaitingStatus("Initializing MongoDB")
            try:
                self.mongo.initialize_replica_set(self.cluster_hosts)
                self.state.mongodb_initialized = True
                self.state.replica_set_hosts = self.cluster_hosts
                logger.debug("MongoDB Initialized")
            except Exception as e:
                logger.info("Deferring on_start since : error={}".format(e))
                event.defer()

        self.on_update_status(event)
        logger.debug("Running on_start finished")

    # Handles stop event
    def on_stop(self, _):
        """Mark terminating unit as inactive
        """
        self.unit.status = MaintenanceStatus('Pod is terminating.')

    # Handles update-status event
    def on_update_status(self, event):
        """Set status for all units

        Status may be
        - MongoDB API server not reachable (service is not ready),
        - MongoDB Replication set is not Initialized
        - Unit is active
        """
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        if not self.mongo.is_ready():
            status_message = "service not ready yet"
            self.unit.status = WaitingStatus(status_message)
            return

        if not self.state.mongodb_initialized:
            status_message = "mongodb not initialized"
            self.unit.status = WaitingStatus(status_message)
            return

        self.unit.status = ActiveStatus()

    ##############################################
    #        PEER RELATION HOOK HANDLERS         #
    ##############################################

    # Handles relation-changed and relation-departed events
    def reconfigure(self, event):
        """Reconfigure replicat set

        The number of replicas in the MongoDB replica set is updated.
        """
        logger.debug("Running reconfigure")

        if (self.unit.is_leader() and self.need_replica_set_reconfiguration()):
            try:
                self.mongo.reconfigure_replica_set(self.cluster_hosts)
            except Exception as e:
                logger.info(
                    "Deferring relation event since : error={}".format(e))
                event.defer()

        self.on_update_status(event)
        logger.debug("Running reconfigure finished")

    ##############################################
    #               RELATIONS                    #
    ##############################################

    # handles client relation for MongoDB
    def on_database_relation_changed(self, event):
        """Connect to database client

        Any MongoDB client is provided with the following information
        - Is MongoDB in a replicated or unitary state
        - Replica set URI
        - Standalone URI

        Using this information a client can establish a database
        connection with MongoDB, for instances using the pymongo
        Python Module.
        """
        event.relation.data[self.unit]['replicated'] = str(self.is_joined)
        event.relation.data[
            self.unit]['replica_set_name'] = self.replica_set_name
        event.relation.data[self.unit]['standalone_uri'] = "{}".format(
            self.standalone_uri)
        event.relation.data[self.unit]['replica_set_uri'] = "{}".format(
            self.replica_set_uri)

    ##############################################
    #               PROPERTIES                   #
    ##############################################

    @property
    def mongo(self):
        """Return a MongoDB API client

        A pymongo client is returned.
        """
        return Mongo(standalone_uri=self.standalone_uri,
                     replica_set_uri="{}?replicaSet={}".format(
                         self.replica_set_uri, self.replica_set_name))

    @property
    def replica_set_uri(self):
        """Construct a replica set URI
        """
        uri = "mongodb://"
        for i, host in enumerate(self.cluster_hosts):
            if i:
                uri += ","
            uri += "{}:{}".format(host, self.port)
        uri += "/"
        return uri

    @property
    def standalone_uri(self):
        """Construct a standalone URI
        """
        return "mongodb://{}:{}/".format(self.model.app.name, self.port)

    @property
    def replica_set_name(self):
        """Find the replica set name
        """
        return self.model.config["replica_set_name"]

    @property
    def num_peers(self):
        """Find number of deployed MongoDB units
        """
        peer_relation = self.framework.model.get_relation(PEER)
        return len(peer_relation.units) + 1 if self.is_joined else 1

    @property
    def is_joined(self):
        """Does MongoDB charm have peers
        """
        peer_relation = self.framework.model.get_relation(PEER)
        return peer_relation is not None

    def _get_unit_hostname(self, _id: int) -> str:
        """Construct a DNS name for a MongoDB unit
        """
        return "{}-{}.{}-endpoints".format(self.model.app.name, _id,
                                           self.model.app.name)

    @property
    def cluster_hosts(self: int) -> list:
        """Find all hostnames for MongoDB units
        """
        return [self._get_unit_hostname(i) for i in range(self.num_peers)]

    def need_replica_set_reconfiguration(self):
        """Does MongoDB replica set need reconfiguration
        """
        return self.cluster_hosts != self.state.replica_set_hosts
Example #26
0
class PrometheusCharm(CharmBase):
    """A Juju Charm for Prometheus
    """
    _stored = StoredState()

    def __init__(self, *args):
        logger.debug('Initializing Charm')

        super().__init__(*args)

        self._stored.set_default(alertmanagers=[])
        self._stored.set_default(alertmanager_port='9093')

        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self.framework.observe(self.on.stop, self._on_stop)
        self.framework.observe(self.on['alertmanager'].relation_changed,
                               self._on_alertmanager_changed)
        self.framework.observe(self.on['alertmanager'].relation_broken,
                               self._on_alertmanager_broken)

        self.framework.observe(self.on['grafana-source'].relation_changed,
                               self._on_grafana_changed)

    def _on_config_changed(self, _):
        """Set a new Juju pod specification
        """
        self._configure_pod()

    def _on_stop(self, _):
        """Mark unit is inactive
        """
        self.unit.status = MaintenanceStatus('Pod is terminating.')

    def _on_grafana_changed(self, event):
        """Provide Grafana with data source information
        """
        event.relation.data[self.unit]['port'] = str(self.model.config['port'])
        event.relation.data[self.unit]['source-type'] = 'prometheus'

    def _on_alertmanager_changed(self, event):
        """Set an alertmanager configuation
        """
        if not self.unit.is_leader():
            return

        addrs = json.loads(event.relation.data[event.app].get('addrs', '[]'))
        port = event.relation.data[event.app]['port']

        self._stored.alertmanager_port = port
        self._stored.alertmanagers = addrs

        self._configure_pod()

    def _on_alertmanager_broken(self, event):
        """Remove all alertmanager configuration
        """
        if not self.unit.is_leader():
            return
        self._stored.alertmanagers.clear()
        self._configure_pod()

    def _cli_args(self):
        """Construct command line arguments for Prometheus
        """
        config = self.model.config
        args = [
            '--config.file=/etc/prometheus/prometheus.yml',
            '--storage.tsdb.path=/var/lib/prometheus',
            '--web.enable-lifecycle',
            '--web.console.templates=/usr/share/prometheus/consoles',
            '--web.console.libraries=/usr/share/prometheus/console_libraries'
        ]

        # get log level
        allowed_log_levels = ['debug', 'info', 'warn', 'error', 'fatal']
        if config.get('log-level'):
            log_level = config['log-level'].lower()
        else:
            log_level = 'info'

        # If log level is invalid set it to debug
        if log_level not in allowed_log_levels:
            logging.error('Invalid loglevel: {0} given, {1} allowed. '
                          'defaulting to DEBUG loglevel.'.format(
                              log_level, '/'.join(allowed_log_levels)))
            log_level = 'debug'

        # set log level
        args.append('--log.level={0}'.format(log_level))

        # Enable time series database compression
        if config.get('tsdb-wal-compression'):
            args.append('--storage.tsdb.wal-compression')

        # Set time series retention time
        if config.get('tsdb-retention-time') and self._is_valid_timespec(
                config['tsdb-retention-time']):
            args.append('--storage.tsdb.retention.time={}'.format(
                config['tsdb-retention-time']))

        return args

    def _is_valid_timespec(self, timeval):
        """Is a time interval unit and value valid
        """
        if not timeval:
            return False

        time, unit = timeval[:-1], timeval[-1]

        if unit not in ['y', 'w', 'd', 'h', 'm', 's']:
            logger.error('Invalid unit {} in time spec'.format(unit))
            return False

        try:
            int(time)
        except ValueError:
            logger.error('Can not convert time {} to integer'.format(time))
            return False

        if not int(time) > 0:
            logger.error('Expected positive time spec but got {}'.format(time))
            return False

        return True

    def _are_valid_labels(self, json_data):
        """Are Prometheus external labels valid
        """
        if not json_data:
            return False

        try:
            labels = json.loads(json_data)
        except (ValueError, TypeError):
            logger.error(
                'Can not parse external labels : {}'.format(json_data))
            return False

        if not isinstance(labels, dict):
            logger.error(
                'Expected label dictionary but got : {}'.format(labels))
            return False

        for key, value in labels.items():
            if not isinstance(key, str) or not isinstance(value, str):
                logger.error('External label keys/values must be strings')
                return False

        return True

    def _external_labels(self):
        """Extract external labels for Prometheus from configuration
        """
        config = self.model.config
        labels = {}

        if config.get('external-labels') and self._are_valid_labels(
                config['external-labels']):
            labels = json.loads(config['external-labels'])

        return labels

    def _prometheus_global_config(self):
        """Construct Prometheus global configuration
        """
        config = self.model.config
        global_config = {}

        labels = self._external_labels()
        if labels:
            global_config['external_labels'] = labels

        if config.get('scrape-interval') and self._is_valid_timespec(
                config['scrape-interval']):
            global_config['scrape_interval'] = config['scrape-interval']

        if config.get('scrape-timeout') and self._is_valid_timespec(
                config['scrape-timeout']):
            global_config['scrape_timeout'] = config['scrape-timeout']

        if config.get('evaluation-interval') and self._is_valid_timespec(
                config['evaluation-interval']):
            global_config['evaluation_interval'] = config[
                'evaluation-interval']

        return global_config

    def _alerting_config(self):
        """Construct Prometheus altering configuation
        """
        alerting_config = ''

        if len(self._stored.alertmanagers) < 1:
            logger.debug('No alertmanagers available')
            return alerting_config

        targets = []
        for manager in self._stored.alertmanagers:
            port = self._stored.alertmanager_port
            targets.append("{}:{}".format(manager, port))

        manager_config = {'static_configs': [{'targets': targets}]}
        alerting_config = {'alertmanagers': [manager_config]}

        return alerting_config

    def _prometheus_config(self):
        """Construct Prometheus configuration
        """
        config = self.model.config

        scrape_config = {
            'global': self._prometheus_global_config(),
            'scrape_configs': []
        }

        alerting_config = self._alerting_config()
        if alerting_config:
            scrape_config['alerting'] = alerting_config

        # By default only monitor prometheus server itself
        default_config = {
            'job_name':
            'prometheus',
            'scrape_interval':
            '5s',
            'scrape_timeout':
            '5s',
            'metrics_path':
            '/metrics',
            'honor_timestamps':
            True,
            'scheme':
            'http',
            'static_configs': [{
                'targets': ['localhost:{}'.format(config['port'])]
            }]
        }
        scrape_config['scrape_configs'].append(default_config)

        logger.debug('Prometheus config : {}'.format(scrape_config))

        return yaml.dump(scrape_config)

    def _build_pod_spec(self):
        """Construct a Juju pod specification for Prometheus
        """
        logger.debug('Building Pod Spec')
        config = self.model.config
        spec = {
            'version':
            3,
            'containers': [{
                'name':
                self.app.name,
                'imageDetails': {
                    'imagePath': config['prometheus-image-path'],
                    'username': config.get('prometheus-image-username', ''),
                    'password': config.get('prometheus-image-password', '')
                },
                'args':
                self._cli_args(),
                'kubernetes': {
                    'readinessProbe': {
                        'httpGet': {
                            'path': '/-/ready',
                            'port': config['port']
                        },
                        'initialDelaySeconds': 10,
                        'timeoutSeconds': 30
                    },
                    'livenessProbe': {
                        'httpGet': {
                            'path': '/-/healthy',
                            'port': config['port']
                        },
                        'initialDelaySeconds': 30,
                        'timeoutSeconds': 30
                    }
                },
                'ports': [{
                    'containerPort': config['port'],
                    'name': 'prometheus-http',
                    'protocol': 'TCP'
                }],
                'volumeConfig': [{
                    'name':
                    'prometheus-config',
                    'mountPath':
                    '/etc/prometheus',
                    'files': [{
                        'path': 'prometheus.yml',
                        'content': self._prometheus_config()
                    }]
                }]
            }]
        }

        return spec

    def _check_config(self):
        """Identify missing but required items in configuation

        :returns: list of missing configuration items (configuration keys)
        """
        logger.debug('Checking Config')
        config = self.model.config
        missing = []

        if not config.get('prometheus-image-path'):
            missing.append('prometheus-image-path')

        if config.get('prometheus-image-username') \
                and not config.get('prometheus-image-password'):
            missing.append('prometheus-image-password')

        return missing

    def _configure_pod(self):
        """Setup a new Prometheus pod specification
        """
        logger.debug('Configuring Pod')
        missing_config = self._check_config()
        if missing_config:
            logger.error('Incomplete Configuration : {}. '
                         'Application will be blocked.'.format(missing_config))
            self.unit.status = \
                BlockedStatus('Missing configuration: {}'.format(missing_config))
            return

        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        self.unit.status = MaintenanceStatus('Setting pod spec.')
        pod_spec = self._build_pod_spec()

        self.model.pod.set_spec(pod_spec)
        self.app.status = ActiveStatus()
        self.unit.status = ActiveStatus()
Example #27
0
class SlurmdCharm(CharmBase):
    """Operator charm responsible for facilitating slurmd lifecycle events."""

    _stored = StoredState()

    def __init__(self, *args):
        """Initialize charm state, and observe charm lifecycle events."""
        super().__init__(*args)

        self.config = self.model.config
        self.slurm_manager = SlurmManager(self, 'slurmd')
        self.slurmd = SlurmdProvides(self, "slurmd")

        self._stored.set_default(
            slurm_installed=False,
            slurm_config_available=False,
            slurm_config=dict(),
        )

        event_handler_bindings = {
            self.on.install:
            self._on_install,
            self.on.config_changed:
            self._on_config_changed,
            self.on.upgrade_charm:
            self._on_upgrade,
            self.slurmd.on.slurmctld_available:
            self._on_render_config_and_restart,
            self.slurmd.on.slurmctld_unavailable:
            self._on_render_config_and_restart,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        """Install the slurm scheduler as snap or tar file."""
        self.slurm_manager.install()
        self.unit.status = ActiveStatus("Slurm Installed")
        self._stored.slurm_installed = True

    def _on_upgrade(self, event):
        """Upgrade charm event handler."""
        slurm_config = dict(self._stored.slurm_config)
        self.slurm_manager.upgrade(slurm_config, resource=False)

    def _on_config_changed(self, event):
        self.slurmd.force_set_config_on_app_relation_data()

    def _on_render_config_and_restart(self, event):
        """Retrieve slurm_config from controller and write slurm.conf."""
        slurm_installed = self._stored.slurm_installed
        slurm_config_available = self._stored.slurm_config_available

        if (slurm_installed and slurm_config_available):
            # cast StoredState -> python dict
            slurm_config = dict(self._stored.slurm_config)
            self.slurm_manager.render_config_and_restart(slurm_config)
            self.unit.status = ActiveStatus("Slurmd Available")
        else:
            self.unit.status = BlockedStatus(
                "Blocked need relation to slurmctld.")
            event.defer()
            return

    def is_slurm_installed(self):
        """Return true/false based on whether or not slurm is installed."""
        return self._stored.slurm_installed

    def set_slurm_config_available(self, config_available):
        """Set slurm_config_available in local stored state."""
        self._stored.slurm_config_available = config_available

    def set_slurm_config(self, slurm_config):
        """Set the slurm_config in local stored state."""
        self._stored.slurm_config = slurm_config
class CephClientRequires(Object):

    on = CephClientEvents()
    state = StoredState()

    def __init__(self, charm, relation_name):
        super().__init__(charm, relation_name)
        self.name = relation_name
        self.this_unit = self.model.unit
        self.relation_name = relation_name
        self.state.set_default(pools_available=False, broker_req={})
        self.framework.observe(charm.on[relation_name].relation_joined,
                               self.on_joined)
        self.framework.observe(charm.on[relation_name].relation_changed,
                               self.on_changed)

    def on_joined(self, event):
        relation = self.model.get_relation(self.relation_name)
        if relation:
            logging.info("emiting broker_available")
            self.on.broker_available.emit()

    def request_osd_settings(self, settings):
        relation = self.model.get_relation(self.relation_name)
        relation.data[self.model.unit]['osd-settings'] = json.dumps(
            settings, sort_keys=True)

    @property
    def pools_available(self):
        return self.state.pools_available

    def mon_hosts(self, mon_ips):
        """List of all monitor host public addresses"""
        hosts = []
        for ceph_addrs in mon_ips:
            # NOTE(jamespage): This looks odd but deals with
            #                  use with ceph-proxy which
            #                  presents all monitors in
            #                  a single space delimited field.
            for addr in ceph_addrs.split(' '):
                hosts.append(ch_ip.format_ipv6_addr(addr) or addr)
        hosts.sort()
        return hosts

    def get_relation_data(self):
        data = {}
        mon_ips = []
        for relation in self.framework.model.relations[self.relation_name]:
            for unit in relation.units:
                _data = {
                    'key': relation.data[unit].get('key'),
                    'auth': relation.data[unit].get('auth')
                }
                mon_ip = relation.data[unit].get('ceph-public-address')
                if mon_ip:
                    mon_ips.append(mon_ip)
                if all(_data.values()):
                    data = _data
        if data:
            data['mon_hosts'] = self.mon_hosts(mon_ips)
        return data

    def existing_request_complete(self):
        rq = self.get_existing_request()
        if rq and self.is_request_complete(rq,
                                           self.model.relations[self.name]):
            return True
        return False

    def on_changed(self, event):
        logging.info("ceph client on_changed")
        relation_data = self.get_relation_data()
        if relation_data:
            if self.existing_request_complete():
                logging.info("emiting pools available")
                self.state.pools_available = True
                self.on.pools_available.emit()
            else:
                logging.info("incomplete request. broker_req not found")

    def get_broker_rsp_key(self):
        return 'broker-rsp-{}'.format(self.this_unit.name.replace('/', '-'))

    def get_existing_request(self):
        logging.info("get_existing_request")
        # json.dumps of the CephBrokerRq()
        rq = ch_ceph.CephBrokerRq()

        if self.state.broker_req:
            try:
                j = json.loads(self.state.broker_req)
                logging.info("Json request: {}".format(self.state.broker_req))
                rq.set_ops(j['ops'])
            except ValueError as err:
                logging.info("Unable to decode broker_req: {}. Error {}"
                             "".format(self.state.broker_req, err))
        return rq

    def create_replicated_pool(self,
                               name,
                               replicas=3,
                               weight=None,
                               pg_num=None,
                               group=None,
                               namespace=None):
        """
        Request pool setup
        @param name: Name of pool to create
        @param replicas: Number of replicas for supporting pools
        @param weight: The percentage of data the pool makes up
        @param pg_num: If not provided, this value will be calculated by the
                       broker based on how many OSDs are in the cluster at the
                       time of creation. Note that, if provided, this value
                       will be capped at the current available maximum.
        @param group: Group to add pool to.
        @param namespace: A group can optionally have a namespace defined that
                          will be used to further restrict pool access.
        """
        logging.info("create_replicated_pool")
        relations = self.framework.model.relations[self.name]
        logging.info("create_replicated_pool: {}".format(relations))
        if not relations:
            return
        rq = self.get_existing_request()
        logging.info("Adding create_replicated_pool request")
        rq.add_op_create_replicated_pool(name=name,
                                         replica_count=replicas,
                                         pg_num=pg_num,
                                         weight=weight,
                                         group=group,
                                         namespace=namespace)
        logging.info("Storing request")
        self.state.broker_req = rq.request
        logging.info("Calling send_request_if_needed")
        # ch_ceph.send_request_if_needed(rq, relation=self.name)
        self.send_request_if_needed(rq, relations)

    def request_ceph_permissions(self, client_name, permissions):
        logging.info("request_ceph_permissions")
        relations = self.framework.model.relations[self.name]
        if not relations:
            return
        rq = self.get_existing_request()
        rq.add_op({
            'op': 'set-key-permissions',
            'permissions': permissions,
            'client': client_name
        })
        self.state.broker_req = rq.request
        # ch_ceph.send_request_if_needed(rq, relation=self.name)
        self.send_request_if_needed(rq, relations)

    def get_previous_request(self, relation):
        """Get the previous request.

        :param relation: Relation to check for existing request.
        :type relation: ops.model.Relation,
        :returns: The previous ceph request.
        :rtype: ch_ceph.CephBrokerRq
        """
        request = None
        broker_req = relation.data[self.this_unit].get('broker_req')
        if broker_req:
            request_data = json.loads(broker_req)
            request = ch_ceph.CephBrokerRq(
                api_version=request_data['api-version'],
                request_id=request_data['request-id'])
            request.set_ops(request_data['ops'])

        return request

    def get_request_states(self, request, relations):
        """Get the existing requests and their states.

        :param request: A CephBrokerRq object
        :type request: ch_ceph.CephBrokerRq
        :param relations: List of relations to check for existing request.
        :type relations: [ops.model.Relation, ...]
        :returns: Whether request is complete.
        :rtype: bool
        """
        complete = []
        requests = {}
        for relation in relations:
            complete = False
            previous_request = self.get_previous_request(relation)
            if request == previous_request:
                sent = True
                complete = self.is_request_complete_for_relation(
                    previous_request, relation)
            else:
                sent = False
                complete = False

            rid = "{}:{}".format(relation.name, relation.id)
            requests[rid] = {
                'sent': sent,
                'complete': complete,
            }

        return requests

    def is_request_complete_for_relation(self, request, relation):
        """Check if a given request has been completed on the given relation

        :param request: A CephBrokerRq object
        :type request: ch_ceph.CephBrokerRq
        :param relation: A relation to check for an existing request.
        :type relation: ops.model.Relation
        :returns: Whether request is complete.
        :rtype: bool
        """
        broker_key = self.get_broker_rsp_key()
        for unit in relation.units:
            if relation.data[unit].get(broker_key):
                rsp = ch_ceph.CephBrokerRsp(relation.data[unit][broker_key])
                if rsp.request_id == request.request_id:
                    if not rsp.exit_code:
                        return True
            else:
                if relation.data[unit].get('broker_rsp'):
                    logging.info('No response for this unit yet')
        return False

    def is_request_complete(self, request, relations):
        """Check a functionally equivalent request has already been completed

        Returns True if a similair request has been completed

        :param request: A CephBrokerRq object
        :type request: ch_ceph.CephBrokerRq
        :param relations: List of relations to check for existing request.
        :type relations: [ops.model.Relation, ...]
        :returns: Whether request is complete.
        :rtype: bool
        """
        states = self.get_request_states(request, relations)
        for rid in states.keys():
            if not states[rid]['complete']:
                return False

        return True

    def is_request_sent(self, request, relations):
        """Check if a functionally equivalent request has already been sent

        Returns True if a similair request has been sent

        :param request: A CephBrokerRq object
        :type request: ch_ceph.CephBrokerRq
        :param relations: List of relations to check for existing request.
        :type relations: [ops.model.Relation, ...]
        :returns: Whether equivalent request has been sent.
        :rtype: bool
        """
        states = self.get_request_states(request, relations)
        for rid in states.keys():
            if not states[rid]['sent']:
                return False

        return True

    def send_request_if_needed(self, request, relations):
        """Send request if an equivalent request has not already been sent

        :param request: A CephBrokerRq object
        :type request: ch_ceph.CephBrokerRq
        :param relations: List of relations to check for existing request.
        :type relations: [ops.model.Relation, ...]
        """
        if self.is_request_sent(request, relations):
            logging.debug('Request already sent, not sending new request')
        else:
            for relation in relations:
                logging.debug('Sending request {}'.format(request.request_id))
                relation.data[self.this_unit]['broker_req'] = request.request
Example #29
0
class RabbitMQAMQPProvides(Object):
    """
    RabbitMQAMQPProvides class
    """

    on = RabbitMQAMQPClientEvents()
    _stored = StoredState()

    def __init__(self, charm, relation_name):
        super().__init__(charm, relation_name)
        self.charm = charm
        self.relation_name = relation_name
        self.framework.observe(
            self.charm.on[relation_name].relation_joined, self._on_amqp_relation_joined
        )
        self.framework.observe(
            self.charm.on[relation_name].relation_changed, self._on_amqp_relation_changed
        )
        self.framework.observe(
            self.charm.on[relation_name].relation_broken, self._on_amqp_relation_broken
        )

    @property
    def _amqp_rel(self):
        """This AMQP relationship."""
        return self.framework.model.get_relation(self.relation_name)

    def _on_amqp_relation_joined(self, event):
        """Handle AMQP joined."""
        logging.debug("RabbitMQAMQPProvides on_joined")
        self.on.has_amqp_clients.emit()

    def _on_amqp_relation_changed(self, event):
        """Handle AMQP changed."""
        logging.debug("RabbitMQAMQPProvides on_changed")
        # Validate data on the relation
        if self.username(event) and self.vhost(event):
            self.on.ready_amqp_clients.emit()
            if self.charm.unit.is_leader():
                self.set_amqp_credentials(
                    event,
                    self.username(event),
                    self.vhost(event))

    def _on_amqp_relation_broken(self, event):
        """Handle AMQP broken."""
        logging.debug("RabbitMQAMQPProvides on_departed")
        # TODO clear data on the relation

    def username(self, event):
        """Return the AMQP username from the client side of the relation."""
        return event.relation.data[self._amqp_rel.app].get("username")

    def vhost(self, event):
        """Return the AMQP vhost from the client side of the relation."""
        return event.relation.data[self._amqp_rel.app].get("vhost")

    def set_amqp_credentials(self, event, username, vhost):
        """Set AMQP Credentials.

        :param event: The current event
        :type EventsBase
        :param username: The requested username
        :type username: str
        :param vhost: The requested vhost
        :type vhost: str
        :returns: None
        :rtype: None
        """
        # TODO: Can we move this into the charm code?
        # TODO TLS Support. Existing interfaces set ssl_port and ssl_ca
        logging.debug("Setting amqp connection information.")
        try:
            if not self.charm.does_vhost_exist(vhost):
                self.charm.create_vhost(vhost)
            password = self.charm.create_user(username)
            self.charm.set_user_permissions(username, vhost)
            event.relation.data[self.charm.app]["password"] = password
            event.relation.data[self.charm.app]["hostname"] = self.charm.hostname
        except requests.exceptions.ConnectionError as e:
            logging.warning("Rabbitmq is not ready. Defering. Errno: {}".format(e.errno))
            event.defer()
class SlurmdCharm(CharmBase):
    """Slurmd lifecycle events."""

    _stored = StoredState()
    on = SlurmdCharmEvents()

    def __init__(self, *args):
        """Init _stored attributes and interfaces, observe events."""
        super().__init__(*args)

        self._stored.set_default(nhc_conf=str(),
                                 slurm_installed=False,
                                 slurmctld_available=False,
                                 slurmctld_started=False,
                                 cluster_name=str())

        self._slurm_manager = SlurmManager(self, "slurmd")
        self._fluentbit = FluentbitClient(self, "fluentbit")

        # interface to slurmctld, should only have one slurmctld per slurmd app
        self._slurmd = Slurmd(self, "slurmd")
        self._slurmd_peer = SlurmdPeer(self, "slurmd-peer")

        event_handler_bindings = {
            self.on.install: self._on_install,
            self.on.upgrade_charm: self._on_upgrade,
            self.on.update_status: self._on_update_status,
            self.on.config_changed: self._on_config_changed,
            self.on.slurmctld_started: self._on_slurmctld_started,
            self.on.slurmd_start: self._on_slurmd_start,
            self.on.check_etcd: self._on_check_etcd,
            self._slurmd.on.slurmctld_available: self._on_slurmctld_available,
            self._slurmd.on.slurmctld_unavailable:
            self._on_slurmctld_unavailable,
            # fluentbit
            self.on["fluentbit"].relation_created:
            self._on_configure_fluentbit,
            # actions
            self.on.version_action: self._on_version_action,
            self.on.node_configured_action: self._on_node_configured_action,
            self.on.get_node_inventory_action:
            self._on_get_node_inventory_action,
            self.on.show_nhc_config_action: self._on_show_nhc_config,
            # infiniband actions
            self.on.get_infiniband_repo_action: self.get_infiniband_repo,
            self.on.set_infiniband_repo_action: self.set_infiniband_repo,
            self.on.install_infiniband_action: self.install_infiniband,
            self.on.uninstall_infiniband_action: self.uninstall_infiniband,
            self.on.start_infiniband_action: self.start_infiniband,
            self.on.enable_infiniband_action: self.enable_infiniband,
            self.on.stop_infiniband_action: self.stop_infiniband,
            self.on.is_active_infiniband_action: self.is_active_infiniband,
            # nvdia actions
            self.on.nvidia_repo_action: self.nvidia_repo,
            self.on.nvidia_package_action: self.nvidia_package,
            self.on.nvidia_install_action: self.nvidia_install,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        """Perform installation operations for slurmd."""
        self.unit.set_workload_version(Path("version").read_text().strip())
        self.unit.status = WaitingStatus("Installing slurmd")

        custom_repo = self.config.get("custom-slurm-repo")
        successful_installation = self._slurm_manager.install(custom_repo)
        logger.debug(f"### slurmd installed: {successful_installation}")

        if successful_installation:
            self._stored.slurm_installed = True
        else:
            self.unit.status = BlockedStatus("Error installing slurmd")
            event.defer()

        self._check_status()

    def _on_configure_fluentbit(self, event):
        """Set up Fluentbit log forwarding."""
        self._configure_fluentbit()

    def _configure_fluentbit(self):
        logger.debug("## Configuring fluentbit")
        cfg = list()
        cfg.extend(self._slurm_manager.fluentbit_config_nhc)
        cfg.extend(self._slurm_manager.fluentbit_config_slurm)
        self._fluentbit.configure(cfg)

    def _on_upgrade(self, event):
        """Perform upgrade operations."""
        self.unit.set_workload_version(Path("version").read_text().strip())

    def _on_update_status(self, event):
        """Handle update status."""
        self._check_status()

    def _check_status(self) -> bool:
        """Check if we heve all needed components.

        - partition name
        - slurm installed
        - slurmctld available and working
        - munge key configured and working
        """
        if self._slurm_manager.needs_reboot:
            self.unit.status = BlockedStatus("Machine needs reboot")
            return False

        if not self.get_partition_name():
            self.unit.status = WaitingStatus("Waiting on charm configuration")
            return False

        if not self._stored.slurm_installed:
            self.unit.status = BlockedStatus("Error installing slurmd")
            return False

        if not self._slurmd.is_joined:
            self.unit.status = BlockedStatus("Need relations: slurmctld")
            return False

        if not self._stored.slurmctld_available:
            self.unit.status = WaitingStatus("Waiting on: slurmctld")
            return False

        if not self._slurm_manager.check_munged():
            self.unit.status = BlockedStatus("Error configuring munge key")
            return False

        if not self._stored.slurmctld_started:
            self.unit.status = WaitingStatus("Waiting slurmctld to start")
            return False

        self.unit.status = ActiveStatus("slurmd available")
        return True

    def ensure_slurmd_starts(self, max_attemps=10) -> bool:
        """Ensure slurmd is up and running."""
        logger.debug("## Stoping slurmd")
        self._slurm_manager.slurm_systemctl('stop')

        for i in range(max_attemps):
            if self._slurm_manager.slurm_is_active():
                logger.debug("## Slurmd running")
                break
            else:
                logger.warning("## Slurmd not running, trying to start it")
                self.unit.status = WaitingStatus("Starting slurmd")
                self._slurm_manager.restart_slurm_component()
                sleep(2 + i)

        if self._slurm_manager.slurm_is_active():
            return True
        else:
            self.unit.status = BlockedStatus("Cannot start slurmd")
            return False

    def _set_slurmctld_available(self, flag: bool):
        """Change stored value for slurmctld availability."""
        self._stored.slurmctld_available = flag

    def _set_slurmctld_started(self, flag: bool):
        """Change stored value for slurmctld started."""
        self._stored.slurmctld_started = flag

    def _on_slurmctld_available(self, event):
        """Get data from slurmctld and send inventory."""
        if not self._stored.slurm_installed:
            event.defer()
            return

        logger.debug(
            '#### Slurmctld available - setting overrides for configless')
        # get slurmctld host:port from relation and override systemd services
        host = self._slurmd.slurmctld_hostname
        port = self._slurmd.slurmctld_port
        self._slurm_manager.create_configless_systemd_override(host, port)
        self._slurm_manager.daemon_reload()

        self._write_munge_key_and_restart_munge()

        self._set_slurmctld_available(True)
        self._on_set_partition_info_on_app_relation_data(event)
        self._check_status()

        # check etcd for hostnames
        self.on.check_etcd.emit()

    def _on_check_etcd(self, event):
        """Check if node is accounted for.

        Check if slurmctld accounted for this node's inventory for the first
        time, if so, emit slurmctld_started event, so the node can start the
        daemon.
        """

        host = self._slurmd.slurmctld_address
        port = self._slurmd.etcd_port
        logger.debug(f"## Connecting to etcd3 in {host}:{port}")
        client = Etcd3Client(host=host, port=port, api_path="/v3/")

        logger.debug("## Querying etcd3 for node list")
        try:
            v = client.get(key="all_nodes")
            logger.debug(f"## Got: {v}")
        except Exception as e:
            logger.error(
                f"## Unable to connect to {host} to get list of nodes: {e}")
            event.defer()
            return

        node_accounted = False
        if v:
            hostnames = json.loads(v[0])
            logger.debug(f"### etcd3 node list: {hostnames}")
            if self.hostname in hostnames:
                self.on.slurmctld_started.emit()
                node_accounted = True

        if not node_accounted:
            logger.debug("## Node not accounted for. Deferring.")
            event.defer()

    def _on_slurmctld_unavailable(self, event):
        logger.debug("## Slurmctld unavailable")
        self._set_slurmctld_available(False)
        self._set_slurmctld_started(False)
        self._slurm_manager.slurm_systemctl('stop')
        self._check_status()

    def _on_slurmctld_started(self, event):
        """Set flag to True and emit slurmd_start event."""
        self._set_slurmctld_started(True)
        self.on.slurmd_start.emit()

    def _on_slurmd_start(self, event):
        if not self._check_status():
            event.defer()
            return

        # only set up fluentbit if we have a relation to it
        if self._fluentbit._relation is not None:
            self._configure_fluentbit()

        # at this point, we have slurm installed, munge configured, and we know
        # slurmctld accounted for this node. It should be safe to start slurmd
        if self.ensure_slurmd_starts():
            logger.debug("## slurmctld started and slurmd is running")
        else:
            event.defer()
        self._check_status()

    def _on_config_changed(self, event):
        """Handle charm configuration changes."""
        if self.model.unit.is_leader():
            logger.debug("## slurmd config changed - leader")
            self._on_set_partition_info_on_app_relation_data(event)

        nhc_conf = self.model.config.get('nhc-conf')
        if nhc_conf:
            if nhc_conf != self._stored.nhc_conf:
                self._stored.nhc_conf = nhc_conf
                self._slurm_manager.render_nhc_config(nhc_conf)

    def get_partition_name(self) -> str:
        """Return the partition_name in the slurmd relation."""
        # Determine if a user-supplied partition-name config exists, if so
        # ensure the partition_name is consistent with the supplied config.
        # If no partition name has been specified then generate one.
        partition_name = self._slurmd_peer.partition_name
        partition_name_from_config = self.config.get("partition-name")
        if partition_name:
            if partition_name_from_config:
                partition_name_from_config = partition_name_from_config.replace(
                    ' ', '-')
                if partition_name != partition_name_from_config:
                    self._set_partition_name(partition_name_from_config)
                    partition_name = partition_name_from_config
                else:
                    logger.debug("Partition name unchanged.")
            else:
                logger.debug("Partition name unchanged.")
        else:
            partition_name = f"osd-{self.app.name}"
            logger.debug(f"Partition name: {partition_name}")
            self._set_partition_name(partition_name)

        return partition_name

    def _set_partition_name(self, name: str):
        """Set the partition_name in the slurmd relation."""
        if self.model.unit.is_leader():
            self._slurmd_peer.partition_name = name

    def _write_munge_key_and_restart_munge(self):
        logger.debug('#### slurmd charm - writting munge key')

        self._slurm_manager.configure_munge_key(
            self._slurmd.get_stored_munge_key())

        if self._slurm_manager.restart_munged():
            logger.debug("## Munge restarted succesfully")
        else:
            logger.error("## Unable to restart munge")

    def _on_version_action(self, event):
        """Return version of installed components.

        - Slurm
        - munge
        - NHC
        - infiniband
        """
        version = {}
        version['slurm'] = self._slurm_manager.slurm_version()
        version['munge'] = self._slurm_manager.munge_version()
        version['nhc'] = self._slurm_manager.nhc_version()
        version['infiniband'] = self._slurm_manager.infiniband_version()

        event.set_results(version)

    def _on_node_configured_action(self, event):
        """Remove node from DownNodes."""
        # trigger reconfig
        self._slurmd.configure_new_node()
        logger.debug('### This node is not new anymore')

    def _on_get_node_inventory_action(self, event):
        """Return node inventory."""
        inventory = self._slurmd.node_inventory
        event.set_results({'inventory': inventory})

    def get_infiniband_repo(self, event):
        """Return the currently used infiniband repository."""
        repo = self._slurm_manager.infiniband.repository
        event.set_results({'infiniband-repo': repo})

    def set_infiniband_repo(self, event):
        """Set the infiniband repository."""
        repo = event.params["repo"]
        logger.debug(f"#### setting custom infiniband repo: {repo}")
        repo = base64.b64decode(repo).decode()
        self._slurm_manager.infiniband.repository = repo

    def install_infiniband(self, event):
        """Install infiniband."""
        logger.debug("#### Installing Infiniband")
        self._slurm_manager.infiniband.install()
        event.set_results({'installation': 'Successfull. Please reboot node.'})
        self.unit.status = BlockedStatus("Need reboot for Infiniband")

    def uninstall_infiniband(self, event):
        """Install infiniband."""
        logger.debug("#### Uninstalling Infiniband")
        self._slurm_manager.infiniband.uninstall()

    def start_infiniband(self, event):
        """Start Infiniband systemd service."""
        logger.debug("#### Starting Infiniband service")
        self._slurm_manager.infiniband.start()

    def enable_infiniband(self, event):
        """Enable Infiniband systemd service."""
        logger.debug("#### Enabling Infiniband service")
        self._slurm_manager.infiniband.enable()

    def stop_infiniband(self, event):
        """Stop Infiniband systemd service."""
        logger.debug("#### Stoping Infiniband service")
        self._slurm_manager.infiniband.stop()

    def is_active_infiniband(self, event):
        """Check if Infiniband systemd service is arctive."""
        status = self._slurm_manager.infiniband.is_active()
        logger.debug(f"#### Infiniband service is-active: {status}")
        event.set_results({'infiniband-is-active': status})

    def nvidia_repo(self, event):
        """Set or get the used nvidia repository."""
        repo = event.params.get("repo", None)
        if repo:
            self._slurm_manager.nvidia.repository = base64.b64decode(
                repo).decode()

        event.set_results(
            {'nvidia-repo': self._slurm_manager.nvidia.repository})

    def nvidia_package(self, event):
        """Set or get the used nvidia package."""
        package = event.params.get("package", None)
        if package or package == "":
            # user supplied a package name -> store it
            self._slurm_manager.nvidia.package = package

        event.set_results(
            {'nvidia-package': self._slurm_manager.nvidia.package})

    def nvidia_install(self, event):
        """Install nvidia drivers."""
        logger.debug("#### Installing nvidia drivers: %s",
                     self._slurm_manager.nvidia.package)
        self._slurm_manager.nvidia.install()
        event.set_results({'installation': 'Successfull. Please reboot node.'})
        self.unit.status = BlockedStatus("Need reboot for nvidia")

    def _on_show_nhc_config(self, event):
        """Show current nhc.conf."""
        nhc_conf = self._slurm_manager.get_nhc_config()
        event.set_results({"nhc.conf": nhc_conf})

    def _on_set_partition_info_on_app_relation_data(self, event):
        """Set the slurm partition info on the application relation data."""
        # Only the leader can set data on the relation.
        if self.model.unit.is_leader():
            # If the relation with slurmctld exists then set our
            # partition info on the application relation data.
            # This handler shouldn't fire if the relation isn't made,
            # but add this extra check here just incase.
            if self._slurmd.is_joined:
                partition = self._assemble_partition()
                if partition:
                    self._slurmd.set_partition_info_on_app_relation_data(
                        partition)
                else:
                    event.defer()
            else:
                event.defer()

    def _assemble_partition(self):
        """Assemble the partition info."""
        partition_name = self.get_partition_name()
        partition_config = self.config.get("partition-config")
        partition_state = self.config.get("partition-state")
        logger.debug(f"## partition_name: {partition_name}")

        return {
            "partition_name": partition_name,
            "partition_state": partition_state,
            "partition_config": partition_config,
        }

    @property
    def hostname(self) -> str:
        """Return the hostname."""
        return self._slurm_manager.hostname

    @property
    def cluster_name(self) -> str:
        """Return the cluster-name."""
        return self._stored.cluster_name

    @cluster_name.setter
    def cluster_name(self, name: str):
        """Set the cluster-name."""
        self._stored.cluster_name = name