class MetalLBSpeakerCharm(CharmBase):
    def __init__(self, *args):
        super().__init__(*args)
        self.speaker_image = OCIImageResource(self, 'speaker-image')
        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)

    @staticmethod
    def _get_pod_spec(config={}):
        with open('metallb.yaml') as f_in:
            spec = yaml.load(f_in)
            spec.update(config)
            return spec

    def set_pod_spec(self, event):
        if not self.model.unit.is_leader():
            print('Not a leader, skipping set_pod_spec')
            self.model.unit.status = ActiveStatus()
            return

        try:
            speaker_details = self.speaker_image.fetch()
        except ResourceError as e:
            self.model.unit.status = e.status
            return

        self.model.unit.status = MaintenanceStatus('Setting pod spec')
        self.model.pod.set_spec(self._get_pod_spec())

        self.model.unit.status = ActiveStatus()
Esempio n. 2
0
    def test_when_complete_image_info_should_match_given_resource(
            self, read_text, path_exists, charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            return Path("/a/b/c/d/e/f")

        resource.model.resources.fetch = patched_fetch
        path_exists.return_value = True

        image = "image:6.0"
        user = "******"
        pwd = "pwd"
        read_text.return_value = """
            "registrypath": {}
            "username": {}
            "password": {}
        """.format(image, user, pwd)

        # When
        image_info = resource.fetch()

        # Then
        self.assertDictEqual(image_info, {
            "imagePath": image,
            "password": pwd,
            "username": user
        })
Esempio n. 3
0
    def test_when_fetch_fails_with_model_error(self, charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            raise ModelError()

        resource.model.resources.fetch = patched_fetch

        # When
        with self.assertRaises(Exception) as context:
            resource.fetch()

        # Then
        self.assertTrue("Missing resource: test-image", str(context.exception))
Esempio n. 4
0
    def test_when_resource_path_does_not_exist(self, path_exists, charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            return Path("/a/b/c/d/e/f")

        resource.model.resources.fetch = patched_fetch
        path_exists.return_value = False

        # When
        with self.assertRaises(Exception) as context:
            resource.fetch()

        # Then
        self.assertTrue("Missing resource: test-image", str(context.exception))
    def test_when_resource_file_is_empty(self, read_text, path_exists, charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            return Path('/a/b/c/d/e/f')

        resource.model.resources.fetch = patched_fetch
        path_exists.return_value = True
        read_text.return_value = ""

        # When
        with self.assertRaises(Exception) as context:
            resource.fetch()

        # Then
        self.assertTrue('Missing resource: test-image', str(context.exception))
Esempio n. 6
0
class CharmedOsmBase(CharmBase):
    """CharmedOsmBase Charm."""

    state = StoredState()

    def __init__(self, *args, oci_image="image") -> NoReturn:
        """CharmedOsmBase Charm constructor."""
        super().__init__(*args)

        # Internal state initialization
        self.state.set_default(pod_spec=None)

        self.image = OCIImageResource(self, oci_image)

        # Registering regular events
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.leader_elected, self.configure_pod)

    def build_pod_spec(self, image_info):
        raise NotImplementedError("build_pod_spec is not implemented")

    def configure_pod(self, _=None) -> NoReturn:
        """Assemble the pod spec and apply it, if possible."""
        try:
            if self.unit.is_leader():
                self.unit.status = MaintenanceStatus("Assembling pod spec")
                image_info = self.image.fetch()
                pod_spec = self.build_pod_spec(image_info)
                self._set_pod_spec(pod_spec)

            self.unit.status = ActiveStatus("ready")
        except OCIImageResourceError:
            self.unit.status = BlockedStatus("Error fetching image information")
        except ValidationError as e:
            logger.exception(f"Config data validation error: {e}")
            self.unit.status = BlockedStatus(str(e))
        except RelationsMissing as e:
            logger.error(f"Relation missing error: {e.message}")
            self.unit.status = BlockedStatus(e.message)
        except ModelError as e:
            self.unit.status = BlockedStatus(str(e))
        except Exception as e:
            error_message = f"Unknown exception: {e}"
            logger.error(error_message)
            self.unit.status = BlockedStatus(error_message)

    def _set_pod_spec(self, pod_spec: Dict[str, Any]) -> NoReturn:
        pod_spec_hash = _hash_from_dict(pod_spec)
        if self.state.pod_spec != pod_spec_hash:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec_hash
Esempio n. 7
0
    def test_when_resource_is_not_a_well_formatted_yaml(
            self, read_text, path_exists, charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            return Path("/a/b/c/d/e/f")

        resource.model.resources.fetch = patched_fetch
        path_exists.return_value = True

        read_text.return_value = """
            item
                <<: *item_attributes
        """

        # When
        with self.assertRaises(Exception) as context:
            resource.fetch()

        # Then
        self.assertTrue("Invalid resource: test-image", str(context.exception))
Esempio n. 8
0
    def test_when_resource_misses_registry_path(self, read_text, path_exists,
                                                charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            return Path("/a/b/c/d/e/f")

        resource.model.resources.fetch = patched_fetch
        path_exists.return_value = True

        read_text.return_value = """
            "username": {}
            "password": {}
        """

        # When
        with self.assertRaises(Exception) as context:
            resource.fetch()

        # Then
        self.assertTrue("Invalid resource: test-image", str(context.exception))
Esempio n. 9
0
class MattermostK8sCharm(CharmBase):

    state = StoredState()

    def __init__(self, framework, key):
        super().__init__(framework, key)
        # get our mattermost_image from juju
        # ie: juju deploy . --resource mattermost_image=mattermost:latest )
        self.mattermost_image = OCIImageResource(self, 'mattermost_image')
        self.framework.observe(self.on.start, self.configure_pod)
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.upgrade_charm, self.configure_pod)

    def configure_pod(self, event):
        if not self.framework.model.unit.is_leader():
            self.model.unit.status = WaitingStatus('Not a leader')
            return

        mattermost_image_details = self.mattermost_image.fetch()
        self.model.unit.status = MaintenanceStatus('Configuring pod')
        config = self.model.config
        self.model.pod.set_spec({
            'containers': [{
                'name':
                self.framework.model.app.name,
                'imageDetails':
                mattermost_image_details,
                'ports': [{
                    'containerPort':
                    int(self.framework.model.config['mattermost_port']),
                    'protocol':
                    'TCP',
                }],
                'config': {
                    'MATTERMOST_HTTPD_LISTEN_PORT':
                    int(config['mattermost_port']),
                    'DB_HOST': config['pg_db_host'],
                    'DB_PORT_NUMBER': int(config['pg_db_port']),
                    'MM_USERNAME': config['pg_user'],
                    'MM_PASSWORD': config['pg_password'],
                    'MM_ENABLEOPENSERVER': config['open_server'],
                    'MM_ENABLEUPLOADS': config['enable_plugin_uploads'],
                },
            }]
        })
        self.state.is_started = True
        self.model.unit.status = ActiveStatus()
Esempio n. 10
0
class SRIOVCNICharm(CharmBase):
    def __init__(self, *args):
        super().__init__(*args)
        self.image = OCIImageResource(self, 'sriov-cni-image')
        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
        self.framework.observe(self.on.config_changed, self.set_pod_spec)

    def set_pod_spec(self, event):
        if not self.model.unit.is_leader():
            log.info('Not a leader, skipping set_pod_spec')
            self.model.unit.status = ActiveStatus()
            return

        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        cni_bin_dir = self.model.config.get('cni-bin-dir', '/opt/cni/bin')

        self.model.unit.status = MaintenanceStatus('Setting pod spec')
        self.model.pod.set_spec({
            'version':
            3,
            'containers': [{
                'name':
                'sriov-cni',
                'imageDetails':
                image_details,
                'volumeConfig': [{
                    'name': 'cni-bin',
                    'mountPath': '/dest',
                    'hostPath': {
                        'path': cni_bin_dir
                    }
                }]
            }],
            'kubernetesResources': {
                'pod': {
                    'hostNetwork': True,
                }
            }
        })
        self.model.unit.status = ActiveStatus()
    def test_when_partial_image_info_should_match_given_resource(
            self, read_text, path_exists, charm):
        # Given
        resource = OCIImageResource(charm, "test-image")

        # Monkeypatch fetch as we can't mock the parent Object
        def patched_fetch(name: str) -> Path:
            return Path('/a/b/c/d/e/f')

        resource.model.resources.fetch = patched_fetch
        path_exists.return_value = True

        image = "image:6.0"
        read_text.return_value = """
            "registrypath": {}
        """.format(image)

        # When
        image_info = resource.fetch()

        # Then
        self.assertDictEqual(image_info, {'imagePath': image})
Esempio n. 12
0
class MongoDBCharm(CharmBase):
    state = StoredState()
    on = MongoDBClusterEvents()

    def __init__(self, *args):
        super().__init__(*args)

        self.state.set_default(started=False)
        self.state.set_default(pod_spec=None)

        self.port = MONGODB_PORT
        self.image = OCIImageResource(self, "mongodb-image")

        # Register all of the events we want to observe
        self.framework.observe(self.on.install, self.configure_pod)
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.upgrade_charm, self.configure_pod)
        self.framework.observe(self.on.start, self.on_start)
        self.framework.observe(self.on.update_status, self.on_update_status)

        # Peer relation
        self.cluster = MongoDBCluster(self, "cluster", self.port)

        self.framework.observe(self.on.cluster_relation_changed,
                               self.reconfigure)
        self.framework.observe(self.on.cluster_relation_departed,
                               self.reconfigure)

        # Cluster Events
        self.framework.observe(self.on.mongodb_started,
                               self.on_mongodb_started)

        logger.debug("MongoDBCharm initialized!")

    # #############################################
    # ########## CHARM HOOKS HANDLERS #############
    # #############################################

    # hooks: install, config-changed, upgrade-charm
    def configure_pod(self, event):
        # Continue only if the unit is the leader
        if not self.unit.is_leader():
            self.on_update_status(event)
            return

        logger.debug("Running configuring_pod")
        # Check problems in the settings
        problems = self._check_settings()
        if problems:
            self.unit.status = BlockedStatus(problems)
            return

        # Fetch image information
        try:
            self.unit.status = WaitingStatus("Fetching image information")
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        # Build Pod spec
        self.unit.status = BlockedStatus("Assembling pod spec")
        pod_spec = make_pod_spec(
            image_info,
            self.port,
            replica_set_name=self.replica_set_name
            if not self.standalone else None,
        )

        # Update pod spec if the generated one is different
        # from the one previously applied
        if self.state.pod_spec != pod_spec:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec

        self.on_update_status(event)
        logger.debug("Running configuring_pod finished")

    # hooks: start
    def on_start(self, event):
        if not self.unit.is_leader():
            return
        logger.debug("Running on_start")
        if MongoConnector.ready(self.standalone_uri):
            self.on.mongodb_started.emit()
        else:
            # This event is not being retriggered before update_status
            event.defer()
            return

        # Can't call update_status because an infinite loop might happen
        # due to the fact I'm calling on_start from update_status
        # self.on_update_status(event)
        logger.debug("Running on_start finished")

    # hooks: update-status
    def on_update_status(self, event):
        status_message = ""
        if self.standalone:
            status_message += "standalone-mode: "
            if MongoConnector.ready(self.standalone_uri):
                status_message += "ready"
                self.unit.status = ActiveStatus(status_message)
            else:
                status_message += "service not ready yet"
                self.unit.status = WaitingStatus(status_message)
        else:
            status_message += f"replica-set-mode({self.replica_set_name}): "
            if MongoConnector.ready(self.standalone_uri):
                status_message += "ready"
                if self.unit.is_leader():
                    if self.cluster.ready:
                        hosts_count = len(self.cluster.replica_set_hosts)
                        status_message += f" ({hosts_count} members)"
                    else:
                        status_message += " (replica set not initialized yet)"
                        # Since on_start is not being properly triggered,
                        # I'm calling it manually here.
                        self.on.start.emit()
                        self.unit.status = WaitingStatus(status_message)
                        return
                self.unit.status = ActiveStatus(status_message)
            else:
                status_message += "service not ready yet"
                self.unit.status = WaitingStatus(status_message)

    # #############################################
    # ####### PEER RELATION HOOK HANDLERS #########
    # #############################################

    # hooks: cluster-relation-changed, cluster-relation-departed
    def reconfigure(self, event):
        logger.debug("Running reconfigure")

        if (self.unit.is_leader() and self.cluster.replica_set_initialized
                and self.cluster.need_replica_set_reconfiguration()):
            uri = self.replica_set_uri
            config = MongoConnector.replset_get_config(uri)
            config = MongoConnector.replset_generate_config(
                self.cluster.hosts,
                self.replica_set_name,
                increase_version=True,
                config=config,
            )
            MongoConnector.replset_reconfigure(uri, config)
            self.on.replica_set_configured.emit(self.cluster.hosts)
        self.on_update_status(event)
        logger.debug("Running reconfigure finished")

    # #############################################
    # ######### CLUSTER EVENT HANDLERS ############
    # #############################################

    def on_mongodb_started(self, event):
        if not self.unit.is_leader() or self.standalone:
            return
        logger.debug("Running on_mongodb_started")
        if not self.cluster.replica_set_initialized:
            self.unit.status = WaitingStatus("Initializing the replica set")
            config = MongoConnector.replset_generate_config(
                self.cluster.hosts, self.replica_set_name)
            MongoConnector.replset_initialize(self.standalone_uri, config)
            self.on.replica_set_configured.emit(self.cluster.hosts)

        self.on.cluster_ready.emit()
        logger.debug("Running on_mongodb_started finished")

    # #############################################
    # ############## PROPERTIES ###################
    # #############################################

    @property
    def replica_set_name(self):
        return self.model.config["replica_set_name"]

    @property
    def standalone(self):
        return self.model.config["standalone"]

    # #############################################
    # ############# PRIVATE METHODS ###############
    # #############################################

    def _check_settings(self):
        problems = []
        config = self.model.config

        for setting in REQUIRED_SETTINGS:
            if config.get(setting) is None:
                problem = f"missing config {setting}"
                problems.append(problem)
        if not self.standalone:
            for setting in REQUIRED_SETTINGS_NOT_STANDALONE:
                if not config.get(setting):
                    problem = f"missing config {setting}"
                    problems.append(problem)

        return ";".join(problems)

    @property
    def replica_set_uri(self):
        uri = "mongodb://"
        for i, host in enumerate(self.cluster.hosts):
            if i:
                uri += ","
            uri += f"{host}:{self.port}"
        uri += f"/?replicaSet={self.replica_set_name}"
        return uri

    @property
    def standalone_uri(self):
        return f"mongodb://{self.model.app.name}:{self.port}/"
class DashboardMetricsScraperCharm(CharmBase):
    def __init__(self, *args):
        super().__init__(*args)
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            self.model.unit.status = WaitingStatus('Waiting for leadership')
            return

        ProvideK8sService(self,
                          'metrics-scraper',
                          service_name=self.app.name,
                          service_port=self.model.config["port"])

        self.log = logging.getLogger(__name__)
        self.scraper_image = OCIImageResource(self, 'metrics-scraper-image')
        for event in [
                self.on.install, self.on.leader_elected, self.on.upgrade_charm,
                self.on.config_changed
        ]:
            self.framework.observe(event, self.main)

    def main(self, event):
        try:
            scraper_image_details = self.scraper_image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        self.model.unit.status = MaintenanceStatus('Setting pod spec')

        self.model.pod.set_spec({
            'version':
            3,
            'service': {
                'updateStrategy': {
                    'type': 'RollingUpdate',
                    'rollingUpdate': {
                        'maxUnavailable': 1
                    },
                },
                'annotations': {
                    'seccomp.security.alpha.kubernetes.io/pod':
                    'runtime/default',
                },
            },
            'containers': [
                {
                    'name':
                    self.model.app.name,
                    'imageDetails':
                    scraper_image_details,
                    'ports': [
                        {
                            'name': 'scraper',
                            'containerPort': self.model.config["port"],
                            'protocol': 'TCP',
                        },
                    ],
                    'volumeConfig': [
                        {
                            'name': 'tmp-volume',
                            'mountPath': '/tmp',
                            'emptyDir': {
                                'medium': 'Memory',
                            },
                        },
                    ],
                    'kubernetes': {
                        'securityContext': {
                            'allowPrivilegeEscalation': False,
                            'readOnlyRootFilesystem': True,
                            'runAsUser': 1001,
                            'runAsGroup': 2001,
                        },
                        'livenessProbe': {
                            'httpGet': {
                                'scheme': 'HTTP',
                                'path': '/',
                                'port': 8000,
                            },
                            'initialDelaySeconds': 30,
                            'timeoutSeconds': 30,
                        },
                    },
                },
            ],
            'serviceAccount': {
                'roles': [
                    {
                        'rules': [
                            {
                                'apiGroups': [''],
                                'resources': ['secrets'],
                                'resourceNames': [
                                    'kubernetes-dashboard-key-holder',
                                    'kubernetes-dashboard-certs',
                                    'kubernetes-dashboard-csrf',
                                ],
                                'verbs': ['get', 'update', 'delete'],
                            },
                            {
                                'apiGroups': [''],
                                'resources': ['configmaps'],
                                'resourceNames':
                                ['kubernetes-dashboard-settings'],
                                'verbs': ['get', 'update'],
                            },
                            {
                                'apiGroups': [''],
                                'resources': ['services'],
                                'resourceNames': [
                                    'heapster',
                                    'dashboard-metrics-scraper',
                                ],
                                'verbs': ['proxy'],
                            },
                            {
                                'apiGroups': [''],
                                'resources': ['services/proxy'],
                                'resourceNames': [
                                    'heapster',
                                    'http:heapster',
                                    'https:heapster',
                                    'dashboard-metrics-scraper',
                                    'http:dashboard-metrics-scraper',
                                ],
                                'verbs': ['get'],
                            },
                            {
                                'apiGroups': ['metrics.k8s.io'],
                                'resources': ['pods', 'nodes'],
                                'verbs': ['get', 'list', 'watch'],
                            },
                        ],
                    },
                    {
                        'global':
                        True,
                        'rules': [
                            {
                                'apiGroups': ['metrics.k8s.io'],
                                'resources': ['pods', 'nodes'],
                                'verbs': ['get', 'list', 'watch'],
                            },
                        ],
                    },
                ],
            },
        })

        self.model.unit.status = ActiveStatus()
Esempio n. 14
0
class MetalLBSpeakerCharm(CharmBase):
    """MetalLB Speaker Charm."""

    _stored = StoredState()

    def __init__(self, *args):
        """Charm initialization for events observation."""
        super().__init__(*args)
        if not self.unit.is_leader():
            self.unit.status = WaitingStatus("Waiting for leadership")
            return
        self.image = OCIImageResource(self, 'metallb-speaker-image')
        self.framework.observe(self.on.install, self._on_start)
        self.framework.observe(self.on.start, self._on_start)
        self.framework.observe(self.on.leader_elected, self._on_start)
        self.framework.observe(self.on.upgrade_charm, self._on_upgrade)
        self.framework.observe(self.on.remove, self._on_remove)
        # -- initialize states --
        self._stored.set_default(k8s_objects_created=False)
        self._stored.set_default(started=False)
        self._stored.set_default(secret=b64encode(
            utils._random_secret(128).encode('utf-8')).decode('utf-8'))
        # -- base values --
        self._stored.set_default(namespace=os.environ["JUJU_MODEL_NAME"])

    def _on_start(self, event):
        """Occurs upon install, start, or upgrade of the charm."""
        if self._stored.started:
            return
        self.unit.status = MaintenanceStatus("Fetching image info")
        try:
            image_info = self.image.fetch()
        except OCIImageResourceError:
            logging.exception('An error occured while fetching the image info')
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        if not self._stored.k8s_objects_created:
            self.unit.status = MaintenanceStatus("Creating supplementary "
                                                 "Kubernetes objects")
            utils.create_k8s_objects(self._stored.namespace)
            self._stored.k8s_objects_created = True

        self.unit.status = MaintenanceStatus("Configuring pod")
        self.set_pod_spec(image_info)

        self.unit.status = ActiveStatus()
        self._stored.started = True

    def _on_upgrade(self, event):
        """Occurs when new charm code or image info is available."""
        self._stored.started = False
        self._on_start(event)

    def _on_remove(self, event):
        """Remove artifacts created by the K8s API."""
        self.unit.status = MaintenanceStatus("Removing supplementary "
                                             "Kubernetes objects")
        utils.remove_k8s_objects(self._stored.namespace)
        self.unit.status = MaintenanceStatus("Removing pod")
        self._stored.started = False
        self._stored.k8s_objects_created = False

    def set_pod_spec(self, image_info):
        """Set pod spec."""
        self.model.pod.set_spec(
            {
                'version':
                3,
                'serviceAccount': {
                    'roles': [{
                        'global':
                        True,
                        'rules': [
                            {
                                'apiGroups': [''],
                                'resources':
                                ['services', 'endpoints', 'nodes'],
                                'verbs': ['get', 'list', 'watch'],
                            },
                            {
                                'apiGroups': [''],
                                'resources': ['events'],
                                'verbs': ['create', 'patch'],
                            },
                            {
                                'apiGroups': ['policy'],
                                'resourceNames': ['speaker'],
                                'resources': ['podsecuritypolicies'],
                                'verbs': ['use'],
                            },
                        ],
                    }],
                },
                'containers': [{
                    'name':
                    'speaker',
                    'imageDetails':
                    image_info,
                    'imagePullPolicy':
                    'Always',
                    'ports': [{
                        'containerPort': 7472,
                        'protocol': 'TCP',
                        'name': 'monitoring'
                    }],
                    'envConfig': {
                        'METALLB_NODE_NAME': {
                            'field': {
                                'path': 'spec.nodeName',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_HOST': {
                            'field': {
                                'path': 'status.hostIP',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_ML_BIND_ADDR': {
                            'field': {
                                'path': 'status.podIP',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_ML_LABELS': "app=metallb,component=speaker",
                        'METALLB_ML_NAMESPACE': {
                            'field': {
                                'path': 'metadata.namespace',
                                'api-version': 'v1'
                            }
                        },
                        'METALLB_ML_SECRET_KEY': {
                            'secret': {
                                'name': 'memberlist',
                                'key': 'secretkey'
                            }
                        }
                    },
                    # TODO: add constraint fields once it exists in pod_spec
                    # bug : https://bugs.launchpad.net/juju/+bug/1893123
                    # 'resources': {
                    #     'limits': {
                    #         'cpu': '100m',
                    #         'memory': '100Mi',
                    #     }
                    # },
                    'kubernetes': {
                        'securityContext': {
                            'allowPrivilegeEscalation': False,
                            'readOnlyRootFilesystem': True,
                            'capabilities': {
                                'add': ['NET_ADMIN', 'NET_RAW', 'SYS_ADMIN'],
                                'drop': ['ALL']
                            },
                        },
                        # fields do not exist in pod_spec
                        # 'TerminationGracePeriodSeconds': 2,
                    },
                }],
                'kubernetesResources': {
                    'pod': {
                        'hostNetwork': True
                    },
                    'secrets': [{
                        'name': 'memberlist',
                        'type': 'Opaque',
                        'data': {
                            'secretkey': self._stored.secret,
                        }
                    }]
                },
                'service': {
                    'annotations': {
                        'prometheus.io/port': '7472',
                        'prometheus.io/scrape': 'true'
                    }
                },
            }, )
class Operator(CharmBase):
    def __init__(self, *args):
        super().__init__(*args)

        self.log = logging.getLogger(__name__)
        self.image = OCIImageResource(self, "oci-image")

        for event in [
                self.on.install,
                self.on.leader_elected,
                self.on.upgrade_charm,
                self.on.config_changed,
                self.on["kubeflow-profiles"].relation_changed,
                self.on["ingress"].relation_changed,
        ]:
            self.framework.observe(event, self.main)

    def main(self, event):
        try:
            self._check_model_name()

            self._check_leader()

            interfaces = self._get_interfaces()

            image_details = self._check_image_details()

            kf_profiles = self._check_kf_profiles(interfaces)
        except CheckFailed as check_failed:
            self.model.unit.status = check_failed.status
            return

        self._configure_mesh(interfaces)

        kf_profiles = list(kf_profiles.get_data().values())[0]
        profiles_service = kf_profiles["service-name"]

        model = self.model.name
        config = self.model.config

        self.model.unit.status = MaintenanceStatus("Setting pod spec")

        self.model.pod.set_spec(
            {
                "version":
                3,
                "serviceAccount": {
                    "roles": [{
                        "global":
                        True,
                        "rules": [
                            {
                                "apiGroups": [""],
                                "resources": ["events", "namespaces", "nodes"],
                                "verbs": ["get", "list", "watch"],
                            },
                            {
                                "apiGroups": ["", "app.k8s.io"],
                                "resources": [
                                    "applications",
                                    "pods",
                                    "pods/exec",
                                    "pods/log",
                                ],
                                "verbs": ["get", "list", "watch"],
                            },
                            {
                                "apiGroups": [""],
                                "resources": ["secrets", "configmaps"],
                                "verbs": ["get"],
                            },
                        ],
                    }]
                },
                "containers": [{
                    "name":
                    "kubeflow-dashboard",
                    "imageDetails":
                    image_details,
                    "envConfig": {
                        "USERID_HEADER": "kubeflow-userid",
                        "USERID_PREFIX": "",
                        "PROFILES_KFAM_SERVICE_HOST":
                        f"{profiles_service}.{model}",
                        "REGISTRATION_FLOW": config["registration-flow"],
                        "DASHBOARD_LINKS_CONFIGMAP":
                        config["dashboard-configmap"],
                    },
                    "ports": [{
                        "name": "ui",
                        "containerPort": config["port"]
                    }],
                    "kubernetes": {
                        "livenessProbe": {
                            "httpGet": {
                                "path": "/healthz",
                                "port": config["port"]
                            },
                            "initialDelaySeconds": 30,
                            "periodSeconds": 30,
                        }
                    },
                }],
            },
            {
                "configMaps": {
                    config["dashboard-configmap"]: {
                        "settings": json.dumps({
                            "DASHBOARD_FORCE_IFRAME": True,
                        }),
                        "links": Path("src/config.json").read_text(),
                    },
                },
                "kubernetesResources": {
                    "customResources": {
                        "profiles.kubeflow.org": [{
                            "apiVersion": "kubeflow.org/v1beta1",
                            "kind": "Profile",
                            "metadata": {
                                "name": config["profile"]
                            },
                            "spec": {
                                "owner": {
                                    "kind": "User",
                                    "name": config["profile"]
                                }
                            },
                        }]
                    },
                },
            },
        )

        self.model.unit.status = ActiveStatus()

    def _configure_mesh(self, interfaces):
        if interfaces["ingress"]:
            interfaces["ingress"].send_data({
                "prefix": "/",
                "rewrite": "/",
                "service": self.model.app.name,
                "port": self.model.config["port"],
            })

    def _check_model_name(self):
        if self.model.name != "kubeflow":
            # Remove when this bug is resolved: https://github.com/kubeflow/kubeflow/issues/6136
            raise CheckFailed(
                "kubeflow-dashboard must be deployed to model named `kubeflow`:"
                " https://git.io/J6d35",
                BlockedStatus,
            )

    def _check_leader(self):
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            raise CheckFailed("Waiting for leadership", WaitingStatus)

    def _get_interfaces(self):
        try:
            interfaces = get_interfaces(self)
        except NoVersionsListed as err:
            raise CheckFailed(err, WaitingStatus)
        except NoCompatibleVersions as err:
            raise CheckFailed(err, BlockedStatus)
        return interfaces

    def _check_image_details(self):
        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            raise CheckFailed(f"{e.status_message}: oci-image", e.status_type)
        return image_details

    def _check_kf_profiles(self, interfaces):
        if not ((kf_profiles := interfaces["kubeflow-profiles"])
                and kf_profiles.get_data()):
            raise CheckFailed("Waiting for kubeflow-profiles relation data",
                              WaitingStatus)

        return kf_profiles
Esempio n. 16
0
class Operator(CharmBase):
    def __init__(self, *args):
        super().__init__(*args)

        if not self.model.unit.is_leader():
            log.info("Not a leader, skipping set_pod_spec")
            self.model.unit.status = ActiveStatus()
            return

        self.image = OCIImageResource(self, "oci-image")

        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
        self.framework.observe(self.on.config_changed, self.set_pod_spec)

        for rel in self.model.relations.keys():
            self.framework.observe(
                self.on[rel].relation_changed,
                self.set_pod_spec,
            )

        self.prometheus_provider = MetricsEndpointProvider(
            charm=self,
            relation_name="metrics-endpoint",
            jobs=[{
                "metrics_path":
                self.config["executor-server-metrics-port-name"],
                "static_configs": [{
                    "targets": ["*:{}".format(self.config["metrics-port"])]
                }],
            }],
        )

        self.dashboard_provider = GrafanaDashboardProvider(
            charm=self,
            relation_name="grafana-dashboard",
        )

    def set_pod_spec(self, event):
        if not self.model.unit.is_leader():
            log.info("Not a leader, skipping set_pod_spec")
            self.model.unit.status = ActiveStatus()
            return

        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            log.info(e)
            return

        config = self.model.config
        tconfig = {k.replace("-", "_"): v for k, v in config.items()}
        tconfig["service"] = self.model.app.name
        tconfig["namespace"] = self.model.name
        env = Environment(loader=FileSystemLoader("src/templates/"), )
        envs = {
            "AMBASSADOR_ENABLED":
            str(bool(self.model.relations["ambassador"])).lower(),
            "AMBASSADOR_SINGLE_NAMESPACE":
            str(config["ambassador-single-namespace"]).lower(),
            "CONTROLLER_ID":
            config["controller-id"],
            "DEFAULT_USER_ID":
            config["default-user-id"],
            "ENGINE_CONTAINER_IMAGE_AND_VERSION":
            config["engine-container-image-and-version"],
            "ENGINE_CONTAINER_IMAGE_PULL_POLICY":
            config["engine-container-image-pull-policy"],
            "ENGINE_CONTAINER_SERVICE_ACCOUNT_NAME":
            config["engine-container-service-account-name"],
            "ENGINE_CONTAINER_USER":
            config["engine-container-user"],
            "ENGINE_DEFAULT_CPU_LIMIT":
            config["engine-default-cpu-limit"],
            "ENGINE_DEFAULT_CPU_REQUEST":
            config["engine-default-cpu-request"],
            "ENGINE_DEFAULT_MEMORY_LIMIT":
            config["engine-default-memory-limit"],
            "ENGINE_DEFAULT_MEMORY_REQUEST":
            config["engine-default-memory-request"],
            "ENGINE_LOG_MESSAGES_EXTERNALLY":
            str(config["engine-log-messages-externally"]).lower(),
            "ENGINE_PROMETHEUS_PATH":
            config["engine-prometheus-path"],
            "ENGINE_SERVER_GRPC_PORT":
            config["engine-server-grpc-port"],
            "ENGINE_SERVER_PORT":
            config["engine-server-port"],
            "EXECUTOR_CONTAINER_IMAGE_AND_VERSION":
            config["executor-container-image-and-version"],
            "EXECUTOR_CONTAINER_IMAGE_PULL_POLICY":
            config["executor-container-image-pull-policy"],
            "EXECUTOR_CONTAINER_SERVICE_ACCOUNT_NAME":
            config["executor-container-service-account-name"],
            "EXECUTOR_CONTAINER_USER":
            config["executor-container-user"],
            "EXECUTOR_DEFAULT_CPU_LIMIT":
            config["executor-default-cpu-limit"],
            "EXECUTOR_DEFAULT_CPU_REQUEST":
            config["executor-default-cpu-request"],
            "EXECUTOR_DEFAULT_MEMORY_LIMIT":
            config["executor-default-memory-limit"],
            "EXECUTOR_DEFAULT_MEMORY_REQUEST":
            config["executor-default-memory-request"],
            "EXECUTOR_PROMETHEUS_PATH":
            config["executor-prometheus-path"],
            "EXECUTOR_REQUEST_LOGGER_DEFAULT_ENDPOINT":
            config["executor-request-logger-default-endpoint"],
            "EXECUTOR_SERVER_METRICS_PORT_NAME":
            config["executor-server-metrics-port-name"],
            "EXECUTOR_SERVER_PORT":
            config["executor-server-port"],
            "ISTIO_ENABLED":
            str(bool(self.model.relations["istio"])).lower(),
            "ISTIO_GATEWAY":
            config["istio-gateway"],
            "ISTIO_TLS_MODE":
            config["istio-tls-mode"],
            "KEDA_ENABLED":
            str(bool(self.model.relations["keda"])).lower(),
            "MANAGER_CREATE_RESOURCES":
            "true",
            "POD_NAMESPACE":
            self.model.name,
            "PREDICTIVE_UNIT_DEFAULT_ENV_SECRET_REF_NAME":
            config["predictive-unit-default-env-secret-ref-name"],
            "PREDICTIVE_UNIT_METRICS_PORT_NAME":
            config["predictive-unit-metrics-port-name"],
            "PREDICTIVE_UNIT_SERVICE_PORT":
            config["predictive-unit-service-port"],
            "RELATED_IMAGE_ENGINE":
            config["related-image-engine"],
            "RELATED_IMAGE_EXECUTOR":
            config["related-image-executor"],
            "RELATED_IMAGE_EXPLAINER":
            config["related-image-explainer"],
            "RELATED_IMAGE_MLFLOWSERVER":
            config["related-image-mlflowserver"],
            "RELATED_IMAGE_MOCK_CLASSIFIER":
            config["related-image-mock-classifier"],
            "RELATED_IMAGE_SKLEARNSERVER":
            config["related-image-sklearnserver"],
            "RELATED_IMAGE_STORAGE_INITIALIZER":
            config["related-image-storage-initializer"],
            "RELATED_IMAGE_TENSORFLOW":
            config["related-image-tensorflow"],
            "RELATED_IMAGE_TFPROXY":
            config["related-image-tfproxy"],
            "RELATED_IMAGE_XGBOOSTSERVER":
            config["related-image-xgboostserver"],
            "USE_EXECUTOR":
            str(config["use-executor"]).lower(),
            "WATCH_NAMESPACE":
            config["watch-namespace"],
        }

        self.model.unit.status = MaintenanceStatus("Setting pod spec")
        self.model.pod.set_spec(
            {
                "version":
                3,
                "serviceAccount": {
                    "roles": [{
                        "global":
                        True,
                        "rules": [
                            {
                                "apiGroups": [""],
                                "resources": ["events"],
                                "verbs": ["create", "patch"],
                            },
                            {
                                "apiGroups": [""],
                                "resources": ["namespaces"],
                                "verbs": ["get", "list", "watch"],
                            },
                            {
                                "apiGroups": [""],
                                "resources": ["services"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["apps"],
                                "resources": ["deployments"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["apps"],
                                "resources": ["deployments/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["autoscaling"],
                                "resources": ["horizontalpodautoscalers"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["autoscaling"],
                                "resources":
                                ["horizontalpodautoscalers/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["keda.sh"],
                                "resources": ["scaledobjects"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["keda.sh"],
                                "resources": ["scaledobjects/finalizers"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["keda.sh"],
                                "resources": ["scaledobjects/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["machinelearning.seldon.io"],
                                "resources": ["seldondeployments"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["machinelearning.seldon.io"],
                                "resources": ["seldondeployments/finalizers"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["machinelearning.seldon.io"],
                                "resources": ["seldondeployments/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["networking.istio.io"],
                                "resources": ["destinationrules"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["networking.istio.io"],
                                "resources": ["destinationrules/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["networking.istio.io"],
                                "resources": ["virtualservices"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["networking.istio.io"],
                                "resources": ["virtualservices/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["policy"],
                                "resources": ["poddisruptionbudgets"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["policy"],
                                "resources": ["poddisruptionbudgets/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["v1"],
                                "resources": ["namespaces"],
                                "verbs": ["get", "list", "watch"],
                            },
                            {
                                "apiGroups": ["v1"],
                                "resources": ["services"],
                                "verbs": [
                                    "create",
                                    "delete",
                                    "get",
                                    "list",
                                    "patch",
                                    "update",
                                    "watch",
                                ],
                            },
                            {
                                "apiGroups": ["v1"],
                                "resources": ["services/status"],
                                "verbs": ["get", "patch", "update"],
                            },
                            {
                                "apiGroups": ["apiextensions.k8s.io"],
                                "resources": ["customresourcedefinitions"],
                                "verbs": ["create", "get", "list"],
                            },
                            {
                                "apiGroups": ["admissionregistration.k8s.io"],
                                "resources":
                                ["validatingwebhookconfigurations"],
                                "verbs": [
                                    "get",
                                    "list",
                                    "create",
                                    "delete",
                                    "update",
                                ],
                            },
                            {
                                "apiGroups": [""],
                                "resources": ["configmaps"],
                                "verbs": [
                                    "get",
                                    "list",
                                    "watch",
                                    "create",
                                    "update",
                                    "patch",
                                    "delete",
                                ],
                            },
                        ],
                    }]
                },
                "containers": [{
                    "name":
                    "seldon-core",
                    "command": ["/manager"],
                    "args": [
                        "--enable-leader-election",
                        "--webhook-port",
                        config["webhook-port"],
                        "--create-resources",
                        "true",
                    ],
                    "imageDetails":
                    image_details,
                    "ports": [
                        {
                            "name": "metrics",
                            "containerPort": int(config["metrics-port"]),
                        },
                        {
                            "name": "webhook",
                            "containerPort": int(config["webhook-port"]),
                        },
                    ],
                    "envConfig":
                    envs,
                    "volumeConfig": [{
                        "name":
                        "operator-resources",
                        "mountPath":
                        "/tmp/operator-resources",
                        "files": [{
                            "path":
                            f"{name}.yaml",
                            "content":
                            env.get_template(f"{name}.yaml").render(tconfig),
                        } for name in (
                            "configmap",
                            "crd",
                            "service",
                            "validate",
                        )],
                    }],
                }],
            }, )
        self.model.unit.status = ActiveStatus()
Esempio n. 17
0
class KfpApiOperator(CharmBase):
    """Charm the Kubeflow Pipelines API.

    https://github.com/canonical/kfp-operators/
    """

    def __init__(self, *args):
        super().__init__(*args)

        self.log = logging.getLogger()
        self.image = OCIImageResource(self, "oci-image")

        self.prometheus_provider = MetricsEndpointProvider(
            charm=self,
            relation_name="metrics-endpoint",
            jobs=[
                {
                    "metrics_path": METRICS_PATH,
                    "static_configs": [{"targets": ["*:{}".format(self.config["http-port"])]}],
                }
            ],
        )

        self.dashboard_provider = GrafanaDashboardProvider(self)

        change_events = [
            self.on.install,
            self.on.upgrade_charm,
            self.on.config_changed,
            self.on["mysql"].relation_changed,
            self.on["object-storage"].relation_changed,
            self.on["kfp-viz"].relation_changed,
            self.on["kfp-api"].relation_changed,
        ]
        for event in change_events:
            self.framework.observe(event, self._main)

    def _send_info(self, interfaces):
        if interfaces["kfp-api"]:
            interfaces["kfp-api"].send_data(
                {
                    "service-name": f"{self.model.app.name}.{self.model.name}",
                    "service-port": self.model.config["http-port"],
                }
            )

    def _main(self, event):
        # Set up all relations/fetch required data
        try:
            self._check_leader()
            mysql = self._get_mysql()
            interfaces = self._get_interfaces()
            image_details = self.image.fetch()
            os = self._get_object_storage(interfaces)
            viz = self._get_viz(interfaces)
        except (CheckFailedError, OCIImageResourceError) as check_failed:
            self.model.unit.status = check_failed.status
            self.log.info(str(check_failed.status))
            return

        self._send_info(interfaces)

        config, config_json = self._generate_config(mysql, os, viz)

        healthz = f"http://localhost:{config['http-port']}/apis/v1beta1/healthz"

        self.model.unit.status = MaintenanceStatus("Setting pod spec")
        self.model.pod.set_spec(
            {
                "version": 3,
                "serviceAccount": {
                    "roles": [
                        {
                            "global": True,
                            "rules": [
                                {
                                    "apiGroups": [""],
                                    "resources": ["pods", "pods/log"],
                                    "verbs": ["get", "list", "delete"],
                                },
                                {
                                    "apiGroups": ["argoproj.io"],
                                    "resources": ["workflows"],
                                    "verbs": [
                                        "create",
                                        "get",
                                        "list",
                                        "watch",
                                        "update",
                                        "patch",
                                        "delete",
                                    ],
                                },
                                {
                                    "apiGroups": ["kubeflow.org"],
                                    "resources": ["scheduledworkflows"],
                                    "verbs": [
                                        "create",
                                        "get",
                                        "list",
                                        "update",
                                        "patch",
                                        "delete",
                                    ],
                                },
                                {
                                    "apiGroups": ["authorization.k8s.io"],
                                    "resources": ["subjectaccessreviews"],
                                    "verbs": ["create"],
                                },
                                {
                                    "apiGroups": ["authentication.k8s.io"],
                                    "resources": ["tokenreviews"],
                                    "verbs": ["create"],
                                },
                            ],
                        }
                    ]
                },
                "containers": [
                    {
                        "name": "ml-pipeline-api-server",
                        "imageDetails": image_details,
                        "ports": [
                            {
                                "name": "http",
                                "containerPort": int(config["http-port"]),
                            },
                            {
                                "name": "grpc",
                                "containerPort": int(config["grpc-port"]),
                            },
                        ],
                        "envConfig": {
                            "POD_NAMESPACE": self.model.name,
                        },
                        "volumeConfig": [
                            {
                                "name": "config",
                                "mountPath": "/config",
                                "files": [
                                    {
                                        "path": "config.json",
                                        "content": json.dumps(config_json),
                                    },
                                    {
                                        "path": "sample_config.json",
                                        "content": Path("src/sample_config.json").read_text(),
                                    },
                                ],
                            }
                        ],
                        "kubernetes": {
                            "readinessProbe": {
                                "exec": {"command": ["wget", "-q", "-S", "-O", "-", healthz]},
                                "initialDelaySeconds": 3,
                                "periodSeconds": 5,
                                "timeoutSeconds": 2,
                            },
                            "livenessProbe": {
                                "exec": {"command": ["wget", "-q", "-S", "-O", "-", healthz]},
                                "initialDelaySeconds": 3,
                                "periodSeconds": 5,
                                "timeoutSeconds": 2,
                            },
                        },
                    }
                ],
            },
            k8s_resources={
                "kubernetesResources": {
                    "services": [
                        {
                            "name": config["k8s-service-name"],
                            "spec": {
                                "selector": {"app.kubernetes.io/name": self.model.app.name},
                                "ports": [
                                    {
                                        "name": "grpc",
                                        "port": int(config["grpc-port"]),
                                        "protocol": "TCP",
                                        "targetPort": int(config["grpc-port"]),
                                    },
                                    {
                                        "name": "http",
                                        "port": int(config["http-port"]),
                                        "protocol": "TCP",
                                        "targetPort": int(config["http-port"]),
                                    },
                                ],
                            },
                        },
                    ],
                }
            },
        )
        self.model.unit.status = ActiveStatus()

    def _generate_config(self, mysql, os, viz):
        config = self.model.config
        config_json = {
            "DBConfig": {
                "ConMaxLifeTimeSec": "120",
                "DBName": mysql["database"],
                "DriverName": "mysql",
                "GroupConcatMaxLen": "4194304",
                "Host": mysql["host"],
                "Password": mysql["root_password"],
                "Port": mysql["port"],
                "User": "******",
            },
            "ObjectStoreConfig": {
                "AccessKey": os["access-key"],
                "BucketName": "mlpipeline",
                "Host": f"{os['service']}.{os['namespace']}",
                "Multipart": {"Disable": "true"},
                "PipelinePath": "pipelines",
                "Port": os["port"],
                "Region": "",
                "SecretAccessKey": os["secret-key"],
                "Secure": str(os["secure"]).lower(),
            },
            "ARCHIVE_CONFIG_LOG_FILE_NAME": config["log-archive-filename"],
            "ARCHIVE_CONFIG_LOG_PATH_PREFIX": config["log-archive-prefix"],
            "AUTO_UPDATE_PIPELINE_DEFAULT_VERSION": config["auto-update-default-version"],
            "CACHE_IMAGE": config["cache-image"],
            "CACHE_NODE_RESTRICTIONS": "false",
            "CacheEnabled": str(config["cache-enabled"]).lower(),
            "DefaultPipelineRunnerServiceAccount": config["runner-sa"],
            "InitConnectionTimeout": config["init-connection-timeout"],
            "KUBEFLOW_USERID_HEADER": "kubeflow-userid",
            "KUBEFLOW_USERID_PREFIX": "",
            "MULTIUSER": "******",
            "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST": viz["service-name"],
            "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT": viz["service-port"],
        }
        return config, config_json

    def _check_leader(self):
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            raise CheckFailedError("Waiting for leadership", WaitingStatus)

    def _get_interfaces(self):
        # Remove this abstraction when SDI adds .status attribute to NoVersionsListed,
        # NoCompatibleVersionsListed:
        # https://github.com/canonical/serialized-data-interface/issues/26
        try:
            interfaces = get_interfaces(self)
        except NoVersionsListed as err:
            raise CheckFailedError(str(err), WaitingStatus)
        except NoCompatibleVersions as err:
            raise CheckFailedError(str(err), BlockedStatus)
        return interfaces

    def _get_mysql(self):
        mysql = self.model.relations["mysql"]
        if len(mysql) == 0:
            raise CheckFailedError("Missing required relation for mysql", BlockedStatus)
        elif len(mysql) > 1:
            raise CheckFailedError("Too many mysql relations", BlockedStatus)

        try:
            mysql = mysql[0]
            unit = list(mysql.units)[0]
            mysql = mysql.data[unit]
        except Exception as e:
            self.log.error(
                f"Encountered the following exception when parsing mysql relation: " f"{str(e)}"
            )
            raise CheckFailedError(
                "Unexpected error when parsing mysql relation.  See logs", BlockedStatus
            )

        expected_attributes = ["database", "host", "root_password", "port"]

        missing_attributes = [
            attribute for attribute in expected_attributes if attribute not in mysql
        ]

        if len(missing_attributes) == len(expected_attributes):
            raise CheckFailedError("Waiting for mysql relation data", WaitingStatus)
        elif len(missing_attributes) > 0:
            self.log.error(
                f"mysql relation data missing expected attributes '{missing_attributes}'"
            )
            raise CheckFailedError(
                "Received incomplete data from mysql relation.  See logs", BlockedStatus
            )
        return mysql

    def _get_object_storage(self, interfaces):
        relation_name = "object-storage"
        return self._validate_sdi_interface(interfaces, relation_name)

    def _get_viz(self, interfaces):
        relation_name = "kfp-viz"
        default_viz_data = {"service-name": "unset", "service-port": "1234"}
        return self._validate_sdi_interface(
            interfaces, relation_name, default_return=default_viz_data
        )

    def _validate_sdi_interface(self, interfaces: dict, relation_name: str, default_return=None):
        """Validates data received from SerializedDataInterface, returning the data if valid.

        Optionally can return a default_return value when no relation is established

        Raises:
            CheckFailed(..., Blocked) when no relation established (unless default_return set)
            CheckFailed(..., Blocked) if interface is not using SDI
            CheckFailed(..., Blocked) if data in interface fails schema check
            CheckFailed(..., Waiting) if we have a relation established but no data passed

        Params:
            interfaces:

        Returns:
              (dict) interface data
        """
        # If nothing is related to this relation, return a default value or raise an error
        if relation_name not in interfaces or interfaces[relation_name] is None:
            if default_return is not None:
                return default_return
            else:
                raise CheckFailedError(
                    f"Missing required relation for {relation_name}", BlockedStatus
                )

        relations = interfaces[relation_name]
        if not isinstance(relations, SerializedDataInterface):
            raise CheckFailedError(
                f"Unexpected error with {relation_name} relation data - data not as expected",
                BlockedStatus,
            )

        # Get and validate data from the relation
        try:
            # relations is a dict of {(ops.model.Relation, ops.model.Application): data}
            unpacked_relation_data = relations.get_data()
        except ValidationError as val_error:
            # Validation in .get_data() ensures if data is populated, it matches the schema and is
            # not incomplete
            self.log.error(val_error)
            raise CheckFailedError(
                f"Found incomplete/incorrect relation data for {relation_name}.  See logs",
                BlockedStatus,
            )

        # Check if we have an established relation with no data exchanged
        if len(unpacked_relation_data) == 0:
            raise CheckFailedError(f"Waiting for {relation_name} relation data", WaitingStatus)

        # Unpack data (we care only about the first element)
        data_dict = list(unpacked_relation_data.values())[0]

        # Catch if empty data dict is received (JSONSchema ValidationError above does not raise
        # when this happens)
        # Remove once addressed in:
        # https://github.com/canonical/serialized-data-interface/issues/28
        if len(data_dict) == 0:
            raise CheckFailedError(
                f"Found incomplete/incorrect relation data for {relation_name}.",
                BlockedStatus,
            )

        return data_dict
Esempio n. 18
0
class OPAAuditCharm(CharmBase):
    """
    A Juju Charm for OPA
    """

    _stored = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        self.framework.observe(self.on.config_changed, self._on_config_changed)
        self.framework.observe(self.on.stop, self._on_stop)
        self.framework.observe(self.on.install, self._on_install)
        self._stored.set_default(things=[])
        self.image = OCIImageResource(self, "gatekeeper-image")

    def _on_config_changed(self, _):
        """
        Set a new Juju pod specification
        """
        self._configure_pod()

    def _on_stop(self, _):
        """
        Mark unit is inactive
        """
        self.unit.status = MaintenanceStatus("Pod is terminating.")
        logger.info("Pod is terminating.")

    def _on_install(self, event):
        logger.info("Congratulations, the charm was properly installed!")

    def _build_pod_spec(self):
        """
        Construct a Juju pod specification for OPA
        """
        logger.debug("Building Pod Spec")
        crds = []
        try:
            crds = [
                yaml.load(Path(f).read_text()) for f in [
                    "files/configs.config.gatekeeper.sh.yaml",
                    "files/constrainttemplates.templates.gatekeeper.sh.yaml",
                    "files/constraintpodstatuses.status.gatekeeper.sh.yaml",
                    "files/constrainttemplatepodstatuses.status.gatekeeper.sh.yaml",
                ]
            ]
        except yaml.YAMLError as exc:
            logger.error("Error in configuration file:", exc)

        crd_objects = [
            CustomResourceDefintion(crd["metadata"]["name"], crd["spec"])
            for crd in crds
        ]

        config = self.model.config
        spec_template = {}
        with open("files/pod-spec.yaml.jinja2") as fh:
            spec_template = Template(fh.read())

        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        template_args = {
            "crds": crd_objects,
            "image_details": image_details,
            "imagePullPolicy": config["imagePullPolicy"],
            "app_name": self.app.name,
            "audit_cli_args": self._audit_cli_args(),
            "namespace": os.environ["JUJU_MODEL_NAME"],
        }

        spec = yaml.load(spec_template.render(**template_args))

        print(f"Pod spec: {spec}")
        return spec

    def _audit_cli_args(self):
        """
        Construct command line arguments for OPA Audit
        """

        args = [
            "--operation=audit",
            "--operation=status",
            "--logtostderr",
        ]

        return args

    def _configure_pod(self):
        """
        Setup a new opa pod specification
        """
        logger.debug("Configuring Pod")

        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        self.unit.status = MaintenanceStatus("Setting pod spec.")
        pod_spec = self._build_pod_spec()

        self.model.pod.set_spec(pod_spec)
        self.unit.status = ActiveStatus()
Esempio n. 19
0
class Operator(CharmBase):
    """Deploys the katib-db-manager service."""
    def __init__(self, framework):
        super().__init__(framework)

        if not self.model.unit.is_leader():
            logger.info("Not a leader, skipping any work")
            self.model.unit.status = ActiveStatus()
            return

        self.image = OCIImageResource(self, "oci-image")
        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.config_changed, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
        self.framework.observe(self.on["mysql"].relation_joined,
                               self.set_pod_spec)
        self.framework.observe(self.on["mysql"].relation_changed,
                               self.set_pod_spec)

    def set_pod_spec(self, event):
        self.model.unit.status = MaintenanceStatus("Setting pod spec")

        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        try:
            relation = self.model.relations["mysql"][0]
            unit = next(iter(relation.units))
            mysql_data = relation.data[unit]
            # Ensure we've got some data sent over the relation
            mysql_data["root_password"]
        except (IndexError, StopIteration, KeyError):
            self.model.unit.status = WaitingStatus(
                "Waiting for mysql connection information")
            return

        self.model.pod.set_spec(
            {
                "version":
                3,
                "serviceAccount": {
                    "roles": [{
                        "global":
                        True,
                        "rules": [
                            {
                                "apiGroups": [""],
                                "resources": [
                                    "configmaps",
                                    "namespaces",
                                ],
                                "verbs": ["*"],
                            },
                            {
                                "apiGroups": ["kubeflow.org"],
                                "resources": [
                                    "experiments",
                                    "trials",
                                    "suggestions",
                                ],
                                "verbs": ["*"],
                            },
                        ],
                    }]
                },
                "containers": [{
                    "name":
                    "katib-db-manager",
                    "command": ["./katib-db-manager"],
                    "imageDetails":
                    image_details,
                    "ports": [{
                        "name": "api",
                        "containerPort": self.model.config["port"],
                    }],
                    "envConfig": {
                        "DB_NAME": "mysql",
                        "DB_USER": "******",
                        "DB_PASSWORD": mysql_data["root_password"],
                        "KATIB_MYSQL_DB_HOST": mysql_data["host"],
                        "KATIB_MYSQL_DB_PORT": mysql_data["port"],
                        "KATIB_MYSQL_DB_DATABASE": mysql_data["database"],
                    },
                    "kubernetes": {
                        "readinessProbe": {
                            "exec": {
                                "command": [
                                    "/bin/grpc_health_probe",
                                    f"-addr=:{self.model.config['port']}",
                                ]
                            },
                            "initialDelaySeconds": 5,
                        },
                        "livenessProbe": {
                            "exec": {
                                "command": [
                                    "/bin/grpc_health_probe",
                                    f"-addr=:{self.model.config['port']}",
                                ]
                            },
                            "initialDelaySeconds": 10,
                            "periodSeconds": 60,
                            "failureThreshold": 5,
                        },
                    },
                }],
            }, )

        self.model.unit.status = ActiveStatus()
Esempio n. 20
0
class MultusCharm(CharmBase):
    def __init__(self, framework, key):
        super().__init__(framework, key)
        self.multus_image = OCIImageResource(self, 'multus-image')
        self.nadm_image = OCIImageResource(self,
                                           'net-attach-def-manager-image')
        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
        self.framework.observe(self.on.config_changed, self.set_pod_spec)

    def set_pod_spec(self, event):
        if not self.model.unit.is_leader():
            log.info('Not a leader, skipping set_pod_spec')
            self.model.unit.status = ActiveStatus()
            return

        try:
            multus_image_details = self.multus_image.fetch()
            nadm_image_details = self.nadm_image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        net_attach_defs_str = self.model.config.get(
            'network-attachment-definitions', '')
        invalid_net_attach_def_status = BlockedStatus(
            'network-attachment-definitions config is invalid, see debug-log')
        try:
            net_attach_defs = list(yaml.safe_load_all(net_attach_defs_str))
        except yaml.YAMLError:
            log.error(traceback.format_exc())
            self.model.unit.status = invalid_net_attach_def_status
            return

        for net_attach_def in net_attach_defs:
            if net_attach_def.get('apiVersion') != 'k8s.cni.cncf.io/v1':
                log.error('network-attachment-definitions config is invalid:' +
                          ' apiVersion must be k8s.cni.cncf.io/v1')
                self.model.unit.status = invalid_net_attach_def_status
                return
            if net_attach_def.get('kind') != 'NetworkAttachmentDefinition':
                log.error('network-attachment-definitions config is invalid:' +
                          ' kind must be NetworkAttachmentDefinition')
                self.model.unit.status = invalid_net_attach_def_status
                return
            if not net_attach_def.get('metadata', {}).get('name'):
                log.error('network-attachment-definitions config is invalid:' +
                          ' metadata.name is required')
                self.model.unit.status = invalid_net_attach_def_status
                return
            if not net_attach_def.get('spec', {}).get('config'):
                log.error('network-attachment-definitions config is invalid:' +
                          ' spec.config is required')
                self.model.unit.status = invalid_net_attach_def_status
                return

        for net_attach_def in net_attach_defs:
            net_attach_def['metadata'].setdefault('namespace', self.model.name)

        self.model.unit.status = MaintenanceStatus('Setting pod spec')
        pod_spec = {
            'version':
            3,
            'containers': [{
                'name':
                'kube-multus',
                'imageDetails':
                multus_image_details,
                'command': ['/entrypoint.sh'],
                'args': ['--multus-conf-file=auto', '--cni-version=0.3.1'],
                'volumeConfig': [{
                    'name': 'cni',
                    'mountPath': '/host/etc/cni/net.d',
                    'hostPath': {
                        'path': '/etc/cni/net.d'
                    }
                }, {
                    'name': 'cnibin',
                    'mountPath': '/host/opt/cni/bin',
                    'hostPath': {
                        'path': '/opt/cni/bin'
                    }
                }],
                'kubernetes': {
                    'securityContext': {
                        'privileged': True
                    }
                }
            }, {
                'name':
                'net-attach-def-manager',
                'imageDetails':
                nadm_image_details,
                'volumeConfig': [{
                    'name':
                    'config',
                    'mountPath':
                    '/config',
                    'files': [{
                        'path':
                        'manifest.yaml',
                        'content':
                        yaml.safe_dump_all(net_attach_defs) or '# empty'
                    }]
                }]
            }],
            'serviceAccount': {
                'roles': [{
                    'global':
                    True,
                    'rules': [{
                        'apiGroups': ['k8s.cni.cncf.io'],
                        'resources': ['*'],
                        'verbs': ['*']
                    }, {
                        'apiGroups': [''],
                        'resources': ['pods', 'pods/status'],
                        'verbs': ['get', 'update']
                    }]
                }]
            },
            'kubernetesResources': {
                'pod': {
                    'hostNetwork': True
                },
                'customResourceDefinitions': [{
                    'name': 'network-attachment-definitions.k8s.cni.cncf.io',
                    'spec': {
                        'group':
                        'k8s.cni.cncf.io',
                        'scope':
                        'Namespaced',
                        'names': {
                            'plural': 'network-attachment-definitions',
                            'singular': 'network-attachment-definition',
                            'kind': 'NetworkAttachmentDefinition',
                            'shortNames': ['net-attach-def']
                        },
                        'versions': [{
                            'name': 'v1',
                            'served': True,
                            'storage': True
                        }],
                        'validation': {
                            'openAPIV3Schema': {
                                'type': 'object',
                                'properties': {
                                    'spec': {
                                        'type': 'object',
                                        'properties': {
                                            'config': {
                                                'type': 'string'
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }]
            }
        }
        self.model.pod.set_spec(pod_spec)
        self.model.unit.status = ActiveStatus()
Esempio n. 21
0
class KfpUiOperator(CharmBase):
    """Charm for the Kubeflow Pipelines UI.

    https://github.com/canonical/kfp-operators
    """
    def __init__(self, *args):
        super().__init__(*args)

        self.log = logging.getLogger()
        self.image = OCIImageResource(self, "oci-image")

        self.framework.observe(self.on.install, self._main)
        self.framework.observe(self.on.upgrade_charm, self._main)
        self.framework.observe(self.on.config_changed, self._main)
        self.framework.observe(self.on["object-storage"].relation_changed,
                               self._main)
        self.framework.observe(self.on["kfp-api"].relation_changed, self._main)
        self.framework.observe(self.on["ingress"].relation_changed, self._main)
        self.framework.observe(self.on["kfp-ui"].relation_changed, self._main)
        self.framework.observe(self.on.leader_elected, self._main)

    def _main(self, event):
        try:
            self._check_leader()
            interfaces = self._get_interfaces()
            image_details = self.image.fetch()
            os = self._validate_sdi_interface(interfaces, "object-storage")
            kfp_api = self._validate_sdi_interface(interfaces, "kfp-api")
        except (CheckFailedError, OCIImageResourceError) as check_failed:
            self.model.unit.status = check_failed.status
            self.log.info(str(check_failed.status))
            return

        self._send_ui_info(interfaces)
        self._send_ingress_info(interfaces)

        config = self.model.config

        healthz = f"http://localhost:{config['http-port']}/apis/v1beta1/healthz"
        charm_name = self.model.app.name
        env = {
            "ALLOW_CUSTOM_VISUALIZATIONS":
            str(config["allow-custom-visualizations"]).lower(),
            "ARGO_ARCHIVE_ARTIFACTORY":
            "minio",
            "ARGO_ARCHIVE_BUCKETNAME":
            "mlpipeline",
            "ARGO_ARCHIVE_LOGS":
            "false",
            "ARGO_ARCHIVE_PREFIX":
            "logs",
            # TODO: This should come from relation to kfp-profile-controller.  It is the name/port
            #  of the user-specific artifact accessor
            "ARTIFACTS_SERVICE_PROXY_NAME":
            "ml-pipeline-ui-artifact",
            "ARTIFACTS_SERVICE_PROXY_PORT":
            "80",
            "ARTIFACTS_SERVICE_PROXY_ENABLED":
            "true",
            "AWS_ACCESS_KEY_ID":
            "",
            "AWS_SECRET_ACCESS_KEY":
            "",
            "DISABLE_GKE_METADATA":
            "false",
            "ENABLE_AUTHZ":
            "true",
            "DEPLOYMENT":
            "KUBEFLOW",
            "HIDE_SIDENAV":
            str(config["hide-sidenav"]).lower(),
            "HTTP_AUTHORIZATION_DEFAULT_VALUE":
            "",
            "HTTP_AUTHORIZATION_KEY":
            "",
            "HTTP_BASE_URL":
            "",
            "KUBEFLOW_USERID_HEADER":
            "kubeflow-userid",
            "KUBEFLOW_USERID_PREFIX":
            "",
            "METADATA_ENVOY_SERVICE_SERVICE_HOST":
            "localhost",
            "METADATA_ENVOY_SERVICE_SERVICE_PORT":
            "9090",
            "minio-secret": {
                "secret": {
                    "name": f"{charm_name}-minio-secret"
                }
            },
            "MINIO_HOST":
            os["service"],
            "MINIO_NAMESPACE":
            os["namespace"],
            "MINIO_PORT":
            os["port"],
            "MINIO_SSL":
            os["secure"],
            "ML_PIPELINE_SERVICE_HOST":
            kfp_api["service-name"],
            "ML_PIPELINE_SERVICE_PORT":
            kfp_api["service-port"],
            "STREAM_LOGS_FROM_SERVER_API":
            "false",
            # TODO: Think there's a file here we should copy in.  Workload's logs show an error on
            #  start for this
            "VIEWER_TENSORBOARD_POD_TEMPLATE_SPEC_PATH":
            "/etc/config/viewer-pod-template.json",
            "VIEWER_TENSORBOARD_TF_IMAGE_NAME":
            "tensorflow/tensorflow",
        }

        # TODO: Not sure if this gets used.  I don't see it in regular pipeline manifests
        config_json = json.dumps(
            {"spec": {
                "serviceAccountName": "kubeflow-pipelines-viewer"
            }})

        viewer_pod_template = json.dumps(
            {"spec": {
                "serviceAccountName": "default-editor"
            }})

        self.model.unit.status = MaintenanceStatus("Setting pod spec")
        self.model.pod.set_spec(
            {
                "version":
                3,
                "serviceAccount": {
                    "roles": [{
                        "global":
                        True,
                        "rules": [
                            {
                                "apiGroups": [""],
                                "resources": ["pods", "pods/log"],
                                "verbs": ["get"],
                            },
                            {
                                "apiGroups": [""],
                                "resources": ["events"],
                                "verbs": ["list"],
                            },
                            {
                                "apiGroups": [""],
                                "resources": ["secrets"],
                                "verbs": ["get", "list"],
                            },
                            {
                                "apiGroups": ["kubeflow.org"],
                                "resources": ["viewers"],
                                "verbs": [
                                    "create",
                                    "get",
                                    "list",
                                    "watch",
                                    "delete",
                                ],
                            },
                            {
                                "apiGroups": ["argoproj.io"],
                                "resources": ["workflows"],
                                "verbs": ["get", "list"],
                            },
                        ],
                    }]
                },
                "containers": [{
                    "name":
                    "ml-pipeline-ui",
                    "imageDetails":
                    image_details,
                    "ports": [
                        {
                            "name": "http",
                            "containerPort": int(config["http-port"]),
                        },
                    ],
                    "envConfig":
                    env,
                    "volumeConfig": [
                        {
                            "name":
                            "config",
                            "mountPath":
                            "/config",
                            "files": [
                                {
                                    "path": "config.json",
                                    "content": config_json,
                                },
                            ],
                        },
                        {
                            "name":
                            "viewer-pod-template",
                            "mountPath":
                            "/etc/config",
                            "files": [
                                {
                                    "path": "viewer-pod-template.json",
                                    "content": viewer_pod_template,
                                },
                            ],
                        },
                    ],
                    "kubernetes": {
                        "readinessProbe": {
                            "exec": {
                                "command":
                                ["wget", "-q", "-S", "-O", "-", healthz]
                            },
                            "initialDelaySeconds": 3,
                            "periodSeconds": 5,
                            "timeoutSeconds": 2,
                        },
                        "livenessProbe": {
                            "exec": {
                                "command":
                                ["wget", "-q", "-S", "-O", "-", healthz]
                            },
                            "initialDelaySeconds": 3,
                            "periodSeconds": 5,
                            "timeoutSeconds": 2,
                        },
                    },
                }],
                "kubernetesResources": {
                    "secrets": [{
                        "name": f"{charm_name}-minio-secret",
                        "type": "Opaque",
                        "data": {
                            k: b64encode(v.encode("utf-8")).decode("utf-8")
                            for k, v in {
                                "MINIO_ACCESS_KEY": os["access-key"],
                                "MINIO_SECRET_KEY": os["secret-key"],
                            }.items()
                        },
                    }]
                },
            }, )
        self.model.unit.status = ActiveStatus()

    def _send_ui_info(self, interfaces):
        if interfaces["kfp-ui"]:
            interfaces["kfp-ui"].send_data({
                "service-name":
                f"{self.model.app.name}.{self.model.name}",
                "service-port":
                self.model.config["http-port"],
            })

    def _send_ingress_info(self, interfaces):
        if interfaces["ingress"]:
            interfaces["ingress"].send_data({
                "prefix":
                "/pipeline",
                "rewrite":
                "/pipeline",
                "service":
                self.model.app.name,  # TODO: Should this be name.namespace?
                "port":
                int(self.model.config["http-port"]),
            })

    def _check_leader(self):
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            raise CheckFailedError("Waiting for leadership", WaitingStatus)

    def _get_interfaces(self):
        # Remove this abstraction when SDI adds .status attribute to NoVersionsListed,
        # NoCompatibleVersionsListed:
        # https://github.com/canonical/serialized-data-interface/issues/26
        try:
            interfaces = get_interfaces(self)
        except NoVersionsListed as err:
            raise CheckFailedError(str(err), WaitingStatus)
        except NoCompatibleVersions as err:
            raise CheckFailedError(str(err), BlockedStatus)
        return interfaces

    def _validate_sdi_interface(self,
                                interfaces: dict,
                                relation_name: str,
                                default_return=None):
        """Validates data received from SerializedDataInterface, returning the data if valid.

        Optionally can return a default_return value when no relation is established
        Raises:
            CheckFailed(..., Blocked) when no relation established (unless default_return set)
            CheckFailed(..., Blocked) if interface is not using SDI
            CheckFailed(..., Blocked) if data in interface fails schema check
            CheckFailed(..., Waiting) if we have a relation established but no data passed
        Params:
            interfaces:
        Returns:
              (dict) interface data
        """
        # If nothing is related to this relation, return a default value or raise an error
        if relation_name not in interfaces or interfaces[relation_name] is None:
            if default_return is not None:
                return default_return
            else:
                raise CheckFailedError(
                    f"Missing required relation for {relation_name}",
                    BlockedStatus)

        relations = interfaces[relation_name]
        if not isinstance(relations, SerializedDataInterface):
            raise CheckFailedError(
                f"Unexpected error with {relation_name} relation data - data not as expected",
                BlockedStatus,
            )

        # Get and validate data from the relation
        try:
            # relations is a dict of {(ops.model.Relation, ops.model.Application): data}
            unpacked_relation_data = relations.get_data()
        except ValidationError as val_error:
            # Validation in .get_data() ensures if data is populated, it matches the schema and is
            # not incomplete
            self.log.error(val_error)
            raise CheckFailedError(
                f"Found incomplete/incorrect relation data for {relation_name}.  See logs",
                BlockedStatus,
            )

        # Check if we have an established relation with no data exchanged
        if len(unpacked_relation_data) == 0:
            raise CheckFailedError(
                f"Waiting for {relation_name} relation data", WaitingStatus)

        # Unpack data (we care only about the first element)
        data_dict = list(unpacked_relation_data.values())[0]

        # Catch if empty data dict is received (JSONSchema ValidationError above does not raise
        # when this happens)
        # Remove once addressed in:
        # https://github.com/canonical/serialized-data-interface/issues/28
        if len(data_dict) == 0:
            raise CheckFailedError(
                f"Found incomplete/incorrect relation data for {relation_name}.",
                BlockedStatus,
            )

        return data_dict
Esempio n. 22
0
class Operator(CharmBase):
    """Deploys the katib-controller service."""

    _stored = StoredState()

    def __init__(self, framework):
        super().__init__(framework)

        self._stored.set_default(**self.gen_certs())
        self.image = OCIImageResource(self, "oci-image")

        self.prometheus_provider = MetricsEndpointProvider(
            charm=self,
            jobs=[{
                "job_name":
                "katib_controller_metrics",
                "static_configs": [{
                    "targets": [f"*:{self.config['metrics-port']}"]
                }],
            }],
        )
        self.dashboard_provider = GrafanaDashboardProvider(self)

        for event in [
                self.on.config_changed,
                self.on.install,
                self.on.leader_elected,
                self.on.upgrade_charm,
        ]:
            self.framework.observe(event, self.set_pod_spec)

    def set_pod_spec(self, event):
        self.model.unit.status = MaintenanceStatus("Setting pod spec")

        try:
            self._check_leader()

            image_details = self._check_image_details()
        except CheckFailed as check_failed:
            self.model.unit.status = check_failed.status
            return

        validating, mutating = yaml.safe_load_all(
            Path("src/webhooks.yaml").read_text())

        self.model.pod.set_spec(
            {
                "version":
                3,
                "serviceAccount": {
                    "roles": [{
                        "global":
                        True,
                        "rules": [
                            {
                                "apiGroups": [""],
                                "resources": [
                                    "configmaps",
                                    "serviceaccounts",
                                    "services",
                                    "events",
                                    "namespaces",
                                    "persistentvolumes",
                                    "persistentvolumeclaims",
                                    "pods",
                                    "pods/log",
                                    "pods/status",
                                ],
                                "verbs": ["*"],
                            },
                            {
                                "apiGroups": ["apps"],
                                "resources": ["deployments"],
                                "verbs": ["*"],
                            },
                            {
                                "apiGroups": ["rbac.authorization.k8s.io"],
                                "resources": [
                                    "roles",
                                    "rolebindings",
                                ],
                                "verbs": ["*"],
                            },
                            {
                                "apiGroups": ["batch"],
                                "resources": ["jobs", "cronjobs"],
                                "verbs": ["*"],
                            },
                            {
                                "apiGroups": ["kubeflow.org"],
                                "resources": [
                                    "experiments",
                                    "experiments/status",
                                    "experiments/finalizers",
                                    "trials",
                                    "trials/status",
                                    "trials/finalizers",
                                    "suggestions",
                                    "suggestions/status",
                                    "suggestions/finalizers",
                                    "tfjobs",
                                    "pytorchjobs",
                                    "mpijobs",
                                    "xgboostjobs",
                                    "mxjobs",
                                ],
                                "verbs": ["*"],
                            },
                        ],
                    }],
                },
                "containers": [{
                    "name":
                    "katib-controller",
                    "imageDetails":
                    image_details,
                    "command": ["./katib-controller"],
                    "args": [
                        f"--webhook-port={self.model.config['webhook-port']}",
                        "--trial-resources=Job.v1.batch",
                        "--trial-resources=TFJob.v1.kubeflow.org",
                        "--trial-resources=PyTorchJob.v1.kubeflow.org",
                        "--trial-resources=MPIJob.v1.kubeflow.org",
                        "--trial-resources=PipelineRun.v1beta1.tekton.dev",
                    ],
                    "ports": [
                        {
                            "name": "webhook",
                            "containerPort": self.model.config["webhook-port"],
                        },
                        {
                            "name": "metrics",
                            "containerPort": self.model.config["metrics-port"],
                        },
                    ],
                    "envConfig": {
                        "KATIB_CORE_NAMESPACE": self.model.name,
                    },
                    "volumeConfig": [{
                        "name":
                        "certs",
                        "mountPath":
                        "/tmp/cert",
                        "files": [
                            {
                                "path": "tls.crt",
                                "content": self._stored.cert,
                            },
                            {
                                "path": "tls.key",
                                "content": self._stored.key,
                            },
                        ],
                    }],
                }],
            },
            k8s_resources={
                "kubernetesResources": {
                    "customResourceDefinitions": [{
                        "name":
                        crd["metadata"]["name"],
                        "spec":
                        crd["spec"]
                    } for crd in yaml.safe_load_all(
                        Path("src/crds.yaml").read_text())],
                    "mutatingWebhookConfigurations": [{
                        "name":
                        mutating["metadata"]["name"],
                        "webhooks":
                        mutating["webhooks"],
                    }],
                    "validatingWebhookConfigurations": [{
                        "name":
                        validating["metadata"]["name"],
                        "webhooks":
                        validating["webhooks"],
                    }],
                },
                "configMaps": {
                    "katib-config": {
                        f: Path(f"src/{f}.json").read_text()
                        for f in (
                            "metrics-collector-sidecar",
                            "suggestion",
                            "early-stopping",
                        )
                    },
                    "trial-template": {
                        f + suffix: Path(f"src/{f}.yaml").read_text()
                        for f, suffix in (
                            ("defaultTrialTemplate", ".yaml"),
                            ("enasCPUTemplate", ""),
                            ("pytorchJobTemplate", ""),
                        )
                    },
                },
            },
        )

        self.model.unit.status = ActiveStatus()

    def gen_certs(self):
        model = self.model.name
        app = self.model.app.name
        Path("/run/ssl.conf").write_text(f"""[ req ]
default_bits = 2048
prompt = no
default_md = sha256
req_extensions = req_ext
distinguished_name = dn
[ dn ]
C = GB
ST = Canonical
L = Canonical
O = Canonical
OU = Canonical
CN = 127.0.0.1
[ req_ext ]
subjectAltName = @alt_names
[ alt_names ]
DNS.1 = {app}
DNS.2 = {app}.{model}
DNS.3 = {app}.{model}.svc
DNS.4 = {app}.{model}.svc.cluster
DNS.5 = {app}.{model}.svc.cluster.local
IP.1 = 127.0.0.1
[ v3_ext ]
authorityKeyIdentifier=keyid,issuer:always
basicConstraints=CA:FALSE
keyUsage=keyEncipherment,dataEncipherment,digitalSignature
extendedKeyUsage=serverAuth,clientAuth
subjectAltName=@alt_names""")

        check_call(["openssl", "genrsa", "-out", "/run/ca.key", "2048"])
        check_call(["openssl", "genrsa", "-out", "/run/server.key", "2048"])
        check_call([
            "openssl",
            "req",
            "-x509",
            "-new",
            "-sha256",
            "-nodes",
            "-days",
            "3650",
            "-key",
            "/run/ca.key",
            "-subj",
            "/CN=127.0.0.1",
            "-out",
            "/run/ca.crt",
        ])
        check_call([
            "openssl",
            "req",
            "-new",
            "-sha256",
            "-key",
            "/run/server.key",
            "-out",
            "/run/server.csr",
            "-config",
            "/run/ssl.conf",
        ])
        check_call([
            "openssl",
            "x509",
            "-req",
            "-sha256",
            "-in",
            "/run/server.csr",
            "-CA",
            "/run/ca.crt",
            "-CAkey",
            "/run/ca.key",
            "-CAcreateserial",
            "-out",
            "/run/cert.pem",
            "-days",
            "365",
            "-extensions",
            "v3_ext",
            "-extfile",
            "/run/ssl.conf",
        ])

        return {
            "cert": Path("/run/cert.pem").read_text(),
            "key": Path("/run/server.key").read_text(),
            "ca": Path("/run/ca.crt").read_text(),
        }

    def _check_leader(self):
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            raise CheckFailed("Waiting for leadership", WaitingStatus)

    def _check_image_details(self):
        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            raise CheckFailed(f"{e.status.message}", e.status_type)
        return image_details
Esempio n. 23
0
class CharmedOsmBase(CharmBase):
    """CharmedOsmBase Charm."""

    state = StoredState()

    def __init__(
        self,
        *args,
        oci_image="image",
        debug_mode_config_key=None,
        debug_pubkey_config_key=None,
        vscode_workspace: Dict = {},
        mysql_uri: bool = False,
    ) -> NoReturn:
        """
        CharmedOsmBase Charm constructor

        :params: oci_image: Resource name for main OCI image
        :params: debug_mode_config_key: Key in charm config for enabling debugging mode
        :params: debug_pubkey_config_key: Key in charm config for setting debugging public ssh key
        :params: vscode_workspace: VSCode workspace
        """
        super().__init__(*args)

        # Internal state initialization
        self.state.set_default(pod_spec=None)

        self.image = OCIImageResource(self, oci_image)
        self.debugging_supported = debug_mode_config_key and debug_pubkey_config_key
        self.debug_mode_config_key = debug_mode_config_key
        self.debug_pubkey_config_key = debug_pubkey_config_key
        self.vscode_workspace = vscode_workspace
        self.mysql_uri = mysql_uri
        # Registering regular events
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.leader_elected, self.configure_pod)

    def build_pod_spec(self, image_info: Dict, **kwargs):
        """
        Method to be implemented by the charm to build the pod spec

        :params: image_info: Image info details
        :params: kwargs:
                    mysql_config (opslib.osm.config.mysql.MysqlModel):
                        Mysql config object. Will be included if the charm has been initialized
                        with mysql_uri=True.
        """
        raise NotImplementedError("build_pod_spec is not implemented")

    def _debug(self, pod_spec: Dict) -> NoReturn:
        """
        Activate debugging mode in the charm

        :params: pod_spec: Pod Spec to be debugged. Note: The first container is
                           the one that will be debugged.
        """
        container = pod_spec["containers"][0]
        if "readinessProbe" in container["kubernetes"]:
            container["kubernetes"].pop("readinessProbe")
        if "livenessProbe" in container["kubernetes"]:
            container["kubernetes"].pop("livenessProbe")
        container["ports"].append({
            "name": "ssh",
            "containerPort": 22,
            "protocol": "TCP",
        })
        container["volumeConfig"].append({
            "name":
            "scripts",
            "mountPath":
            "/osm-debug-scripts",
            "files": [{
                "path":
                "debug.sh",
                "content":
                Template(DEBUG_SCRIPT).substitute(
                    pubkey=self.config[self.debug_pubkey_config_key],
                    vscode_workspace=json.dumps(
                        self.vscode_workspace,
                        sort_keys=True,
                        indent=4,
                        separators=(",", ": "),
                    ),
                ),
                "mode":
                0o777,
            }],
        })
        container["command"] = ["/osm-debug-scripts/debug.sh"]

    def _debug_if_needed(self, pod_spec):
        """
        Debug the pod_spec if needed

        :params: pod_spec: Pod Spec to be debugged.
        """
        if self.debugging_supported and self.config[
                self.debug_mode_config_key]:
            if self.debug_pubkey_config_key not in self.config:
                raise Exception("debug_pubkey config is not set")
            self._debug(pod_spec)

    def _get_build_pod_spec_kwargs(self):
        """Get kwargs for the build_pod_spec function"""
        kwargs = {}
        if self.mysql_uri:
            kwargs["mysql_config"] = MysqlModel(**self.config)
        return kwargs

    def configure_pod(self, _=None) -> NoReturn:
        """Assemble the pod spec and apply it, if possible."""
        try:
            if self.unit.is_leader():
                self.unit.status = MaintenanceStatus("Assembling pod spec")
                image_info = self.image.fetch()
                kwargs = self._get_build_pod_spec_kwargs()
                pod_spec = self.build_pod_spec(image_info, **kwargs)
                self._debug_if_needed(pod_spec)
                self._set_pod_spec(pod_spec)

            self.unit.status = ActiveStatus("ready")
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
        except ValidationError as e:
            logger.error(f"Config data validation error: {e}")
            logger.debug(traceback.format_exc())
            self.unit.status = BlockedStatus(str(e))
        except RelationsMissing as e:
            logger.error(f"Relation missing error: {e.message}")
            logger.debug(traceback.format_exc())
            self.unit.status = BlockedStatus(e.message)
        except ModelError as e:
            self.unit.status = BlockedStatus(str(e))
        except Exception as e:
            error_message = f"Unknown exception: {e}"
            logger.error(error_message)
            logger.debug(traceback.format_exc())
            self.unit.status = BlockedStatus(error_message)

    def _set_pod_spec(self, pod_spec: Dict[str, Any]) -> NoReturn:
        pod_spec_hash = hash_from_dict(pod_spec)
        if self.state.pod_spec != pod_spec_hash:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec_hash
            logger.debug(f"applying pod spec with hash {pod_spec_hash}")
Esempio n. 24
0
class NrfCharm(CharmBase):
    """NRF charm events class definition"""

    state = StoredState()

    def __init__(self, *args):
        """NRF charm constructor."""
        super().__init__(*args)
        # Internal state initialization
        self.state.set_default(pod_spec=None)

        self.image = OCIImageResource(self, "image")

        # Registering regular events
        self.framework.observe(self.on.config_changed, self.configure_pod)

        # Registering required relation changed events
        self.framework.observe(self.on.mongodb_relation_changed,
                               self._on_mongodb_relation_changed)

        # Registering required relation broken events
        self.framework.observe(self.on.mongodb_relation_broken,
                               self._on_mongodb_relation_broken)

        # -- initialize states --
        self.state.set_default(mongodb_host=None, mongodb_uri=None)

    def publish_nrf_info(self, _=None) -> NoReturn:
        """Publishes NRF information
        relation.7
        """
        if self.unit.is_leader():
            relation_id = self.model.relations.__getitem__("nrf")
            for i in relation_id:
                relation = self.model.get_relation("nrf", i.id)
                relation.data[self.model.app]["hostname"] = self.model.app.name

    def _on_mongodb_relation_changed(self, event: EventBase) -> NoReturn:
        """Reads information about the MongoDB relation.

        Args:
             event (EventBase): MongoDB relation event.
        """
        if event.app not in event.relation.data:
            return
        mongodb_host = event.relation.data[event.app].get("hostname")
        mongodb_uri = event.relation.data[event.app].get("mongodb_uri")
        validate_mongodb = mongodb_host and mongodb_uri
        host_state = self.state.mongodb_host != mongodb_host
        uri_state = self.state.mongodb_uri != mongodb_uri
        validate_state = host_state or uri_state
        if validate_mongodb and validate_state:
            self.state.mongodb_host = mongodb_host
            self.state.mongodb_uri = mongodb_uri
            self.configure_pod()

    def _on_mongodb_relation_broken(self, _=None) -> NoReturn:
        """Clears data from MongoDB relation."""
        self.state.mongodb_host = None
        self.state.mongodb_uri = None
        self.configure_pod()

    def _missing_relations(self) -> str:
        """Checks if there missing relations.

        Returns:
            str: string with missing relations
        """
        data_status = {"mongodb": self.state.mongodb_uri}
        missing_relations = [k for k, v in data_status.items() if not v]
        return ", ".join(missing_relations)

    @property
    def relation_state(self) -> Dict[str, Any]:
        """Collects relation state configuration for pod spec assembly.

        Returns:
            Dict[str, Any]: relation state information.
        """
        relation_state = {
            "mongodb_host": self.state.mongodb_host,
            "mongodb_uri": self.state.mongodb_uri,
        }

        return relation_state

    def configure_pod(self, _=None) -> NoReturn:
        """Assemble the pod spec and apply it, if possible."""
        missing = self._missing_relations()
        if missing:
            status = "Waiting for {0} relation{1}"
            self.unit.status = BlockedStatus(
                status.format(missing, "s" if "," in missing else ""))
            return
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus("ready")
            return

        self.unit.status = MaintenanceStatus("Assembling pod spec")

        # Fetch image information
        try:
            self.unit.status = MaintenanceStatus("Fetching image information")
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        try:
            pod_spec = make_pod_spec(
                image_info,
                self.model.config,
                self.relation_state,
                self.model.app.name,
            )
        except ValueError as exc:
            logger.exception("Config/Relation data validation error")
            self.unit.status = BlockedStatus(str(exc))
            return

        if self.state.pod_spec != pod_spec:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec

        self.unit.status = ActiveStatus("ready")
        self.publish_nrf_info()
Esempio n. 25
0
class RancherCharm(CharmBase):
    def __init__(self, *args):
        super().__init__(*args)
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            self.model.unit.status = WaitingStatus('Waiting for leadership')
            return
        self.log = logging.getLogger(__name__)
        self.rancher_image = OCIImageResource(self, 'rancher-image')
        for event in [
                self.on.install, self.on.leader_elected, self.on.upgrade_charm,
                self.on.config_changed
        ]:
            self.framework.observe(event, self.main)

    def main(self, event):

        try:
            rancher_image_details = self.rancher_image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        self.model.unit.status = MaintenanceStatus('Setting pod spec')

        self.model.pod.set_spec({
            'version':
            3,
            'service': {
                'updateStrategy': {
                    'type': 'RollingUpdate',
                    'rollingUpdate': {
                        'maxUnavailable': 1
                    },
                },
            },
            'configMaps': {
                'kubernetes-dashboard-settings': {},
            },
            'containers': [
                {
                    'name':
                    self.model.app.name,
                    'imageDetails':
                    rancher_image_details,
                    'imagePullPolicy':
                    'Always',
                    'ports': [
                        {
                            'name': 'rancher',
                            'containerPort': 80,
                            'protocol': 'TCP',
                        },
                    ],
                    'args': [
                        "--https-listen-port=80",
                        "--https-listen-port=443",
                        "--add-local=true",
                        "--debug",
                    ],
                    'envConfig': {
                        'CATTLE_NAMESPACE': self.model.name,
                        'CATTLE_PEER_SERVICE': self.model.app.name,
                    },
                    'kubernetes': {
                        'livenessProbe': {
                            'httpGet': {
                                'scheme': 'HTTPS',
                                'path': '/healthz',
                                'port': 80,
                            },
                            'initialDelaySeconds': 60,
                            'periodSeconds': 30,
                        },
                        'readinessProbe': {
                            'httpGet': {
                                'scheme': 'HTTPS',
                                'path': '/healthz',
                                'port': 80,
                            },
                            'initialDelaySeconds': 5,
                            'periodSeconds': 30,
                        },
                    },
                },
            ],
            'serviceAccount': {
                'roles': [{
                    'global':
                    True,
                    'rules': [
                        {
                            'apiGroups': ["*"],
                            'resources': ["*"],
                            'verbs': ["*"],
                        },
                        {
                            'nonResourceURLs': ["*"],
                            'verbs': ["*"],
                        },
                    ],
                }],
            },
        })
        self.model.unit.status = ActiveStatus()
class AdmissionWebhookCharm(CharmBase):
    """Deploys the admission-webhook service.

    Handles injecting common data such as secrets and environment variables
    into Kubeflow pods.
    """
    def __init__(self, framework):
        super().__init__(framework)
        self.image = OCIImageResource(self, "oci-image")
        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
        self.framework.observe(
            self.on.pod_defaults_relation_changed,
            self.set_pod_spec,
        )

    def set_pod_spec(self, event):
        if not self.model.unit.is_leader():
            logger.info("Not a leader, skipping set_pod_spec")
            self.model.unit.status = ActiveStatus()
            return

        self.model.unit.status = MaintenanceStatus("Setting pod spec")

        pod_defaults = {
            key.name: dict(value)["pod-defaults"]
            for relation in self.model.relations["pod-defaults"]
            for key, value in relation.data.items()
            if isinstance(key, Application) and not key._is_our_app
        }
        custom_resources = {
            "poddefaults.kubeflow.org":
            [{
                "apiVersion": "kubeflow.org/v1alpha1",
                "kind": "PodDefault",
                "metadata": {
                    "name": f"{charm}-{name}",
                },
                "spec": {
                    "selector": {
                        "matchLabels": {
                            f"{charm}-{name}": "true"
                        },
                    },
                    "env": [{
                        "name": k,
                        "value": v
                    } for k, v in value["env"].items()],
                },
            } for charm, defaults in pod_defaults.items()
             for name, value in json.loads(defaults).items()],
        }

        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        model = os.environ["JUJU_MODEL_NAME"]

        gen_certs(model, hookenv.service_name())

        ca_bundle = b64encode(
            Path("/run/cert.pem").read_bytes()).decode("utf-8")

        self.model.pod.set_spec(
            {
                "version":
                3,
                "serviceAccount": {
                    "roles": [{
                        "global":
                        True,
                        "rules": [
                            {
                                "apiGroups": ["kubeflow.org"],
                                "resources": ["poddefaults"],
                                "verbs": [
                                    "get",
                                    "list",
                                    "watch",
                                    "update",
                                    "create",
                                    "patch",
                                    "delete",
                                ],
                            },
                        ],
                    }],
                },
                "containers": [{
                    "name":
                    "admission-webhook",
                    "imageDetails":
                    image_details,
                    "ports": [{
                        "name": "webhook",
                        "containerPort": 443
                    }],
                    "volumeConfig": [{
                        "name":
                        "certs",
                        "mountPath":
                        "/etc/webhook/certs",
                        "files": [
                            {
                                "path": "cert.pem",
                                "content": Path("/run/cert.pem").read_text(),
                            },
                            {
                                "path": "key.pem",
                                "content": Path("/run/server.key").read_text(),
                            },
                        ],
                    }],
                }],
            },
            k8s_resources={
                "kubernetesResources": {
                    "customResourceDefinitions": [{
                        "name":
                        crd["metadata"]["name"],
                        "spec":
                        crd["spec"]
                    } for crd in yaml.safe_load_all(
                        Path("src/crds.yaml").read_text())],
                    "customResources":
                    custom_resources,
                    "mutatingWebhookConfigurations": [{
                        "name":
                        "admission-webhook",
                        "webhooks": [
                            {
                                "name":
                                "admission-webhook.kubeflow.org",
                                "failurePolicy":
                                "Fail",
                                "clientConfig": {
                                    "caBundle": ca_bundle,
                                    "service": {
                                        "name": hookenv.service_name(),
                                        "namespace": model,
                                        "path": "/apply-poddefault",
                                    },
                                },
                                "objectSelector": {
                                    "matchExpressions": [
                                        {
                                            "key": "juju-app",
                                            "operator": "NotIn",
                                            "values": ["admission-webhook"],
                                        },
                                        {
                                            "key": "app.kubernetes.io/name",
                                            "operator": "NotIn",
                                            "values": ["admission-webhook"],
                                        },
                                        {
                                            "key": "juju-operator",
                                            "operator": "NotIn",
                                            "values": ["admission-webhook"],
                                        },
                                        {
                                            "key": "operator.juju.is/name",
                                            "operator": "NotIn",
                                            "values": ["admission-webhook"],
                                        },
                                    ]
                                },
                                "rules": [{
                                    "apiGroups": [""],
                                    "apiVersions": ["v1"],
                                    "operations": ["CREATE"],
                                    "resources": ["pods"],
                                }],
                            },
                        ],
                    }],
                }
            },
        )

        self.model.unit.status = ActiveStatus()
Esempio n. 27
0
class RedisCharm(CharmBase):
    state = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        logger.debug('Initializing charm')

        self.redis = RedisClient(host=self.model.app.name, port=DEFAULT_PORT)
        self.image = OCIImageResource(self, "redis-image")

        self.framework.observe(self.on.start, self.on_start)
        self.framework.observe(self.on.stop, self.on_stop)
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.upgrade_charm, self.configure_pod)
        self.framework.observe(self.on.update_status, self.update_status)
        self.framework.observe(self.on["datastore"].relation_changed,
                               self.relation_changed)

    @log_event_handler
    def on_start(self, event):
        """Initialize Redis.

        This event handler is deferred if initialization of Redis fails.
        """
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        if not self.redis.is_ready():
            self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG)
            logger.debug(
                "{}: deferring on_start".format(WAITING_FOR_REDIS_MSG))
            event.defer()
            return

        self.set_ready_status()

    @log_event_handler
    def on_stop(self, _):
        """Mark terminating unit as inactive.
        """
        self.redis.close()
        self.unit.status = MaintenanceStatus('Pod is terminating.')

    @log_event_handler
    def configure_pod(self, event):
        """Applies the pod configuration.
        """
        if not self.unit.is_leader():
            logger.debug("Spec changes ignored by non-leader")
            self.unit.status = ActiveStatus()
            return

        self.unit.status = WaitingStatus("Fetching image information ...")
        try:
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information.")
            return

        # Build Pod spec
        builder = PodSpecBuilder(
            name=self.model.app.name,
            port=DEFAULT_PORT,
            image_info=image_info,
        )

        spec = builder.build_pod_spec()
        logger.debug("Pod spec: \n{}".format(yaml.dump(spec)))

        # Applying pod spec. If the spec hasn't changed, this has no effect.
        logger.debug("Applying pod spec.")
        self.model.pod.set_spec(spec)

        if not self.redis.is_ready():
            self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG)
            logger.debug(
                "{}: deferring configure_pod".format(WAITING_FOR_REDIS_MSG))
            event.defer()
            return

        self.set_ready_status()

    @log_event_handler
    def update_status(self, _):
        """Set status for all units.

        Status may be
        - Redis API server not reachable (service is not ready),
        - Ready
        """
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        if not self.redis.is_ready():
            self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG)
            return

        self.set_ready_status()

    @log_event_handler
    def relation_changed(self, event):
        """This event handler pass the host and port to the remote unit.
         Any Redis client is provided with the following information
        - Redis host
        - Redis port

        Using this information a client can establish a connection with
        Redis, for instances using the redis Python library.
        """

        if not self.unit.is_leader():
            logger.debug("Relation changes ignored by non-leader")
            return

        event.relation.data[self.unit]['hostname'] = str(
            self.bind_address(event))
        event.relation.data[self.unit]['port'] = str(DEFAULT_PORT)
        # The reactive Redis charm exposes also 'password'. When tackling
        # https://github.com/canonical/redis-operator/issues/7 add 'password'
        # field so that it matches the exposed interface information from it.
        # event.relation.data[self.unit]['password'] = ''

    def bind_address(self, event):
        relation = self.model.get_relation(event.relation.name,
                                           event.relation.id)
        if address := self.model.get_binding(relation).network.bind_address:
            return address
        return self.app.name
Esempio n. 28
0
class NssfCharm(CharmBase):
    """NSSF charm events class definition"""

    state = StoredState()

    def __init__(self, *args):
        """NSSF charm constructor."""
        super().__init__(*args)
        self.state.set_default(pod_spec=None)

        self.image = OCIImageResource(self, "image")

        # Registering regular events
        self.framework.observe(self.on.config_changed, self.configure_pod)

        # Registering required relation changed events
        self.framework.observe(self.on.nrf_relation_changed,
                               self._on_nrf_relation_changed)

        # Registering required relation broken events
        self.framework.observe(self.on.nrf_relation_broken,
                               self._on_nrf_relation_broken)

        # -- initialize states --
        self.state.set_default(nrf_host=None)

    def _on_nrf_relation_changed(self, event: EventBase) -> NoReturn:
        """Reads information about the NRF relation.

        Args:
           event (EventBase): NRF relation event.
        """
        if event.app not in event.relation.data:
            return

        nrf_host = event.relation.data[event.app].get("hostname")
        if nrf_host and self.state.nrf_host != nrf_host:
            self.state.nrf_host = nrf_host
            self.configure_pod()

    def _on_nrf_relation_broken(self, _=None) -> NoReturn:
        """Clears data from NRF relation."""
        self.state.nrf_host = None
        self.configure_pod()

    def _missing_relations(self) -> str:
        """Checks if there missing relations.

        Returns:
            str: string with missing relations
        """
        data_status = {"nrf": self.state.nrf_host}
        missing_relations = [k for k, v in data_status.items() if not v]
        return ", ".join(missing_relations)

    @property
    def relation_state(self) -> Dict[str, Any]:
        """Collects relation state configuration for pod spec assembly.

        Returns:
            Dict[str, Any]: relation state information.
        """
        relation_state = {"nrf_host": self.state.nrf_host}

        return relation_state

    def configure_pod(self, _=None) -> NoReturn:
        """Assemble the pod spec and apply it, if possible."""
        missing = self._missing_relations()
        if missing:
            self.unit.status = BlockedStatus(
                "Waiting for {0} relation{1}".format(
                    missing, "s" if "," in missing else ""))
            return
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus("ready")
            return

        self.unit.status = MaintenanceStatus("Assembling pod spec")

        # Fetch image information
        try:
            self.unit.status = MaintenanceStatus("Fetching image information")
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information")
            return

        try:
            pod_spec = make_pod_spec(
                image_info,
                self.model.config,
                self.model.app.name,
                self.relation_state,
            )
        except ValueError as exc:
            logger.exception("Config data validation error")
            self.unit.status = BlockedStatus(str(exc))
            return

        if self.state.pod_spec != pod_spec:
            self.model.pod.set_spec(pod_spec)
            self.state.pod_spec = pod_spec

        self.unit.status = ActiveStatus("ready")
Esempio n. 29
0
class Operator(CharmBase):
    """Deploys the katib-ui service."""

    def __init__(self, framework):
        super().__init__(framework)

        self.image = OCIImageResource(self, "oci-image")
        self.framework.observe(self.on.install, self.set_pod_spec)
        self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
        self.framework.observe(
            self.on["ingress"].relation_changed,
            self.set_pod_spec,
        )

    def set_pod_spec(self, event):
        try:

            self._check_leader()

            interfaces = self._get_interfaces()

            image_details = self._check_image_details()

        except CheckFailed as check_failed:
            self.model.unit.status = check_failed.status
            return

        self._configure_ingress(interfaces)

        self.model.unit.status = MaintenanceStatus("Setting pod spec")

        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            self.model.unit.status = e.status
            return

        self.model.pod.set_spec(
            {
                "version": 3,
                "serviceAccount": {
                    "roles": [
                        {
                            "global": True,
                            "rules": [
                                {
                                    "apiGroups": [""],
                                    "resources": [
                                        "configmaps",
                                        "namespaces",
                                    ],
                                    "verbs": ["*"],
                                },
                                {
                                    "apiGroups": ["kubeflow.org"],
                                    "resources": [
                                        "experiments",
                                        "trials",
                                        "suggestions",
                                    ],
                                    "verbs": ["*"],
                                },
                            ],
                        }
                    ]
                },
                "containers": [
                    {
                        "name": "katib-ui",
                        "command": ["./katib-ui"],
                        "args": [f"--port={self.model.config['port']}"],
                        "imageDetails": image_details,
                        "ports": [
                            {
                                "name": "http",
                                "containerPort": self.model.config["port"],
                            }
                        ],
                        "envConfig": {
                            "KATIB_CORE_NAMESPACE": self.model.name,
                        },
                    }
                ],
            },
        )

        self.model.unit.status = ActiveStatus()

    def _configure_ingress(self, interfaces):
        if interfaces["ingress"]:
            interfaces["ingress"].send_data(
                {
                    "prefix": "/katib/",
                    "service": self.model.app.name,
                    "port": self.model.config["port"],
                }
            )

    def _check_leader(self):
        if not self.unit.is_leader():
            # We can't do anything useful when not the leader, so do nothing.
            raise CheckFailed("Waiting for leadership", WaitingStatus)

    def _get_interfaces(self):
        try:
            interfaces = get_interfaces(self)
        except NoVersionsListed as err:
            raise CheckFailed(err, WaitingStatus)
        except NoCompatibleVersions as err:
            raise CheckFailed(err, BlockedStatus)
        return interfaces

    def _check_image_details(self):
        try:
            image_details = self.image.fetch()
        except OCIImageResourceError as e:
            raise CheckFailed(f"{e.status.message}", e.status_type)
        return image_details
Esempio n. 30
0
class RedisCharm(CharmBase):
    state = StoredState()

    def __init__(self, *args):
        super().__init__(*args)
        logger.debug('Initializing charm')

        self.state.set_default(pod_spec=None)

        self.redis = RedisClient(host=self.model.app.name, port=DEFAULT_PORT)
        self.image = OCIImageResource(self, "redis-image")

        self.framework.observe(self.on.start, self.on_start)
        self.framework.observe(self.on.stop, self.on_stop)
        self.framework.observe(self.on.config_changed, self.configure_pod)
        self.framework.observe(self.on.upgrade_charm, self.configure_pod)
        self.framework.observe(self.on.update_status, self.update_status)

    @log_event_handler
    def on_start(self, event):
        """Initialize Redis.

        This event handler is deferred if initialization of Redis fails.
        """
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        if not self.redis.is_ready():
            self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG)
            logger.debug(
                "{}: deferring on_start".format(WAITING_FOR_REDIS_MSG))
            event.defer()
            return

        self.set_ready_status()

    @log_event_handler
    def on_stop(self, _):
        """Mark terminating unit as inactive.
        """
        self.redis.close()
        self.unit.status = MaintenanceStatus('Pod is terminating.')

    @log_event_handler
    def configure_pod(self, event):
        """Applies the pod configuration.
        """
        if not self.unit.is_leader():
            logger.debug("Spec changes ignored by non-leader")
            self.unit.status = ActiveStatus()
            return

        self.unit.status = WaitingStatus("Fetching image information ...")
        try:
            image_info = self.image.fetch()
        except OCIImageResourceError:
            self.unit.status = BlockedStatus(
                "Error fetching image information.")
            return

        # Build Pod spec
        builder = PodSpecBuilder(
            name=self.model.app.name,
            port=DEFAULT_PORT,
            image_info=image_info,
        )

        spec = builder.build_pod_spec()
        logger.debug("Pod spec: \n{}".format(yaml.dump(spec)))

        # Update pod spec if the generated one is different
        # from the one previously applied.
        if self.state.pod_spec == spec:
            logger.debug("Discarding pod spec because it has not changed.")
        else:
            logger.debug("Applying new pod spec.")
            self.model.pod.set_spec(spec)
            self.state.pod_spec = spec

        if not self.redis.is_ready():
            self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG)
            logger.debug(
                "{}: deferring configure_pod".format(WAITING_FOR_REDIS_MSG))
            event.defer()
            return

        self.set_ready_status()

    @log_event_handler
    def update_status(self, _):
        """Set status for all units.

        Status may be
        - Redis API server not reachable (service is not ready),
        - Ready
        """
        if not self.unit.is_leader():
            self.unit.status = ActiveStatus()
            return

        if not self.redis.is_ready():
            self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG)
            return

        self.set_ready_status()

    def set_ready_status(self):
        logger.debug('Pod is ready.')
        self.unit.status = ActiveStatus()
        self.app.status = ActiveStatus()