def __init__(self, framework): super().__init__(framework) if not self.model.unit.is_leader(): logger.info("Not a leader, skipping any work") self.model.unit.status = ActiveStatus() return self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
def build_juju_unit_status(pod_status): if pod_status.is_unknown: unit_status = MaintenanceStatus("Waiting for pod to appear") elif not pod_status.is_running: unit_status = MaintenanceStatus("Pod is starting") elif pod_status.is_running and not pod_status.is_ready: unit_status = MaintenanceStatus("Pod is getting ready") elif pod_status.is_running and pod_status.is_ready: unit_status = ActiveStatus() return unit_status
def configure_pod(self, _=None) -> NoReturn: """Assemble the pod spec and apply it, if possible.""" missing = self._missing_relations() if missing: status = "Waiting for {0} relation{1}" self.unit.status = BlockedStatus( status.format(missing, "s" if "," in missing else "")) return if not self.unit.is_leader(): self.unit.status = ActiveStatus("ready") return self.unit.status = MaintenanceStatus("Assembling pod spec") # Fetch image information try: self.unit.status = MaintenanceStatus("Fetching image information") image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") return try: pod_spec = make_pod_spec( image_info, self.model.config, self.relation_state, self.model.app.name, ) except ValueError as exc: logger.exception("Config/Relation data validation error") self.unit.status = BlockedStatus(str(exc)) return if self.state.pod_spec != pod_spec: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec self.unit.status = ActiveStatus("ready") self.publish_nrf_info()
def _check_credentials(self): if self.client.is_ready: return try: result = subprocess.run(['credential-get', '--format=json'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) creds = json.loads(result.stdout.decode('utf8')) endpoint = creds['endpoint'] client_cert = creds['credential']['attrs']['client-cert'] client_key = creds['credential']['attrs']['client-key'] server_cert = creds['credential']['attrs']['server-cert'] if endpoint and client_cert and client_key and server_cert: self.client.set_credentials(endpoint, client_cert, client_key, server_cert) self.model.unit.status = ActiveStatus() return True except json.JSONDecodeError as e: logger.warning( 'Failed to parse JSON from credentials-get: {}'.format(e.msg)) except FileNotFoundError: pass except subprocess.CalledProcessError as e: if 'permission denied' not in e.stderr.decode('utf8'): raise endpoint = self.model.config['lxd_endpoint'] client_cert = self.model.config['lxd_client_cert'] client_key = self.model.config['lxd_client_key'] server_cert = self.model.config['lxd_server_cert'] if endpoint and client_cert and client_key and server_cert: self.client.set_credentials(endpoint, client_cert, client_key, server_cert) self.model.unit.status = ActiveStatus() return True self.model.unit.status = BlockedStatus( 'Missing credentials access; grant with: juju trust') return False
def _on_install(self, _): self.unit.status = MaintenanceStatus("Installing dependencies") subprocess.run(["apt", "update"]) subprocess.run(["apt", "install", "-y", "git", "python3-pip"])#, "openssh-server"]) self.unit.status = MaintenanceStatus("Installing ML app") repoPath="https://github.com/daviddvs/ml_nfv_ec.git" wd=os.path.expanduser('~')+"/ml_nfv_ec" subprocess.run(["git", "clone", repoPath, wd]) wd=wd+"/mon" subprocess.run(["git", "checkout", "devel"], cwd=wd) subprocess.run(["pip3", "install", "-r", "requirements.txt"], cwd=wd) self.unit.status = ActiveStatus("ML app installed")
def on_upgrade_charm(self, event): if not hasattr(self.state, 'installed'): self.on_install(event) return try: check_call(['apt-get', 'update', '-qq']) check_call(['apt-get', 'dist-upgrade', '-y']) event.framework.model.unit.status = ActiveStatus() except Exception as e: event.framework.model.unit.status = BlockedStatus('{}: {}'.format( "upgrade failed", e)) event.defer()
def _on_initial_config(self, event: PebbleReadyEvent) -> None: if not self._is_workload_ready(): msg = "Workload is not ready" logging.info(msg) self.unit.status = WaitingStatus(msg) event.defer() return self.unit.status = WaitingStatus("Performing initial config") self._on_update_admin_password_action(event) self.unit.status = ActiveStatus("ready")
def _on_peer_relation_changed(self, event: RelationChangedEvent) -> None: """Handle the peer relation changed event.""" # Only execute if peer relation data contains cluster config values if not self._is_peer_data_set: event.defer() return # Update the unit's status to ActiveStatus if it was added to the cluster unit_label = self.unit.name.replace("/", "-") if isinstance(self.unit.status, WaitingStatus ) and self._mysql.is_instance_in_cluster(unit_label): self.unit.status = ActiveStatus()
def on_config_changed(self, _): unit = self.model.unit # current = self.config["thing"] new_spec = self.make_pod_spec() # if self._stored.spec != new_spec: unit.status = MaintenanceStatus("Appling new pod spec") self._apply_spec(new_spec) unit.status = ActiveStatus()
def test_on_start_when_redis_is_ready(self, is_ready): # Given self.harness.set_leader(True) is_ready.return_value = True # When self.harness.charm.on.start.emit() # Then is_ready.assert_called_once_with() self.assertEqual( self.harness.charm.unit.status, ActiveStatus() )
def on_start(self, event): """Called when the charm is being started""" print("Start called") unit = self.model.unit if not SSHProxy.has_ssh_key(): unit.status = MaintenanceStatus("Generating SSH keys...") print("Generating SSH Keys") SSHProxy.generate_ssh_key() unit.status = ActiveStatus()
def on_upgrade_charm(self, event): """Upgrade the charm.""" # raise NotImplementedError("TODO") unit = self.model.unit # Mark the unit as under Maintenance. unit.status = MaintenanceStatus("Upgrading charm") self.on_start(event) # When maintenance is done, return to an Active state unit.status = ActiveStatus()
def _on_daemon_started(self, event): if not self.cluster.is_joined and not self.is_single_node: self.unit.status = WaitingStatus('Waiting for peer units to join.') event.defer() return if self.cluster.is_cluster_initialized: # Skip this event when some other unit has already initialized a cluster. self.unit.status = ActiveStatus() return elif not self.unit.is_leader(): self.unit.status = WaitingStatus( 'Waiting for the leader unit to initialize a cluster.') event.defer() return self.unit.status = MaintenanceStatus('Initializing the cluster.') # Initialize the cluster if we're a leader in a multi-node deployment, otherwise it have # already been initialized by running start-single-node. if not self.is_single_node and self.model.unit.is_leader(): self.instance_manager.init_db() self.unit.status = ActiveStatus()
def set_pod_spec(self, event): if not self.model.unit.is_leader(): log.info("Not a leader, skipping set_pod_spec") self.model.unit.status = ActiveStatus() return try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status log.info(e) return secret_key = get_or_set( "password", configured=self.model.config["secret-key"], default=gen_pass(), ) self.model.unit.status = MaintenanceStatus("Setting pod spec") self.model.pod.set_spec({ "version": 3, "containers": [{ "name": "minio", "args": ["server", "/data"], "imageDetails": image_details, "ports": [{ "name": "minio", "containerPort": int(self.model.config["port"]), }], "envConfig": { "MINIO_ACCESS_KEY": self.model.config["access-key"], "MINIO_SECRET_KEY": secret_key, }, }], }) self.model.unit.status = ActiveStatus()
def test_on_config_changed(self, _render, _setup, _call, _restart): # Check first run, no change to values set by install/start self.harness.charm._stored.repo = "https://github.com/juju/hello-juju" self.harness.charm._stored.port = 80 # Run the handler self.harness.charm.on.config_changed.emit() _setup.assert_not_called() _call.assert_not_called() self.assertEqual(self.harness.charm.unit.status, ActiveStatus()) # Change the application repo, should prompt a restart _setup.reset_mock() _call.reset_mock() self.harness.update_config({"application-repo": "DIFFERENT"}) self.assertEqual(self.harness.charm._stored.repo, "DIFFERENT") _setup.assert_called_once() # This also ensures that the port change code wasn't run _render.assert_not_called() _restart.assert_called_with("hello-juju") self.assertEqual(self.harness.charm.unit.status, ActiveStatus()) # Change the port, should prompt a restart _setup.reset_mock() _call.reset_mock() _restart.reset_mock() self.harness.update_config({"port": 8080}) self.assertEqual(self.harness.charm._stored.port, 8080) _render.assert_called_once() _setup.assert_not_called() # Check the old port is closed, the new is opened and the service restarts self.assertEqual( _call.call_args_list, [ call(["close-port", "80/TCP"]), call(["open-port", "8080/TCP"]), ], ) _restart.assert_called_with("hello-juju") self.assertEqual(self.harness.charm.unit.status, ActiveStatus())
def _main(self, event): # Set up all relations/fetch required data try: self._check_leader() interfaces = self._get_interfaces() image_details = self.image.fetch() kfpapi = self._get_kfpapi(interfaces) except (CheckFailedError, OCIImageResourceError) as check_failed: self.model.unit.status = check_failed.status self.log.info(str(check_failed.status)) return self.model.unit.status = MaintenanceStatus("Setting pod spec") self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": ["argoproj.io"], "resources": ["workflows"], "verbs": ["get", "list", "watch"], }, { "apiGroups": ["kubeflow.org"], "resources": ["scheduledworkflows"], "verbs": ["get", "list", "watch"], }, ], }] }, "containers": [{ "name": "ml-pipeline-persistenceagent", "imageDetails": image_details, "command": [ "persistence_agent", "--logtostderr=true", "--namespace=", "--ttlSecondsAfterWorkflowFinish=86400", "--numWorker=2", f"--mlPipelineAPIServerName={kfpapi['service-name']}", ], }], }, ) self.model.unit.status = ActiveStatus()
def _configure_pod(self): """Setup a new Prometheus pod specification """ logger.debug('Configuring Pod') missing_config = self._check_config() if missing_config: logger.error('Incomplete Configuration : {}. ' 'Application will be blocked.'.format(missing_config)) self.unit.status = \ BlockedStatus('Missing configuration: {}'.format(missing_config)) return if not self.unit.is_leader(): self.unit.status = ActiveStatus() return self.unit.status = MaintenanceStatus('Setting pod spec.') pod_spec = self._build_pod_spec() self.model.pod.set_spec(pod_spec) self.app.status = ActiveStatus() self.unit.status = ActiveStatus()
def configure_pod(self, event): # Continue only if the unit is the leader if not self.unit.is_leader(): self.unit.status = ActiveStatus() return # Check problems in the settings problems = self._check_settings() if problems: self.unit.status = BlockedStatus(problems) return self.unit.status = BlockedStatus("Assembling pod spec") image_details = self._make_pod_image_details() ports = self._make_pod_ports() env_config = self._make_pod_envconfig() command = self._make_pod_command() volume_config = self._make_pod_volume_config() ingress_resources = self._make_pod_ingress_resources() secrets = self._make_pod_secrets() pod_spec = { "version": 3, "containers": [ { "name": self.framework.model.app.name, "imageDetails": image_details, "ports": ports, "envConfig": env_config, "command": command, "volumeConfig": volume_config, } ], "kubernetesResources": { "ingressResources": ingress_resources or [], "secrets": secrets, }, } self.model.pod.set_spec(pod_spec) self.unit.status = ActiveStatus()
def on_config_changed(self, event): """Handle changes in configuration""" unit = self.model.unit # Unit should go into a waiting state until verify_ssh_credentials is successful unit.status = WaitingStatus("Waiting for SSH credentials") proxy = self.get_ssh_proxy() verified = proxy.verify_credentials() if verified: unit.status = ActiveStatus() else: unit.status = BlockedStatus("Invalid SSH credentials.")
def handle_database_available(event, fw_adapter, state): """Render the database details into the slurmdbd.yaml and set the snap.mode. """ state.db_info = { 'user': event.db_info.user, 'password': event.db_info.password, 'host': event.db_info.host, 'port': event.db_info.port, 'database': event.db_info.database, } fw_adapter.set_unit_status(ActiveStatus("mysql available"))
def _restart_kafka(self): logger.info("Restarting kafka ...") container = self.unit.get_container(SERVICE) container.get_plan().to_yaml() status = container.get_service(SERVICE) if status.current == ServiceStatus.ACTIVE: container.stop(SERVICE) self.unit.status = MaintenanceStatus("kafka maintenance") container.start(SERVICE) self.unit.status = ActiveStatus("kafka restarted")
def test_no_relation(harness): harness.set_leader(True) harness.add_oci_resource( "oci-image", { "registrypath": "ci-test", "username": "", "password": "", }, ) harness.begin_with_initial_hooks() assert harness.charm.model.unit.status == ActiveStatus()
def _on_start(self, _): self.unit.status = MaintenanceStatus("Starting Proxy") lines = [ '', 'listen mlappbalance', ' bind *:5000', ' balance roundrobin', ' option forwardfor', ' option httpchk'] for ln in lines: cmd = f'echo "{ln}" >> /etc/haproxy/haproxy.cfg' subprocess.run(cmd, shell=True) self.unit.status = ActiveStatus("Proxy started")
def _on_reconcile_runners(self, event): runner_manager = self._get_runner_manager() if not runner_manager or not runner_manager.runner_bin_path.exists(): return self.unit.status = MaintenanceStatus("Reconciling runners") try: self._reconcile_runners(runner_manager) except Exception as e: logger.exception("Failed to reconcile runners") self.unit.status = BlockedStatus( f"Failed to reconcile runners: {e}") else: self.unit.status = ActiveStatus()
def _on_db_info_available(self, event): """Store the db_info in the StoredState for later use. """ db_info = { 'user': event.db_info.user, 'password': event.db_info.password, 'host': event.db_info.host, 'port': event.db_info.port, 'database': event.db_info.database, } self._stored.db_info = db_info self.slurm_ops.on.configure_slurm.emit() self.unit.status = ActiveStatus("db info available")
def on_start(self, event): """Handle start state.""" if not self.state.configured: logging.warning("Start called before configuration complete, deferring event: {}".format(event.handle)) self._defer_once(event) return self.unit.status = MaintenanceStatus("Starting charm software") # Start software self.unit.status = ActiveStatus("Unit is ready") self.state.started = True logging.info("Started")
def test_peer_changed_handler_with_single_node_via_update_status_event(self): self.harness.set_leader(True) seed_config = MINIMAL_CONFIG.copy() self.harness.update_config(seed_config) # check that the number of nodes and the status is correct # after emitting the update_status event self.assertEqual(self.harness.charm.num_hosts, 1) self.harness.charm.on.update_status.emit() self.assertEqual( self.harness.charm.unit.status, ActiveStatus() )
def __init__(self, *args): super().__init__(*args) if not self.model.unit.is_leader(): log.info("Not a leader, skipping set_pod_spec") self.model.unit.status = ActiveStatus() return self._stored.set_default(**self.gen_certs()) self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe(self.on.config_changed, self.set_pod_spec)
def __init__(self, *args): super().__init__(*args) if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. self.model.unit.status = WaitingStatus("Waiting for leadership") return try: self.interfaces = get_interfaces(self) except NoVersionsListed as err: self.model.unit.status = WaitingStatus(str(err)) return except NoCompatibleVersions as err: self.model.unit.status = BlockedStatus(str(err)) return else: self.model.unit.status = ActiveStatus() self.log = logging.getLogger(__name__) self.env = Environment(loader=FileSystemLoader('src')) self._resource_handler = ResourceHandler(self.app.name, self.model.name) self.lightkube_client = Client(namespace=self.model.name, field_manager="lightkube") self._resource_files = [ "gateway.yaml.j2", "auth_filter.yaml.j2", "virtual_service.yaml.j2", ] self.framework.observe(self.on.install, self.install) self.framework.observe(self.on.remove, self.remove) self.framework.observe(self.on.config_changed, self.handle_default_gateway) self.framework.observe(self.on["istio-pilot"].relation_changed, self.send_info) self.framework.observe(self.on['ingress'].relation_changed, self.handle_ingress) self.framework.observe(self.on['ingress'].relation_broken, self.handle_ingress) self.framework.observe(self.on['ingress'].relation_departed, self.handle_ingress) self.framework.observe(self.on['ingress-auth'].relation_changed, self.handle_ingress_auth) self.framework.observe(self.on['ingress-auth'].relation_departed, self.handle_ingress_auth)
def set_pod_spec(self, event): if not self.model.unit.is_leader(): log.info('Not a leader, skipping set_pod_spec') self.model.unit.status = ActiveStatus() return try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return cni_bin_dir = self.model.config.get('cni-bin-dir', '/opt/cni/bin') self.model.unit.status = MaintenanceStatus('Setting pod spec') self.model.pod.set_spec({ 'version': 3, 'containers': [{ 'name': 'sriov-cni', 'imageDetails': image_details, 'volumeConfig': [{ 'name': 'cni-bin', 'mountPath': '/dest', 'hostPath': { 'path': cni_bin_dir } }] }], 'kubernetesResources': { 'pod': { 'hostNetwork': True, } } }) self.model.unit.status = ActiveStatus()