async def test_conflict_logs(faker, namespace, caplog, api_client): caplog.set_level(logging.DEBUG, logger=__name__) core = CoreV1Api(api_client) name = faker.domain_word() ns = namespace.metadata.name password1 = faker.password(length=12) password2 = faker.password(length=12) await core.create_namespaced_secret( namespace=ns, body=V1Secret( data={"password": b64encode(password1)}, metadata=V1ObjectMeta(name=name), type="Opaque", ), ) await call_kubeapi( core.create_namespaced_secret, logger, continue_on_conflict=True, namespace=ns, body=V1Secret( data={"password": b64encode(password2)}, metadata=V1ObjectMeta(name=name), type="Opaque", ), ) secret = await core.read_namespaced_secret(name=name, namespace=ns) assert b64decode(secret.data["password"]) == password1 assert ( f"Failed creating V1Secret '{ns}/{name}' because it already exists. Continuing." in caplog.messages )
async def create_services( owner_references: Optional[List[V1OwnerReference]], namespace: str, name: str, labels: LabelType, http_port: int, postgres_port: int, transport_port: int, dns_record: Optional[str], logger: logging.Logger, ) -> None: async with ApiClient() as api_client: core = CoreV1Api(api_client) await call_kubeapi( core.create_namespaced_service, logger, continue_on_conflict=True, namespace=namespace, body=get_data_service(owner_references, name, labels, http_port, postgres_port, dns_record), ) await call_kubeapi( core.create_namespaced_service, logger, continue_on_conflict=True, namespace=namespace, body=get_discovery_service(owner_references, name, labels, transport_port), )
async def create_debug_volume( owner_references: Optional[List[V1OwnerReference]], namespace: str, name: str, labels: LabelType, logger: logging.Logger, ) -> None: """ Creates a ``PersistentVolume`` and ``PersistentVolumeClaim`` to be used for exporting Java Heapdumps from CrateDB. The volume can be configured with the :attr:`~crate.operator.config.Config.DEBUG_VOLUME_SIZE` and :attr:`~crate.operator.config.Config.DEBUG_VOLUME_STORAGE_CLASS` settings. """ async with ApiClient() as api_client: core = CoreV1Api(api_client) await call_kubeapi( core.create_persistent_volume, logger, continue_on_conflict=True, body=get_debug_persistent_volume(owner_references, namespace, name, labels), ) await call_kubeapi( core.create_namespaced_persistent_volume_claim, logger, continue_on_conflict=True, namespace=namespace, body=get_debug_persistent_volume_claim(owner_references, name, labels), )
async def test_create(self, faker, namespace, cleanup_handler): core = CoreV1Api() name = faker.domain_word() # Clean up persistent volume after the test cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}")) pv, pvc = await asyncio.gather(*create_debug_volume( core, None, namespace.metadata.name, name, {}, logging.getLogger(__name__), )) await assert_wait_for( True, self.does_pv_exist, core, f"temp-pv-{namespace.metadata.name}-{name}", ) await assert_wait_for( True, self.does_pvc_exist, core, namespace.metadata.name, f"local-resource-{name}", )
async def test_conflict_raises(faker, namespace, api_client): core = CoreV1Api(api_client) name = faker.domain_word() ns = namespace.metadata.name password1 = faker.password(length=12) password2 = faker.password(length=12) await core.create_namespaced_secret( namespace=ns, body=V1Secret( data={"password": b64encode(password1)}, metadata=V1ObjectMeta(name=name), type="Opaque", ), ) with pytest.raises(ApiException): await call_kubeapi( core.create_namespaced_secret, logger, namespace=ns, body=V1Secret( data={"password": b64encode(password2)}, metadata=V1ObjectMeta(name=name), type="Opaque", ), ) secret = await core.read_namespaced_secret(name=name, namespace=ns) assert b64decode(secret.data["password"]) == password1
async def namespace(faker, api_client) -> V1Namespace: core = CoreV1Api(api_client) name = faker.uuid4() await assert_wait_for(False, does_namespace_exist, core, name) ns: V1Namespace = await core.create_namespace(body=V1Namespace( metadata=V1ObjectMeta(name=name))) await assert_wait_for(True, does_namespace_exist, core, name) yield ns await core.delete_namespace(name=ns.metadata.name, body=V1DeleteOptions())
async def test_absent_raises(faker, namespace, api_client): core = CoreV1Api(api_client) name = faker.domain_word() with pytest.raises(ApiException): await call_kubeapi( core.delete_namespaced_secret, logger, namespace=namespace.metadata.name, name=name, body=V1DeleteOptions(), )
async def test_create(self, faker, namespace): core = CoreV1Api() name = faker.domain_word() await create_sql_exporter_config(core, None, namespace.metadata.name, name, {}, logging.getLogger(__name__)) await assert_wait_for( True, self.does_configmap_exist, core, namespace.metadata.name, f"crate-sql-exporter-{name}", )
async def reset_allocation(namespace: str, pod_name: str, has_ssl: bool) -> None: """ Reset all temporary node deallocations to none. .. note:: Ideally, we'd be using the system user to reset the allocation exclusions. However, `due to a bug <https://github.com/crate/crate/pull/10083>`_, this isn't possible in CrateDB <= 4.1.6. We therefore fall back to the "exec-in-container" approach that we also use during cluster bootstrapping. :param namespace: The Kubernetes namespace for the CrateDB cluster. :param pod_name: The pod name of one of the eligible master nodes in the cluster. Used to ``exec`` into. :param has_ssl: When ``True``, ``crash`` will establish a connection to the CrateDB cluster from inside the ``crate`` container using SSL/TLS. This must match how the cluster is configured, otherwise ``crash`` won't be able to connect, since non-encrypted connections are forbidden when SSL/TLS is enabled, and encrypted connections aren't possible when no SSL/TLS is configured. """ # async with conn_factory() as conn: # async with conn.cursor() as cursor: # await cursor.execute( # """ # RESET GLOBAL "cluster.routing.allocation.exclude._name" # """, # ) scheme = "https" if has_ssl else "http" command_grant = [ "crash", "--verify-ssl=false", f"--host={scheme}://localhost:4200", "-c", 'RESET GLOBAL "cluster.routing.allocation.exclude._name";', ] async with WsApiClient() as ws_api_client: core_ws = CoreV1Api(ws_api_client) await core_ws.connect_get_namespaced_pod_exec( namespace=namespace, name=pod_name, command=command_grant, container="crate", stderr=True, stdin=False, stdout=True, tty=False, )
async def handle( # type: ignore self, namespace: str, name: str, old: kopf.Body, logger: logging.Logger, patch: kopf.Patch, status: kopf.Status, **kwargs: Any, ): async with ApiClient() as api_client: core = CoreV1Api(api_client) await restart_cluster(core, namespace, name, old, logger, patch, status) await self.send_notifications(logger)
async def create_sql_exporter_config( owner_references: Optional[List[V1OwnerReference]], namespace: str, name: str, labels: LabelType, logger: logging.Logger, ) -> None: async with ApiClient() as api_client: core = CoreV1Api(api_client) await call_kubeapi( core.create_namespaced_config_map, logger, continue_on_conflict=True, namespace=namespace, body=get_sql_exporter_config(owner_references, name, labels), )
async def test_absent_logs(faker, namespace, caplog, api_client): caplog.set_level(logging.DEBUG, logger=__name__) core = CoreV1Api(api_client) name = faker.domain_word() ns = namespace.metadata.name await call_kubeapi( core.delete_namespaced_secret, logger, continue_on_absence=True, namespace=ns, name=name, body=V1DeleteOptions(), ) assert ( f"Failed deleting '{ns}/{name}' because it doesn't exist. Continuing." in caplog.messages )
async def resolve_secret_key_ref(namespace: str, secret_key_ref: SecretKeyRef, core: Optional[CoreV1Api] = None) -> str: """ Lookup the secret value defined by ``secret_key_ref`` in ``namespace``. :param namespace: The namespace where to lookup a secret and its value. :param secret_key_ref: a ``secretKeyRef`` containing the secret name and key within that holds the desired value. :param core: An instance of the Kubernetes Core V1 API. """ core = core or CoreV1Api() secret_name = secret_key_ref["name"] key = secret_key_ref["key"] secret = await core.read_namespaced_secret(namespace=namespace, name=secret_name) return b64decode(secret.data[key])
async def secret_update( namespace: str, name: str, diff: kopf.Diff, logger: logging.Logger, **kwargs, ): async with ApiClient() as api_client: coapi = CustomObjectsApi(api_client) core = CoreV1Api(api_client) for operation, field_path, old_value, new_value in diff: custom_objects = await coapi.list_namespaced_custom_object( namespace=namespace, group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, ) for crate_custom_object in custom_objects["items"]: host = await get_host( core, namespace, crate_custom_object["metadata"]["name"] ) for user_spec in crate_custom_object["spec"]["users"]: expected_field_path = ( "data", user_spec["password"]["secretKeyRef"]["key"], ) if ( user_spec["password"]["secretKeyRef"]["name"] == name and field_path == expected_field_path ): kopf.register( fn=subhandler_partial( update_user_password, host, user_spec["name"], old_value, new_value, logger, ), id=f"update-{crate_custom_object['metadata']['name']}-{user_spec['name']}", # noqa timeout=config.BOOTSTRAP_TIMEOUT, )
async def test_success(faker, namespace, api_client): core = CoreV1Api(api_client) name = faker.domain_word() password = faker.password(length=12) await call_kubeapi( core.create_namespaced_secret, logger, namespace=namespace.metadata.name, body=V1Secret( data={"password": b64encode(password)}, metadata=V1ObjectMeta(name=name), type="Opaque", ), ) secret = await core.read_namespaced_secret( name=name, namespace=namespace.metadata.name ) assert b64decode(secret.data["password"]) == password
async def create_system_user( owner_references: Optional[List[V1OwnerReference]], namespace: str, name: str, labels: LabelType, logger: logging.Logger, ) -> None: """ The *CrateDB Operator* will need to perform operations on the CrateDB cluster. For that, it will use a ``system`` user who's credentials are created here. """ async with ApiClient() as api_client: core = CoreV1Api(api_client) await call_kubeapi( core.create_namespaced_secret, logger, continue_on_conflict=True, namespace=namespace, body=get_system_user_secret(owner_references, name, labels), )
async def bootstrap_cluster( namespace: str, name: str, master_node_pod: str, license: Optional[SecretKeyRefContainer], has_ssl: bool, users: Optional[List[Dict[str, Any]]], logger: logging.Logger, ): """ Bootstrap an entire cluster, including license, system user, and additional users. :param namespace: The Kubernetes namespace for the CrateDB cluster. :param name: The name for the ``CrateDB`` custom resource. Used to lookup the password for the system user created during deployment. :param master_node_pod: The pod name of one of the eligible master nodes in the cluster. Used to ``exec`` into. :param license: An optional ``secretKeyRef`` to the Kubernetes secret that holds the CrateDB license key. :param has_ssl: When ``True``, ``crash`` will establish a connection to the CrateDB cluster from inside the ``crate`` container using SSL/TLS. This must match how the cluster is configured, otherwise ``crash`` won't be able to connect, since non-encrypted connections are forbidden when SSL/TLS is enabled, and encrypted connections aren't possible when no SSL/TLS is configured. :param users: An optional list of user definitions containing the username and the secret key reference to their password. """ # We first need to set the license, in case the CrateDB cluster # contains more nodes than available in the free license. async with ApiClient() as api_client: core = CoreV1Api(api_client) if license: await bootstrap_license(core, namespace, master_node_pod, has_ssl, license, logger) await bootstrap_system_user(core, namespace, name, master_node_pod, has_ssl, logger) if users: await bootstrap_users(core, namespace, name, users)
async def test_create(self, faker, namespace): core = CoreV1Api() name = faker.domain_word() s_data, s_discovery = await asyncio.gather(*create_services( core, None, namespace.metadata.name, name, {}, 1, 2, 3, faker.domain_name(), logging.getLogger(__name__), )) await assert_wait_for( True, self.do_services_exist, core, namespace.metadata.name, {f"crate-{name}", f"crate-discovery-{name}"}, )
async def test_create(self, faker, namespace): core = CoreV1Api() name = faker.domain_word() password = faker.password(length=12) with mock.patch("crate.operator.create.gen_password", return_value=password): secret = await create_system_user( core, None, namespace.metadata.name, name, {}, logging.getLogger(__name__), ) await assert_wait_for( True, self.does_secret_exist, core, namespace.metadata.name, f"user-system-{name}", ) assert b64decode(secret.data["password"]) == password
async def update_cratedb_resource( namespace: str, name: str, spec: kopf.Spec, **kwargs, ): if "users" in spec: async with ApiClient() as api_client: for user_spec in spec["users"]: core = CoreV1Api(api_client) secret_name = user_spec["password"]["secretKeyRef"]["name"] secret = await core.read_namespaced_secret( namespace=namespace, name=secret_name ) if ( secret.metadata.labels is None or LABEL_USER_PASSWORD not in secret.metadata.labels ): await ensure_user_password_label( core, namespace, user_spec["password"]["secretKeyRef"]["name"] )
async def _ensure_no_snapshots_in_progress(self, namespace, name, logger): async with ApiClient() as api_client: core = CoreV1Api(api_client) host = await get_host(core, namespace, name) password = await get_system_user_password(core, namespace, name) conn_factory = connection_factory(host, password) snapshots_in_progress, statement = await are_snapshots_in_progress( conn_factory, logger) if snapshots_in_progress: # Raising a TemporaryError will clear any registered subhandlers, so we # execute this one directly instead to make sure it runs. # The same guarantees about it being executed only once still stand. await kopf.execute( fns={ "notify_backup_running": subhandler_partial(self._notify_backup_running, logger) }) raise kopf.TemporaryError( "A snapshot is currently in progress, " f"waiting for it to finish: {statement}", delay=30, )
async def restart_cluster(namespace: str, name: str, total_nodes: int, logger: logging.Logger) -> None: """ Perform a rolling restart of the CrateDB cluster ``name`` in ``namespace``. One node at a time, this function will terminate first the master nodes and then the data nodes in the cluster. After triggering a pod's termination, the operator will wait for that pod to be terminated and gone. It will then wait for the cluster to have the desired number of nodes again and for the cluster to be in a ``GREEN`` state. :param namespace: The Kubernetes namespace where to look up CrateDB cluster. :param name: The CrateDB custom resource name defining the CrateDB cluster. :param total_nodes: The total number of nodes that the cluster should consist of, per the CrateDB cluster spec. """ coapi = CustomObjectsApi() core = CoreV1Api() cluster = await coapi.get_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace, name=name, ) password = await get_system_user_password(namespace, name, core) host = await get_host(core, namespace, name) conn_factory = connection_factory(host, password) if "master" in cluster["spec"]["nodes"]: await restart_statefulset(core, conn_factory, namespace, name, "master", total_nodes, logger) for node_spec in cluster["spec"]["nodes"]["data"]: await restart_statefulset(core, conn_factory, namespace, name, node_spec["name"], total_nodes, logger)
async def test_bootstrap_license( bootstrap_system_user: mock.AsyncMock, bootstrap_license_mock: mock.AsyncMock, faker, namespace, cleanup_handler, kopf_runner, api_client, ): coapi = CustomObjectsApi(api_client) core = CoreV1Api(api_client) name = faker.domain_word() license = base64.b64encode(faker.binary(64)).decode() cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}"), ) await core.create_namespaced_secret( namespace=namespace.metadata.name, body=V1Secret( data={"license": b64encode(license)}, metadata=V1ObjectMeta(name=f"license-{name}"), type="Opaque", ), ) await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body={ "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": { "name": name }, "spec": { "cluster": { "imageRegistry": "crate", "license": { "secretKeyRef": { "key": "license", "name": f"license-{name}" }, }, "name": "my-crate-cluster", "version": CRATE_VERSION, }, "nodes": { "data": [{ "name": "data", "replicas": 1, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, }] }, }, }, ) await assert_wait_for( True, was_license_set, bootstrap_license_mock, mock.ANY, namespace.metadata.name, f"crate-data-data-{name}-0", False, {"secretKeyRef": { "key": "license", "name": f"license-{name}" }}, timeout=DEFAULT_TIMEOUT * 3, )
async def start_cluster( name: str, namespace: V1Namespace, cleanup_handler, core: CoreV1Api, coapi: CustomObjectsApi, hot_nodes: int = 0, crate_version: str = CRATE_VERSION, ) -> Tuple[str, str]: # Clean up persistent volume after the test cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}")) body = { "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": { "name": name }, "spec": { "cluster": { "imageRegistry": "crate", "name": "my-crate-cluster", "version": crate_version, }, "nodes": { "data": [ { "name": "hot", "replicas": hot_nodes, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, }, ] }, }, } await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body=body, ) host = await asyncio.wait_for( get_public_host(core, namespace.metadata.name, name), # It takes a while to retrieve an external IP on AKS. timeout=DEFAULT_TIMEOUT * 5, ) password = await get_system_user_password(core, namespace.metadata.name, name) await assert_wait_for( True, is_cluster_healthy, connection_factory(host, password), hot_nodes, err_msg="Cluster wasn't healthy after 5 minutes.", timeout=DEFAULT_TIMEOUT * 5, ) return host, password
async def test_bootstrap_users( bootstrap_license_mock: mock.AsyncMock, faker, namespace, cleanup_handler, kopf_runner, ): coapi = CustomObjectsApi() core = CoreV1Api() name = faker.domain_word() password1 = faker.password(length=40) password2 = faker.password(length=30) username1 = faker.user_name() username2 = faker.user_name() cleanup_handler.append( core.delete_persistent_volume(name=f"temp-pv-{namespace.metadata.name}-{name}") ) await asyncio.gather( core.create_namespaced_secret( namespace=namespace.metadata.name, body=V1Secret( data={"password": b64encode(password1)}, metadata=V1ObjectMeta(name=f"user-{name}-1"), type="Opaque", ), ), core.create_namespaced_secret( namespace=namespace.metadata.name, body=V1Secret( data={"password": b64encode(password2)}, metadata=V1ObjectMeta(name=f"user-{name}-2"), type="Opaque", ), ), ) await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body={ "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": {"name": name}, "spec": { "cluster": { "imageRegistry": "crate", "name": "my-crate-cluster", "version": "4.1.5", }, "nodes": { "data": [ { "name": "data", "replicas": 1, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, } ] }, "users": [ { "name": username1, "password": { "secretKeyRef": { "key": "password", "name": f"user-{name}-1", } }, }, { "name": username2, "password": { "secretKeyRef": { "key": "password", "name": f"user-{name}-2", } }, }, ], }, }, ) host = await asyncio.wait_for( get_public_host(core, namespace.metadata.name, name), timeout=BACKOFF_TIME * 5, # It takes a while to retrieve an external IP on AKS. ) password_system = await get_system_user_password( namespace.metadata.name, name, core ) await assert_wait_for( True, does_user_exist, host, password_system, SYSTEM_USERNAME, timeout=BACKOFF_TIME * 5, ) await assert_wait_for( True, does_user_exist, host, password1, username1, timeout=BACKOFF_TIME * 3, ) await assert_wait_for( True, does_user_exist, host, password2, username2, timeout=BACKOFF_TIME * 3, )
async def test_restart_cluster(faker, namespace, cleanup_handler, cratedb_crd, kopf_runner): coapi = CustomObjectsApi() core = CoreV1Api() name = faker.domain_word() # Clean up persistent volume after the test cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}")) await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body={ "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": { "name": name }, "spec": { "cluster": { "imageRegistry": "crate", "name": "my-crate-cluster", "version": "4.1.5", }, "nodes": { "data": [ { "name": "hot", "replicas": 1, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, }, { "name": "cold", "replicas": 2, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, }, ], }, }, }, ) host = await asyncio.wait_for( get_public_host(core, namespace.metadata.name, name), timeout=BACKOFF_TIME * 5, # It takes a while to retrieve an external IP on AKS. ) password = await get_system_user_password(namespace.metadata.name, name, core) await assert_wait_for( True, do_pods_exist, core, namespace.metadata.name, { f"crate-data-hot-{name}-0", f"crate-data-cold-{name}-0", f"crate-data-cold-{name}-1", }, ) await assert_wait_for( True, is_cluster_healthy, connection_factory(host, password), err_msg="Cluster wasn't healthy after 5 minutes.", timeout=BACKOFF_TIME * 5, ) pods = await core.list_namespaced_pod(namespace=namespace.metadata.name) original_pods = {p.metadata.uid for p in pods.items} await asyncio.wait_for( restart_cluster(namespace.metadata.name, name, 3, logging.getLogger(__name__)), BACKOFF_TIME * 15, ) pods = await core.list_namespaced_pod(namespace=namespace.metadata.name) new_pods = {p.metadata.uid for p in pods.items} assert original_pods.intersection(new_pods) == set()
async def test_scale_cluster( repl_master_from, repl_master_to, repl_hot_from, repl_hot_to, repl_cold_from, repl_cold_to, faker, namespace, cleanup_handler, cratedb_crd, kopf_runner, ): coapi = CustomObjectsApi() core = CoreV1Api() name = faker.domain_word() # Clean up persistent volume after the test cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}")) body = { "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": { "name": name }, "spec": { "cluster": { "imageRegistry": "crate", "name": "my-crate-cluster", "version": "4.1.5", }, "nodes": { "data": [] }, }, } if repl_master_from: body["spec"]["nodes"]["master"] = { "replicas": repl_master_from, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1 }, }, } body["spec"]["nodes"]["data"].append( { "name": "hot", "replicas": repl_hot_from, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1 }, }, }, ) if repl_cold_from: body["spec"]["nodes"]["data"].append( { "name": "cold", "replicas": repl_cold_from, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1 }, }, }, ) await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body=body, ) host = await asyncio.wait_for( get_public_host(core, namespace.metadata.name, name), timeout=BACKOFF_TIME * 5, # It takes a while to retrieve an external IP on AKS. ) password = await get_system_user_password(namespace.metadata.name, name, core) await assert_wait_for( True, is_cluster_healthy, connection_factory(host, password), repl_master_from + repl_hot_from + repl_cold_from, err_msg="Cluster wasn't healthy after 5 minutes.", timeout=BACKOFF_TIME * 5, ) patch_body = [] if repl_master_from != repl_master_to: patch_body.append({ "op": "replace", "path": "/spec/nodes/master/replicas", "value": repl_master_to, }) if repl_hot_from != repl_hot_to: patch_body.append({ "op": "replace", "path": "/spec/nodes/data/0/replicas", "value": repl_hot_to, }) if repl_cold_from != repl_cold_to: patch_body.append({ "op": "replace", "path": "/spec/nodes/data/1/replicas", "value": repl_cold_to, }) await coapi.patch_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, name=name, body=patch_body, ) await assert_wait_for( True, is_cluster_healthy, connection_factory(host, password), repl_master_to + repl_hot_to + repl_cold_to, err_msg="Cluster wasn't healthy after 5 minutes.", timeout=BACKOFF_TIME * 5, )
async def test_update_cluster_password(faker, namespace, cleanup_handler, kopf_runner, api_client): coapi = CustomObjectsApi(api_client) core = CoreV1Api(api_client) name = faker.domain_word() password = faker.password(length=40) new_password = faker.password(length=40) username = faker.user_name() cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}")) await asyncio.gather( core.create_namespaced_secret( namespace=namespace.metadata.name, body=V1Secret( data={"password": b64encode(password)}, metadata=V1ObjectMeta(name=f"user-{name}", labels={LABEL_USER_PASSWORD: "******"}), type="Opaque", ), ), ) await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body={ "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": { "name": name }, "spec": { "cluster": { "imageRegistry": "crate", "name": "my-crate-cluster", "version": CRATE_VERSION, }, "nodes": { "data": [{ "name": "data", "replicas": 1, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, }] }, "users": [ { "name": username, "password": { "secretKeyRef": { "key": "password", "name": f"user-{name}", } }, }, ], }, }, ) host = await asyncio.wait_for( get_public_host(core, namespace.metadata.name, name), # It takes a while to retrieve an external IP on AKS. timeout=DEFAULT_TIMEOUT * 5, ) await core.patch_namespaced_secret( namespace=namespace.metadata.name, name=f"user-{name}", body=V1Secret(data={"password": b64encode(new_password)}, ), ) await assert_wait_for( True, is_password_set, host, new_password, username, timeout=DEFAULT_TIMEOUT * 5, )
async def cluster_create(namespace: str, meta: kopf.Meta, spec: kopf.Spec, logger: logging.Logger, **kwargs): name = meta["name"] base_labels = { LABEL_MANAGED_BY: "crate-operator", LABEL_NAME: name, LABEL_PART_OF: "cratedb", } cratedb_labels = base_labels.copy() cratedb_labels[LABEL_COMPONENT] = "cratedb" cratedb_labels.update(meta.get("labels", {})) apps = AppsV1Api() batchv1_beta1 = BatchV1beta1Api() core = CoreV1Api() owner_references = [ V1OwnerReference( api_version=f"{API_GROUP}/v1", block_owner_deletion=True, controller=True, kind="CrateDB", name=name, uid=meta["uid"], ) ] image_pull_secrets = ([ V1LocalObjectReference(name=secret) for secret in config.IMAGE_PULL_SECRETS ] if config.IMAGE_PULL_SECRETS else None) ports_spec = spec.get("ports", {}) http_port = ports_spec.get("http", Port.HTTP.value) jmx_port = ports_spec.get("jmx", Port.JMX.value) postgres_port = ports_spec.get("postgres", Port.POSTGRES.value) prometheus_port = ports_spec.get("prometheus", Port.PROMETHEUS.value) transport_port = ports_spec.get("transport", Port.TRANSPORT.value) master_nodes = get_master_nodes_names(spec["nodes"]) total_nodes_count = get_total_nodes_count(spec["nodes"]) crate_image = spec["cluster"]["imageRegistry"] + ":" + spec["cluster"][ "version"] has_master_nodes = "master" in spec["nodes"] # The first StatefulSet we create references a set of master nodes. These # can either be explicit CrateDB master nodes, or implicit ones, which # would be the first set of nodes from the data nodes list. # # After the first StatefulSet was created, we set `treat_as_master` to # `False` to indicate that all remaining StatefulSets are neither explicit # nor implicit master nodes. treat_as_master = True sts = [] cluster_name = spec["cluster"]["name"] if has_master_nodes: sts.append( create_statefulset( apps, owner_references, namespace, name, cratedb_labels, treat_as_master, False, cluster_name, "master", "master-", spec["nodes"]["master"], master_nodes, total_nodes_count, http_port, jmx_port, postgres_port, prometheus_port, transport_port, crate_image, spec["cluster"].get("ssl"), spec["cluster"].get("settings"), image_pull_secrets, logger, )) treat_as_master = False for node_spec in spec["nodes"]["data"]: node_name = node_spec["name"] sts.append( create_statefulset( apps, owner_references, namespace, name, cratedb_labels, treat_as_master, True, cluster_name, node_name, f"data-{node_name}-", node_spec, master_nodes, total_nodes_count, http_port, jmx_port, postgres_port, prometheus_port, transport_port, crate_image, spec["cluster"].get("ssl"), spec["cluster"].get("settings"), image_pull_secrets, logger, )) treat_as_master = False await asyncio.gather( create_sql_exporter_config(core, owner_references, namespace, name, cratedb_labels, logger), *create_debug_volume(core, owner_references, namespace, name, cratedb_labels, logger), create_system_user(core, owner_references, namespace, name, cratedb_labels, logger), *sts, *create_services( core, owner_references, namespace, name, cratedb_labels, http_port, postgres_port, transport_port, spec.get("cluster", {}).get("externalDNS"), logger, ), ) if has_master_nodes: master_node_pod = f"crate-master-{name}-0" else: node_name = spec["nodes"]["data"][0]["name"] master_node_pod = f"crate-data-{node_name}-{name}-0" await with_timeout( bootstrap_cluster( core, namespace, name, master_node_pod, spec["cluster"].get("license"), "ssl" in spec["cluster"], spec.get("users"), logger, ), config.BOOTSTRAP_TIMEOUT, (f"Failed to bootstrap cluster {namespace}/{name} after " f"{config.BOOTSTRAP_TIMEOUT} seconds."), ) if "backups" in spec: backup_metrics_labels = base_labels.copy() backup_metrics_labels[LABEL_COMPONENT] = "backup" backup_metrics_labels.update(meta.get("labels", {})) await asyncio.gather(*create_backups( apps, batchv1_beta1, owner_references, namespace, name, backup_metrics_labels, http_port, prometheus_port, spec["backups"], image_pull_secrets, "ssl" in spec["cluster"], logger, ))
async def handle( # type: ignore self, namespace: str, name: str, spec: kopf.Spec, old: kopf.Body, diff: kopf.Diff, logger: logging.Logger, **kwargs: Any, ): scale_master_diff_item: Optional[kopf.DiffItem] = None scale_data_diff_items: Optional[List[kopf.DiffItem]] = None for operation, field_path, old_value, new_value in diff: if field_path == ("spec", "nodes", "master", "replicas"): scale_master_diff_item = kopf.DiffItem( operation, field_path, old_value, new_value ) elif field_path == ("spec", "nodes", "data"): # TODO: check for data node order, added or removed types, ... if len(old_value) != len(new_value): raise kopf.PermanentError( "Adding and removing node specs is not supported at this time." ) scale_data_diff_items = [] for node_spec_idx in range(len(old_value)): old_spec = old_value[node_spec_idx] new_spec = new_value[node_spec_idx] inner_diff = calc_diff(old_spec, new_spec) for ( inner_operation, inner_field_path, inner_old_value, inner_new_value, ) in inner_diff: if inner_field_path == ("replicas",): scale_data_diff_items.append( kopf.DiffItem( inner_operation, (str(node_spec_idx),) + inner_field_path, inner_old_value, inner_new_value, ) ) else: logger.info( "Ignoring operation %s on field %s", operation, field_path + (str(node_spec_idx),) + inner_field_path, ) else: logger.info("Ignoring operation %s on field %s", operation, field_path) async with ApiClient() as api_client: apps = AppsV1Api(api_client) core = CoreV1Api(api_client) await scale_cluster( apps, core, namespace, name, old, scale_master_diff_item, (kopf.Diff(scale_data_diff_items) if scale_data_diff_items else None), logger, ) self.schedule_notification( WebhookEvent.SCALE, WebhookScalePayload( old_data_replicas=[ WebhookScaleNodePayload( name=item["name"], replicas=item["replicas"] ) for item in old["spec"]["nodes"]["data"] ], new_data_replicas=[ WebhookScaleNodePayload( name=item["name"], replicas=item["replicas"] ) for item in spec["nodes"]["data"] ], old_master_replicas=old["spec"]["nodes"] .get("master", {}) .get("replicas"), new_master_replicas=spec["nodes"].get("master", {}).get("replicas"), ), WebhookStatus.SUCCESS, ) await self.send_notifications(logger)